summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/ndisc.c6
-rw-r--r--net/802/garp.c2
-rw-r--r--net/802/mrp.c2
-rw-r--r--net/8021q/vlan_dev.c12
-rw-r--r--net/8021q/vlan_netlink.c6
-rw-r--r--net/8021q/vlanproc.c4
-rw-r--r--net/9p/Kconfig8
-rw-r--r--net/9p/Makefile4
-rw-r--r--net/9p/client.c12
-rw-r--r--net/9p/trans_usbg.c956
-rw-r--r--net/9p/trans_xen.c9
-rw-r--r--net/Kconfig9
-rw-r--r--net/Kconfig.debug15
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/Makefile2
-rw-r--r--net/appletalk/dev.c46
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c8
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/soft-interface.c5
-rw-r--r--net/batman-adv/translation-table.c154
-rw-r--r--net/batman-adv/types.h2
-rw-r--r--net/bluetooth/af_bluetooth.c25
-rw-r--r--net/bluetooth/bnep/core.c5
-rw-r--r--net/bluetooth/cmtp/Kconfig4
-rw-r--r--net/bluetooth/cmtp/capi.c32
-rw-r--r--net/bluetooth/coredump.c2
-rw-r--r--net/bluetooth/eir.h2
-rw-r--r--net/bluetooth/hci_conn.c246
-rw-r--r--net/bluetooth/hci_core.c62
-rw-r--r--net/bluetooth/hci_event.c99
-rw-r--r--net/bluetooth/hci_sock.c16
-rw-r--r--net/bluetooth/hci_sync.c86
-rw-r--r--net/bluetooth/hci_sysfs.c15
-rw-r--r--net/bluetooth/iso.c220
-rw-r--r--net/bluetooth/l2cap_core.c20
-rw-r--r--net/bluetooth/l2cap_sock.c21
-rw-r--r--net/bluetooth/leds.c2
-rw-r--r--net/bluetooth/mgmt.c280
-rw-r--r--net/bluetooth/mgmt_util.c2
-rw-r--r--net/bluetooth/rfcomm/core.c8
-rw-r--r--net/bluetooth/rfcomm/sock.c31
-rw-r--r--net/bluetooth/sco.c169
-rw-r--r--net/bluetooth/smp.c7
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c2
-rw-r--r--net/bpf/test_run.c1
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_fdb.c53
-rw-r--r--net/bridge/br_mdb.c2
-rw-r--r--net/bridge/br_netfilter_hooks.c21
-rw-r--r--net/bridge/br_netlink.c12
-rw-r--r--net/bridge/br_private.h9
-rw-r--r--net/bridge/br_stp_bpdu.c2
-rw-r--r--net/bridge/br_vlan.c19
-rw-r--r--net/bridge/netfilter/Kconfig8
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c9
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c3
-rw-r--r--net/caif/cfpkt_skbuff.c6
-rw-r--r--net/caif/cfrfml.c2
-rw-r--r--net/caif/cfsrvl.c6
-rw-r--r--net/caif/chnl_net.c2
-rw-r--r--net/can/af_can.c1
-rw-r--r--net/can/bcm.c6
-rw-r--r--net/can/gw.c29
-rw-r--r--net/can/j1939/transport.c10
-rw-r--r--net/can/raw.c2
-rw-r--r--net/ceph/crypto.c12
-rw-r--r--net/ceph/crypto.h1
-rw-r--r--net/ceph/messenger.c2
-rw-r--r--net/ceph/osd_client.c34
-rw-r--r--net/ceph/pagelist.c38
-rw-r--r--net/ceph/pagevec.c52
-rw-r--r--net/core/Makefile4
-rw-r--r--net/core/bpf_sk_storage.c6
-rw-r--r--net/core/datagram.c6
-rw-r--r--net/core/dev.c268
-rw-r--r--net/core/dev.h123
-rw-r--r--net/core/dev_addr_lists.c6
-rw-r--r--net/core/dev_ioctl.c15
-rw-r--r--net/core/devmem.c389
-rw-r--r--net/core/devmem.h180
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/dst.c17
-rw-r--r--net/core/fib_notifier.c2
-rw-r--r--net/core/fib_rules.c43
-rw-r--r--net/core/filter.c295
-rw-r--r--net/core/gro.c14
-rw-r--r--net/core/link_watch.c7
-rw-r--r--net/core/lwt_bpf.c12
-rw-r--r--net/core/mp_dmabuf_devmem.h44
-rw-r--r--net/core/neighbour.c363
-rw-r--r--net/core/net-sysfs.c19
-rw-r--r--net/core/net-traces.c2
-rw-r--r--net/core/net_namespace.c144
-rw-r--r--net/core/netdev-genl-gen.c46
-rw-r--r--net/core/netdev-genl-gen.h7
-rw-r--r--net/core/netdev-genl.c222
-rw-r--r--net/core/netdev_rx_queue.c81
-rw-r--r--net/core/netmem_priv.h31
-rw-r--r--net/core/netpoll.c95
-rw-r--r--net/core/page_pool.c121
-rw-r--r--net/core/page_pool_priv.h46
-rw-r--r--net/core/page_pool_user.c32
-rw-r--r--net/core/pktgen.c12
-rw-r--r--net/core/rtnetlink.c1079
-rw-r--r--net/core/rtnl_net_debug.c125
-rw-r--r--net/core/skb_fault_injection.c106
-rw-r--r--net/core/skbuff.c144
-rw-r--r--net/core/skmsg.c6
-rw-r--r--net/core/sock.c118
-rw-r--r--net/core/sock_map.c42
-rw-r--r--net/core/sock_reuseport.c5
-rw-r--r--net/core/sysctl_net_core.c56
-rw-r--r--net/core/tso.c2
-rw-r--r--net/core/utils.c2
-rw-r--r--net/dcb/dcbnl.c8
-rw-r--r--net/dccp/ccids/ccid3.c2
-rw-r--r--net/dccp/feat.c6
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/dccp/options.c2
-rw-r--r--net/devlink/dev.c18
-rw-r--r--net/devlink/devl_internal.h7
-rw-r--r--net/devlink/dpipe.c18
-rw-r--r--net/devlink/health.c25
-rw-r--r--net/devlink/rate.c8
-rw-r--r--net/devlink/region.c15
-rw-r--r--net/devlink/resource.c101
-rw-r--r--net/devlink/trap.c34
-rw-r--r--net/dsa/devlink.c23
-rw-r--r--net/dsa/dsa.c15
-rw-r--r--net/dsa/port.c40
-rw-r--r--net/dsa/tag_ksz.c11
-rw-r--r--net/dsa/tag_ocelot_8021q.c2
-rw-r--r--net/dsa/user.c102
-rw-r--r--net/ethtool/Makefile3
-rw-r--r--net/ethtool/bitset.c48
-rw-r--r--net/ethtool/cabletest.c57
-rw-r--r--net/ethtool/channels.c20
-rw-r--r--net/ethtool/cmis.h17
-rw-r--r--net/ethtool/cmis_cdb.c108
-rw-r--r--net/ethtool/cmis_fw_update.c108
-rw-r--r--net/ethtool/common.c151
-rw-r--r--net/ethtool/common.h8
-rw-r--r--net/ethtool/ioctl.c58
-rw-r--r--net/ethtool/linkinfo.c2
-rw-r--r--net/ethtool/linkmodes.c2
-rw-r--r--net/ethtool/netlink.c68
-rw-r--r--net/ethtool/netlink.h37
-rw-r--r--net/ethtool/phy.c306
-rw-r--r--net/ethtool/plca.c30
-rw-r--r--net/ethtool/pse-pd.c38
-rw-r--r--net/ethtool/rss.c233
-rw-r--r--net/ethtool/strset.c27
-rw-r--r--net/handshake/handshake-test.c2
-rw-r--r--net/handshake/netlink.c4
-rw-r--r--net/handshake/request.c1
-rw-r--r--net/hsr/hsr_device.c125
-rw-r--r--net/hsr/hsr_forward.c21
-rw-r--r--net/hsr/hsr_main.h1
-rw-r--r--net/hsr/hsr_netlink.c11
-rw-r--r--net/hsr/hsr_slave.c11
-rw-r--r--net/ieee802154/6lowpan/core.c2
-rw-r--r--net/ieee802154/core.c10
-rw-r--r--net/ieee802154/nl-mac.c15
-rw-r--r--net/ieee802154/nl802154.c26
-rw-r--r--net/ieee802154/socket.c12
-rw-r--r--net/ipv4/Kconfig3
-rw-r--r--net/ipv4/af_inet.c22
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/bpf_tcp_ca.c26
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/datagram.c8
-rw-r--r--net/ipv4/devinet.c367
-rw-r--r--net/ipv4/esp4.c3
-rw-r--r--net/ipv4/esp4_offload.c6
-rw-r--r--net/ipv4/fib_frontend.c46
-rw-r--r--net/ipv4/fib_notifier.c10
-rw-r--r--net/ipv4/fib_rules.c56
-rw-r--r--net/ipv4/fib_semantics.c94
-rw-r--r--net/ipv4/fib_trie.c11
-rw-r--r--net/ipv4/fou_core.c29
-rw-r--r--net/ipv4/fou_nl.c4
-rw-r--r--net/ipv4/icmp.c137
-rw-r--r--net/ipv4/igmp.c26
-rw-r--r--net/ipv4/inet_connection_sock.c34
-rw-r--r--net/ipv4/inet_diag.c14
-rw-r--r--net/ipv4/inet_hashtables.c12
-rw-r--r--net/ipv4/inetpeer.c9
-rw-r--r--net/ipv4/ip_fragment.c11
-rw-r--r--net/ipv4/ip_gre.c13
-rw-r--r--net/ipv4/ip_input.c26
-rw-r--r--net/ipv4/ip_options.c5
-rw-r--r--net/ipv4/ip_output.c31
-rw-r--r--net/ipv4/ip_tunnel.c17
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c106
-rw-r--r--net/ipv4/ipmr_base.c3
-rw-r--r--net/ipv4/netfilter.c3
-rw-r--r--net/ipv4/netfilter/Kconfig16
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c4
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c3
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c10
-rw-r--r--net/ipv4/nexthop.c99
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c265
-rw-r--r--net/ipv4/sysctl_net_ipv4.c32
-rw-r--r--net/ipv4/tcp.c300
-rw-r--r--net/ipv4/tcp_ao.c42
-rw-r--r--net/ipv4/tcp_bpf.c24
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_input.c61
-rw-r--r--net/ipv4/tcp_ipv4.c40
-rw-r--r--net/ipv4/tcp_metrics.c10
-rw-r--r--net/ipv4/tcp_minisocks.c37
-rw-r--r--net/ipv4/tcp_offload.c10
-rw-r--r--net/ipv4/tcp_output.c32
-rw-r--r--net/ipv4/tcp_timer.c26
-rw-r--r--net/ipv4/udp.c274
-rw-r--r--net/ipv4/udp_offload.c22
-rw-r--r--net/ipv4/udp_tunnel_core.c3
-rw-r--r--net/ipv4/xfrm4_input.c2
-rw-r--r--net/ipv4/xfrm4_policy.c41
-rw-r--r--net/ipv4/xfrm4_protocol.c2
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c153
-rw-r--r--net/ipv6/addrlabel.c28
-rw-r--r--net/ipv6/af_inet6.c23
-rw-r--r--net/ipv6/anycast.c5
-rw-r--r--net/ipv6/calipso.c2
-rw-r--r--net/ipv6/esp6.c3
-rw-r--r--net/ipv6/esp6_offload.c6
-rw-r--r--net/ipv6/fib6_notifier.c2
-rw-r--r--net/ipv6/fib6_rules.c45
-rw-r--r--net/ipv6/icmp.c28
-rw-r--r--net/ipv6/ila/ila.h1
-rw-r--r--net/ipv6/ila/ila_main.c6
-rw-r--r--net/ipv6/ila/ila_xlat.c26
-rw-r--r--net/ipv6/inet6_hashtables.c15
-rw-r--r--net/ipv6/ioam6.c14
-rw-r--r--net/ipv6/ioam6_iptunnel.c92
-rw-r--r--net/ipv6/ip6_fib.c41
-rw-r--r--net/ipv6/ip6_gre.c7
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_output.c24
-rw-r--r--net/ipv6/ip6_tunnel.c15
-rw-r--r--net/ipv6/ip6mr.c84
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/mcast.c5
-rw-r--r--net/ipv6/ndisc.c6
-rw-r--r--net/ipv6/netfilter/Kconfig9
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2
-rw-r--r--net/ipv6/netfilter/nf_dup_ipv6.c7
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c34
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c5
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/route.c88
-rw-r--r--net/ipv6/rpl_iptunnel.c12
-rw-r--r--net/ipv6/seg6_local.c14
-rw-r--r--net/ipv6/sit.c11
-rw-r--r--net/ipv6/tcp_ipv6.c26
-rw-r--r--net/ipv6/tcpv6_offload.c10
-rw-r--r--net/ipv6/udp.c129
-rw-r--r--net/ipv6/xfrm6_policy.c31
-rw-r--r--net/iucv/af_iucv.c26
-rw-r--r--net/kcm/kcmsock.c10
-rw-r--r--net/key/af_key.c7
-rw-r--r--net/l2tp/l2tp_core.c398
-rw-r--r--net/l2tp/l2tp_core.h25
-rw-r--r--net/l2tp/l2tp_debugfs.c24
-rw-r--r--net/l2tp/l2tp_eth.c44
-rw-r--r--net/l2tp/l2tp_ip.c125
-rw-r--r--net/l2tp/l2tp_ip6.c123
-rw-r--r--net/l2tp/l2tp_netlink.c80
-rw-r--r--net/l2tp/l2tp_ppp.c154
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/mac80211/Kconfig2
-rw-r--r--net/mac80211/agg-rx.c109
-rw-r--r--net/mac80211/agg-tx.c46
-rw-r--r--net/mac80211/airtime.c140
-rw-r--r--net/mac80211/cfg.c277
-rw-r--r--net/mac80211/chan.c70
-rw-r--r--net/mac80211/debugfs.c28
-rw-r--r--net/mac80211/debugfs_key.c9
-rw-r--r--net/mac80211/debugfs_netdev.c3
-rw-r--r--net/mac80211/debugfs_sta.c9
-rw-r--r--net/mac80211/driver-ops.c16
-rw-r--r--net/mac80211/driver-ops.h18
-rw-r--r--net/mac80211/eht.c21
-rw-r--r--net/mac80211/ht.c17
-rw-r--r--net/mac80211/ibss.c7
-rw-r--r--net/mac80211/ieee80211_i.h113
-rw-r--r--net/mac80211/iface.c82
-rw-r--r--net/mac80211/key.c44
-rw-r--r--net/mac80211/link.c73
-rw-r--r--net/mac80211/main.c8
-rw-r--r--net/mac80211/mesh.c10
-rw-r--r--net/mac80211/mesh_hwmp.c8
-rw-r--r--net/mac80211/mesh_pathtbl.c12
-rw-r--r--net/mac80211/mesh_plink.c7
-rw-r--r--net/mac80211/mesh_sync.c2
-rw-r--r--net/mac80211/michael.c2
-rw-r--r--net/mac80211/mlme.c167
-rw-r--r--net/mac80211/ocb.c6
-rw-r--r--net/mac80211/offchannel.c1
-rw-r--r--net/mac80211/pm.c2
-rw-r--r--net/mac80211/rate.c37
-rw-r--r--net/mac80211/rate.h10
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c2
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c2
-rw-r--r--net/mac80211/rx.c77
-rw-r--r--net/mac80211/scan.c38
-rw-r--r--net/mac80211/spectmgmt.c9
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c8
-rw-r--r--net/mac80211/tdls.c3
-rw-r--r--net/mac80211/tests/elems.c2
-rw-r--r--net/mac80211/tests/mfp.c2
-rw-r--r--net/mac80211/tests/tpe.c2
-rw-r--r--net/mac80211/tkip.c4
-rw-r--r--net/mac80211/trace.h34
-rw-r--r--net/mac80211/tx.c12
-rw-r--r--net/mac80211/util.c146
-rw-r--r--net/mac80211/vht.c29
-rw-r--r--net/mac80211/wep.c2
-rw-r--r--net/mac80211/wpa.c5
-rw-r--r--net/mac802154/rx.c2
-rw-r--r--net/mac802154/scan.c4
-rw-r--r--net/mac802154/tx.c2
-rw-r--r--net/mctp/af_mctp.c9
-rw-r--r--net/mctp/device.c48
-rw-r--r--net/mctp/neigh.c31
-rw-r--r--net/mctp/route.c33
-rw-r--r--net/mpls/af_mpls.c47
-rw-r--r--net/mpls/mpls_iptunnel.c2
-rw-r--r--net/mptcp/crypto.c2
-rw-r--r--net/mptcp/ctrl.c133
-rw-r--r--net/mptcp/diag.c2
-rw-r--r--net/mptcp/mib.c10
-rw-r--r--net/mptcp/mib.h10
-rw-r--r--net/mptcp/mptcp_pm_gen.c3
-rw-r--r--net/mptcp/options.c4
-rw-r--r--net/mptcp/pm.c14
-rw-r--r--net/mptcp/pm_netlink.c147
-rw-r--r--net/mptcp/pm_userspace.c58
-rw-r--r--net/mptcp/protocol.c75
-rw-r--r--net/mptcp/protocol.h36
-rw-r--r--net/mptcp/sched.c2
-rw-r--r--net/mptcp/subflow.c88
-rw-r--r--net/ncsi/ncsi-manage.c2
-rw-r--r--net/netfilter/core.c4
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c7
-rw-r--r--net/netfilter/ipset/ip_set_core.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c2
-rw-r--r--net/netfilter/nf_bpf_link.c9
-rw-r--r--net/netfilter/nf_conncount.c15
-rw-r--r--net/netfilter/nf_conntrack_core.c143
-rw-r--r--net/netfilter/nf_conntrack_netlink.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c2
-rw-r--r--net/netfilter/nf_flow_table_core.c6
-rw-r--r--net/netfilter/nf_flow_table_inet.c2
-rw-r--r--net/netfilter/nf_nat_core.c129
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c697
-rw-r--r--net/netfilter/nf_tables_core.c2
-rw-r--r--net/netfilter/nfnetlink.c16
-rw-r--r--net/netfilter/nfnetlink_queue.c12
-rw-r--r--net/netfilter/nft_bitwise.c170
-rw-r--r--net/netfilter/nft_byteorder.c4
-rw-r--r--net/netfilter/nft_cmp.c6
-rw-r--r--net/netfilter/nft_compat.c12
-rw-r--r--net/netfilter/nft_counter.c90
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netfilter/nft_dup_netdev.c2
-rw-r--r--net/netfilter/nft_dynset.c22
-rw-r--r--net/netfilter/nft_exthdr.c4
-rw-r--r--net/netfilter/nft_fib.c3
-rw-r--r--net/netfilter/nft_flow_offload.c12
-rw-r--r--net/netfilter/nft_fwd_netdev.c9
-rw-r--r--net/netfilter/nft_hash.c2
-rw-r--r--net/netfilter/nft_immediate.c3
-rw-r--r--net/netfilter/nft_inner.c57
-rw-r--r--net/netfilter/nft_log.c2
-rw-r--r--net/netfilter/nft_lookup.c5
-rw-r--r--net/netfilter/nft_masq.c7
-rw-r--r--net/netfilter/nft_meta.c10
-rw-r--r--net/netfilter/nft_nat.c11
-rw-r--r--net/netfilter/nft_numgen.c2
-rw-r--r--net/netfilter/nft_objref.c2
-rw-r--r--net/netfilter/nft_osf.c3
-rw-r--r--net/netfilter/nft_payload.c5
-rw-r--r--net/netfilter/nft_queue.c5
-rw-r--r--net/netfilter/nft_range.c2
-rw-r--r--net/netfilter/nft_redir.c7
-rw-r--r--net/netfilter/nft_reject.c3
-rw-r--r--net/netfilter/nft_reject_inet.c3
-rw-r--r--net/netfilter/nft_reject_netdev.c3
-rw-r--r--net/netfilter/nft_rt.c3
-rw-r--r--net/netfilter/nft_set_bitmap.c10
-rw-r--r--net/netfilter/nft_set_hash.c19
-rw-r--r--net/netfilter/nft_set_pipapo.c13
-rw-r--r--net/netfilter/nft_socket.c51
-rw-r--r--net/netfilter/nft_synproxy.c3
-rw-r--r--net/netfilter/nft_tproxy.c7
-rw-r--r--net/netfilter/nft_tunnel.c10
-rw-r--r--net/netfilter/nft_xfrm.c3
-rw-r--r--net/netfilter/x_tables.c2
-rw-r--r--net/netfilter/xt_CHECKSUM.c33
-rw-r--r--net/netfilter/xt_CLASSIFY.c16
-rw-r--r--net/netfilter/xt_CONNSECMARK.c36
-rw-r--r--net/netfilter/xt_CT.c106
-rw-r--r--net/netfilter/xt_IDLETIMER.c115
-rw-r--r--net/netfilter/xt_LED.c43
-rw-r--r--net/netfilter/xt_NFLOG.c36
-rw-r--r--net/netfilter/xt_RATEEST.c39
-rw-r--r--net/netfilter/xt_SECMARK.c27
-rw-r--r--net/netfilter/xt_TRACE.c36
-rw-r--r--net/netfilter/xt_addrtype.c15
-rw-r--r--net/netfilter/xt_cluster.c33
-rw-r--r--net/netfilter/xt_connbytes.c4
-rw-r--r--net/netfilter/xt_connlimit.c54
-rw-r--r--net/netfilter/xt_connmark.c28
-rw-r--r--net/netfilter/xt_mark.c42
-rw-r--r--net/netlabel/netlabel_mgmt.c13
-rw-r--r--net/netlabel/netlabel_unlabeled.c2
-rw-r--r--net/netlabel/netlabel_user.c7
-rw-r--r--net/netlabel/netlabel_user.h2
-rw-r--r--net/netlink/af_netlink.c74
-rw-r--r--net/netlink/af_netlink.h3
-rw-r--r--net/netlink/genetlink.c32
-rw-r--r--net/netrom/nr_route.c4
-rw-r--r--net/nfc/nci/core.c13
-rw-r--r--net/nfc/nci/ntf.c32
-rw-r--r--net/nfc/netlink.c5
-rw-r--r--net/openvswitch/actions.c8
-rw-r--r--net/openvswitch/conntrack.c35
-rw-r--r--net/openvswitch/datapath.c10
-rw-r--r--net/openvswitch/datapath.h3
-rw-r--r--net/openvswitch/flow_netlink.c4
-rw-r--r--net/openvswitch/vport-internal_dev.c12
-rw-r--r--net/packet/af_packet.c31
-rw-r--r--net/phonet/af_phonet.c2
-rw-r--r--net/phonet/pn_dev.c74
-rw-r--r--net/phonet/pn_netlink.c141
-rw-r--r--net/qrtr/af_qrtr.c2
-rw-r--r--net/rds/Kconfig9
-rw-r--r--net/rds/Makefile5
-rw-r--r--net/rds/ib.h4
-rw-r--r--net/rds/ib_rdma.c4
-rw-r--r--net/rfkill/core.c9
-rw-r--r--net/rfkill/rfkill-gpio.c28
-rw-r--r--net/rxrpc/af_rxrpc.c7
-rw-r--r--net/rxrpc/ar-internal.h4
-rw-r--r--net/rxrpc/conn_client.c4
-rw-r--r--net/rxrpc/conn_object.c4
-rw-r--r--net/rxrpc/io_thread.c10
-rw-r--r--net/rxrpc/local_object.c6
-rw-r--r--net/rxrpc/sendmsg.c11
-rw-r--r--net/sched/act_api.c125
-rw-r--r--net/sched/act_ct.c14
-rw-r--r--net/sched/act_ctinfo.c8
-rw-r--r--net/sched/act_gate.c11
-rw-r--r--net/sched/act_mpls.c18
-rw-r--r--net/sched/act_police.c6
-rw-r--r--net/sched/act_vlan.c1
-rw-r--r--net/sched/cls_api.c73
-rw-r--r--net/sched/cls_flower.c5
-rw-r--r--net/sched/cls_u32.c18
-rw-r--r--net/sched/em_cmp.c2
-rw-r--r--net/sched/sch_api.c29
-rw-r--r--net/sched/sch_cake.c66
-rw-r--r--net/sched/sch_cbs.c2
-rw-r--r--net/sched/sch_choke.c4
-rw-r--r--net/sched/sch_fq.c42
-rw-r--r--net/sched/sch_generic.c8
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sched/sch_htb.c4
-rw-r--r--net/sched/sch_netem.c32
-rw-r--r--net/sched/sch_qfq.c5
-rw-r--r--net/sched/sch_red.c2
-rw-r--r--net/sched/sch_sfq.c39
-rw-r--r--net/sched/sch_taprio.c27
-rw-r--r--net/sched/sch_tbf.c18
-rw-r--r--net/sctp/ipv6.c21
-rw-r--r--net/sctp/protocol.c19
-rw-r--r--net/sctp/sm_statefuns.c2
-rw-r--r--net/sctp/socket.c20
-rw-r--r--net/shaper/Makefile8
-rw-r--r--net/shaper/shaper.c1438
-rw-r--r--net/shaper/shaper_nl_gen.c154
-rw-r--r--net/shaper/shaper_nl_gen.h44
-rw-r--r--net/smc/af_smc.c18
-rw-r--r--net/smc/smc.h5
-rw-r--r--net/smc/smc_clc.h6
-rw-r--r--net/smc/smc_core.c74
-rw-r--r--net/smc/smc_core.h6
-rw-r--r--net/smc/smc_ib.c8
-rw-r--r--net/smc/smc_inet.c19
-rw-r--r--net/smc/smc_loopback.h1
-rw-r--r--net/smc/smc_pnet.c9
-rw-r--r--net/smc/smc_stats.c6
-rw-r--r--net/smc/smc_stats.h28
-rw-r--r--net/smc/smc_sysctl.c11
-rw-r--r--net/smc/smc_wr.c6
-rw-r--r--net/socket.c329
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_test.c2
-rw-r--r--net/sunrpc/cache.c18
-rw-r--r--net/sunrpc/clnt.c13
-rw-r--r--net/sunrpc/rpc_pipe.c1
-rw-r--r--net/sunrpc/sunrpc.h4
-rw-r--r--net/sunrpc/svc.c201
-rw-r--r--net/sunrpc/svc_xprt.c11
-rw-r--r--net/sunrpc/svcauth.c29
-rw-r--r--net/sunrpc/svcauth_unix.c3
-rw-r--r--net/sunrpc/svcsock.c11
-rw-r--r--net/sunrpc/xprtrdma/ib_client.c1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c10
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c18
-rw-r--r--net/sunrpc/xprtsock.c19
-rw-r--r--net/tipc/bcast.c2
-rw-r--r--net/tipc/bearer.c10
-rw-r--r--net/tipc/monitor.c2
-rw-r--r--net/tipc/socket.c6
-rw-r--r--net/tipc/udp_media.c7
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/tls/trace.h2
-rw-r--r--net/unix/af_unix.c81
-rw-r--r--net/unix/garbage.c16
-rw-r--r--net/vmw_vsock/af_vsock.c132
-rw-r--r--net/vmw_vsock/hyperv_transport.c1
-rw-r--r--net/vmw_vsock/virtio_transport.c148
-rw-r--r--net/vmw_vsock/virtio_transport_common.c59
-rw-r--r--net/vmw_vsock/vsock_bpf.c8
-rw-r--r--net/vmw_vsock/vsock_loopback.c6
-rw-r--r--net/wireless/Kconfig45
-rw-r--r--net/wireless/Makefile5
-rw-r--r--net/wireless/chan.c5
-rw-r--r--net/wireless/core.c84
-rw-r--r--net/wireless/core.h9
-rw-r--r--net/wireless/ibss.c2
-rw-r--r--net/wireless/lib80211.c257
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c448
-rw-r--r--net/wireless/lib80211_crypt_tkip.c738
-rw-r--r--net/wireless/lib80211_crypt_wep.c256
-rw-r--r--net/wireless/mesh.c2
-rw-r--r--net/wireless/mlme.c26
-rw-r--r--net/wireless/nl80211.c248
-rw-r--r--net/wireless/radiotap.c4
-rw-r--r--net/wireless/rdev-ops.h18
-rw-r--r--net/wireless/reg.c21
-rw-r--r--net/wireless/scan.c61
-rw-r--r--net/wireless/sme.c4
-rw-r--r--net/wireless/tests/chan.c2
-rw-r--r--net/wireless/tests/scan.c2
-rw-r--r--net/wireless/trace.h50
-rw-r--r--net/wireless/util.c48
-rw-r--r--net/wireless/wext-compat.c13
-rw-r--r--net/wireless/wext-compat.h6
-rw-r--r--net/wireless/wext-core.c2
-rw-r--r--net/wireless/wext-spy.c232
-rw-r--r--net/xdp/xsk.c72
-rw-r--r--net/xdp/xsk_buff_pool.c99
-rw-r--r--net/xdp/xsk_queue.h7
-rw-r--r--net/xdp/xskmap.c2
-rw-r--r--net/xfrm/xfrm_compat.c6
-rw-r--r--net/xfrm/xfrm_device.c17
-rw-r--r--net/xfrm/xfrm_input.c2
-rw-r--r--net/xfrm/xfrm_interface_core.c2
-rw-r--r--net/xfrm/xfrm_policy.c302
-rw-r--r--net/xfrm/xfrm_state.c171
-rw-r--r--net/xfrm/xfrm_user.c95
584 files changed, 17132 insertions, 9210 deletions
diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c
index 16be8f8b2f8c..c40b98f7743c 100644
--- a/net/6lowpan/ndisc.c
+++ b/net/6lowpan/ndisc.c
@@ -11,11 +11,6 @@
#include "6lowpan_i.h"
-static int lowpan_ndisc_is_useropt(u8 nd_opt_type)
-{
- return nd_opt_type == ND_OPT_6CO;
-}
-
#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
#define NDISC_802154_SHORT_ADDR_LENGTH 1
static int lowpan_ndisc_parse_802154_options(const struct net_device *dev,
@@ -222,7 +217,6 @@ static void lowpan_ndisc_prefix_rcv_add_addr(struct net *net,
#endif
const struct ndisc_ops lowpan_ndisc_ops = {
- .is_useropt = lowpan_ndisc_is_useropt,
#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
.parse_options = lowpan_ndisc_parse_options,
.update = lowpan_ndisc_update,
diff --git a/net/802/garp.c b/net/802/garp.c
index 6a743d004301..27f0ab146026 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -16,7 +16,7 @@
#include <net/llc.h>
#include <net/llc_pdu.h>
#include <net/garp.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
static unsigned int garp_join_time __read_mostly = 200;
module_param(garp_join_time, uint, 0644);
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 3154d7409493..e0c96d0da8d5 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -16,7 +16,7 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <net/mrp.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
static unsigned int mrp_join_time __read_mostly = 200;
module_param(mrp_join_time, uint, 0644);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 217be32426b5..91d134961357 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -564,17 +564,20 @@ static int vlan_dev_init(struct net_device *dev)
NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE |
NETIF_F_GSO_ENCAP_ALL |
NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
- NETIF_F_ALL_FCOE;
+ NETIF_F_FCOE_CRC | NETIF_F_FSO;
if (real_dev->vlan_features & NETIF_F_HW_MACSEC)
dev->hw_features |= NETIF_F_HW_MACSEC;
- dev->features |= dev->hw_features | NETIF_F_LLTX;
+ dev->features |= dev->hw_features;
+ dev->lltx = true;
+ dev->fcoe_mtu = true;
netif_inherit_tso_max(dev, real_dev);
if (dev->features & NETIF_F_VLAN_FEATURES)
netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n");
- dev->vlan_features = real_dev->vlan_features & ~NETIF_F_ALL_FCOE;
+ dev->vlan_features = real_dev->vlan_features &
+ ~(NETIF_F_FCOE_CRC | NETIF_F_FSO);
dev->hw_enc_features = vlan_tnl_features(real_dev);
dev->mpls_features = real_dev->mpls_features;
@@ -655,7 +658,6 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
lower_features |= NETIF_F_HW_CSUM;
features = netdev_intersect_features(features, lower_features);
features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE);
- features |= NETIF_F_LLTX;
return features;
}
@@ -723,7 +725,7 @@ static void vlan_dev_poll_controller(struct net_device *dev)
return;
}
-static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
+static int vlan_dev_netpoll_setup(struct net_device *dev)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index cf5219df7903..134419667d59 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -161,10 +161,8 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
return -ENODEV;
}
- if (data[IFLA_VLAN_PROTOCOL])
- proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]);
- else
- proto = htons(ETH_P_8021Q);
+ proto = nla_get_be16_default(data[IFLA_VLAN_PROTOCOL],
+ htons(ETH_P_8021Q));
vlan->vlan_proto = proto;
vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]);
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 87b959da00cd..fa67374bda49 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -238,9 +238,9 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
stats = dev_get_stats(vlandev, &temp);
seq_printf(seq,
- "%s VID: %d REORDER_HDR: %i dev->priv_flags: %llx\n",
+ "%s VID: %d REORDER_HDR: %i dev->priv_flags: %x\n",
vlandev->name, vlan->vlan_id,
- (int)(vlan->flags & 1), vlandev->priv_flags);
+ (int)(vlan->flags & 1), (u32)vlandev->priv_flags);
seq_printf(seq, fmt64, "total frames received", stats->rx_packets);
seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes);
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index bcdab9c23b40..22f8c167845d 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -40,6 +40,14 @@ config NET_9P_XEN
This builds support for a transport for 9pfs between
two Xen domains.
+config NET_9P_USBG
+ tristate "9P USB Gadget Transport"
+ depends on USB_GADGET
+ select CONFIGFS_FS
+ select USB_LIBCOMPOSITE
+ help
+ This builds support for a transport for 9pfs over
+ usb gadget.
config NET_9P_RDMA
depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 1df9b344c30b..22794a451c3f 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o
obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o
obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
+obj-$(CONFIG_NET_9P_USBG) += 9pnet_usbg.o
9pnet-objs := \
mod.o \
@@ -23,3 +24,6 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
9pnet_rdma-objs := \
trans_rdma.o \
+
+9pnet_usbg-objs := \
+ trans_usbg.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index 5cd94721d974..09f8ced9f8bb 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -977,8 +977,10 @@ error:
struct p9_client *p9_client_create(const char *dev_name, char *options)
{
int err;
+ static atomic_t seqno = ATOMIC_INIT(0);
struct p9_client *clnt;
char *client_id;
+ char *cache_name;
clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
if (!clnt)
@@ -1035,15 +1037,23 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (err)
goto close_trans;
+ cache_name = kasprintf(GFP_KERNEL,
+ "9p-fcall-cache-%u", atomic_inc_return(&seqno));
+ if (!cache_name) {
+ err = -ENOMEM;
+ goto close_trans;
+ }
+
/* P9_HDRSZ + 4 is the smallest packet header we can have that is
* followed by data accessed from userspace by read
*/
clnt->fcall_cache =
- kmem_cache_create_usercopy("9p-fcall-cache", clnt->msize,
+ kmem_cache_create_usercopy(cache_name, clnt->msize,
0, 0, P9_HDRSZ + 4,
clnt->msize - (P9_HDRSZ + 4),
NULL);
+ kfree(cache_name);
return clnt;
close_trans:
diff --git a/net/9p/trans_usbg.c b/net/9p/trans_usbg.c
new file mode 100644
index 000000000000..6b694f117aef
--- /dev/null
+++ b/net/9p/trans_usbg.c
@@ -0,0 +1,956 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * trans_usbg.c - USB peripheral usb9pfs configuration driver and transport.
+ *
+ * Copyright (C) 2024 Michael Grzeschik <m.grzeschik@pengutronix.de>
+ */
+
+/* Gadget usb9pfs only needs two bulk endpoints, and will use the usb9pfs
+ * transport to mount host exported filesystem via usb gadget.
+ */
+
+/* +--------------------------+ | +--------------------------+
+ * | 9PFS mounting client | | | 9PFS exporting server |
+ * SW | | | | |
+ * | (this:trans_usbg) | | |(e.g. diod or nfs-ganesha)|
+ * +-------------^------------+ | +-------------^------------+
+ * | | |
+ * ------------------|------------------------------------|-------------
+ * | | |
+ * +-------------v------------+ | +-------------v------------+
+ * | | | | |
+ * HW | USB Device Controller <---------> USB Host Controller |
+ * | | | | |
+ * +--------------------------+ | +--------------------------+
+ */
+
+#include <linux/cleanup.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/usb/composite.h>
+#include <linux/usb/func_utils.h>
+
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include <net/9p/transport.h>
+
+#define DEFAULT_BUFLEN 16384
+
+struct f_usb9pfs {
+ struct p9_client *client;
+
+ /* 9p request lock for en/dequeue */
+ spinlock_t lock;
+
+ struct usb_request *in_req;
+ struct usb_request *out_req;
+
+ struct usb_ep *in_ep;
+ struct usb_ep *out_ep;
+
+ struct completion send;
+ struct completion received;
+
+ unsigned int buflen;
+
+ struct usb_function function;
+};
+
+static inline struct f_usb9pfs *func_to_usb9pfs(struct usb_function *f)
+{
+ return container_of(f, struct f_usb9pfs, function);
+}
+
+struct f_usb9pfs_opts {
+ struct usb_function_instance func_inst;
+ unsigned int buflen;
+
+ struct f_usb9pfs_dev *dev;
+
+ /* Read/write access to configfs attributes is handled by configfs.
+ *
+ * This is to protect the data from concurrent access by read/write
+ * and create symlink/remove symlink.
+ */
+ struct mutex lock;
+ int refcnt;
+};
+
+struct f_usb9pfs_dev {
+ struct f_usb9pfs *usb9pfs;
+ struct f_usb9pfs_opts *opts;
+ char tag[41];
+ bool inuse;
+
+ struct list_head usb9pfs_instance;
+};
+
+static DEFINE_MUTEX(usb9pfs_lock);
+static struct list_head usbg_instance_list;
+
+static int usb9pfs_queue_tx(struct f_usb9pfs *usb9pfs, struct p9_req_t *p9_tx_req,
+ gfp_t gfp_flags)
+{
+ struct usb_composite_dev *cdev = usb9pfs->function.config->cdev;
+ struct usb_request *req = usb9pfs->in_req;
+ int ret;
+
+ if (!(p9_tx_req->tc.size % usb9pfs->in_ep->maxpacket))
+ req->zero = 1;
+
+ req->buf = p9_tx_req->tc.sdata;
+ req->length = p9_tx_req->tc.size;
+ req->context = p9_tx_req;
+
+ dev_dbg(&cdev->gadget->dev, "%s usb9pfs send --> %d/%d, zero: %d\n",
+ usb9pfs->in_ep->name, req->actual, req->length, req->zero);
+
+ ret = usb_ep_queue(usb9pfs->in_ep, req, gfp_flags);
+ if (ret)
+ req->context = NULL;
+
+ dev_dbg(&cdev->gadget->dev, "tx submit --> %d\n", ret);
+
+ return ret;
+}
+
+static int usb9pfs_queue_rx(struct f_usb9pfs *usb9pfs, struct usb_request *req,
+ gfp_t gfp_flags)
+{
+ struct usb_composite_dev *cdev = usb9pfs->function.config->cdev;
+ int ret;
+
+ ret = usb_ep_queue(usb9pfs->out_ep, req, gfp_flags);
+
+ dev_dbg(&cdev->gadget->dev, "rx submit --> %d\n", ret);
+
+ return ret;
+}
+
+static int usb9pfs_transmit(struct f_usb9pfs *usb9pfs, struct p9_req_t *p9_req)
+{
+ int ret = 0;
+
+ guard(spinlock_irqsave)(&usb9pfs->lock);
+
+ ret = usb9pfs_queue_tx(usb9pfs, p9_req, GFP_ATOMIC);
+ if (ret)
+ return ret;
+
+ list_del(&p9_req->req_list);
+
+ p9_req_get(p9_req);
+
+ return ret;
+}
+
+static void usb9pfs_tx_complete(struct usb_ep *ep, struct usb_request *req)
+{
+ struct f_usb9pfs *usb9pfs = ep->driver_data;
+ struct usb_composite_dev *cdev = usb9pfs->function.config->cdev;
+ struct p9_req_t *p9_tx_req = req->context;
+ unsigned long flags;
+
+ /* reset zero packages */
+ req->zero = 0;
+
+ if (req->status) {
+ dev_err(&cdev->gadget->dev, "%s usb9pfs complete --> %d, %d/%d\n",
+ ep->name, req->status, req->actual, req->length);
+ return;
+ }
+
+ dev_dbg(&cdev->gadget->dev, "%s usb9pfs complete --> %d, %d/%d\n",
+ ep->name, req->status, req->actual, req->length);
+
+ spin_lock_irqsave(&usb9pfs->lock, flags);
+ WRITE_ONCE(p9_tx_req->status, REQ_STATUS_SENT);
+
+ p9_req_put(usb9pfs->client, p9_tx_req);
+
+ req->context = NULL;
+
+ spin_unlock_irqrestore(&usb9pfs->lock, flags);
+
+ complete(&usb9pfs->send);
+}
+
+static struct p9_req_t *usb9pfs_rx_header(struct f_usb9pfs *usb9pfs, void *buf)
+{
+ struct p9_req_t *p9_rx_req;
+ struct p9_fcall rc;
+ int ret;
+
+ /* start by reading header */
+ rc.sdata = buf;
+ rc.offset = 0;
+ rc.capacity = P9_HDRSZ;
+ rc.size = P9_HDRSZ;
+
+ p9_debug(P9_DEBUG_TRANS, "mux %p got %zu bytes\n", usb9pfs,
+ rc.capacity - rc.offset);
+
+ ret = p9_parse_header(&rc, &rc.size, NULL, NULL, 0);
+ if (ret) {
+ p9_debug(P9_DEBUG_ERROR,
+ "error parsing header: %d\n", ret);
+ return NULL;
+ }
+
+ p9_debug(P9_DEBUG_TRANS,
+ "mux %p pkt: size: %d bytes tag: %d\n",
+ usb9pfs, rc.size, rc.tag);
+
+ p9_rx_req = p9_tag_lookup(usb9pfs->client, rc.tag);
+ if (!p9_rx_req || p9_rx_req->status != REQ_STATUS_SENT) {
+ p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", rc.tag);
+ return NULL;
+ }
+
+ if (rc.size > p9_rx_req->rc.capacity) {
+ p9_debug(P9_DEBUG_ERROR,
+ "requested packet size too big: %d for tag %d with capacity %zd\n",
+ rc.size, rc.tag, p9_rx_req->rc.capacity);
+ p9_req_put(usb9pfs->client, p9_rx_req);
+ return NULL;
+ }
+
+ if (!p9_rx_req->rc.sdata) {
+ p9_debug(P9_DEBUG_ERROR,
+ "No recv fcall for tag %d (req %p), disconnecting!\n",
+ rc.tag, p9_rx_req);
+ p9_req_put(usb9pfs->client, p9_rx_req);
+ return NULL;
+ }
+
+ return p9_rx_req;
+}
+
+static void usb9pfs_rx_complete(struct usb_ep *ep, struct usb_request *req)
+{
+ struct f_usb9pfs *usb9pfs = ep->driver_data;
+ struct usb_composite_dev *cdev = usb9pfs->function.config->cdev;
+ struct p9_req_t *p9_rx_req;
+
+ if (req->status) {
+ dev_err(&cdev->gadget->dev, "%s usb9pfs complete --> %d, %d/%d\n",
+ ep->name, req->status, req->actual, req->length);
+ return;
+ }
+
+ p9_rx_req = usb9pfs_rx_header(usb9pfs, req->buf);
+ if (!p9_rx_req)
+ return;
+
+ memcpy(p9_rx_req->rc.sdata, req->buf, req->actual);
+
+ p9_rx_req->rc.size = req->actual;
+
+ p9_client_cb(usb9pfs->client, p9_rx_req, REQ_STATUS_RCVD);
+ p9_req_put(usb9pfs->client, p9_rx_req);
+
+ complete(&usb9pfs->received);
+}
+
+static void disable_ep(struct usb_composite_dev *cdev, struct usb_ep *ep)
+{
+ int value;
+
+ value = usb_ep_disable(ep);
+ if (value < 0)
+ dev_info(&cdev->gadget->dev,
+ "disable %s --> %d\n", ep->name, value);
+}
+
+static void disable_usb9pfs(struct f_usb9pfs *usb9pfs)
+{
+ struct usb_composite_dev *cdev =
+ usb9pfs->function.config->cdev;
+
+ if (usb9pfs->in_req) {
+ usb_ep_free_request(usb9pfs->in_ep, usb9pfs->in_req);
+ usb9pfs->in_req = NULL;
+ }
+
+ if (usb9pfs->out_req) {
+ usb_ep_free_request(usb9pfs->out_ep, usb9pfs->out_req);
+ usb9pfs->out_req = NULL;
+ }
+
+ disable_ep(cdev, usb9pfs->in_ep);
+ disable_ep(cdev, usb9pfs->out_ep);
+ dev_dbg(&cdev->gadget->dev, "%s disabled\n",
+ usb9pfs->function.name);
+}
+
+static int alloc_requests(struct usb_composite_dev *cdev,
+ struct f_usb9pfs *usb9pfs)
+{
+ int ret;
+
+ usb9pfs->in_req = usb_ep_alloc_request(usb9pfs->in_ep, GFP_ATOMIC);
+ if (!usb9pfs->in_req) {
+ ret = -ENOENT;
+ goto fail;
+ }
+
+ usb9pfs->out_req = alloc_ep_req(usb9pfs->out_ep, usb9pfs->buflen);
+ if (!usb9pfs->out_req) {
+ ret = -ENOENT;
+ goto fail_in;
+ }
+
+ usb9pfs->in_req->complete = usb9pfs_tx_complete;
+ usb9pfs->out_req->complete = usb9pfs_rx_complete;
+
+ /* length will be set in complete routine */
+ usb9pfs->in_req->context = usb9pfs;
+ usb9pfs->out_req->context = usb9pfs;
+
+ return 0;
+
+fail_in:
+ usb_ep_free_request(usb9pfs->in_ep, usb9pfs->in_req);
+fail:
+ return ret;
+}
+
+static int enable_endpoint(struct usb_composite_dev *cdev,
+ struct f_usb9pfs *usb9pfs, struct usb_ep *ep)
+{
+ int ret;
+
+ ret = config_ep_by_speed(cdev->gadget, &usb9pfs->function, ep);
+ if (ret)
+ return ret;
+
+ ret = usb_ep_enable(ep);
+ if (ret < 0)
+ return ret;
+
+ ep->driver_data = usb9pfs;
+
+ return 0;
+}
+
+static int
+enable_usb9pfs(struct usb_composite_dev *cdev, struct f_usb9pfs *usb9pfs)
+{
+ struct p9_client *client;
+ int ret = 0;
+
+ ret = enable_endpoint(cdev, usb9pfs, usb9pfs->in_ep);
+ if (ret)
+ goto out;
+
+ ret = enable_endpoint(cdev, usb9pfs, usb9pfs->out_ep);
+ if (ret)
+ goto disable_in;
+
+ ret = alloc_requests(cdev, usb9pfs);
+ if (ret)
+ goto disable_out;
+
+ client = usb9pfs->client;
+ if (client)
+ client->status = Connected;
+
+ dev_dbg(&cdev->gadget->dev, "%s enabled\n", usb9pfs->function.name);
+ return 0;
+
+disable_out:
+ usb_ep_disable(usb9pfs->out_ep);
+disable_in:
+ usb_ep_disable(usb9pfs->in_ep);
+out:
+ return ret;
+}
+
+static int p9_usbg_create(struct p9_client *client, const char *devname, char *args)
+{
+ struct f_usb9pfs_dev *dev;
+ struct f_usb9pfs *usb9pfs;
+ int ret = -ENOENT;
+ int found = 0;
+
+ if (!devname)
+ return -EINVAL;
+
+ guard(mutex)(&usb9pfs_lock);
+
+ list_for_each_entry(dev, &usbg_instance_list, usb9pfs_instance) {
+ if (!strncmp(devname, dev->tag, strlen(devname))) {
+ if (!dev->inuse) {
+ dev->inuse = true;
+ found = 1;
+ break;
+ }
+ ret = -EBUSY;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_err("no channels available for device %s\n", devname);
+ return ret;
+ }
+
+ usb9pfs = dev->usb9pfs;
+ if (!usb9pfs)
+ return -EINVAL;
+
+ client->trans = (void *)usb9pfs;
+ if (!usb9pfs->in_req)
+ client->status = Disconnected;
+ else
+ client->status = Connected;
+ usb9pfs->client = client;
+
+ client->trans_mod->maxsize = usb9pfs->buflen;
+
+ complete(&usb9pfs->received);
+
+ return 0;
+}
+
+static void usb9pfs_clear_tx(struct f_usb9pfs *usb9pfs)
+{
+ struct p9_req_t *req;
+
+ guard(spinlock_irqsave)(&usb9pfs->lock);
+
+ req = usb9pfs->in_req->context;
+ if (!req)
+ return;
+
+ if (!req->t_err)
+ req->t_err = -ECONNRESET;
+
+ p9_client_cb(usb9pfs->client, req, REQ_STATUS_ERROR);
+}
+
+static void p9_usbg_close(struct p9_client *client)
+{
+ struct f_usb9pfs *usb9pfs;
+ struct f_usb9pfs_dev *dev;
+ struct f_usb9pfs_opts *opts;
+
+ if (!client)
+ return;
+
+ usb9pfs = client->trans;
+ if (!usb9pfs)
+ return;
+
+ client->status = Disconnected;
+
+ usb9pfs_clear_tx(usb9pfs);
+
+ opts = container_of(usb9pfs->function.fi,
+ struct f_usb9pfs_opts, func_inst);
+
+ dev = opts->dev;
+
+ mutex_lock(&usb9pfs_lock);
+ dev->inuse = false;
+ mutex_unlock(&usb9pfs_lock);
+}
+
+static int p9_usbg_request(struct p9_client *client, struct p9_req_t *p9_req)
+{
+ struct f_usb9pfs *usb9pfs = client->trans;
+ int ret;
+
+ if (client->status != Connected)
+ return -EBUSY;
+
+ ret = wait_for_completion_killable(&usb9pfs->received);
+ if (ret)
+ return ret;
+
+ ret = usb9pfs_transmit(usb9pfs, p9_req);
+ if (ret)
+ return ret;
+
+ ret = wait_for_completion_killable(&usb9pfs->send);
+ if (ret)
+ return ret;
+
+ return usb9pfs_queue_rx(usb9pfs, usb9pfs->out_req, GFP_ATOMIC);
+}
+
+static int p9_usbg_cancel(struct p9_client *client, struct p9_req_t *req)
+{
+ struct f_usb9pfs *usb9pfs = client->trans;
+ int ret = 1;
+
+ p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
+
+ guard(spinlock_irqsave)(&usb9pfs->lock);
+
+ if (req->status == REQ_STATUS_UNSENT) {
+ list_del(&req->req_list);
+ WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
+ p9_req_put(client, req);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static struct p9_trans_module p9_usbg_trans = {
+ .name = "usbg",
+ .create = p9_usbg_create,
+ .close = p9_usbg_close,
+ .request = p9_usbg_request,
+ .cancel = p9_usbg_cancel,
+ .owner = THIS_MODULE,
+};
+
+/*-------------------------------------------------------------------------*/
+
+#define USB_PROTOCOL_9PFS 0x09
+
+static struct usb_interface_descriptor usb9pfs_intf = {
+ .bLength = sizeof(usb9pfs_intf),
+ .bDescriptorType = USB_DT_INTERFACE,
+
+ .bNumEndpoints = 2,
+ .bInterfaceClass = USB_CLASS_VENDOR_SPEC,
+ .bInterfaceSubClass = USB_SUBCLASS_VENDOR_SPEC,
+ .bInterfaceProtocol = USB_PROTOCOL_9PFS,
+
+ /* .iInterface = DYNAMIC */
+};
+
+/* full speed support: */
+
+static struct usb_endpoint_descriptor fs_usb9pfs_source_desc = {
+ .bLength = USB_DT_ENDPOINT_SIZE,
+ .bDescriptorType = USB_DT_ENDPOINT,
+
+ .bEndpointAddress = USB_DIR_IN,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_endpoint_descriptor fs_usb9pfs_sink_desc = {
+ .bLength = USB_DT_ENDPOINT_SIZE,
+ .bDescriptorType = USB_DT_ENDPOINT,
+
+ .bEndpointAddress = USB_DIR_OUT,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_descriptor_header *fs_usb9pfs_descs[] = {
+ (struct usb_descriptor_header *)&usb9pfs_intf,
+ (struct usb_descriptor_header *)&fs_usb9pfs_sink_desc,
+ (struct usb_descriptor_header *)&fs_usb9pfs_source_desc,
+ NULL,
+};
+
+/* high speed support: */
+
+static struct usb_endpoint_descriptor hs_usb9pfs_source_desc = {
+ .bLength = USB_DT_ENDPOINT_SIZE,
+ .bDescriptorType = USB_DT_ENDPOINT,
+
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ .wMaxPacketSize = cpu_to_le16(512),
+};
+
+static struct usb_endpoint_descriptor hs_usb9pfs_sink_desc = {
+ .bLength = USB_DT_ENDPOINT_SIZE,
+ .bDescriptorType = USB_DT_ENDPOINT,
+
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ .wMaxPacketSize = cpu_to_le16(512),
+};
+
+static struct usb_descriptor_header *hs_usb9pfs_descs[] = {
+ (struct usb_descriptor_header *)&usb9pfs_intf,
+ (struct usb_descriptor_header *)&hs_usb9pfs_source_desc,
+ (struct usb_descriptor_header *)&hs_usb9pfs_sink_desc,
+ NULL,
+};
+
+/* super speed support: */
+
+static struct usb_endpoint_descriptor ss_usb9pfs_source_desc = {
+ .bLength = USB_DT_ENDPOINT_SIZE,
+ .bDescriptorType = USB_DT_ENDPOINT,
+
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ .wMaxPacketSize = cpu_to_le16(1024),
+};
+
+static struct usb_ss_ep_comp_descriptor ss_usb9pfs_source_comp_desc = {
+ .bLength = USB_DT_SS_EP_COMP_SIZE,
+ .bDescriptorType = USB_DT_SS_ENDPOINT_COMP,
+ .bMaxBurst = 0,
+ .bmAttributes = 0,
+ .wBytesPerInterval = 0,
+};
+
+static struct usb_endpoint_descriptor ss_usb9pfs_sink_desc = {
+ .bLength = USB_DT_ENDPOINT_SIZE,
+ .bDescriptorType = USB_DT_ENDPOINT,
+
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ .wMaxPacketSize = cpu_to_le16(1024),
+};
+
+static struct usb_ss_ep_comp_descriptor ss_usb9pfs_sink_comp_desc = {
+ .bLength = USB_DT_SS_EP_COMP_SIZE,
+ .bDescriptorType = USB_DT_SS_ENDPOINT_COMP,
+ .bMaxBurst = 0,
+ .bmAttributes = 0,
+ .wBytesPerInterval = 0,
+};
+
+static struct usb_descriptor_header *ss_usb9pfs_descs[] = {
+ (struct usb_descriptor_header *)&usb9pfs_intf,
+ (struct usb_descriptor_header *)&ss_usb9pfs_source_desc,
+ (struct usb_descriptor_header *)&ss_usb9pfs_source_comp_desc,
+ (struct usb_descriptor_header *)&ss_usb9pfs_sink_desc,
+ (struct usb_descriptor_header *)&ss_usb9pfs_sink_comp_desc,
+ NULL,
+};
+
+/* function-specific strings: */
+static struct usb_string strings_usb9pfs[] = {
+ [0].s = "usb9pfs input to output",
+ { } /* end of list */
+};
+
+static struct usb_gadget_strings stringtab_usb9pfs = {
+ .language = 0x0409, /* en-us */
+ .strings = strings_usb9pfs,
+};
+
+static struct usb_gadget_strings *usb9pfs_strings[] = {
+ &stringtab_usb9pfs,
+ NULL,
+};
+
+/*-------------------------------------------------------------------------*/
+
+static int usb9pfs_func_bind(struct usb_configuration *c,
+ struct usb_function *f)
+{
+ struct f_usb9pfs *usb9pfs = func_to_usb9pfs(f);
+ struct f_usb9pfs_opts *opts;
+ struct usb_composite_dev *cdev = c->cdev;
+ int ret;
+ int id;
+
+ /* allocate interface ID(s) */
+ id = usb_interface_id(c, f);
+ if (id < 0)
+ return id;
+ usb9pfs_intf.bInterfaceNumber = id;
+
+ id = usb_string_id(cdev);
+ if (id < 0)
+ return id;
+ strings_usb9pfs[0].id = id;
+ usb9pfs_intf.iInterface = id;
+
+ /* allocate endpoints */
+ usb9pfs->in_ep = usb_ep_autoconfig(cdev->gadget,
+ &fs_usb9pfs_source_desc);
+ if (!usb9pfs->in_ep)
+ goto autoconf_fail;
+
+ usb9pfs->out_ep = usb_ep_autoconfig(cdev->gadget,
+ &fs_usb9pfs_sink_desc);
+ if (!usb9pfs->out_ep)
+ goto autoconf_fail;
+
+ /* support high speed hardware */
+ hs_usb9pfs_source_desc.bEndpointAddress =
+ fs_usb9pfs_source_desc.bEndpointAddress;
+ hs_usb9pfs_sink_desc.bEndpointAddress =
+ fs_usb9pfs_sink_desc.bEndpointAddress;
+
+ /* support super speed hardware */
+ ss_usb9pfs_source_desc.bEndpointAddress =
+ fs_usb9pfs_source_desc.bEndpointAddress;
+ ss_usb9pfs_sink_desc.bEndpointAddress =
+ fs_usb9pfs_sink_desc.bEndpointAddress;
+
+ ret = usb_assign_descriptors(f, fs_usb9pfs_descs, hs_usb9pfs_descs,
+ ss_usb9pfs_descs, ss_usb9pfs_descs);
+ if (ret)
+ return ret;
+
+ opts = container_of(f->fi, struct f_usb9pfs_opts, func_inst);
+ opts->dev->usb9pfs = usb9pfs;
+
+ dev_dbg(&cdev->gadget->dev, "%s speed %s: IN/%s, OUT/%s\n",
+ (gadget_is_superspeed(c->cdev->gadget) ? "super" :
+ (gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full")),
+ f->name, usb9pfs->in_ep->name, usb9pfs->out_ep->name);
+
+ return 0;
+
+autoconf_fail:
+ ERROR(cdev, "%s: can't autoconfigure on %s\n",
+ f->name, cdev->gadget->name);
+ return -ENODEV;
+}
+
+static void usb9pfs_func_unbind(struct usb_configuration *c,
+ struct usb_function *f)
+{
+ struct f_usb9pfs *usb9pfs = func_to_usb9pfs(f);
+
+ disable_usb9pfs(usb9pfs);
+}
+
+static void usb9pfs_free_func(struct usb_function *f)
+{
+ struct f_usb9pfs *usb9pfs = func_to_usb9pfs(f);
+ struct f_usb9pfs_opts *opts;
+
+ kfree(usb9pfs);
+
+ opts = container_of(f->fi, struct f_usb9pfs_opts, func_inst);
+
+ mutex_lock(&opts->lock);
+ opts->refcnt--;
+ mutex_unlock(&opts->lock);
+
+ usb_free_all_descriptors(f);
+}
+
+static int usb9pfs_set_alt(struct usb_function *f,
+ unsigned int intf, unsigned int alt)
+{
+ struct f_usb9pfs *usb9pfs = func_to_usb9pfs(f);
+ struct usb_composite_dev *cdev = f->config->cdev;
+
+ return enable_usb9pfs(cdev, usb9pfs);
+}
+
+static void usb9pfs_disable(struct usb_function *f)
+{
+ struct f_usb9pfs *usb9pfs = func_to_usb9pfs(f);
+
+ usb9pfs_clear_tx(usb9pfs);
+}
+
+static struct usb_function *usb9pfs_alloc(struct usb_function_instance *fi)
+{
+ struct f_usb9pfs_opts *usb9pfs_opts;
+ struct f_usb9pfs *usb9pfs;
+
+ usb9pfs = kzalloc(sizeof(*usb9pfs), GFP_KERNEL);
+ if (!usb9pfs)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&usb9pfs->lock);
+
+ init_completion(&usb9pfs->send);
+ init_completion(&usb9pfs->received);
+
+ usb9pfs_opts = container_of(fi, struct f_usb9pfs_opts, func_inst);
+
+ mutex_lock(&usb9pfs_opts->lock);
+ usb9pfs_opts->refcnt++;
+ mutex_unlock(&usb9pfs_opts->lock);
+
+ usb9pfs->buflen = usb9pfs_opts->buflen;
+
+ usb9pfs->function.name = "usb9pfs";
+ usb9pfs->function.bind = usb9pfs_func_bind;
+ usb9pfs->function.unbind = usb9pfs_func_unbind;
+ usb9pfs->function.set_alt = usb9pfs_set_alt;
+ usb9pfs->function.disable = usb9pfs_disable;
+ usb9pfs->function.strings = usb9pfs_strings;
+
+ usb9pfs->function.free_func = usb9pfs_free_func;
+
+ return &usb9pfs->function;
+}
+
+static inline struct f_usb9pfs_opts *to_f_usb9pfs_opts(struct config_item *item)
+{
+ return container_of(to_config_group(item), struct f_usb9pfs_opts,
+ func_inst.group);
+}
+
+static inline struct f_usb9pfs_opts *fi_to_f_usb9pfs_opts(struct usb_function_instance *fi)
+{
+ return container_of(fi, struct f_usb9pfs_opts, func_inst);
+}
+
+static void usb9pfs_attr_release(struct config_item *item)
+{
+ struct f_usb9pfs_opts *usb9pfs_opts = to_f_usb9pfs_opts(item);
+
+ usb_put_function_instance(&usb9pfs_opts->func_inst);
+}
+
+static struct configfs_item_operations usb9pfs_item_ops = {
+ .release = usb9pfs_attr_release,
+};
+
+static ssize_t f_usb9pfs_opts_buflen_show(struct config_item *item, char *page)
+{
+ struct f_usb9pfs_opts *opts = to_f_usb9pfs_opts(item);
+ int ret;
+
+ mutex_lock(&opts->lock);
+ ret = sysfs_emit(page, "%d\n", opts->buflen);
+ mutex_unlock(&opts->lock);
+
+ return ret;
+}
+
+static ssize_t f_usb9pfs_opts_buflen_store(struct config_item *item,
+ const char *page, size_t len)
+{
+ struct f_usb9pfs_opts *opts = to_f_usb9pfs_opts(item);
+ int ret;
+ u32 num;
+
+ guard(mutex)(&opts->lock);
+
+ if (opts->refcnt)
+ return -EBUSY;
+
+ ret = kstrtou32(page, 0, &num);
+ if (ret)
+ return ret;
+
+ opts->buflen = num;
+
+ return len;
+}
+
+CONFIGFS_ATTR(f_usb9pfs_opts_, buflen);
+
+static struct configfs_attribute *usb9pfs_attrs[] = {
+ &f_usb9pfs_opts_attr_buflen,
+ NULL,
+};
+
+static const struct config_item_type usb9pfs_func_type = {
+ .ct_item_ops = &usb9pfs_item_ops,
+ .ct_attrs = usb9pfs_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct f_usb9pfs_dev *_usb9pfs_do_find_dev(const char *tag)
+{
+ struct f_usb9pfs_dev *usb9pfs_dev;
+
+ if (!tag)
+ return NULL;
+
+ list_for_each_entry(usb9pfs_dev, &usbg_instance_list, usb9pfs_instance) {
+ if (strcmp(usb9pfs_dev->tag, tag) == 0)
+ return usb9pfs_dev;
+ }
+
+ return NULL;
+}
+
+static int usb9pfs_tag_instance(struct f_usb9pfs_dev *dev, const char *tag)
+{
+ struct f_usb9pfs_dev *existing;
+ int ret = 0;
+
+ guard(mutex)(&usb9pfs_lock);
+
+ existing = _usb9pfs_do_find_dev(tag);
+ if (!existing)
+ strscpy(dev->tag, tag, ARRAY_SIZE(dev->tag));
+ else if (existing != dev)
+ ret = -EBUSY;
+
+ return ret;
+}
+
+static int usb9pfs_set_inst_tag(struct usb_function_instance *fi, const char *tag)
+{
+ if (strlen(tag) >= sizeof_field(struct f_usb9pfs_dev, tag))
+ return -ENAMETOOLONG;
+ return usb9pfs_tag_instance(fi_to_f_usb9pfs_opts(fi)->dev, tag);
+}
+
+static void usb9pfs_free_instance(struct usb_function_instance *fi)
+{
+ struct f_usb9pfs_opts *usb9pfs_opts =
+ container_of(fi, struct f_usb9pfs_opts, func_inst);
+ struct f_usb9pfs_dev *dev = usb9pfs_opts->dev;
+
+ mutex_lock(&usb9pfs_lock);
+ list_del(&dev->usb9pfs_instance);
+ mutex_unlock(&usb9pfs_lock);
+
+ kfree(usb9pfs_opts);
+}
+
+static struct usb_function_instance *usb9pfs_alloc_instance(void)
+{
+ struct f_usb9pfs_opts *usb9pfs_opts;
+ struct f_usb9pfs_dev *dev;
+
+ usb9pfs_opts = kzalloc(sizeof(*usb9pfs_opts), GFP_KERNEL);
+ if (!usb9pfs_opts)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&usb9pfs_opts->lock);
+
+ usb9pfs_opts->func_inst.set_inst_name = usb9pfs_set_inst_tag;
+ usb9pfs_opts->func_inst.free_func_inst = usb9pfs_free_instance;
+
+ usb9pfs_opts->buflen = DEFAULT_BUFLEN;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev) {
+ kfree(usb9pfs_opts);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ usb9pfs_opts->dev = dev;
+ dev->opts = usb9pfs_opts;
+
+ config_group_init_type_name(&usb9pfs_opts->func_inst.group, "",
+ &usb9pfs_func_type);
+
+ mutex_lock(&usb9pfs_lock);
+ list_add_tail(&dev->usb9pfs_instance, &usbg_instance_list);
+ mutex_unlock(&usb9pfs_lock);
+
+ return &usb9pfs_opts->func_inst;
+}
+DECLARE_USB_FUNCTION(usb9pfs, usb9pfs_alloc_instance, usb9pfs_alloc);
+
+static int __init usb9pfs_modinit(void)
+{
+ int ret;
+
+ INIT_LIST_HEAD(&usbg_instance_list);
+
+ ret = usb_function_register(&usb9pfsusb_func);
+ if (!ret)
+ v9fs_register_trans(&p9_usbg_trans);
+
+ return ret;
+}
+
+static void __exit usb9pfs_modexit(void)
+{
+ usb_function_unregister(&usb9pfsusb_func);
+ v9fs_unregister_trans(&p9_usbg_trans);
+}
+
+module_init(usb9pfs_modinit);
+module_exit(usb9pfs_modexit);
+
+MODULE_ALIAS_9P("usbg");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("USB gadget 9pfs transport");
+MODULE_AUTHOR("Michael Grzeschik");
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index dfdbe1ca5338..b9ff69c7522a 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -286,7 +286,7 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
if (!priv->rings[i].intf)
break;
if (priv->rings[i].irq > 0)
- unbind_from_irqhandler(priv->rings[i].irq, priv->dev);
+ unbind_from_irqhandler(priv->rings[i].irq, ring);
if (priv->rings[i].data.in) {
for (j = 0;
j < (1 << priv->rings[i].intf->ring_order);
@@ -465,6 +465,7 @@ static int xen_9pfs_front_init(struct xenbus_device *dev)
goto error;
}
+ xenbus_switch_state(dev, XenbusStateInitialised);
return 0;
error_xenbus:
@@ -512,8 +513,10 @@ static void xen_9pfs_front_changed(struct xenbus_device *dev,
break;
case XenbusStateInitWait:
- if (!xen_9pfs_front_init(dev))
- xenbus_switch_state(dev, XenbusStateInitialised);
+ if (dev->state != XenbusStateInitialising)
+ break;
+
+ xen_9pfs_front_init(dev);
break;
case XenbusStateConnected:
diff --git a/net/Kconfig b/net/Kconfig
index d27d0deac0bf..c3fca69a7c83 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -66,6 +66,15 @@ config SKB_DECRYPTED
config SKB_EXTENSIONS
bool
+config NET_DEVMEM
+ def_bool y
+ depends on DMA_SHARED_BUFFER
+ depends on GENERIC_ALLOCATOR
+ depends on PAGE_POOL
+
+config NET_SHAPER
+ bool
+
menu "Networking options"
source "net/packet/Kconfig"
diff --git a/net/Kconfig.debug b/net/Kconfig.debug
index 5e3fffe707dd..277fab8c4d77 100644
--- a/net/Kconfig.debug
+++ b/net/Kconfig.debug
@@ -24,3 +24,18 @@ config DEBUG_NET
help
Enable extra sanity checks in networking.
This is mostly used by fuzzers, but is safe to select.
+
+config DEBUG_NET_SMALL_RTNL
+ bool "Add extra per-netns mutex inside RTNL"
+ depends on DEBUG_KERNEL && NET && LOCK_DEBUGGING_SUPPORT
+ select PROVE_LOCKING
+ default n
+ help
+ rtnl_lock() is being replaced with rtnl_net_lock() that
+ acquires the global RTNL and a small per-netns RTNL mutex.
+
+ During the conversion, rtnl_net_lock() just adds an extra
+ mutex in every RTNL scope and slows down the operations.
+
+ Once the conversion completes, rtnl_lock() will be removed
+ and rtnetlink will gain per-netns scalability.
diff --git a/net/Makefile b/net/Makefile
index 65bb8c72a35e..60ed5190eda8 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -79,3 +79,4 @@ obj-$(CONFIG_XDP_SOCKETS) += xdp/
obj-$(CONFIG_MPTCP) += mptcp/
obj-$(CONFIG_MCTP) += mctp/
obj-$(CONFIG_NET_HANDSHAKE) += handshake/
+obj-$(CONFIG_NET_SHAPER) += shaper/
diff --git a/net/appletalk/Makefile b/net/appletalk/Makefile
index 33164d972d37..152312a15180 100644
--- a/net/appletalk/Makefile
+++ b/net/appletalk/Makefile
@@ -5,6 +5,6 @@
obj-$(CONFIG_ATALK) += appletalk.o
-appletalk-y := aarp.o ddp.o dev.o
+appletalk-y := aarp.o ddp.o
appletalk-$(CONFIG_PROC_FS) += atalk_proc.o
appletalk-$(CONFIG_SYSCTL) += sysctl_net_atalk.o
diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c
deleted file mode 100644
index 284c8e585533..000000000000
--- a/net/appletalk/dev.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Moved here from drivers/net/net_init.c, which is:
- * Written 1993,1994,1995 by Donald Becker.
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/if_ltalk.h>
-
-static void ltalk_setup(struct net_device *dev)
-{
- /* Fill in the fields of the device structure with localtalk-generic values. */
-
- dev->type = ARPHRD_LOCALTLK;
- dev->hard_header_len = LTALK_HLEN;
- dev->mtu = LTALK_MTU;
- dev->addr_len = LTALK_ALEN;
- dev->tx_queue_len = 10;
-
- dev->broadcast[0] = 0xFF;
-
- dev->flags = IFF_BROADCAST|IFF_MULTICAST|IFF_NOARP;
-}
-
-/**
- * alloc_ltalkdev - Allocates and sets up an localtalk device
- * @sizeof_priv: Size of additional driver-private structure to be allocated
- * for this localtalk device
- *
- * Fill in the fields of the device structure with localtalk-generic
- * values. Basically does everything except registering the device.
- *
- * Constructs a new net device, complete with a private data area of
- * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for
- * this private data area.
- */
-
-struct net_device *alloc_ltalkdev(int sizeof_priv)
-{
- return alloc_netdev(sizeof_priv, "lt%d", NET_NAME_UNKNOWN,
- ltalk_setup);
-}
-EXPORT_SYMBOL(alloc_ltalkdev);
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 74b49c35ddc1..07ae5dd1f150 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -36,6 +36,7 @@
#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <net/genetlink.h>
@@ -371,8 +372,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
batadv_ogm_packet->orig,
ntohl(batadv_ogm_packet->seqno),
batadv_ogm_packet->tq, batadv_ogm_packet->ttl,
- ((batadv_ogm_packet->flags & BATADV_DIRECTLINK) ?
- "on" : "off"),
+ str_on_off(batadv_ogm_packet->flags & BATADV_DIRECTLINK),
hard_iface->net_dev->name,
hard_iface->net_dev->dev_addr);
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 5f46ca3d4bb8..449faf5a5487 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -33,6 +33,7 @@
#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/workqueue.h>
#include <net/arp.h>
#include <net/genetlink.h>
@@ -1946,16 +1947,15 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
claim = batadv_claim_hash_find(bat_priv, &search_claim);
if (!claim) {
+ bool local = batadv_is_my_client(bat_priv, ethhdr->h_source, vid);
+
/* possible optimization: race for a claim */
/* No claim exists yet, claim it for us!
*/
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"%s(): Unclaimed MAC %pM found. Claim it. Local: %s\n",
- __func__, ethhdr->h_source,
- batadv_is_my_client(bat_priv,
- ethhdr->h_source, vid) ?
- "yes" : "no");
+ __func__, ethhdr->h_source, str_yes_no(local));
batadv_handle_claim(bat_priv, primary_if,
primary_if->net_dev->dev_addr,
ethhdr->h_source, vid);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 4c7e85534324..801eff8a40e5 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -7,7 +7,7 @@
#include "distributed-arp-table.h"
#include "main.h"
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 3d4c36ae2e1a..97ea71a052f8 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2024.2"
+#define BATADV_SOURCE_VERSION "2024.3"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 30ecbc2ef1fd..2758aba47a2f 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1020,9 +1020,10 @@ static void batadv_softif_init_early(struct net_device *dev)
dev->netdev_ops = &batadv_netdev_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = batadv_softif_free;
- dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL;
- dev->features |= NETIF_F_LLTX;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
dev->priv_flags |= IFF_NO_QUEUE;
+ dev->lltx = true;
+ dev->netns_local = true;
/* can't call min_mtu, because the needed variables
* have not been initialized yet
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 2243cec18ecc..760d51fdbdf6 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -28,6 +28,7 @@
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
+#include <linux/overflow.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/skbuff.h>
@@ -209,20 +210,6 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
}
/**
- * batadv_tt_local_entry_free_rcu() - free the tt_local_entry
- * @rcu: rcu pointer of the tt_local_entry
- */
-static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_tt_local_entry *tt_local_entry;
-
- tt_local_entry = container_of(rcu, struct batadv_tt_local_entry,
- common.rcu);
-
- kmem_cache_free(batadv_tl_cache, tt_local_entry);
-}
-
-/**
* batadv_tt_local_entry_release() - release tt_local_entry from lists and queue
* for free after rcu grace period
* @ref: kref pointer of the nc_node
@@ -236,7 +223,7 @@ static void batadv_tt_local_entry_release(struct kref *ref)
batadv_softif_vlan_put(tt_local_entry->vlan);
- call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu);
+ kfree_rcu(tt_local_entry, common.rcu);
}
/**
@@ -255,20 +242,6 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
}
/**
- * batadv_tt_global_entry_free_rcu() - free the tt_global_entry
- * @rcu: rcu pointer of the tt_global_entry
- */
-static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_tt_global_entry *tt_global_entry;
-
- tt_global_entry = container_of(rcu, struct batadv_tt_global_entry,
- common.rcu);
-
- kmem_cache_free(batadv_tg_cache, tt_global_entry);
-}
-
-/**
* batadv_tt_global_entry_release() - release tt_global_entry from lists and
* queue for free after rcu grace period
* @ref: kref pointer of the nc_node
@@ -282,7 +255,7 @@ void batadv_tt_global_entry_release(struct kref *ref)
batadv_tt_global_del_orig_list(tt_global_entry);
- call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu);
+ kfree_rcu(tt_global_entry, common.rcu);
}
/**
@@ -408,19 +381,6 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
}
/**
- * batadv_tt_orig_list_entry_free_rcu() - free the orig_entry
- * @rcu: rcu pointer of the orig_entry
- */
-static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_tt_orig_list_entry *orig_entry;
-
- orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
-
- kmem_cache_free(batadv_tt_orig_cache, orig_entry);
-}
-
-/**
* batadv_tt_orig_list_entry_release() - release tt orig entry from lists and
* queue for free after rcu grace period
* @ref: kref pointer of the tt orig entry
@@ -433,7 +393,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref)
refcount);
batadv_orig_node_put(orig_entry->orig_node);
- call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
+ kfree_rcu(orig_entry, rcu);
}
/**
@@ -856,8 +816,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
num_entries += atomic_read(&vlan->tt.num_entries);
}
- change_offset = sizeof(**tt_data);
- change_offset += num_vlan * sizeof(*tt_vlan);
+ change_offset = struct_size(*tt_data, vlan_data, num_vlan);
/* if tt_len is negative, allocate the space needed by the full table */
if (*tt_len < 0)
@@ -876,7 +835,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
(*tt_data)->ttvn = atomic_read(&orig_node->last_ttvn);
(*tt_data)->num_vlan = htons(num_vlan);
- tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1);
+ tt_vlan = (*tt_data)->vlan_data;
hlist_for_each_entry(vlan, &orig_node->vlan_list, list) {
tt_vlan->vid = htons(vlan->vid);
tt_vlan->crc = htonl(vlan->tt.crc);
@@ -936,8 +895,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
total_entries += vlan_entries;
}
- change_offset = sizeof(**tt_data);
- change_offset += num_vlan * sizeof(*tt_vlan);
+ change_offset = struct_size(*tt_data, vlan_data, num_vlan);
/* if tt_len is negative, allocate the space needed by the full table */
if (*tt_len < 0)
@@ -956,7 +914,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
(*tt_data)->ttvn = atomic_read(&bat_priv->tt.vn);
(*tt_data)->num_vlan = htons(num_vlan);
- tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1);
+ tt_vlan = (*tt_data)->vlan_data;
hlist_for_each_entry(vlan, &bat_priv->softif_vlan_list, list) {
vlan_entries = atomic_read(&vlan->tt.num_entries);
if (vlan_entries < 1)
@@ -990,16 +948,25 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
int tt_diff_len, tt_change_len = 0;
int tt_diff_entries_num = 0;
int tt_diff_entries_count = 0;
+ bool drop_changes = false;
+ size_t tt_extra_len = 0;
u16 tvlv_len;
tt_diff_entries_num = atomic_read(&bat_priv->tt.local_changes);
tt_diff_len = batadv_tt_len(tt_diff_entries_num);
/* if we have too many changes for one packet don't send any
- * and wait for the tt table request which will be fragmented
+ * and wait for the tt table request so we can reply with the full
+ * (fragmented) table.
+ *
+ * The local change history should still be cleaned up so the next
+ * TT round can start again with a clean state.
*/
- if (tt_diff_len > bat_priv->soft_iface->mtu)
+ if (tt_diff_len > bat_priv->soft_iface->mtu) {
tt_diff_len = 0;
+ tt_diff_entries_num = 0;
+ drop_changes = true;
+ }
tvlv_len = batadv_tt_prepare_tvlv_local_data(bat_priv, &tt_data,
&tt_change, &tt_diff_len);
@@ -1008,7 +975,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
tt_data->flags = BATADV_TT_OGM_DIFF;
- if (tt_diff_len == 0)
+ if (!drop_changes && tt_diff_len == 0)
goto container_register;
spin_lock_bh(&bat_priv->tt.changes_list_lock);
@@ -1027,6 +994,9 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
}
spin_unlock_bh(&bat_priv->tt.changes_list_lock);
+ tt_extra_len = batadv_tt_len(tt_diff_entries_num -
+ tt_diff_entries_count);
+
/* Keep the buffer for possible tt_request */
spin_lock_bh(&bat_priv->tt.last_changeset_lock);
kfree(bat_priv->tt.last_changeset);
@@ -1035,6 +1005,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
tt_change_len = batadv_tt_len(tt_diff_entries_count);
/* check whether this new OGM has no changes due to size problems */
if (tt_diff_entries_count > 0) {
+ tt_diff_len -= tt_extra_len;
/* if kmalloc() fails we will reply with the full table
* instead of providing the diff
*/
@@ -1047,6 +1018,8 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
}
spin_unlock_bh(&bat_priv->tt.last_changeset_lock);
+ /* Remove extra packet space for OGM */
+ tvlv_len -= tt_extra_len;
container_register:
batadv_tvlv_container_register(bat_priv, BATADV_TVLV_TT, 1, tt_data,
tvlv_len);
@@ -2747,14 +2720,16 @@ static bool batadv_tt_global_valid(const void *entry_ptr,
*
* Fills the tvlv buff with the tt entries from the specified hash. If valid_cb
* is not provided then this becomes a no-op.
+ *
+ * Return: Remaining unused length in tvlv_buff.
*/
-static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
- struct batadv_hashtable *hash,
- void *tvlv_buff, u16 tt_len,
- bool (*valid_cb)(const void *,
- const void *,
- u8 *flags),
- void *cb_data)
+static u16 batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
+ struct batadv_hashtable *hash,
+ void *tvlv_buff, u16 tt_len,
+ bool (*valid_cb)(const void *,
+ const void *,
+ u8 *flags),
+ void *cb_data)
{
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tvlv_tt_change *tt_change;
@@ -2768,7 +2743,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
tt_change = tvlv_buff;
if (!valid_cb)
- return;
+ return tt_len;
rcu_read_lock();
for (i = 0; i < hash->size; i++) {
@@ -2794,6 +2769,8 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
}
}
rcu_read_unlock();
+
+ return batadv_tt_len(tt_tot - tt_num_entries);
}
/**
@@ -2916,7 +2893,6 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
{
struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
struct batadv_tt_req_node *tt_req_node = NULL;
- struct batadv_tvlv_tt_vlan_data *tt_vlan_req;
struct batadv_hard_iface *primary_if;
bool ret = false;
int i, size;
@@ -2932,7 +2908,7 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
if (!tt_req_node)
goto out;
- size = sizeof(*tvlv_tt_data) + sizeof(*tt_vlan_req) * num_vlan;
+ size = struct_size(tvlv_tt_data, vlan_data, num_vlan);
tvlv_tt_data = kzalloc(size, GFP_ATOMIC);
if (!tvlv_tt_data)
goto out;
@@ -2944,12 +2920,10 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
/* send all the CRCs within the request. This is needed by intermediate
* nodes to ensure they have the correct table before replying
*/
- tt_vlan_req = (struct batadv_tvlv_tt_vlan_data *)(tvlv_tt_data + 1);
for (i = 0; i < num_vlan; i++) {
- tt_vlan_req->vid = tt_vlan->vid;
- tt_vlan_req->crc = tt_vlan->crc;
+ tvlv_tt_data->vlan_data[i].vid = tt_vlan->vid;
+ tvlv_tt_data->vlan_data[i].crc = tt_vlan->crc;
- tt_vlan_req++;
tt_vlan++;
}
@@ -3001,7 +2975,6 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
struct batadv_orig_node *res_dst_orig_node = NULL;
struct batadv_tvlv_tt_change *tt_change;
struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
- struct batadv_tvlv_tt_vlan_data *tt_vlan;
bool ret = false, full_table;
u8 orig_ttvn, req_ttvn;
u16 tvlv_len;
@@ -3024,10 +2997,9 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
orig_ttvn = (u8)atomic_read(&req_dst_orig_node->last_ttvn);
req_ttvn = tt_data->ttvn;
- tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1);
/* this node doesn't have the requested data */
if (orig_ttvn != req_ttvn ||
- !batadv_tt_global_check_crc(req_dst_orig_node, tt_vlan,
+ !batadv_tt_global_check_crc(req_dst_orig_node, tt_data->vlan_data,
ntohs(tt_data->num_vlan)))
goto out;
@@ -3069,10 +3041,11 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
goto out;
/* fill the rest of the tvlv with the real TT entries */
- batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.global_hash,
- tt_change, tt_len,
- batadv_tt_global_valid,
- req_dst_orig_node);
+ tvlv_len -= batadv_tt_tvlv_generate(bat_priv,
+ bat_priv->tt.global_hash,
+ tt_change, tt_len,
+ batadv_tt_global_valid,
+ req_dst_orig_node);
}
/* Don't send the response, if larger than fragmented packet. */
@@ -3196,9 +3169,11 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
goto out;
/* fill the rest of the tvlv with the real TT entries */
- batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.local_hash,
- tt_change, tt_len,
- batadv_tt_local_valid, NULL);
+ tvlv_len -= batadv_tt_tvlv_generate(bat_priv,
+ bat_priv->tt.local_hash,
+ tt_change, tt_len,
+ batadv_tt_local_valid,
+ NULL);
}
tvlv_tt_data->flags = BATADV_TT_RESPONSE;
@@ -3370,7 +3345,6 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node = NULL;
struct batadv_tvlv_tt_change *tt_change;
u8 *tvlv_ptr = (u8 *)tt_data;
- u16 change_offset;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n",
@@ -3383,10 +3357,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
spin_lock_bh(&orig_node->tt_lock);
- change_offset = sizeof(struct batadv_tvlv_tt_vlan_data);
- change_offset *= ntohs(tt_data->num_vlan);
- change_offset += sizeof(*tt_data);
- tvlv_ptr += change_offset;
+ tvlv_ptr += struct_size(tt_data, vlan_data, ntohs(tt_data->num_vlan));
tt_change = (struct batadv_tvlv_tt_change *)tvlv_ptr;
if (tt_data->flags & BATADV_TT_FULL_TABLE) {
@@ -3985,10 +3956,10 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
u8 flags, void *tvlv_value,
u16 tvlv_value_len)
{
- struct batadv_tvlv_tt_vlan_data *tt_vlan;
struct batadv_tvlv_tt_change *tt_change;
struct batadv_tvlv_tt_data *tt_data;
u16 num_entries, num_vlan;
+ size_t flex_size;
if (tvlv_value_len < sizeof(*tt_data))
return;
@@ -3998,17 +3969,18 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
num_vlan = ntohs(tt_data->num_vlan);
- if (tvlv_value_len < sizeof(*tt_vlan) * num_vlan)
+ flex_size = flex_array_size(tt_data, vlan_data, num_vlan);
+ if (tvlv_value_len < flex_size)
return;
- tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1);
- tt_change = (struct batadv_tvlv_tt_change *)(tt_vlan + num_vlan);
- tvlv_value_len -= sizeof(*tt_vlan) * num_vlan;
+ tt_change = (struct batadv_tvlv_tt_change *)((void *)tt_data
+ + flex_size);
+ tvlv_value_len -= flex_size;
num_entries = batadv_tt_entries(tvlv_value_len);
- batadv_tt_update_orig(bat_priv, orig, tt_vlan, num_vlan, tt_change,
- num_entries, tt_data->ttvn);
+ batadv_tt_update_orig(bat_priv, orig, tt_data->vlan_data, num_vlan,
+ tt_change, num_entries, tt_data->ttvn);
}
/**
@@ -4039,8 +4011,8 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
tt_data = tvlv_value;
tvlv_value_len -= sizeof(*tt_data);
- tt_vlan_len = sizeof(struct batadv_tvlv_tt_vlan_data);
- tt_vlan_len *= ntohs(tt_data->num_vlan);
+ tt_vlan_len = flex_array_size(tt_data, vlan_data,
+ ntohs(tt_data->num_vlan));
if (tvlv_value_len < tt_vlan_len)
return NET_RX_SUCCESS;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 00840d5784fe..04f6398b3a40 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -287,7 +287,7 @@ struct batadv_frag_table_entry {
/** @lock: lock to protect the list of fragments */
spinlock_t lock;
- /** @timestamp: time (jiffie) of last received fragment */
+ /** @timestamp: time (jiffy) of last received fragment */
unsigned long timestamp;
/** @seqno: sequence number of the fragments in the list */
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 67604ccec2f4..0b4d0a8bd361 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -185,6 +185,28 @@ void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk)
}
EXPORT_SYMBOL(bt_sock_unlink);
+bool bt_sock_linked(struct bt_sock_list *l, struct sock *s)
+{
+ struct sock *sk;
+
+ if (!l || !s)
+ return false;
+
+ read_lock(&l->lock);
+
+ sk_for_each(sk, &l->head) {
+ if (s == sk) {
+ read_unlock(&l->lock);
+ return true;
+ }
+ }
+
+ read_unlock(&l->lock);
+
+ return false;
+}
+EXPORT_SYMBOL(bt_sock_linked);
+
void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
{
const struct cred *old_cred;
@@ -825,11 +847,14 @@ cleanup_sysfs:
bt_sysfs_cleanup();
cleanup_led:
bt_leds_cleanup();
+ debugfs_remove_recursive(bt_debugfs);
return err;
}
static void __exit bt_exit(void)
{
+ iso_exit();
+
mgmt_exit();
sco_exit();
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index ec45f77fce21..d44987d4515c 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -29,7 +29,7 @@
#include <linux/kthread.h>
#include <linux/file.h>
#include <linux/etherdevice.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/l2cap.h>
@@ -745,8 +745,7 @@ static int __init bnep_init(void)
if (flt[0])
BT_INFO("BNEP filters: %s", flt);
- bnep_sock_init();
- return 0;
+ return bnep_sock_init();
}
static void __exit bnep_exit(void)
diff --git a/net/bluetooth/cmtp/Kconfig b/net/bluetooth/cmtp/Kconfig
index c8337786da6b..34e923466236 100644
--- a/net/bluetooth/cmtp/Kconfig
+++ b/net/bluetooth/cmtp/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config BT_CMTP
- tristate "CMTP protocol support"
- depends on BT_BREDR && ISDN_CAPI
+ tristate "CMTP protocol support (DEPRECATED)"
+ depends on BT_BREDR && ISDN_CAPI && DEPRECATED
help
CMTP (CAPI Message Transport Protocol) is a transport layer
for CAPI messages. CMTP is required for the Bluetooth Common
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index f3bedc3b613a..884703fda979 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -248,18 +248,10 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s
break;
case CAPI_FUNCTION_GET_MANUFACTURER:
- if (skb->len < CAPI_MSG_BASELEN + 15)
- break;
-
- if (!info && ctrl) {
- int len = min_t(uint, CAPI_MANUFACTURER_LEN,
- skb->data[CAPI_MSG_BASELEN + 14]);
-
- memset(ctrl->manu, 0, CAPI_MANUFACTURER_LEN);
- strncpy(ctrl->manu,
- skb->data + CAPI_MSG_BASELEN + 15, len);
- }
-
+ if (!info && ctrl && skb->len > CAPI_MSG_BASELEN + 14)
+ strscpy_pad(ctrl->manu,
+ skb->data + CAPI_MSG_BASELEN + 15,
+ skb->data[CAPI_MSG_BASELEN + 14]);
break;
case CAPI_FUNCTION_GET_VERSION:
@@ -276,18 +268,10 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s
break;
case CAPI_FUNCTION_GET_SERIAL_NUMBER:
- if (skb->len < CAPI_MSG_BASELEN + 17)
- break;
-
- if (!info && ctrl) {
- int len = min_t(uint, CAPI_SERIAL_LEN,
- skb->data[CAPI_MSG_BASELEN + 16]);
-
- memset(ctrl->serial, 0, CAPI_SERIAL_LEN);
- strncpy(ctrl->serial,
- skb->data + CAPI_MSG_BASELEN + 17, len);
- }
-
+ if (!info && ctrl && skb->len > CAPI_MSG_BASELEN + 16)
+ strscpy_pad(ctrl->serial,
+ skb->data + CAPI_MSG_BASELEN + 17,
+ skb->data[CAPI_MSG_BASELEN + 16]);
break;
}
diff --git a/net/bluetooth/coredump.c b/net/bluetooth/coredump.c
index ec97a4bab1c9..c18df3a08607 100644
--- a/net/bluetooth/coredump.c
+++ b/net/bluetooth/coredump.c
@@ -5,7 +5,7 @@
#include <linux/devcoredump.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h
index 0df19f2f4af9..5c89a05e8b29 100644
--- a/net/bluetooth/eir.h
+++ b/net/bluetooth/eir.h
@@ -5,7 +5,7 @@
* Copyright (C) 2021 Intel Corporation
*/
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
void eir_create(struct hci_dev *hdev, u8 *data);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 8e48ccd2af30..d097e308a755 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -106,8 +106,7 @@ void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
* where a timeout + cancel does indicate an actual failure.
*/
if (status && status != HCI_ERROR_UNKNOWN_CONN_ID)
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, status);
+ mgmt_connect_failed(hdev, conn, status);
/* The connection attempt was doing scan for new RPA, and is
* in scan phase. If params are not associated with any other
@@ -290,6 +289,9 @@ static int hci_enhanced_setup_sync(struct hci_dev *hdev, void *data)
kfree(conn_handle);
+ if (!hci_conn_valid(hdev, conn))
+ return -ECANCELED;
+
bt_dev_dbg(hdev, "hcon %p", conn);
configure_datapath_sync(hdev, &conn->codec);
@@ -778,7 +780,6 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c
if (!d)
return -ENOMEM;
- memset(d, 0, sizeof(*d));
d->big = big;
d->sync_handle = conn->sync_handle;
@@ -951,6 +952,7 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t
conn->tx_power = HCI_TX_POWER_INVALID;
conn->max_tx_power = HCI_TX_POWER_INVALID;
conn->sync_handle = HCI_SYNC_HANDLE_INVALID;
+ conn->sid = HCI_SID_INVALID;
set_bit(HCI_CONN_POWER_SAVE, &conn->flags);
conn->disc_timeout = HCI_DISCONN_TIMEOUT;
@@ -1126,9 +1128,9 @@ void hci_conn_del(struct hci_conn *conn)
hci_conn_unlink(conn);
- cancel_delayed_work_sync(&conn->disc_work);
- cancel_delayed_work_sync(&conn->auto_accept_work);
- cancel_delayed_work_sync(&conn->idle_work);
+ disable_delayed_work_sync(&conn->disc_work);
+ disable_delayed_work_sync(&conn->auto_accept_work);
+ disable_delayed_work_sync(&conn->idle_work);
if (conn->type == ACL_LINK) {
/* Unacked frames */
@@ -1250,8 +1252,7 @@ void hci_conn_failed(struct hci_conn *conn, u8 status)
hci_le_conn_failed(conn, status);
break;
case ACL_LINK:
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, status);
+ mgmt_connect_failed(hdev, conn, status);
break;
}
@@ -2062,105 +2063,217 @@ static int create_big_sync(struct hci_dev *hdev, void *data)
static void create_pa_complete(struct hci_dev *hdev, void *data, int err)
{
- struct hci_cp_le_pa_create_sync *cp = data;
-
bt_dev_dbg(hdev, "");
if (err)
bt_dev_err(hdev, "Unable to create PA: %d", err);
+}
+
+static bool hci_conn_check_create_pa_sync(struct hci_conn *conn)
+{
+ if (conn->type != ISO_LINK || conn->sid == HCI_SID_INVALID)
+ return false;
- kfree(cp);
+ return true;
}
static int create_pa_sync(struct hci_dev *hdev, void *data)
{
- struct hci_cp_le_pa_create_sync *cp = data;
- int err;
+ struct hci_cp_le_pa_create_sync cp = {0};
+ struct hci_conn *conn;
+ int err = 0;
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC,
- sizeof(*cp), cp, HCI_CMD_TIMEOUT);
- if (err) {
- hci_dev_clear_flag(hdev, HCI_PA_SYNC);
- return err;
+ hci_dev_lock(hdev);
+
+ rcu_read_lock();
+
+ /* The spec allows only one pending LE Periodic Advertising Create
+ * Sync command at a time. If the command is pending now, don't do
+ * anything. We check for pending connections after each PA Sync
+ * Established event.
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2493:
+ *
+ * If the Host issues this command when another HCI_LE_Periodic_
+ * Advertising_Create_Sync command is pending, the Controller shall
+ * return the error code Command Disallowed (0x0C).
+ */
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (test_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags))
+ goto unlock;
}
- return hci_update_passive_scan_sync(hdev);
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (hci_conn_check_create_pa_sync(conn)) {
+ struct bt_iso_qos *qos = &conn->iso_qos;
+
+ cp.options = qos->bcast.options;
+ cp.sid = conn->sid;
+ cp.addr_type = conn->dst_type;
+ bacpy(&cp.addr, &conn->dst);
+ cp.skip = cpu_to_le16(qos->bcast.skip);
+ cp.sync_timeout = cpu_to_le16(qos->bcast.sync_timeout);
+ cp.sync_cte_type = qos->bcast.sync_cte_type;
+
+ break;
+ }
+ }
+
+unlock:
+ rcu_read_unlock();
+
+ hci_dev_unlock(hdev);
+
+ if (bacmp(&cp.addr, BDADDR_ANY)) {
+ hci_dev_set_flag(hdev, HCI_PA_SYNC);
+ set_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags);
+
+ err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ if (!err)
+ err = hci_update_passive_scan_sync(hdev);
+
+ if (err) {
+ hci_dev_clear_flag(hdev, HCI_PA_SYNC);
+ clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags);
+ }
+ }
+
+ return err;
+}
+
+int hci_pa_create_sync_pending(struct hci_dev *hdev)
+{
+ /* Queue start pa_create_sync and scan */
+ return hci_cmd_sync_queue(hdev, create_pa_sync,
+ NULL, create_pa_complete);
}
struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, __u8 sid,
struct bt_iso_qos *qos)
{
- struct hci_cp_le_pa_create_sync *cp;
struct hci_conn *conn;
- int err;
-
- if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC))
- return ERR_PTR(-EBUSY);
conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE);
if (IS_ERR(conn))
return conn;
conn->iso_qos = *qos;
+ conn->dst_type = dst_type;
+ conn->sid = sid;
conn->state = BT_LISTEN;
hci_conn_hold(conn);
- cp = kzalloc(sizeof(*cp), GFP_KERNEL);
- if (!cp) {
- hci_dev_clear_flag(hdev, HCI_PA_SYNC);
- hci_conn_drop(conn);
- return ERR_PTR(-ENOMEM);
+ hci_pa_create_sync_pending(hdev);
+
+ return conn;
+}
+
+static bool hci_conn_check_create_big_sync(struct hci_conn *conn)
+{
+ if (!conn->num_bis)
+ return false;
+
+ return true;
+}
+
+static void big_create_sync_complete(struct hci_dev *hdev, void *data, int err)
+{
+ bt_dev_dbg(hdev, "");
+
+ if (err)
+ bt_dev_err(hdev, "Unable to create BIG sync: %d", err);
+}
+
+static int big_create_sync(struct hci_dev *hdev, void *data)
+{
+ DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11);
+ struct hci_conn *conn;
+
+ rcu_read_lock();
+
+ pdu->num_bis = 0;
+
+ /* The spec allows only one pending LE BIG Create Sync command at
+ * a time. If the command is pending now, don't do anything. We
+ * check for pending connections after each BIG Sync Established
+ * event.
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2586:
+ *
+ * If the Host sends this command when the Controller is in the
+ * process of synchronizing to any BIG, i.e. the HCI_LE_BIG_Sync_
+ * Established event has not been generated, the Controller shall
+ * return the error code Command Disallowed (0x0C).
+ */
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (test_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags))
+ goto unlock;
}
- cp->options = qos->bcast.options;
- cp->sid = sid;
- cp->addr_type = dst_type;
- bacpy(&cp->addr, dst);
- cp->skip = cpu_to_le16(qos->bcast.skip);
- cp->sync_timeout = cpu_to_le16(qos->bcast.sync_timeout);
- cp->sync_cte_type = qos->bcast.sync_cte_type;
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (hci_conn_check_create_big_sync(conn)) {
+ struct bt_iso_qos *qos = &conn->iso_qos;
- /* Queue start pa_create_sync and scan */
- err = hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete);
- if (err < 0) {
- hci_conn_drop(conn);
- kfree(cp);
- return ERR_PTR(err);
+ set_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags);
+
+ pdu->handle = qos->bcast.big;
+ pdu->sync_handle = cpu_to_le16(conn->sync_handle);
+ pdu->encryption = qos->bcast.encryption;
+ memcpy(pdu->bcode, qos->bcast.bcode,
+ sizeof(pdu->bcode));
+ pdu->mse = qos->bcast.mse;
+ pdu->timeout = cpu_to_le16(qos->bcast.timeout);
+ pdu->num_bis = conn->num_bis;
+ memcpy(pdu->bis, conn->bis, conn->num_bis);
+
+ break;
+ }
}
- return conn;
+unlock:
+ rcu_read_unlock();
+
+ if (!pdu->num_bis)
+ return 0;
+
+ return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC,
+ struct_size(pdu, bis, pdu->num_bis), pdu);
+}
+
+int hci_le_big_create_sync_pending(struct hci_dev *hdev)
+{
+ /* Queue big_create_sync */
+ return hci_cmd_sync_queue_once(hdev, big_create_sync,
+ NULL, big_create_sync_complete);
}
int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
struct bt_iso_qos *qos,
__u16 sync_handle, __u8 num_bis, __u8 bis[])
{
- DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11);
int err;
- if (num_bis < 0x01 || num_bis > pdu->num_bis)
+ if (num_bis < 0x01 || num_bis > ISO_MAX_NUM_BIS)
return -EINVAL;
err = qos_set_big(hdev, qos);
if (err)
return err;
- if (hcon)
- hcon->iso_qos.bcast.big = qos->bcast.big;
+ if (hcon) {
+ /* Update hcon QoS */
+ hcon->iso_qos = *qos;
- pdu->handle = qos->bcast.big;
- pdu->sync_handle = cpu_to_le16(sync_handle);
- pdu->encryption = qos->bcast.encryption;
- memcpy(pdu->bcode, qos->bcast.bcode, sizeof(pdu->bcode));
- pdu->mse = qos->bcast.mse;
- pdu->timeout = cpu_to_le16(qos->bcast.timeout);
- pdu->num_bis = num_bis;
- memcpy(pdu->bis, bis, num_bis);
+ hcon->num_bis = num_bis;
+ memcpy(hcon->bis, bis, num_bis);
+ }
- return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC,
- struct_size(pdu, bis, num_bis), pdu);
+ return hci_le_big_create_sync_pending(hdev);
}
static void create_big_complete(struct hci_dev *hdev, void *data, int err)
@@ -2224,13 +2337,9 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
conn->iso_qos.bcast.big);
if (parent && parent != conn) {
link = hci_conn_link(parent, conn);
- if (!link) {
- hci_conn_drop(conn);
- return ERR_PTR(-ENOLINK);
- }
-
- /* Link takes the refcount */
hci_conn_drop(conn);
+ if (!link)
+ return ERR_PTR(-ENOLINK);
}
return conn;
@@ -2320,15 +2429,12 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
}
link = hci_conn_link(le, cis);
+ hci_conn_drop(cis);
if (!link) {
hci_conn_drop(le);
- hci_conn_drop(cis);
return ERR_PTR(-ENOLINK);
}
- /* Link takes the refcount */
- hci_conn_drop(cis);
-
cis->state = BT_CONNECT;
hci_le_create_cis_pending(hdev);
@@ -2952,5 +3058,9 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
return 0;
}
- return hci_cmd_sync_queue_once(hdev, abort_conn_sync, conn, NULL);
+ /* Run immediately if on cmd_sync_work since this may be called
+ * as a result to MGMT_OP_DISCONNECT/MGMT_OP_UNPAIR which does
+ * already queue its callback on cmd_sync_work.
+ */
+ return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index d6976db02c06..18ab5628f85a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -33,7 +33,7 @@
#include <linux/property.h>
#include <linux/suspend.h>
#include <linux/wait.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -57,7 +57,6 @@ DEFINE_RWLOCK(hci_dev_list_lock);
/* HCI callback list */
LIST_HEAD(hci_cb_list);
-DEFINE_MUTEX(hci_cb_list_lock);
/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);
@@ -1644,12 +1643,12 @@ void hci_adv_instances_clear(struct hci_dev *hdev)
struct adv_info *adv_instance, *n;
if (hdev->adv_instance_timeout) {
- cancel_delayed_work(&hdev->adv_instance_expire);
+ disable_delayed_work(&hdev->adv_instance_expire);
hdev->adv_instance_timeout = 0;
}
list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) {
- cancel_delayed_work_sync(&adv_instance->rpa_expired_cb);
+ disable_delayed_work_sync(&adv_instance->rpa_expired_cb);
list_del(&adv_instance->list);
kfree(adv_instance);
}
@@ -2685,11 +2684,11 @@ void hci_unregister_dev(struct hci_dev *hdev)
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
- cancel_work_sync(&hdev->rx_work);
- cancel_work_sync(&hdev->cmd_work);
- cancel_work_sync(&hdev->tx_work);
- cancel_work_sync(&hdev->power_on);
- cancel_work_sync(&hdev->error_reset);
+ disable_work_sync(&hdev->rx_work);
+ disable_work_sync(&hdev->cmd_work);
+ disable_work_sync(&hdev->tx_work);
+ disable_work_sync(&hdev->power_on);
+ disable_work_sync(&hdev->error_reset);
hci_cmd_sync_clear(hdev);
@@ -2796,8 +2795,14 @@ static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err)
{
bt_dev_dbg(hdev, "err 0x%2.2x", err);
- cancel_delayed_work_sync(&hdev->cmd_timer);
- cancel_delayed_work_sync(&hdev->ncmd_timer);
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+ disable_delayed_work_sync(&hdev->cmd_timer);
+ disable_delayed_work_sync(&hdev->ncmd_timer);
+ } else {
+ cancel_delayed_work_sync(&hdev->cmd_timer);
+ cancel_delayed_work_sync(&hdev->ncmd_timer);
+ }
+
atomic_set(&hdev->cmd_cnt, 1);
hci_cmd_sync_cancel_sync(hdev, err);
@@ -2987,9 +2992,7 @@ int hci_register_cb(struct hci_cb *cb)
{
BT_DBG("%p name %s", cb, cb->name);
- mutex_lock(&hci_cb_list_lock);
- list_add_tail(&cb->list, &hci_cb_list);
- mutex_unlock(&hci_cb_list_lock);
+ list_add_tail_rcu(&cb->list, &hci_cb_list);
return 0;
}
@@ -2999,9 +3002,8 @@ int hci_unregister_cb(struct hci_cb *cb)
{
BT_DBG("%p name %s", cb, cb->name);
- mutex_lock(&hci_cb_list_lock);
- list_del(&cb->list);
- mutex_unlock(&hci_cb_list_lock);
+ list_del_rcu(&cb->list);
+ synchronize_rcu();
return 0;
}
@@ -3765,18 +3767,22 @@ static void hci_tx_work(struct work_struct *work)
/* ACL data packet */
static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
{
- struct hci_acl_hdr *hdr = (void *) skb->data;
+ struct hci_acl_hdr *hdr;
struct hci_conn *conn;
__u16 handle, flags;
- skb_pull(skb, HCI_ACL_HDR_SIZE);
+ hdr = skb_pull_data(skb, sizeof(*hdr));
+ if (!hdr) {
+ bt_dev_err(hdev, "ACL packet too small");
+ goto drop;
+ }
handle = __le16_to_cpu(hdr->handle);
flags = hci_flags(handle);
handle = hci_handle(handle);
- BT_DBG("%s len %d handle 0x%4.4x flags 0x%4.4x", hdev->name, skb->len,
- handle, flags);
+ bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len,
+ handle, flags);
hdev->stat.acl_rx++;
@@ -3795,24 +3801,29 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
handle);
}
+drop:
kfree_skb(skb);
}
/* SCO data packet */
static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
{
- struct hci_sco_hdr *hdr = (void *) skb->data;
+ struct hci_sco_hdr *hdr;
struct hci_conn *conn;
__u16 handle, flags;
- skb_pull(skb, HCI_SCO_HDR_SIZE);
+ hdr = skb_pull_data(skb, sizeof(*hdr));
+ if (!hdr) {
+ bt_dev_err(hdev, "SCO packet too small");
+ goto drop;
+ }
handle = __le16_to_cpu(hdr->handle);
flags = hci_flags(handle);
handle = hci_handle(handle);
- BT_DBG("%s len %d handle 0x%4.4x flags 0x%4.4x", hdev->name, skb->len,
- handle, flags);
+ bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len,
+ handle, flags);
hdev->stat.sco_rx++;
@@ -3830,6 +3841,7 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
handle);
}
+drop:
kfree_skb(skb);
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 1c82dcdf6e8f..2cc7a9306350 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -25,7 +25,7 @@
/* Bluetooth HCI event handling. */
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/crypto.h>
#include <crypto/algapi.h>
@@ -42,8 +42,6 @@
#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \
"\x00\x00\x00\x00\x00\x00\x00\x00"
-#define secs_to_jiffies(_secs) msecs_to_jiffies((_secs) * 1000)
-
/* Handle HCI Event packets */
static void *hci_ev_skb_pull(struct hci_dev *hdev, struct sk_buff *skb,
@@ -3626,6 +3624,13 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
+ /* We skip the WRITE_AUTH_PAYLOAD_TIMEOUT for ATS2851 based controllers
+ * to avoid unexpected SMP command errors when pairing.
+ */
+ if (test_bit(HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT,
+ &hdev->quirks))
+ goto notify;
+
/* Set the default Authenticated Payload Timeout after
* an LE Link is established. As per Core Spec v5.0, Vol 2, Part B
* Section 3.3, the HCI command WRITE_AUTH_PAYLOAD_TIMEOUT should be
@@ -3706,7 +3711,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
- if (!ev->status && !test_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) {
+ if (!ev->status) {
struct hci_cp_remote_name_req cp;
memset(&cp, 0, sizeof(cp));
bacpy(&cp.bdaddr, &conn->dst);
@@ -5324,19 +5329,16 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
- /* If no side requires MITM protection; auto-accept */
+ /* If no side requires MITM protection; use JUST_CFM method */
if ((!loc_mitm || conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) &&
(!rem_mitm || conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)) {
- /* If we're not the initiators request authorization to
- * proceed from user space (mgmt_user_confirm with
- * confirm_hint set to 1). The exception is if neither
- * side had MITM or if the local IO capability is
- * NoInputNoOutput, in which case we do auto-accept
+ /* If we're not the initiator of request authorization and the
+ * local IO capability is not NoInputNoOutput, use JUST_WORKS
+ * method (mgmt_user_confirm with confirm_hint set to 1).
*/
if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) &&
- conn->io_capability != HCI_IO_NO_INPUT_OUTPUT &&
- (loc_mitm || rem_mitm)) {
+ conn->io_capability != HCI_IO_NO_INPUT_OUTPUT) {
bt_dev_dbg(hdev, "Confirming auto-accept as acceptor");
confirm_hint = 1;
goto confirm;
@@ -6348,7 +6350,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
struct hci_ev_le_pa_sync_established *ev = data;
int mask = hdev->link_mode;
__u8 flags = 0;
- struct hci_conn *pa_sync;
+ struct hci_conn *pa_sync, *conn;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6356,6 +6358,20 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
hci_dev_clear_flag(hdev, HCI_PA_SYNC);
+ conn = hci_conn_hash_lookup_sid(hdev, ev->sid, &ev->bdaddr,
+ ev->bdaddr_type);
+ if (!conn) {
+ bt_dev_err(hdev,
+ "Unable to find connection for dst %pMR sid 0x%2.2x",
+ &ev->bdaddr, ev->sid);
+ goto unlock;
+ }
+
+ clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags);
+
+ conn->sync_handle = le16_to_cpu(ev->handle);
+ conn->sid = HCI_SID_INVALID;
+
mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ISO_LINK, &flags);
if (!(mask & HCI_LM_ACCEPT)) {
hci_le_pa_term_sync(hdev, ev->handle);
@@ -6382,6 +6398,9 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
}
unlock:
+ /* Handle any other pending PA sync command */
+ hci_pa_create_sync_pending(hdev);
+
hci_dev_unlock(hdev);
}
@@ -6851,38 +6870,27 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
return;
hci_dev_lock(hdev);
- rcu_read_lock();
/* Connect all BISes that are bound to the BIG */
- list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- if (bacmp(&conn->dst, BDADDR_ANY) ||
- conn->type != ISO_LINK ||
- conn->iso_qos.bcast.big != ev->handle)
+ while ((conn = hci_conn_hash_lookup_big_state(hdev, ev->handle,
+ BT_BOUND))) {
+ if (ev->status) {
+ hci_connect_cfm(conn, ev->status);
+ hci_conn_del(conn);
continue;
+ }
if (hci_conn_set_handle(conn,
__le16_to_cpu(ev->bis_handle[i++])))
continue;
- if (!ev->status) {
- conn->state = BT_CONNECTED;
- set_bit(HCI_CONN_BIG_CREATED, &conn->flags);
- rcu_read_unlock();
- hci_debugfs_create_conn(conn);
- hci_conn_add_sysfs(conn);
- hci_iso_setup_path(conn);
- rcu_read_lock();
- continue;
- }
-
- hci_connect_cfm(conn, ev->status);
- rcu_read_unlock();
- hci_conn_del(conn);
- rcu_read_lock();
+ conn->state = BT_CONNECTED;
+ set_bit(HCI_CONN_BIG_CREATED, &conn->flags);
+ hci_debugfs_create_conn(conn);
+ hci_conn_add_sysfs(conn);
+ hci_iso_setup_path(conn);
}
- rcu_read_unlock();
-
if (!ev->status && !i)
/* If no BISes have been connected for the BIG,
* terminate. This is in case all bound connections
@@ -6899,7 +6907,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
struct hci_evt_le_big_sync_estabilished *ev = data;
- struct hci_conn *bis;
+ struct hci_conn *bis, *conn;
int i;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6910,6 +6918,20 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
+ conn = hci_conn_hash_lookup_big_sync_pend(hdev, ev->handle,
+ ev->num_bis);
+ if (!conn) {
+ bt_dev_err(hdev,
+ "Unable to find connection for big 0x%2.2x",
+ ev->handle);
+ goto unlock;
+ }
+
+ clear_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags);
+
+ conn->num_bis = 0;
+ memset(conn->bis, 0, sizeof(conn->num_bis));
+
for (i = 0; i < ev->num_bis; i++) {
u16 handle = le16_to_cpu(ev->bis[i]);
__le32 interval;
@@ -6930,6 +6952,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
/* Mark PA sync as established */
set_bit(HCI_CONN_PA_SYNC, &bis->flags);
+ bis->sync_handle = conn->sync_handle;
bis->iso_qos.bcast.big = ev->handle;
memset(&interval, 0, sizeof(interval));
memcpy(&interval, ev->latency, sizeof(ev->latency));
@@ -6959,6 +6982,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
hci_connect_cfm(bis, ev->status);
}
+unlock:
+ /* Handle any other pending BIG sync command */
+ hci_le_big_create_sync_pending(hdev);
+
hci_dev_unlock(hdev);
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 69c2ba1e843e..022b86797acd 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -27,7 +27,7 @@
#include <linux/export.h>
#include <linux/utsname.h>
#include <linux/sched.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -1926,7 +1926,7 @@ drop:
}
static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int len)
+ sockptr_t optval, unsigned int optlen)
{
struct hci_ufilter uf = { .opcode = 0 };
struct sock *sk = sock->sk;
@@ -1943,7 +1943,7 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
switch (optname) {
case HCI_DATA_DIR:
- err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (err)
break;
@@ -1954,7 +1954,7 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
break;
case HCI_TIME_STAMP:
- err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (err)
break;
@@ -1974,7 +1974,7 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
uf.event_mask[1] = *((u32 *) f->event_mask + 1);
}
- err = bt_copy_from_sockptr(&uf, sizeof(uf), optval, len);
+ err = copy_safe_from_sockptr(&uf, sizeof(uf), optval, optlen);
if (err)
break;
@@ -2005,7 +2005,7 @@ done:
}
static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int len)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int err = 0;
@@ -2015,7 +2015,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
if (level == SOL_HCI)
return hci_sock_setsockopt_old(sock, level, optname, optval,
- len);
+ optlen);
if (level != SOL_BLUETOOTH)
return -ENOPROTOOPT;
@@ -2035,7 +2035,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
goto done;
}
- err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (err)
break;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index e79cd40bd079..c86f4e42e69c 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -112,7 +112,7 @@ static void hci_cmd_sync_add(struct hci_request *req, u16 opcode, u32 plen,
skb_queue_tail(&req->cmd_q, skb);
}
-static int hci_cmd_sync_run(struct hci_request *req)
+static int hci_req_sync_run(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
struct sk_buff *skb;
@@ -169,7 +169,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
hdev->req_status = HCI_REQ_PEND;
- err = hci_cmd_sync_run(&req);
+ err = hci_req_sync_run(&req);
if (err < 0)
return ERR_PTR(err);
@@ -206,6 +206,12 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
return ERR_PTR(err);
}
+ /* If command return a status event skb will be set to NULL as there are
+ * no parameters.
+ */
+ if (!skb)
+ return ERR_PTR(-ENODATA);
+
return skb;
}
EXPORT_SYMBOL(__hci_cmd_sync_sk);
@@ -255,6 +261,11 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
u8 status;
skb = __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout, sk);
+
+ /* If command return a status event, skb will be set to -ENODATA */
+ if (skb == ERR_PTR(-ENODATA))
+ return 0;
+
if (IS_ERR(skb)) {
if (!event)
bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld", opcode,
@@ -262,13 +273,6 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
return PTR_ERR(skb);
}
- /* If command return a status event skb will be set to NULL as there are
- * no parameters, in case of failure IS_ERR(skb) would have be set to
- * the actual error would be found with PTR_ERR(skb).
- */
- if (!skb)
- return 0;
-
status = skb->data[0];
kfree_skb(skb);
@@ -782,6 +786,44 @@ int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
}
EXPORT_SYMBOL(hci_cmd_sync_queue_once);
+/* Run HCI command:
+ *
+ * - hdev must be running
+ * - if on cmd_sync_work then run immediately otherwise queue
+ */
+int hci_cmd_sync_run(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy)
+{
+ /* Only queue command if hdev is running which means it had been opened
+ * and is either on init phase or is already up.
+ */
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ return -ENETDOWN;
+
+ /* If on cmd_sync_work then run immediately otherwise queue */
+ if (current_work() == &hdev->cmd_sync_work)
+ return func(hdev, data);
+
+ return hci_cmd_sync_submit(hdev, func, data, destroy);
+}
+EXPORT_SYMBOL(hci_cmd_sync_run);
+
+/* Run HCI command entry once:
+ *
+ * - Lookup if an entry already exist and only if it doesn't creates a new entry
+ * and run it.
+ * - if on cmd_sync_work then run immediately otherwise queue
+ */
+int hci_cmd_sync_run_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy)
+{
+ if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy))
+ return 0;
+
+ return hci_cmd_sync_run(hdev, func, data, destroy);
+}
+EXPORT_SYMBOL(hci_cmd_sync_run_once);
+
/* Lookup HCI command entry:
*
* - Return first entry that matches by function callback or data or
@@ -4800,6 +4842,13 @@ static const struct {
HCI_QUIRK_BROKEN(SET_RPA_TIMEOUT,
"HCI LE Set Random Private Address Timeout command is "
"advertised, but not supported."),
+ HCI_QUIRK_BROKEN(EXT_CREATE_CONN,
+ "HCI LE Extended Create Connection command is "
+ "advertised, but not supported."),
+ HCI_QUIRK_BROKEN(WRITE_AUTH_PAYLOAD_TIMEOUT,
+ "HCI WRITE AUTH PAYLOAD TIMEOUT command leads "
+ "to unexpected SMP errors when pairing "
+ "and will not be used."),
HCI_QUIRK_BROKEN(LE_CODED,
"HCI LE Coded PHY feature bit is set, "
"but its usage is not supported.")
@@ -5093,9 +5142,15 @@ int hci_dev_close_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "");
- cancel_delayed_work(&hdev->power_off);
- cancel_delayed_work(&hdev->ncmd_timer);
- cancel_delayed_work(&hdev->le_scan_disable);
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+ disable_delayed_work(&hdev->power_off);
+ disable_delayed_work(&hdev->ncmd_timer);
+ disable_delayed_work(&hdev->le_scan_disable);
+ } else {
+ cancel_delayed_work(&hdev->power_off);
+ cancel_delayed_work(&hdev->ncmd_timer);
+ cancel_delayed_work(&hdev->le_scan_disable);
+ }
hci_cmd_sync_cancel_sync(hdev, ENODEV);
@@ -5342,7 +5397,10 @@ int hci_stop_discovery_sync(struct hci_dev *hdev)
if (!e)
return 0;
- return hci_remote_name_cancel_sync(hdev, &e->data.bdaddr);
+ /* Ignore cancel errors since it should interfere with stopping
+ * of the discovery.
+ */
+ hci_remote_name_cancel_sync(hdev, &e->data.bdaddr);
}
return 0;
@@ -6426,7 +6484,7 @@ static int hci_le_create_conn_sync(struct hci_dev *hdev, void *data)
&own_addr_type);
if (err)
goto done;
-
+ /* Send command LE Extended Create Connection if supported */
if (use_ext_conn(hdev)) {
err = hci_le_ext_create_conn_sync(hdev, conn, own_addr_type);
goto done;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 367e32fe30eb..4b54dbbf0729 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -21,16 +21,6 @@ static const struct device_type bt_link = {
.release = bt_link_release,
};
-/*
- * The rfcomm tty device will possibly retain even when conn
- * is down, and sysfs doesn't support move zombie device,
- * so we should move the device before conn device is destroyed.
- */
-static int __match_tty(struct device *dev, void *data)
-{
- return !strncmp(dev_name(dev), "rfcomm", 6);
-}
-
void hci_conn_init_sysfs(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
@@ -73,10 +63,13 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
return;
}
+ /* If there are devices using the connection as parent reset it to NULL
+ * before unregistering the device.
+ */
while (1) {
struct device *dev;
- dev = device_find_child(&conn->dev, NULL, __match_tty);
+ dev = device_find_any_child(&conn->dev);
if (!dev)
break;
device_move(dev, NULL, DPM_ORDER_DEV_LAST);
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index d5e00d0dd1a0..43d0ebe11100 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -35,6 +35,7 @@ struct iso_conn {
struct sk_buff *rx_skb;
__u32 rx_len;
__u16 tx_sn;
+ struct kref ref;
};
#define iso_conn_lock(c) spin_lock(&(c)->lock)
@@ -93,17 +94,73 @@ static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst,
#define ISO_CONN_TIMEOUT (HZ * 40)
#define ISO_DISCONN_TIMEOUT (HZ * 2)
+static void iso_conn_free(struct kref *ref)
+{
+ struct iso_conn *conn = container_of(ref, struct iso_conn, ref);
+
+ BT_DBG("conn %p", conn);
+
+ if (conn->sk)
+ iso_pi(conn->sk)->conn = NULL;
+
+ if (conn->hcon) {
+ conn->hcon->iso_data = NULL;
+ hci_conn_drop(conn->hcon);
+ }
+
+ /* Ensure no more work items will run since hci_conn has been dropped */
+ disable_delayed_work_sync(&conn->timeout_work);
+
+ kfree(conn);
+}
+
+static void iso_conn_put(struct iso_conn *conn)
+{
+ if (!conn)
+ return;
+
+ BT_DBG("conn %p refcnt %d", conn, kref_read(&conn->ref));
+
+ kref_put(&conn->ref, iso_conn_free);
+}
+
+static struct iso_conn *iso_conn_hold_unless_zero(struct iso_conn *conn)
+{
+ if (!conn)
+ return NULL;
+
+ BT_DBG("conn %p refcnt %u", conn, kref_read(&conn->ref));
+
+ if (!kref_get_unless_zero(&conn->ref))
+ return NULL;
+
+ return conn;
+}
+
+static struct sock *iso_sock_hold(struct iso_conn *conn)
+{
+ if (!conn || !bt_sock_linked(&iso_sk_list, conn->sk))
+ return NULL;
+
+ sock_hold(conn->sk);
+
+ return conn->sk;
+}
+
static void iso_sock_timeout(struct work_struct *work)
{
struct iso_conn *conn = container_of(work, struct iso_conn,
timeout_work.work);
struct sock *sk;
+ conn = iso_conn_hold_unless_zero(conn);
+ if (!conn)
+ return;
+
iso_conn_lock(conn);
- sk = conn->sk;
- if (sk)
- sock_hold(sk);
+ sk = iso_sock_hold(conn);
iso_conn_unlock(conn);
+ iso_conn_put(conn);
if (!sk)
return;
@@ -141,9 +198,14 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon)
{
struct iso_conn *conn = hcon->iso_data;
+ conn = iso_conn_hold_unless_zero(conn);
if (conn) {
- if (!conn->hcon)
+ if (!conn->hcon) {
+ iso_conn_lock(conn);
conn->hcon = hcon;
+ iso_conn_unlock(conn);
+ }
+ iso_conn_put(conn);
return conn;
}
@@ -151,6 +213,7 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon)
if (!conn)
return NULL;
+ kref_init(&conn->ref);
spin_lock_init(&conn->lock);
INIT_DELAYED_WORK(&conn->timeout_work, iso_sock_timeout);
@@ -170,17 +233,15 @@ static void iso_chan_del(struct sock *sk, int err)
struct sock *parent;
conn = iso_pi(sk)->conn;
+ iso_pi(sk)->conn = NULL;
BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
if (conn) {
iso_conn_lock(conn);
conn->sk = NULL;
- iso_pi(sk)->conn = NULL;
iso_conn_unlock(conn);
-
- if (conn->hcon)
- hci_conn_drop(conn->hcon);
+ iso_conn_put(conn);
}
sk->sk_state = BT_CLOSED;
@@ -202,6 +263,7 @@ static void iso_conn_del(struct hci_conn *hcon, int err)
struct iso_conn *conn = hcon->iso_data;
struct sock *sk;
+ conn = iso_conn_hold_unless_zero(conn);
if (!conn)
return;
@@ -209,24 +271,20 @@ static void iso_conn_del(struct hci_conn *hcon, int err)
/* Kill socket */
iso_conn_lock(conn);
- sk = conn->sk;
- if (sk)
- sock_hold(sk);
+ sk = iso_sock_hold(conn);
iso_conn_unlock(conn);
+ iso_conn_put(conn);
- if (sk) {
- lock_sock(sk);
- iso_sock_clear_timer(sk);
- iso_chan_del(sk, err);
- release_sock(sk);
- sock_put(sk);
+ if (!sk) {
+ iso_conn_put(conn);
+ return;
}
- /* Ensure no more work items will run before freeing conn. */
- cancel_delayed_work_sync(&conn->timeout_work);
-
- hcon->iso_data = NULL;
- kfree(conn);
+ lock_sock(sk);
+ iso_sock_clear_timer(sk);
+ iso_chan_del(sk, err);
+ release_sock(sk);
+ sock_put(sk);
}
static int __iso_chan_add(struct iso_conn *conn, struct sock *sk,
@@ -646,6 +704,8 @@ static void iso_sock_destruct(struct sock *sk)
{
BT_DBG("sk %p", sk);
+ iso_conn_put(iso_pi(sk)->conn);
+
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_write_queue);
}
@@ -705,6 +765,7 @@ static void iso_sock_disconn(struct sock *sk)
*/
if (bis_sk) {
hcon->state = BT_OPEN;
+ hcon->iso_data = NULL;
iso_pi(sk)->conn->hcon = NULL;
iso_sock_clear_timer(sk);
iso_chan_del(sk, bt_to_errno(hcon->abort_reason));
@@ -714,7 +775,6 @@ static void iso_sock_disconn(struct sock *sk)
}
sk->sk_state = BT_DISCONN;
- iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT);
iso_conn_lock(iso_pi(sk)->conn);
hci_conn_drop(iso_pi(sk)->conn->hcon);
iso_pi(sk)->conn->hcon = NULL;
@@ -1069,6 +1129,7 @@ static int iso_listen_bis(struct sock *sk)
return -EHOSTUNREACH;
hci_dev_lock(hdev);
+ lock_sock(sk);
/* Fail if user set invalid QoS */
if (iso_pi(sk)->qos_user_set && !check_bcast_qos(&iso_pi(sk)->qos)) {
@@ -1098,10 +1159,10 @@ static int iso_listen_bis(struct sock *sk)
goto unlock;
}
- hci_dev_put(hdev);
-
unlock:
+ release_sock(sk);
hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
return err;
}
@@ -1128,6 +1189,7 @@ static int iso_sock_listen(struct socket *sock, int backlog)
BT_DBG("sk %p backlog %d", sk, backlog);
+ sock_hold(sk);
lock_sock(sk);
if (sk->sk_state != BT_BOUND) {
@@ -1140,10 +1202,16 @@ static int iso_sock_listen(struct socket *sock, int backlog)
goto done;
}
- if (!bacmp(&iso_pi(sk)->dst, BDADDR_ANY))
+ if (!bacmp(&iso_pi(sk)->dst, BDADDR_ANY)) {
err = iso_listen_cis(sk);
- else
+ } else {
+ /* Drop sock lock to avoid potential
+ * deadlock with the hdev lock.
+ */
+ release_sock(sk);
err = iso_listen_bis(sk);
+ lock_sock(sk);
+ }
if (err)
goto done;
@@ -1155,6 +1223,7 @@ static int iso_sock_listen(struct socket *sock, int backlog)
done:
release_sock(sk);
+ sock_put(sk);
return err;
}
@@ -1166,7 +1235,11 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock,
long timeo;
int err = 0;
- lock_sock(sk);
+ /* Use explicit nested locking to avoid lockdep warnings generated
+ * because the parent socket and the child socket are locked on the
+ * same thread.
+ */
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
@@ -1197,7 +1270,7 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock,
release_sock(sk);
timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
- lock_sock(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
}
remove_wait_queue(sk_sleep(sk), &wait);
@@ -1332,6 +1405,14 @@ static void iso_conn_big_sync(struct sock *sk)
if (!hdev)
return;
+ /* hci_le_big_create_sync requires hdev lock to be held, since
+ * it enqueues the HCI LE BIG Create Sync command via
+ * hci_cmd_sync_queue_once, which checks hdev flags that might
+ * change.
+ */
+ hci_dev_lock(hdev);
+ lock_sock(sk);
+
if (!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) {
err = hci_le_big_create_sync(hdev, iso_pi(sk)->conn->hcon,
&iso_pi(sk)->qos,
@@ -1342,6 +1423,9 @@ static void iso_conn_big_sync(struct sock *sk)
bt_dev_err(hdev, "hci_le_big_create_sync: %d",
err);
}
+
+ release_sock(sk);
+ hci_dev_unlock(hdev);
}
static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
@@ -1349,39 +1433,57 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
{
struct sock *sk = sock->sk;
struct iso_pinfo *pi = iso_pi(sk);
+ bool early_ret = false;
+ int err = 0;
BT_DBG("sk %p", sk);
if (test_and_clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ sock_hold(sk);
lock_sock(sk);
+
switch (sk->sk_state) {
case BT_CONNECT2:
if (test_bit(BT_SK_PA_SYNC, &pi->flags)) {
+ release_sock(sk);
iso_conn_big_sync(sk);
+ lock_sock(sk);
+
sk->sk_state = BT_LISTEN;
} else {
iso_conn_defer_accept(pi->conn->hcon);
sk->sk_state = BT_CONFIG;
}
- release_sock(sk);
- return 0;
+
+ early_ret = true;
+ break;
case BT_CONNECTED:
if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
+ release_sock(sk);
iso_conn_big_sync(sk);
+ lock_sock(sk);
+
sk->sk_state = BT_LISTEN;
- release_sock(sk);
- return 0;
+ early_ret = true;
}
- release_sock(sk);
break;
case BT_CONNECT:
release_sock(sk);
- return iso_connect_cis(sk);
+ err = iso_connect_cis(sk);
+ lock_sock(sk);
+
+ early_ret = true;
+ break;
default:
- release_sock(sk);
break;
}
+
+ release_sock(sk);
+ sock_put(sk);
+
+ if (early_ret)
+ return err;
}
return bt_sock_recvmsg(sock, msg, len, flags);
@@ -1497,7 +1599,7 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (err)
break;
@@ -1508,7 +1610,7 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_PKT_STATUS:
- err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (err)
break;
@@ -1527,7 +1629,7 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- err = bt_copy_from_sockptr(&qos, sizeof(qos), optval, optlen);
+ err = copy_safe_from_sockptr(&qos, sizeof(qos), optval, optlen);
if (err)
break;
@@ -1548,8 +1650,8 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- err = bt_copy_from_sockptr(iso_pi(sk)->base, optlen, optval,
- optlen);
+ err = copy_safe_from_sockptr(iso_pi(sk)->base, optlen, optval,
+ optlen);
if (err)
break;
@@ -1727,6 +1829,13 @@ static bool iso_match_big(struct sock *sk, void *data)
return ev->handle == iso_pi(sk)->qos.bcast.big;
}
+static bool iso_match_big_hcon(struct sock *sk, void *data)
+{
+ struct hci_conn *hcon = data;
+
+ return hcon->iso_qos.bcast.big == iso_pi(sk)->qos.bcast.big;
+}
+
static bool iso_match_pa_sync_flag(struct sock *sk, void *data)
{
return test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags);
@@ -1750,8 +1859,16 @@ static void iso_conn_ready(struct iso_conn *conn)
if (!hcon)
return;
- if (test_bit(HCI_CONN_BIG_SYNC, &hcon->flags) ||
- test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) {
+ if (test_bit(HCI_CONN_BIG_SYNC, &hcon->flags)) {
+ /* A BIS slave hcon is notified to the ISO layer
+ * after the Command Complete for the LE Setup
+ * ISO Data Path command is received. Get the
+ * parent socket that matches the hcon BIG handle.
+ */
+ parent = iso_get_sock(&hcon->src, &hcon->dst,
+ BT_LISTEN, iso_match_big_hcon,
+ hcon);
+ } else if (test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) {
ev = hci_recv_event_data(hcon->hdev,
HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
@@ -1818,7 +1935,6 @@ static void iso_conn_ready(struct iso_conn *conn)
if (!bacmp(&hcon->dst, BDADDR_ANY)) {
bacpy(&hcon->dst, &iso_pi(parent)->dst);
hcon->dst_type = iso_pi(parent)->dst_type;
- hcon->sync_handle = iso_pi(parent)->sync_handle;
}
if (ev3) {
@@ -1936,6 +2052,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
if (sk) {
int err;
+ struct hci_conn *hcon = iso_pi(sk)->conn->hcon;
iso_pi(sk)->qos.bcast.encryption = ev2->encryption;
@@ -1944,7 +2061,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) &&
!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) {
- err = hci_le_big_create_sync(hdev, NULL,
+ err = hci_le_big_create_sync(hdev,
+ hcon,
&iso_pi(sk)->qos,
iso_pi(sk)->sync_handle,
iso_pi(sk)->bc_num_bis,
@@ -2033,6 +2151,11 @@ done:
return HCI_LM_ACCEPT;
}
+static bool iso_match(struct hci_conn *hcon)
+{
+ return hcon->type == ISO_LINK || hcon->type == LE_LINK;
+}
+
static void iso_connect_cfm(struct hci_conn *hcon, __u8 status)
{
if (hcon->type != ISO_LINK) {
@@ -2214,6 +2337,7 @@ drop:
static struct hci_cb iso_cb = {
.name = "ISO",
+ .match = iso_match,
.connect_cfm = iso_connect_cfm,
.disconn_cfm = iso_disconn_cfm,
};
@@ -2301,13 +2425,9 @@ int iso_init(void)
hci_register_cb(&iso_cb);
- if (IS_ERR_OR_NULL(bt_debugfs))
- return 0;
-
- if (!iso_debugfs) {
+ if (!IS_ERR_OR_NULL(bt_debugfs))
iso_debugfs = debugfs_create_file("iso", 0444, bt_debugfs,
NULL, &iso_debugfs_fops);
- }
iso_inited = true;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 9988ba382b68..27b4c4a2ba1f 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4066,17 +4066,9 @@ response:
static int l2cap_connect_req(struct l2cap_conn *conn,
struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data)
{
- struct hci_dev *hdev = conn->hcon->hdev;
- struct hci_conn *hcon = conn->hcon;
-
if (cmd_len < sizeof(struct l2cap_conn_req))
return -EPROTO;
- hci_dev_lock(hdev);
- if (hci_dev_test_flag(hdev, HCI_MGMT))
- mgmt_device_connected(hdev, hcon, NULL, 0);
- hci_dev_unlock(hdev);
-
l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP);
return 0;
}
@@ -7225,6 +7217,11 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
return NULL;
}
+static bool l2cap_match(struct hci_conn *hcon)
+{
+ return hcon->type == ACL_LINK || hcon->type == LE_LINK;
+}
+
static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
{
struct hci_dev *hdev = hcon->hdev;
@@ -7232,9 +7229,6 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
struct l2cap_chan *pchan;
u8 dst_type;
- if (hcon->type != ACL_LINK && hcon->type != LE_LINK)
- return;
-
BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
if (status) {
@@ -7299,9 +7293,6 @@ int l2cap_disconn_ind(struct hci_conn *hcon)
static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
{
- if (hcon->type != ACL_LINK && hcon->type != LE_LINK)
- return;
-
BT_DBG("hcon %p reason %d", hcon, reason);
l2cap_conn_del(hcon, bt_to_errno(reason));
@@ -7580,6 +7571,7 @@ drop:
static struct hci_cb l2cap_cb = {
.name = "L2CAP",
+ .match = l2cap_match,
.connect_cfm = l2cap_connect_cfm,
.disconn_cfm = l2cap_disconn_cfm,
.security_cfm = l2cap_security_cfm,
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index ba437c6f6ee5..3d2553dcdb1b 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -755,7 +755,8 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
opts.max_tx = chan->max_tx;
opts.txwin_size = chan->tx_win;
- err = bt_copy_from_sockptr(&opts, sizeof(opts), optval, optlen);
+ err = copy_safe_from_sockptr(&opts, sizeof(opts), optval,
+ optlen);
if (err)
break;
@@ -800,7 +801,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
break;
case L2CAP_LM:
- err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (err)
break;
@@ -909,7 +910,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
sec.level = BT_SECURITY_LOW;
- err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+ err = copy_safe_from_sockptr(&sec, sizeof(sec), optval, optlen);
if (err)
break;
@@ -956,7 +957,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (err)
break;
@@ -970,7 +971,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_FLUSHABLE:
- err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (err)
break;
@@ -1004,7 +1005,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
pwr.force_active = BT_POWER_FORCE_ACTIVE_ON;
- err = bt_copy_from_sockptr(&pwr, sizeof(pwr), optval, optlen);
+ err = copy_safe_from_sockptr(&pwr, sizeof(pwr), optval, optlen);
if (err)
break;
@@ -1015,7 +1016,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_CHANNEL_POLICY:
- err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (err)
break;
@@ -1046,7 +1047,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- err = bt_copy_from_sockptr(&mtu, sizeof(mtu), optval, optlen);
+ err = copy_safe_from_sockptr(&mtu, sizeof(mtu), optval, optlen);
if (err)
break;
@@ -1076,7 +1077,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- err = bt_copy_from_sockptr(&mode, sizeof(mode), optval, optlen);
+ err = copy_safe_from_sockptr(&mode, sizeof(mode), optval,
+ optlen);
if (err)
break;
@@ -1886,6 +1888,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
chan = l2cap_chan_create();
if (!chan) {
sk_free(sk);
+ sock->sk = NULL;
return NULL;
}
diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c
index f46847632ffa..6e349704efe4 100644
--- a/net/bluetooth/leds.c
+++ b/net/bluetooth/leds.c
@@ -48,7 +48,7 @@ static int power_activate(struct led_classdev *led_cdev)
htrig = to_hci_basic_led_trigger(led_cdev->trigger);
powered = test_bit(HCI_UP, &htrig->hdev->flags);
- led_trigger_event(led_cdev->trigger, powered ? LED_FULL : LED_OFF);
+ led_set_brightness(led_cdev, powered ? LED_FULL : LED_OFF);
return 0;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 25979f4283a6..b31192d473d0 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -25,7 +25,7 @@
/* Bluetooth HCI Management interface */
#include <linux/module.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -132,6 +132,7 @@ static const u16 mgmt_commands[] = {
MGMT_OP_MESH_READ_FEATURES,
MGMT_OP_MESH_SEND,
MGMT_OP_MESH_SEND_CANCEL,
+ MGMT_OP_HCI_CMD_SYNC,
};
static const u16 mgmt_events[] = {
@@ -1317,7 +1318,8 @@ static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err)
struct mgmt_mode *cp;
/* Make sure cmd still outstanding. */
- if (cmd != pending_find(MGMT_OP_SET_POWERED, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_SET_POWERED, hdev))
return;
cp = cmd->param;
@@ -1350,7 +1352,13 @@ static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err)
static int set_powered_sync(struct hci_dev *hdev, void *data)
{
struct mgmt_pending_cmd *cmd = data;
- struct mgmt_mode *cp = cmd->param;
+ struct mgmt_mode *cp;
+
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_POWERED, hdev))
+ return -ECANCELED;
+
+ cp = cmd->param;
BT_DBG("%s", hdev->name);
@@ -1453,10 +1461,15 @@ static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data)
static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
{
- if (cmd->cmd_complete) {
- u8 *status = data;
+ struct cmd_lookup *match = data;
+
+ /* dequeue cmd_sync entries using cmd as data as that is about to be
+ * removed/freed.
+ */
+ hci_cmd_sync_dequeue(match->hdev, NULL, cmd, NULL);
- cmd->cmd_complete(cmd, *status);
+ if (cmd->cmd_complete) {
+ cmd->cmd_complete(cmd, match->mgmt_status);
mgmt_pending_remove(cmd);
return;
@@ -1505,7 +1518,8 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "err %d", err);
/* Make sure cmd still outstanding. */
- if (cmd != pending_find(MGMT_OP_SET_DISCOVERABLE, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_SET_DISCOVERABLE, hdev))
return;
hci_dev_lock(hdev);
@@ -1679,7 +1693,8 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "err %d", err);
/* Make sure cmd still outstanding. */
- if (cmd != pending_find(MGMT_OP_SET_CONNECTABLE, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_SET_CONNECTABLE, hdev))
return;
hci_dev_lock(hdev);
@@ -1911,7 +1926,7 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
bool changed;
/* Make sure cmd still outstanding. */
- if (cmd != pending_find(MGMT_OP_SET_SSP, hdev))
+ if (err == -ECANCELED || cmd != pending_find(MGMT_OP_SET_SSP, hdev))
return;
if (err) {
@@ -2510,6 +2525,64 @@ unlock:
return err;
}
+static int send_hci_cmd_sync(struct hci_dev *hdev, void *data)
+{
+ struct mgmt_pending_cmd *cmd = data;
+ struct mgmt_cp_hci_cmd_sync *cp = cmd->param;
+ struct sk_buff *skb;
+
+ skb = __hci_cmd_sync_ev(hdev, le16_to_cpu(cp->opcode),
+ le16_to_cpu(cp->params_len), cp->params,
+ cp->event, cp->timeout ?
+ msecs_to_jiffies(cp->timeout * 1000) :
+ HCI_CMD_TIMEOUT);
+ if (IS_ERR(skb)) {
+ mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_HCI_CMD_SYNC,
+ mgmt_status(PTR_ERR(skb)));
+ goto done;
+ }
+
+ mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_HCI_CMD_SYNC, 0,
+ skb->data, skb->len);
+
+ kfree_skb(skb);
+
+done:
+ mgmt_pending_free(cmd);
+
+ return 0;
+}
+
+static int mgmt_hci_cmd_sync(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_cp_hci_cmd_sync *cp = data;
+ struct mgmt_pending_cmd *cmd;
+ int err;
+
+ if (len < sizeof(*cp))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_HCI_CMD_SYNC,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ hci_dev_lock(hdev);
+ cmd = mgmt_pending_new(sk, MGMT_OP_HCI_CMD_SYNC, hdev, data, len);
+ if (!cmd)
+ err = -ENOMEM;
+ else
+ err = hci_cmd_sync_queue(hdev, send_hci_cmd_sync, cmd, NULL);
+
+ if (err < 0) {
+ err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_HCI_CMD_SYNC,
+ MGMT_STATUS_FAILED);
+
+ if (cmd)
+ mgmt_pending_free(cmd);
+ }
+
+ hci_dev_unlock(hdev);
+ return err;
+}
+
/* This is a helper function to test for pending mgmt commands that can
* cause CoD or EIR HCI commands. We can only allow one such pending
* mgmt command at a time since otherwise we cannot easily track what
@@ -2830,16 +2903,6 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "debug_keys %u key_count %u", cp->debug_keys,
key_count);
- for (i = 0; i < key_count; i++) {
- struct mgmt_link_key_info *key = &cp->keys[i];
-
- /* Considering SMP over BREDR/LE, there is no need to check addr_type */
- if (key->type > 0x08)
- return mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_LOAD_LINK_KEYS,
- MGMT_STATUS_INVALID_PARAMS);
- }
-
hci_dev_lock(hdev);
hci_link_keys_clear(hdev);
@@ -2864,6 +2927,19 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
continue;
}
+ if (key->addr.type != BDADDR_BREDR) {
+ bt_dev_warn(hdev,
+ "Invalid link address type %u for %pMR",
+ key->addr.type, &key->addr.bdaddr);
+ continue;
+ }
+
+ if (key->type > 0x08) {
+ bt_dev_warn(hdev, "Invalid link key type %u for %pMR",
+ key->type, &key->addr.bdaddr);
+ continue;
+ }
+
/* Always ignore debug keys and require a new pairing if
* the user wants to use them.
*/
@@ -2921,7 +2997,12 @@ static int unpair_device_sync(struct hci_dev *hdev, void *data)
if (!conn)
return 0;
- return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM);
+ /* Disregard any possible error since the likes of hci_abort_conn_sync
+ * will clean up the connection no matter the error.
+ */
+ hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
+
+ return 0;
}
static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -3053,13 +3134,44 @@ unlock:
return err;
}
+static void disconnect_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct mgmt_pending_cmd *cmd = data;
+
+ cmd->cmd_complete(cmd, mgmt_status(err));
+ mgmt_pending_free(cmd);
+}
+
+static int disconnect_sync(struct hci_dev *hdev, void *data)
+{
+ struct mgmt_pending_cmd *cmd = data;
+ struct mgmt_cp_disconnect *cp = cmd->param;
+ struct hci_conn *conn;
+
+ if (cp->addr.type == BDADDR_BREDR)
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
+ &cp->addr.bdaddr);
+ else
+ conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr,
+ le_addr_type(cp->addr.type));
+
+ if (!conn)
+ return -ENOTCONN;
+
+ /* Disregard any possible error since the likes of hci_abort_conn_sync
+ * will clean up the connection no matter the error.
+ */
+ hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
+
+ return 0;
+}
+
static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
struct mgmt_cp_disconnect *cp = data;
struct mgmt_rp_disconnect rp;
struct mgmt_pending_cmd *cmd;
- struct hci_conn *conn;
int err;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -3082,27 +3194,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- if (pending_find(MGMT_OP_DISCONNECT, hdev)) {
- err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT,
- MGMT_STATUS_BUSY, &rp, sizeof(rp));
- goto failed;
- }
-
- if (cp->addr.type == BDADDR_BREDR)
- conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
- &cp->addr.bdaddr);
- else
- conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr,
- le_addr_type(cp->addr.type));
-
- if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) {
- err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT,
- MGMT_STATUS_NOT_CONNECTED, &rp,
- sizeof(rp));
- goto failed;
- }
-
- cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, hdev, data, len);
+ cmd = mgmt_pending_new(sk, MGMT_OP_DISCONNECT, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -3110,9 +3202,10 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
cmd->cmd_complete = generic_cmd_complete;
- err = hci_disconnect(conn, HCI_ERROR_REMOTE_USER_TERM);
+ err = hci_cmd_sync_queue(hdev, disconnect_sync, cmd,
+ disconnect_complete);
if (err < 0)
- mgmt_pending_remove(cmd);
+ mgmt_pending_free(cmd);
failed:
hci_dev_unlock(hdev);
@@ -3757,7 +3850,8 @@ static void set_name_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- if (cmd != pending_find(MGMT_OP_SET_LOCAL_NAME, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_SET_LOCAL_NAME, hdev))
return;
if (status) {
@@ -3932,7 +4026,8 @@ static void set_default_phy_complete(struct hci_dev *hdev, void *data, int err)
struct sk_buff *skb = cmd->skb;
u8 status = mgmt_status(err);
- if (cmd != pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev))
return;
if (!status) {
@@ -5823,13 +5918,16 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
+ bt_dev_dbg(hdev, "err %d", err);
+
+ if (err == -ECANCELED)
+ return;
+
if (cmd != pending_find(MGMT_OP_START_DISCOVERY, hdev) &&
cmd != pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev) &&
cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev))
return;
- bt_dev_dbg(hdev, "err %d", err);
-
mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
cmd->param, 1);
mgmt_pending_remove(cmd);
@@ -6062,7 +6160,8 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
- if (cmd != pending_find(MGMT_OP_STOP_DISCOVERY, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_STOP_DISCOVERY, hdev))
return;
bt_dev_dbg(hdev, "err %d", err);
@@ -7072,7 +7171,6 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
for (i = 0; i < irk_count; i++) {
struct mgmt_irk_info *irk = &cp->irks[i];
- u8 addr_type = le_addr_type(irk->addr.type);
if (hci_is_blocked_key(hdev,
HCI_BLOCKED_KEY_TYPE_IRK,
@@ -7082,12 +7180,8 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
continue;
}
- /* When using SMP over BR/EDR, the addr type should be set to BREDR */
- if (irk->addr.type == BDADDR_BREDR)
- addr_type = BDADDR_BREDR;
-
hci_add_irk(hdev, &irk->addr.bdaddr,
- addr_type, irk->val,
+ le_addr_type(irk->addr.type), irk->val,
BDADDR_ANY);
}
@@ -7152,15 +7246,6 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
bt_dev_dbg(hdev, "key_count %u", key_count);
- for (i = 0; i < key_count; i++) {
- struct mgmt_ltk_info *key = &cp->keys[i];
-
- if (!ltk_is_valid(key))
- return mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_LOAD_LONG_TERM_KEYS,
- MGMT_STATUS_INVALID_PARAMS);
- }
-
hci_dev_lock(hdev);
hci_smp_ltks_clear(hdev);
@@ -7168,7 +7253,6 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
for (i = 0; i < key_count; i++) {
struct mgmt_ltk_info *key = &cp->keys[i];
u8 type, authenticated;
- u8 addr_type = le_addr_type(key->addr.type);
if (hci_is_blocked_key(hdev,
HCI_BLOCKED_KEY_TYPE_LTK,
@@ -7178,6 +7262,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
continue;
}
+ if (!ltk_is_valid(key)) {
+ bt_dev_warn(hdev, "Invalid LTK for %pMR",
+ &key->addr.bdaddr);
+ continue;
+ }
+
switch (key->type) {
case MGMT_LTK_UNAUTHENTICATED:
authenticated = 0x00;
@@ -7203,12 +7293,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
continue;
}
- /* When using SMP over BR/EDR, the addr type should be set to BREDR */
- if (key->addr.type == BDADDR_BREDR)
- addr_type = BDADDR_BREDR;
-
hci_add_ltk(hdev, &key->addr.bdaddr,
- addr_type, type, authenticated,
+ le_addr_type(key->addr.type), type, authenticated,
key->val, key->enc_size, key->ediv, key->rand);
}
@@ -8066,7 +8152,8 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, void *data,
u8 status = mgmt_status(err);
u16 eir_len;
- if (cmd != pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev))
return;
if (!status) {
@@ -9359,6 +9446,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
{ mesh_send, MGMT_MESH_SEND_SIZE,
HCI_MGMT_VAR_LEN },
{ mesh_send_cancel, MGMT_MESH_SEND_CANCEL_SIZE },
+ { mgmt_hci_cmd_sync, MGMT_HCI_CMD_SYNC_SIZE, HCI_MGMT_VAR_LEN },
};
void mgmt_index_added(struct hci_dev *hdev)
@@ -9387,12 +9475,12 @@ void mgmt_index_added(struct hci_dev *hdev)
void mgmt_index_removed(struct hci_dev *hdev)
{
struct mgmt_ev_ext_index ev;
- u8 status = MGMT_STATUS_INVALID_INDEX;
+ struct cmd_lookup match = { NULL, hdev, MGMT_STATUS_INVALID_INDEX };
if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
return;
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
+ mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match);
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0,
@@ -9443,7 +9531,7 @@ void mgmt_power_on(struct hci_dev *hdev, int err)
void __mgmt_power_off(struct hci_dev *hdev)
{
struct cmd_lookup match = { NULL, hdev };
- u8 status, zero_cod[] = { 0, 0, 0 };
+ u8 zero_cod[] = { 0, 0, 0 };
mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
@@ -9455,11 +9543,11 @@ void __mgmt_power_off(struct hci_dev *hdev)
* status responses.
*/
if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
- status = MGMT_STATUS_INVALID_INDEX;
+ match.mgmt_status = MGMT_STATUS_INVALID_INDEX;
else
- status = MGMT_STATUS_NOT_POWERED;
+ match.mgmt_status = MGMT_STATUS_NOT_POWERED;
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
+ mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match);
if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) {
mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
@@ -9502,7 +9590,7 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key,
ev.store_hint = persistent;
bacpy(&ev.key.addr.bdaddr, &key->bdaddr);
- ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type);
+ ev.key.addr.type = BDADDR_BREDR;
ev.key.type = key->type;
memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE);
ev.key.pin_len = key->pin_len;
@@ -9553,7 +9641,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
ev.store_hint = persistent;
bacpy(&ev.key.addr.bdaddr, &key->bdaddr);
- ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type);
+ ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type);
ev.key.type = mgmt_ltk_type(key);
ev.key.enc_size = key->enc_size;
ev.key.ediv = key->ediv;
@@ -9582,7 +9670,7 @@ void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent)
bacpy(&ev.rpa, &irk->rpa);
bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr);
- ev.irk.addr.type = link_to_bdaddr(irk->link_type, irk->addr_type);
+ ev.irk.addr.type = link_to_bdaddr(LE_LINK, irk->addr_type);
memcpy(ev.irk.val, irk->val, sizeof(irk->val));
mgmt_event(MGMT_EV_NEW_IRK, hdev, &ev, sizeof(ev), NULL);
@@ -9611,7 +9699,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk,
ev.store_hint = persistent;
bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr);
- ev.key.addr.type = link_to_bdaddr(csrk->link_type, csrk->bdaddr_type);
+ ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type);
ev.key.type = csrk->type;
memcpy(ev.key.val, csrk->val, sizeof(csrk->val));
@@ -9689,18 +9777,6 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
mgmt_event_skb(skb, NULL);
}
-static void disconnect_rsp(struct mgmt_pending_cmd *cmd, void *data)
-{
- struct sock **sk = data;
-
- cmd->cmd_complete(cmd, 0);
-
- *sk = cmd->sk;
- sock_hold(*sk);
-
- mgmt_pending_remove(cmd);
-}
-
static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data)
{
struct hci_dev *hdev = data;
@@ -9744,8 +9820,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
if (link_type != ACL_LINK && link_type != LE_LINK)
return;
- mgmt_pending_foreach(MGMT_OP_DISCONNECT, hdev, disconnect_rsp, &sk);
-
bacpy(&ev.addr.bdaddr, bdaddr);
ev.addr.type = link_to_bdaddr(link_type, addr_type);
ev.reason = reason;
@@ -9758,9 +9832,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
if (sk)
sock_put(sk);
-
- mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp,
- hdev);
}
void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr,
@@ -9789,13 +9860,18 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr,
mgmt_pending_remove(cmd);
}
-void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
- u8 addr_type, u8 status)
+void mgmt_connect_failed(struct hci_dev *hdev, struct hci_conn *conn, u8 status)
{
struct mgmt_ev_connect_failed ev;
- bacpy(&ev.addr.bdaddr, bdaddr);
- ev.addr.type = link_to_bdaddr(link_type, addr_type);
+ if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) {
+ mgmt_device_disconnected(hdev, &conn->dst, conn->type,
+ conn->dst_type, status, true);
+ return;
+ }
+
+ bacpy(&ev.addr.bdaddr, &conn->dst);
+ ev.addr.type = link_to_bdaddr(conn->type, conn->dst_type);
ev.status = mgmt_status(status);
mgmt_event(MGMT_EV_CONNECT_FAILED, hdev, &ev, sizeof(ev), NULL);
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index 0115f783bde8..17ab909a7c07 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -21,7 +21,7 @@
SOFTWARE IS DISCLAIMED.
*/
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 1d34d8497033..4c56ca5a216c 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -28,7 +28,7 @@
#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/kthread.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -2134,6 +2134,11 @@ static int rfcomm_run(void *unused)
return 0;
}
+static bool rfcomm_match(struct hci_conn *hcon)
+{
+ return hcon->type == ACL_LINK;
+}
+
static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
{
struct rfcomm_session *s;
@@ -2180,6 +2185,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
static struct hci_cb rfcomm_cb = {
.name = "RFCOMM",
+ .match = rfcomm_match,
.security_cfm = rfcomm_security_cfm
};
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 37d63d768afb..913402806fa0 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -274,13 +274,13 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock,
struct rfcomm_dlc *d;
struct sock *sk;
- sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern);
- if (!sk)
+ d = rfcomm_dlc_alloc(prio);
+ if (!d)
return NULL;
- d = rfcomm_dlc_alloc(prio);
- if (!d) {
- sk_free(sk);
+ sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern);
+ if (!sk) {
+ rfcomm_dlc_free(d);
return NULL;
}
@@ -629,10 +629,9 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname,
switch (optname) {
case RFCOMM_LM:
- if (bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen)) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt & RFCOMM_LM_FIPS) {
err = -EINVAL;
@@ -685,7 +684,7 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
sec.level = BT_SECURITY_LOW;
- err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+ err = copy_safe_from_sockptr(&sec, sizeof(sec), optval, optlen);
if (err)
break;
@@ -703,7 +702,7 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (err)
break;
@@ -729,7 +728,8 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u
struct sock *l2cap_sk;
struct l2cap_conn *conn;
struct rfcomm_conninfo cinfo;
- int len, err = 0;
+ int err = 0;
+ size_t len;
u32 opt;
BT_DBG("sk %p", sk);
@@ -783,7 +783,7 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u
cinfo.hci_handle = conn->hcon->handle;
memcpy(cinfo.dev_class, conn->hcon->dev_class, 3);
- len = min_t(unsigned int, len, sizeof(cinfo));
+ len = min(len, sizeof(cinfo));
if (copy_to_user(optval, (char *) &cinfo, len))
err = -EFAULT;
@@ -802,7 +802,8 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
{
struct sock *sk = sock->sk;
struct bt_security sec;
- int len, err = 0;
+ int err = 0;
+ size_t len;
BT_DBG("sk %p", sk);
@@ -827,7 +828,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
sec.level = rfcomm_pi(sk)->sec_level;
sec.key_size = 0;
- len = min_t(unsigned int, len, sizeof(sec));
+ len = min(len, sizeof(sec));
if (copy_to_user(optval, (char *) &sec, len))
err = -EFAULT;
@@ -865,9 +866,7 @@ static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
if (err == -ENOIOCTLCMD) {
#ifdef CONFIG_BT_RFCOMM_TTY
- lock_sock(sk);
err = rfcomm_dev_ioctl(sk, cmd, (void __user *) arg);
- release_sock(sk);
#else
err = -EOPNOTSUPP;
#endif
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index a5ac160c592e..aa7bfe26cb40 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -51,6 +51,7 @@ struct sco_conn {
struct delayed_work timeout_work;
unsigned int mtu;
+ struct kref ref;
};
#define sco_conn_lock(c) spin_lock(&c->lock)
@@ -76,21 +77,78 @@ struct sco_pinfo {
#define SCO_CONN_TIMEOUT (HZ * 40)
#define SCO_DISCONN_TIMEOUT (HZ * 2)
+static void sco_conn_free(struct kref *ref)
+{
+ struct sco_conn *conn = container_of(ref, struct sco_conn, ref);
+
+ BT_DBG("conn %p", conn);
+
+ if (conn->sk)
+ sco_pi(conn->sk)->conn = NULL;
+
+ if (conn->hcon) {
+ conn->hcon->sco_data = NULL;
+ hci_conn_drop(conn->hcon);
+ }
+
+ /* Ensure no more work items will run since hci_conn has been dropped */
+ disable_delayed_work_sync(&conn->timeout_work);
+
+ kfree(conn);
+}
+
+static void sco_conn_put(struct sco_conn *conn)
+{
+ if (!conn)
+ return;
+
+ BT_DBG("conn %p refcnt %d", conn, kref_read(&conn->ref));
+
+ kref_put(&conn->ref, sco_conn_free);
+}
+
+static struct sco_conn *sco_conn_hold_unless_zero(struct sco_conn *conn)
+{
+ if (!conn)
+ return NULL;
+
+ BT_DBG("conn %p refcnt %u", conn, kref_read(&conn->ref));
+
+ if (!kref_get_unless_zero(&conn->ref))
+ return NULL;
+
+ return conn;
+}
+
+static struct sock *sco_sock_hold(struct sco_conn *conn)
+{
+ if (!conn || !bt_sock_linked(&sco_sk_list, conn->sk))
+ return NULL;
+
+ sock_hold(conn->sk);
+
+ return conn->sk;
+}
+
static void sco_sock_timeout(struct work_struct *work)
{
struct sco_conn *conn = container_of(work, struct sco_conn,
timeout_work.work);
struct sock *sk;
+ conn = sco_conn_hold_unless_zero(conn);
+ if (!conn)
+ return;
+
sco_conn_lock(conn);
if (!conn->hcon) {
sco_conn_unlock(conn);
+ sco_conn_put(conn);
return;
}
- sk = conn->sk;
- if (sk)
- sock_hold(sk);
+ sk = sco_sock_hold(conn);
sco_conn_unlock(conn);
+ sco_conn_put(conn);
if (!sk)
return;
@@ -128,9 +186,13 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
{
struct sco_conn *conn = hcon->sco_data;
+ conn = sco_conn_hold_unless_zero(conn);
if (conn) {
- if (!conn->hcon)
+ if (!conn->hcon) {
+ sco_conn_lock(conn);
conn->hcon = hcon;
+ sco_conn_unlock(conn);
+ }
return conn;
}
@@ -138,6 +200,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
if (!conn)
return NULL;
+ kref_init(&conn->ref);
spin_lock_init(&conn->lock);
INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout);
@@ -162,17 +225,15 @@ static void sco_chan_del(struct sock *sk, int err)
struct sco_conn *conn;
conn = sco_pi(sk)->conn;
+ sco_pi(sk)->conn = NULL;
BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
if (conn) {
sco_conn_lock(conn);
conn->sk = NULL;
- sco_pi(sk)->conn = NULL;
sco_conn_unlock(conn);
-
- if (conn->hcon)
- hci_conn_drop(conn->hcon);
+ sco_conn_put(conn);
}
sk->sk_state = BT_CLOSED;
@@ -187,31 +248,28 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
struct sco_conn *conn = hcon->sco_data;
struct sock *sk;
+ conn = sco_conn_hold_unless_zero(conn);
if (!conn)
return;
BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
- /* Kill socket */
sco_conn_lock(conn);
- sk = conn->sk;
- if (sk)
- sock_hold(sk);
+ sk = sco_sock_hold(conn);
sco_conn_unlock(conn);
+ sco_conn_put(conn);
- if (sk) {
- lock_sock(sk);
- sco_sock_clear_timer(sk);
- sco_chan_del(sk, err);
- release_sock(sk);
- sock_put(sk);
+ if (!sk) {
+ sco_conn_put(conn);
+ return;
}
- /* Ensure no more work items will run before freeing conn. */
- cancel_delayed_work_sync(&conn->timeout_work);
-
- hcon->sco_data = NULL;
- kfree(conn);
+ /* Kill socket */
+ lock_sock(sk);
+ sco_sock_clear_timer(sk);
+ sco_chan_del(sk, err);
+ release_sock(sk);
+ sock_put(sk);
}
static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
@@ -261,10 +319,13 @@ static int sco_connect(struct sock *sk)
else
type = SCO_LINK;
- if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT &&
- (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) {
- err = -EOPNOTSUPP;
- goto unlock;
+ switch (sco_pi(sk)->setting & SCO_AIRMODE_MASK) {
+ case SCO_AIRMODE_TRANSP:
+ if (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev)) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+ break;
}
hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst,
@@ -395,6 +456,8 @@ static void sco_sock_destruct(struct sock *sk)
{
BT_DBG("sk %p", sk);
+ sco_conn_put(sco_pi(sk)->conn);
+
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_write_queue);
}
@@ -442,17 +505,6 @@ static void __sco_sock_close(struct sock *sk)
case BT_CONNECTED:
case BT_CONFIG:
- if (sco_pi(sk)->conn->hcon) {
- sk->sk_state = BT_DISCONN;
- sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
- sco_conn_lock(sco_pi(sk)->conn);
- hci_conn_drop(sco_pi(sk)->conn->hcon);
- sco_pi(sk)->conn->hcon = NULL;
- sco_conn_unlock(sco_pi(sk)->conn);
- } else
- sco_chan_del(sk, ECONNRESET);
- break;
-
case BT_CONNECT2:
case BT_CONNECT:
case BT_DISCONN:
@@ -847,7 +899,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (err)
break;
@@ -866,18 +918,11 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
voice.setting = sco_pi(sk)->setting;
- err = bt_copy_from_sockptr(&voice, sizeof(voice), optval,
- optlen);
+ err = copy_safe_from_sockptr(&voice, sizeof(voice), optval,
+ optlen);
if (err)
break;
- /* Explicitly check for these values */
- if (voice.setting != BT_VOICE_TRANSPARENT &&
- voice.setting != BT_VOICE_CVSD_16BIT) {
- err = -EINVAL;
- break;
- }
-
sco_pi(sk)->setting = voice.setting;
hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src,
BDADDR_BREDR);
@@ -885,14 +930,19 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
err = -EBADFD;
break;
}
- if (enhanced_sync_conn_capable(hdev) &&
- voice.setting == BT_VOICE_TRANSPARENT)
- sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT;
+
+ switch (sco_pi(sk)->setting & SCO_AIRMODE_MASK) {
+ case SCO_AIRMODE_TRANSP:
+ if (enhanced_sync_conn_capable(hdev))
+ sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT;
+ break;
+ }
+
hci_dev_put(hdev);
break;
case BT_PKT_STATUS:
- err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (err)
break;
@@ -935,7 +985,8 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- err = bt_copy_from_sockptr(buffer, optlen, optval, optlen);
+ err = copy_struct_from_sockptr(buffer, sizeof(buffer), optval,
+ optlen);
if (err) {
hci_dev_put(hdev);
break;
@@ -1347,11 +1398,13 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
return lm;
}
-static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
+static bool sco_match(struct hci_conn *hcon)
{
- if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
- return;
+ return hcon->type == SCO_LINK || hcon->type == ESCO_LINK;
+}
+static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
+{
BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status);
if (!status) {
@@ -1366,9 +1419,6 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
{
- if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
- return;
-
BT_DBG("hcon %p reason %d", hcon, reason);
sco_conn_del(hcon, bt_to_errno(reason));
@@ -1394,6 +1444,7 @@ drop:
static struct hci_cb sco_cb = {
.name = "SCO",
+ .match = sco_match,
.connect_cfm = sco_connect_cfm,
.disconn_cfm = sco_disconn_cfm,
};
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 4f9fdf400584..8b9724fd752a 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1060,7 +1060,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
}
if (smp->remote_irk) {
- smp->remote_irk->link_type = hcon->type;
mgmt_new_irk(hdev, smp->remote_irk, persistent);
/* Now that user space can be considered to know the
@@ -1080,28 +1079,24 @@ static void smp_notify_keys(struct l2cap_conn *conn)
}
if (smp->csrk) {
- smp->csrk->link_type = hcon->type;
smp->csrk->bdaddr_type = hcon->dst_type;
bacpy(&smp->csrk->bdaddr, &hcon->dst);
mgmt_new_csrk(hdev, smp->csrk, persistent);
}
if (smp->responder_csrk) {
- smp->responder_csrk->link_type = hcon->type;
smp->responder_csrk->bdaddr_type = hcon->dst_type;
bacpy(&smp->responder_csrk->bdaddr, &hcon->dst);
mgmt_new_csrk(hdev, smp->responder_csrk, persistent);
}
if (smp->ltk) {
- smp->ltk->link_type = hcon->type;
smp->ltk->bdaddr_type = hcon->dst_type;
bacpy(&smp->ltk->bdaddr, &hcon->dst);
mgmt_new_ltk(hdev, smp->ltk, persistent);
}
if (smp->responder_ltk) {
- smp->responder_ltk->link_type = hcon->type;
smp->responder_ltk->bdaddr_type = hcon->dst_type;
bacpy(&smp->responder_ltk->bdaddr, &hcon->dst);
mgmt_new_ltk(hdev, smp->responder_ltk, persistent);
@@ -1121,8 +1116,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
key = hci_add_link_key(hdev, smp->conn->hcon, &hcon->dst,
smp->link_key, type, 0, &persistent);
if (key) {
- key->link_type = hcon->type;
- key->bdaddr_type = hcon->dst_type;
mgmt_new_link_key(hdev, key, persistent);
/* Don't keep debug keys around if the relevant
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 3ea52b05adfb..f71f67c6896b 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -115,7 +115,7 @@ static int check_test_run_args(struct bpf_prog *prog, struct bpf_dummy_ops_test_
offset = btf_ctx_arg_offset(bpf_dummy_ops_btf, func_proto, arg_no);
info = find_ctx_arg_info(prog->aux, offset);
- if (info && (info->reg_type & PTR_MAYBE_NULL))
+ if (info && type_may_be_null(info->reg_type))
continue;
return -EINVAL;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 6d7a442ceb89..501ec4249fed 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -246,6 +246,7 @@ static void reset_ctx(struct xdp_page_head *head)
head->ctx.data_meta = head->orig_ctx.data_meta;
head->ctx.data_end = head->orig_ctx.data_end;
xdp_update_frame_from_buff(&head->ctx, head->frame);
+ head->frame->mem = head->orig_ctx.rxq->mem;
}
static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index fb1115857e49..0ab4613aa07a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -328,7 +328,7 @@ int br_netpoll_enable(struct net_bridge_port *p)
return __br_netpoll_enable(p);
}
-static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
+static int br_netpoll_setup(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *p;
@@ -487,9 +487,11 @@ void br_dev_setup(struct net_device *dev)
dev->ethtool_ops = &br_ethtool_ops;
SET_NETDEV_DEVTYPE(dev, &br_type);
dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
+ dev->lltx = true;
+ dev->netns_local = true;
- dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
- NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+ dev->features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX;
dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
dev->vlan_features = COMMON_FEATURES;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c77591e63841..82bac2426631 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -18,7 +18,7 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/atomic.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/if_vlan.h>
#include <net/switchdev.h>
#include <trace/events/bridge.h>
@@ -73,13 +73,6 @@ static inline int has_expired(const struct net_bridge *br,
time_before_eq(fdb->updated + hold_time(br), jiffies);
}
-static void fdb_rcu_free(struct rcu_head *head)
-{
- struct net_bridge_fdb_entry *ent
- = container_of(head, struct net_bridge_fdb_entry, rcu);
- kmem_cache_free(br_fdb_cache, ent);
-}
-
static int fdb_to_nud(const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb)
{
@@ -329,7 +322,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f,
if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &f->flags))
atomic_dec(&br->fdb_n_learned);
fdb_notify(br, f, RTM_DELNEIGH, swdev_notify);
- call_rcu(&f->rcu, fdb_rcu_free);
+ kfree_rcu(f, rcu);
}
/* Delete a local entry if no other port had the same address.
@@ -1159,7 +1152,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
struct net_bridge_port *p, const unsigned char *addr,
u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[],
- struct netlink_ext_ack *extack)
+ bool *notified, struct netlink_ext_ack *extack)
{
int err = 0;
@@ -1190,6 +1183,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
spin_unlock_bh(&br->hash_lock);
}
+ if (!err)
+ *notified = true;
return err;
}
@@ -1202,7 +1197,7 @@ static const struct nla_policy br_nda_fdb_pol[NFEA_MAX + 1] = {
int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags,
- struct netlink_ext_ack *extack)
+ bool *notified, struct netlink_ext_ack *extack)
{
struct nlattr *nfea_tb[NFEA_MAX + 1], *attr;
struct net_bridge_vlan_group *vg;
@@ -1265,10 +1260,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
/* VID was specified, so use it. */
err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb,
- extack);
+ notified, extack);
} else {
err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb,
- extack);
+ notified, extack);
if (err || !vg || !vg->num_vlans)
goto out;
@@ -1280,7 +1275,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
if (!br_vlan_should_use(v))
continue;
err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid,
- nfea_tb, extack);
+ nfea_tb, notified, extack);
if (err)
goto out;
}
@@ -1292,7 +1287,7 @@ out:
static int fdb_delete_by_addr_and_port(struct net_bridge *br,
const struct net_bridge_port *p,
- const u8 *addr, u16 vlan)
+ const u8 *addr, u16 vlan, bool *notified)
{
struct net_bridge_fdb_entry *fdb;
@@ -1301,18 +1296,19 @@ static int fdb_delete_by_addr_and_port(struct net_bridge *br,
return -ENOENT;
fdb_delete(br, fdb, true);
+ *notified = true;
return 0;
}
static int __br_fdb_delete(struct net_bridge *br,
const struct net_bridge_port *p,
- const unsigned char *addr, u16 vid)
+ const unsigned char *addr, u16 vid, bool *notified)
{
int err;
spin_lock_bh(&br->hash_lock);
- err = fdb_delete_by_addr_and_port(br, p, addr, vid);
+ err = fdb_delete_by_addr_and_port(br, p, addr, vid, notified);
spin_unlock_bh(&br->hash_lock);
return err;
@@ -1321,12 +1317,11 @@ static int __br_fdb_delete(struct net_bridge *br,
/* Remove neighbor entry with RTM_DELNEIGH */
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr, u16 vid,
+ const unsigned char *addr, u16 vid, bool *notified,
struct netlink_ext_ack *extack)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p = NULL;
- struct net_bridge_vlan *v;
struct net_bridge *br;
int err;
@@ -1345,23 +1340,19 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
}
if (vid) {
- v = br_vlan_find(vg, vid);
- if (!v) {
- pr_info("bridge: RTM_DELNEIGH with unconfigured vlan %d on %s\n", vid, dev->name);
- return -EINVAL;
- }
-
- err = __br_fdb_delete(br, p, addr, vid);
+ err = __br_fdb_delete(br, p, addr, vid, notified);
} else {
+ struct net_bridge_vlan *v;
+
err = -ENOENT;
- err &= __br_fdb_delete(br, p, addr, 0);
+ err &= __br_fdb_delete(br, p, addr, 0, notified);
if (!vg || !vg->num_vlans)
return err;
list_for_each_entry(v, &vg->vlan_list, vlist) {
if (!br_vlan_should_use(v))
continue;
- err &= __br_fdb_delete(br, p, addr, v->vid);
+ err &= __br_fdb_delete(br, p, addr, v->vid, notified);
}
}
@@ -1469,12 +1460,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
modified = true;
}
- if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) {
+ if (test_and_set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) {
/* Refresh entry */
fdb->used = jiffies;
- } else if (!test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags)) {
- /* Take over SW learned entry */
- set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags);
+ } else {
modified = true;
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index bc37e47ad829..1a52a0bca086 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -1674,7 +1674,7 @@ int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq,
spin_lock_bh(&br->multicast_lock);
mp = br_mdb_ip_get(br, &group);
- if (!mp) {
+ if (!mp || (!mp->ports && !mp->host_joined)) {
NL_SET_ERR_MSG_MOD(extack, "MDB entry not found");
err = -ENOENT;
goto unlock;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 8f9c19d992ac..451e45b9a6a5 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -33,9 +33,11 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
+#include <net/dst_metadata.h>
#include <net/route.h>
#include <net/netfilter/br_netfilter.h>
#include <net/netns/generic.h>
+#include <net/inet_dscp.h>
#include <linux/uaccess.h>
#include "br_private.h"
@@ -368,11 +370,11 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
*/
static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb->dev, *br_indev;
- struct iphdr *iph = ip_hdr(skb);
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ struct net_device *dev = skb->dev, *br_indev;
+ const struct iphdr *iph = ip_hdr(skb);
+ enum skb_drop_reason reason;
struct rtable *rt;
- int err;
br_indev = nf_bridge_get_physindev(skb, net);
if (!br_indev) {
@@ -388,7 +390,9 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
}
nf_bridge->in_prerouting = 0;
if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
- if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
+ reason = ip_route_input(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev);
+ if (reason) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
/* If err equals -EHOSTUNREACH the error is due to a
@@ -398,11 +402,12 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
* martian destinations: loopback destinations and destination
* 0.0.0.0. In both cases the packet will be dropped because the
* destination is the loopback device and not the bridge. */
- if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
+ if (reason != SKB_DROP_REASON_IP_INADDRERRORS || !in_dev ||
+ IN_DEV_FORWARD(in_dev))
goto free_skb;
rt = ip_route_output(net, iph->daddr, 0,
- RT_TOS(iph->tos), 0,
+ ip4h_dscp(iph), 0,
RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) {
/* - Bridged-and-DNAT'ed traffic doesn't
@@ -878,6 +883,10 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
return br_dev_queue_push_xmit(net, sk, skb);
}
+ /* Fragmentation on metadata/template dst is not supported */
+ if (unlikely(!skb_valid_dst(skb)))
+ goto drop;
+
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index f17dbac7d828..3e0f47203f2a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1920,8 +1920,13 @@ int __init br_netlink_init(void)
{
int err;
- br_vlan_rtnl_init();
- rtnl_af_register(&br_af_ops);
+ err = br_vlan_rtnl_init();
+ if (err)
+ goto out;
+
+ err = rtnl_af_register(&br_af_ops);
+ if (err)
+ goto out_vlan;
err = rtnl_link_register(&br_link_ops);
if (err)
@@ -1931,6 +1936,9 @@ int __init br_netlink_init(void)
out_af:
rtnl_af_unregister(&br_af_ops);
+out_vlan:
+ br_vlan_rtnl_uninit();
+out:
return err;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d4bedc87b1d8..9853cfbb9d14 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -853,12 +853,12 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr, u16 vid,
- struct netlink_ext_ack *extack);
+ bool *notified, struct netlink_ext_ack *extack);
int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev,
struct netlink_ext_ack *extack);
int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags,
- struct netlink_ext_ack *extack);
+ bool *notified, struct netlink_ext_ack *extack);
int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev, struct net_device *fdev, int *idx);
int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
@@ -1571,7 +1571,7 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v,
void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
void *ptr);
-void br_vlan_rtnl_init(void);
+int br_vlan_rtnl_init(void);
void br_vlan_rtnl_uninit(void);
void br_vlan_notify(const struct net_bridge *br,
const struct net_bridge_port *p,
@@ -1802,8 +1802,9 @@ static inline int br_vlan_bridge_event(struct net_device *dev,
return 0;
}
-static inline void br_vlan_rtnl_init(void)
+static inline int br_vlan_rtnl_init(void)
{
+ return 0;
}
static inline void br_vlan_rtnl_uninit(void)
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 0e4572f31330..7895489ac6fe 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -17,7 +17,7 @@
#include <net/llc.h>
#include <net/llc_pdu.h>
#include <net/stp.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "br_private.h"
#include "br_private_stp.h"
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 9c2fffb827ab..89f51ea4cabe 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -2296,19 +2296,18 @@ static int br_vlan_rtm_process(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
}
-void br_vlan_rtnl_init(void)
+static const struct rtnl_msg_handler br_vlan_rtnl_msg_handlers[] = {
+ {THIS_MODULE, PF_BRIDGE, RTM_NEWVLAN, br_vlan_rtm_process, NULL, 0},
+ {THIS_MODULE, PF_BRIDGE, RTM_DELVLAN, br_vlan_rtm_process, NULL, 0},
+ {THIS_MODULE, PF_BRIDGE, RTM_GETVLAN, NULL, br_vlan_rtm_dump, 0},
+};
+
+int br_vlan_rtnl_init(void)
{
- rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETVLAN, NULL,
- br_vlan_rtm_dump, 0);
- rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWVLAN,
- br_vlan_rtm_process, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELVLAN,
- br_vlan_rtm_process, NULL, 0);
+ return rtnl_register_many(br_vlan_rtnl_msg_handlers);
}
void br_vlan_rtnl_uninit(void)
{
- rtnl_unregister(PF_BRIDGE, RTM_GETVLAN);
- rtnl_unregister(PF_BRIDGE, RTM_NEWVLAN);
- rtnl_unregister(PF_BRIDGE, RTM_DELVLAN);
+ rtnl_unregister_many(br_vlan_rtnl_msg_handlers);
}
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 104c0125e32e..f16bbbbb9481 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -41,7 +41,13 @@ config NF_CONNTRACK_BRIDGE
# old sockopt interface and eval loop
config BRIDGE_NF_EBTABLES_LEGACY
- tristate
+ tristate "Legacy EBTABLES support"
+ depends on BRIDGE && NETFILTER_XTABLES
+ default n
+ help
+ Legacy ebtables packet/frame classifier.
+ This is not needed if you are using ebtables over nftables
+ (iptables-nft).
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index cbd0e3586c3f..3e67d4aff419 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1256,7 +1256,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
goto free_unlock;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
if (newinfo->nentries)
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index bd4d1b4d745f..5adced1e7d0c 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -63,7 +63,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
return nft_meta_get_eval(expr, regs, pkt);
}
- strncpy((char *)dest, br_dev ? br_dev->name : "", IFNAMSIZ);
+ strscpy_pad((char *)dest, br_dev ? br_dev->name : "", IFNAMSIZ);
return;
err:
regs->verdict.code = NFT_BREAK;
@@ -142,7 +142,7 @@ static int nft_meta_bridge_set_init(const struct nft_ctx *ctx,
}
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len);
+ err = nft_parse_register_load(ctx, tb[NFTA_META_SREG], &priv->sreg, len);
if (err < 0)
return err;
@@ -168,8 +168,7 @@ static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track,
}
static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int hooks;
@@ -179,7 +178,7 @@ static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx,
hooks = 1 << NF_BR_PRE_ROUTING;
break;
default:
- return nft_meta_set_validate(ctx, expr, data);
+ return nft_meta_set_validate(ctx, expr);
}
return nft_chain_validate_hooks(ctx->chain, hooks);
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 71b54fed7263..1cb5c16e97b7 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -170,8 +170,7 @@ out:
}
static int nft_reject_bridge_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain, (1 << NF_BR_PRE_ROUTING) |
(1 << NF_BR_LOCAL_IN));
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 2ae8cfa3df88..96236d21b18e 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -298,10 +298,8 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
return dstpkt;
}
- if (expectlen > addlen)
- neededtailspace = expectlen;
- else
- neededtailspace = addlen;
+
+ neededtailspace = max(expectlen, addlen);
if (dst->tail + neededtailspace > dst->end) {
/* Create a dumplicate of 'dst' with more tail space */
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 7b0af33bdb97..3c335057f255 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -9,7 +9,7 @@
#include <linux/stddef.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/caif/caif_layer.h>
#include <net/caif/cfsrvl.h>
#include <net/caif/cfpkt.h>
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 9cef9496a707..171fa32ada85 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -183,12 +183,6 @@ bool cfsrvl_ready(struct cfsrvl *service, int *err)
return true;
}
-u8 cfsrvl_getphyid(struct cflayer *layer)
-{
- struct cfsrvl *servl = container_obj(layer);
- return servl->dev_info.id;
-}
-
bool cfsrvl_phyid_match(struct cflayer *layer, int phyid)
{
struct cfsrvl *servl = container_obj(layer);
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 47901bd4def1..94ad09e36df2 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -47,7 +47,6 @@ struct chnl_net {
struct caif_connect_request conn_req;
struct list_head list_field;
struct net_device *netdev;
- char name[256];
wait_queue_head_t netmgmt_wq;
/* Flow status to remember and control the transmission. */
bool flowenabled;
@@ -347,7 +346,6 @@ static int chnl_net_init(struct net_device *dev)
struct chnl_net *priv;
ASSERT_RTNL();
priv = netdev_priv(dev);
- strncpy(priv->name, dev->name, sizeof(priv->name));
INIT_LIST_HEAD(&priv->list_field);
return 0;
}
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 707576eeeb58..01f3fbb3b67d 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -171,6 +171,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
/* release sk on errors */
sock_orphan(sk);
sock_put(sk);
+ sock->sk = NULL;
}
errout:
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 27d5fcf0eac9..217049fa496e 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1470,6 +1470,12 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
/* remove device reference, if this is our bound device */
if (bo->bound && bo->ifindex == dev->ifindex) {
+#if IS_ENABLED(CONFIG_PROC_FS)
+ if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read) {
+ remove_proc_entry(bo->procname, sock_net(sk)->can.bcmproc_dir);
+ bo->bcm_proc_read = NULL;
+ }
+#endif
bo->bound = 0;
bo->ifindex = 0;
notify_enodev = 1;
diff --git a/net/can/gw.c b/net/can/gw.c
index 37528826935e..ef93293c1fae 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1265,6 +1265,15 @@ static struct pernet_operations cangw_pernet_ops = {
.exit_batch = cangw_pernet_exit_batch,
};
+static const struct rtnl_msg_handler cgw_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_NEWROUTE,
+ .doit = cgw_create_job},
+ {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_DELROUTE,
+ .doit = cgw_remove_job},
+ {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_GETROUTE,
+ .dumpit = cgw_dump_jobs},
+};
+
static __init int cgw_module_init(void)
{
int ret;
@@ -1290,27 +1299,13 @@ static __init int cgw_module_init(void)
if (ret)
goto out_register_notifier;
- ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE,
- NULL, cgw_dump_jobs, 0);
- if (ret)
- goto out_rtnl_register1;
-
- ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
- cgw_create_job, NULL, 0);
- if (ret)
- goto out_rtnl_register2;
- ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
- cgw_remove_job, NULL, 0);
+ ret = rtnl_register_many(cgw_rtnl_msg_handlers);
if (ret)
- goto out_rtnl_register3;
+ goto out_rtnl_register;
return 0;
-out_rtnl_register3:
- rtnl_unregister(PF_CAN, RTM_NEWROUTE);
-out_rtnl_register2:
- rtnl_unregister(PF_CAN, RTM_GETROUTE);
-out_rtnl_register1:
+out_rtnl_register:
unregister_netdevice_notifier(&notifier);
out_register_notifier:
kmem_cache_destroy(cgw_cache);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 4be73de5033c..95f7a7e65a73 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1179,10 +1179,10 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
break;
case -ENETDOWN:
/* In this case we should get a netdev_event(), all active
- * sessions will be cleared by
- * j1939_cancel_all_active_sessions(). So handle this as an
- * error, but let j1939_cancel_all_active_sessions() do the
- * cleanup including propagation of the error to user space.
+ * sessions will be cleared by j1939_cancel_active_session().
+ * So handle this as an error, but let
+ * j1939_cancel_active_session() do the cleanup including
+ * propagation of the error to user space.
*/
break;
case -EOVERFLOW:
@@ -1505,7 +1505,7 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv,
session->state = J1939_SESSION_NEW;
skb_queue_head_init(&session->skb_queue);
- skb_queue_tail(&session->skb_queue, skb);
+ skb_queue_tail(&session->skb_queue, skb_get(skb));
skcb = j1939_skb_to_cb(skb);
memcpy(&session->skcb, skcb, sizeof(session->skcb));
diff --git a/net/can/raw.c b/net/can/raw.c
index 00533f64d69d..255c0a8f39d6 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -966,7 +966,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;
- skb_setup_tx_timestamp(skb, sockc.tsflags);
+ skb_setup_tx_timestamp(skb, &sockc);
err = can_send(skb, ro->loopback);
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 051d22c0e4ad..01b2ce1e8fc0 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -74,18 +74,6 @@ int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
return set_secret(dst, src->key);
}
-int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
-{
- if (*p + sizeof(u16) + sizeof(key->created) +
- sizeof(u16) + key->len > end)
- return -ERANGE;
- ceph_encode_16(p, key->type);
- ceph_encode_copy(p, &key->created, sizeof(key->created));
- ceph_encode_16(p, key->len);
- ceph_encode_copy(p, key->key, key->len);
- return 0;
-}
-
int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end)
{
int ret;
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 13bd526349fa..23de29fc613c 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -21,7 +21,6 @@ struct ceph_crypto_key {
int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
const struct ceph_crypto_key *src);
-int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end);
int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end);
int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in);
void ceph_crypto_key_destroy(struct ceph_crypto_key *key);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 3c8b78d9c4d1..d1b5705dc0c6 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1254,7 +1254,7 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen,
colon_p = memchr(name, ':', namelen);
if (delim_p && colon_p)
- end = delim_p < colon_p ? delim_p : colon_p;
+ end = min(delim_p, colon_p);
else if (!delim_p && colon_p)
end = colon_p;
else {
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 9d078b37fe0b..9b1168eb77ab 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4999,40 +4999,6 @@ out_put_lreq:
}
EXPORT_SYMBOL(ceph_osdc_notify);
-/*
- * Return the number of milliseconds since the watch was last
- * confirmed, or an error. If there is an error, the watch is no
- * longer valid, and should be destroyed with ceph_osdc_unwatch().
- */
-int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
- struct ceph_osd_linger_request *lreq)
-{
- unsigned long stamp, age;
- int ret;
-
- down_read(&osdc->lock);
- mutex_lock(&lreq->lock);
- stamp = lreq->watch_valid_thru;
- if (!list_empty(&lreq->pending_lworks)) {
- struct linger_work *lwork =
- list_first_entry(&lreq->pending_lworks,
- struct linger_work,
- pending_item);
-
- if (time_before(lwork->queued_stamp, stamp))
- stamp = lwork->queued_stamp;
- }
- age = jiffies - stamp;
- dout("%s lreq %p linger_id %llu age %lu last_error %d\n", __func__,
- lreq, lreq->linger_id, age, lreq->last_error);
- /* we are truncating to msecs, so return a safe upper bound */
- ret = lreq->last_error ?: 1 + jiffies_to_msecs(age);
-
- mutex_unlock(&lreq->lock);
- up_read(&osdc->lock);
- return ret;
-}
-
static int decode_watcher(void **p, void *end, struct ceph_watch_item *item)
{
u8 struct_v;
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 74622b278d57..5a9c4be5f222 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -131,41 +131,3 @@ int ceph_pagelist_free_reserve(struct ceph_pagelist *pl)
return 0;
}
EXPORT_SYMBOL(ceph_pagelist_free_reserve);
-
-/* Create a truncation point. */
-void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
- struct ceph_pagelist_cursor *c)
-{
- c->pl = pl;
- c->page_lru = pl->head.prev;
- c->room = pl->room;
-}
-EXPORT_SYMBOL(ceph_pagelist_set_cursor);
-
-/* Truncate a pagelist to the given point. Move extra pages to reserve.
- * This won't sleep.
- * Returns: 0 on success,
- * -EINVAL if the pagelist doesn't match the trunc point pagelist
- */
-int ceph_pagelist_truncate(struct ceph_pagelist *pl,
- struct ceph_pagelist_cursor *c)
-{
- struct page *page;
-
- if (pl != c->pl)
- return -EINVAL;
- ceph_pagelist_unmap_tail(pl);
- while (pl->head.prev != c->page_lru) {
- page = list_entry(pl->head.prev, struct page, lru);
- /* move from pagelist to reserve */
- list_move_tail(&page->lru, &pl->free_list);
- ++pl->num_pages_free;
- }
- pl->room = c->room;
- if (!list_empty(&pl->head)) {
- page = list_entry(pl->head.prev, struct page, lru);
- pl->mapped_tail = kmap(page);
- }
- return 0;
-}
-EXPORT_SYMBOL(ceph_pagelist_truncate);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 64305e7056a1..4509757d8b3b 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -55,58 +55,6 @@ struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
}
EXPORT_SYMBOL(ceph_alloc_page_vector);
-/*
- * copy user data into a page vector
- */
-int ceph_copy_user_to_page_vector(struct page **pages,
- const void __user *data,
- loff_t off, size_t len)
-{
- int i = 0;
- int po = off & ~PAGE_MASK;
- int left = len;
- int l, bad;
-
- while (left > 0) {
- l = min_t(int, PAGE_SIZE-po, left);
- bad = copy_from_user(page_address(pages[i]) + po, data, l);
- if (bad == l)
- return -EFAULT;
- data += l - bad;
- left -= l - bad;
- po += l - bad;
- if (po == PAGE_SIZE) {
- po = 0;
- i++;
- }
- }
- return len;
-}
-EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
-
-void ceph_copy_to_page_vector(struct page **pages,
- const void *data,
- loff_t off, size_t len)
-{
- int i = 0;
- size_t po = off & ~PAGE_MASK;
- size_t left = len;
-
- while (left > 0) {
- size_t l = min_t(size_t, PAGE_SIZE-po, left);
-
- memcpy(page_address(pages[i]) + po, data, l);
- data += l;
- left -= l;
- po += l;
- if (po == PAGE_SIZE) {
- po = 0;
- i++;
- }
- }
-}
-EXPORT_SYMBOL(ceph_copy_to_page_vector);
-
void ceph_copy_from_page_vector(struct page **pages,
void *data,
loff_t off, size_t len)
diff --git a/net/core/Makefile b/net/core/Makefile
index 62be9aef2528..d9326600e289 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
obj-y += net-sysfs.o
obj-y += hotdata.o
+obj-y += netdev_rx_queue.o
obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
@@ -43,3 +44,6 @@ obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
obj-$(CONFIG_OF) += of_net.o
obj-$(CONFIG_NET_TEST) += net_test.o
+obj-$(CONFIG_NET_DEVMEM) += devmem.o
+obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o
+obj-$(CONFIG_FAIL_SKB_REALLOC) += skb_fault_injection.o
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index bc01b3aa6b0f..2f4ed83a75ae 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -106,7 +106,7 @@ static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
if (sock) {
sdata = bpf_local_storage_update(
sock->sk, (struct bpf_local_storage_map *)map, value,
- map_flags, GFP_ATOMIC);
+ map_flags, false, GFP_ATOMIC);
sockfd_put(sock);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -137,7 +137,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
{
struct bpf_local_storage_elem *copy_selem;
- copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC);
+ copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, false, GFP_ATOMIC);
if (!copy_selem)
return NULL;
@@ -243,7 +243,7 @@ BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
refcount_inc_not_zero(&sk->sk_refcnt)) {
sdata = bpf_local_storage_update(
sk, (struct bpf_local_storage_map *)map, value,
- BPF_NOEXIST, gfp_flags);
+ BPF_NOEXIST, false, gfp_flags);
/* sk must be a fullsock (guaranteed by verifier),
* so sock_gen_put() is unnecessary.
*/
diff --git a/net/core/datagram.c b/net/core/datagram.c
index a40f733b37d7..f0693707aece 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -407,6 +407,9 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
return 0;
}
+ if (!skb_frags_readable(skb))
+ goto short_copy;
+
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
@@ -623,6 +626,9 @@ int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
{
int frag = skb_shinfo(skb)->nr_frags;
+ if (!skb_frags_readable(skb))
+ return -EFAULT;
+
while (length && iov_iter_count(from)) {
struct page *head, *last_head = NULL;
struct page *pages[MAX_SKB_FRAGS];
diff --git a/net/core/dev.c b/net/core/dev.c
index f66e61407883..45a8c3dd4a64 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -158,8 +158,10 @@
#include <net/page_pool/types.h>
#include <net/page_pool/helpers.h>
#include <net/rps.h>
+#include <linux/phy_link_topology.h>
#include "dev.h"
+#include "devmem.h"
#include "net-sysfs.h"
static DEFINE_SPINLOCK(ptype_lock);
@@ -2947,6 +2949,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
if (dev->num_tc)
netif_setup_tc(dev, txq);
+ net_shaper_set_real_num_tx_queues(dev, txq);
+
dev_qdisc_change_real_num_tx(dev, txq);
dev->real_num_tx_queues = txq;
@@ -3310,6 +3314,10 @@ int skb_checksum_help(struct sk_buff *skb)
return -EINVAL;
}
+ if (!skb_frags_readable(skb)) {
+ return -EFAULT;
+ }
+
/* Before computing a checksum, we should make sure no frag could
* be modified by an external entity : checksum could be wrong.
*/
@@ -3386,6 +3394,7 @@ int skb_crc32c_csum_help(struct sk_buff *skb)
out:
return ret;
}
+EXPORT_SYMBOL(skb_crc32c_csum_help);
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
@@ -3431,8 +3440,9 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
if (!(dev->features & NETIF_F_HIGHDMA)) {
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct page *page = skb_frag_page(frag);
- if (PageHighMem(skb_frag_page(frag)))
+ if (page && PageHighMem(page))
return 1;
}
}
@@ -3504,7 +3514,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb,
if (gso_segs > READ_ONCE(dev->gso_max_segs))
return features & ~NETIF_F_GSO_MASK;
- if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size)))
+ if (unlikely(skb->len >= netif_get_gso_max_size(dev, skb)))
return features & ~NETIF_F_GSO_MASK;
if (!skb_shinfo(skb)->gso_type) {
@@ -3631,6 +3641,9 @@ int skb_csum_hwoffload_help(struct sk_buff *skb,
return 0;
if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
+ if (vlan_get_protocol(skb) == htons(ETH_P_IPV6) &&
+ skb_network_header_len(skb) != sizeof(struct ipv6hdr))
+ goto sw_checksum;
switch (skb->csum_offset) {
case offsetof(struct tcphdr, check):
case offsetof(struct udphdr, check):
@@ -3638,6 +3651,7 @@ int skb_csum_hwoffload_help(struct sk_buff *skb,
}
}
+sw_checksum:
return skb_checksum_help(skb);
}
EXPORT_SYMBOL(skb_csum_hwoffload_help);
@@ -3705,7 +3719,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
next = skb->next;
skb_mark_not_on_list(skb);
- /* in case skb wont be segmented, point to itself */
+ /* in case skb won't be segmented, point to itself */
skb->prev = skb;
skb = validate_xmit_skb(skb, dev, again);
@@ -3750,7 +3764,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
sizeof(_tcphdr), &_tcphdr);
if (likely(th))
hdr_len += __tcp_hdrlen(th);
- } else {
+ } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
struct udphdr _udphdr;
if (skb_header_pointer(skb, hdr_len,
@@ -3758,10 +3772,14 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
hdr_len += sizeof(struct udphdr);
}
- if (shinfo->gso_type & SKB_GSO_DODGY)
- gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
- shinfo->gso_size);
+ if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) {
+ int payload = skb->len - hdr_len;
+ /* Malicious packet. */
+ if (payload <= 0)
+ return;
+ gso_segs = DIV_ROUND_UP(payload, shinfo->gso_size);
+ }
qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
}
}
@@ -4245,13 +4263,6 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
}
EXPORT_SYMBOL(dev_pick_tx_zero);
-u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev)
-{
- return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
-}
-EXPORT_SYMBOL(dev_pick_tx_cpu_id);
-
u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev)
{
@@ -5248,7 +5259,7 @@ int netif_rx(struct sk_buff *skb)
}
EXPORT_SYMBOL(netif_rx);
-static __latent_entropy void net_tx_action(struct softirq_action *h)
+static __latent_entropy void net_tx_action(void)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -5725,10 +5736,9 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
struct packet_type *pt_curr = NULL;
/* Current (common) orig_dev of sublist */
struct net_device *od_curr = NULL;
- struct list_head sublist;
struct sk_buff *skb, *next;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *orig_dev = skb->dev;
struct packet_type *pt_prev = NULL;
@@ -5866,9 +5876,8 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
void netif_receive_skb_list_internal(struct list_head *head)
{
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue),
skb);
@@ -6227,12 +6236,12 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
if (work_done) {
if (n->gro_bitmask)
- timeout = READ_ONCE(n->dev->gro_flush_timeout);
- n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
+ timeout = napi_get_gro_flush_timeout(n);
+ n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n);
}
if (n->defer_hard_irqs_count > 0) {
n->defer_hard_irqs_count--;
- timeout = READ_ONCE(n->dev->gro_flush_timeout);
+ timeout = napi_get_gro_flush_timeout(n);
if (timeout)
ret = false;
}
@@ -6366,8 +6375,8 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
if (flags & NAPI_F_PREFER_BUSY_POLL) {
- napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
- timeout = READ_ONCE(napi->dev->gro_flush_timeout);
+ napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
+ timeout = napi_get_gro_flush_timeout(napi);
if (napi->defer_hard_irqs_count && timeout) {
hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
skip_schedule = true;
@@ -6498,26 +6507,82 @@ void napi_busy_loop(unsigned int napi_id,
}
EXPORT_SYMBOL(napi_busy_loop);
+void napi_suspend_irqs(unsigned int napi_id)
+{
+ struct napi_struct *napi;
+
+ rcu_read_lock();
+ napi = napi_by_id(napi_id);
+ if (napi) {
+ unsigned long timeout = napi_get_irq_suspend_timeout(napi);
+
+ if (timeout)
+ hrtimer_start(&napi->timer, ns_to_ktime(timeout),
+ HRTIMER_MODE_REL_PINNED);
+ }
+ rcu_read_unlock();
+}
+
+void napi_resume_irqs(unsigned int napi_id)
+{
+ struct napi_struct *napi;
+
+ rcu_read_lock();
+ napi = napi_by_id(napi_id);
+ if (napi) {
+ /* If irq_suspend_timeout is set to 0 between the call to
+ * napi_suspend_irqs and now, the original value still
+ * determines the safety timeout as intended and napi_watchdog
+ * will resume irq processing.
+ */
+ if (napi_get_irq_suspend_timeout(napi)) {
+ local_bh_disable();
+ napi_schedule(napi);
+ local_bh_enable();
+ }
+ }
+ rcu_read_unlock();
+}
+
#endif /* CONFIG_NET_RX_BUSY_POLL */
+static void __napi_hash_add_with_id(struct napi_struct *napi,
+ unsigned int napi_id)
+{
+ napi->napi_id = napi_id;
+ hlist_add_head_rcu(&napi->napi_hash_node,
+ &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
+}
+
+static void napi_hash_add_with_id(struct napi_struct *napi,
+ unsigned int napi_id)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&napi_hash_lock, flags);
+ WARN_ON_ONCE(napi_by_id(napi_id));
+ __napi_hash_add_with_id(napi, napi_id);
+ spin_unlock_irqrestore(&napi_hash_lock, flags);
+}
+
static void napi_hash_add(struct napi_struct *napi)
{
+ unsigned long flags;
+
if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state))
return;
- spin_lock(&napi_hash_lock);
+ spin_lock_irqsave(&napi_hash_lock, flags);
/* 0..NR_CPUS range is reserved for sender_cpu use */
do {
if (unlikely(++napi_gen_id < MIN_NAPI_ID))
napi_gen_id = MIN_NAPI_ID;
} while (napi_by_id(napi_gen_id));
- napi->napi_id = napi_gen_id;
- hlist_add_head_rcu(&napi->napi_hash_node,
- &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
+ __napi_hash_add_with_id(napi, napi_gen_id);
- spin_unlock(&napi_hash_lock);
+ spin_unlock_irqrestore(&napi_hash_lock, flags);
}
/* Warning : caller is responsible to make sure rcu grace period
@@ -6525,11 +6590,13 @@ static void napi_hash_add(struct napi_struct *napi)
*/
static void napi_hash_del(struct napi_struct *napi)
{
- spin_lock(&napi_hash_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&napi_hash_lock, flags);
hlist_del_init_rcu(&napi->napi_hash_node);
- spin_unlock(&napi_hash_lock);
+ spin_unlock_irqrestore(&napi_hash_lock, flags);
}
static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
@@ -6638,6 +6705,30 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
}
EXPORT_SYMBOL(netif_queue_set_napi);
+static void napi_restore_config(struct napi_struct *n)
+{
+ n->defer_hard_irqs = n->config->defer_hard_irqs;
+ n->gro_flush_timeout = n->config->gro_flush_timeout;
+ n->irq_suspend_timeout = n->config->irq_suspend_timeout;
+ /* a NAPI ID might be stored in the config, if so use it. if not, use
+ * napi_hash_add to generate one for us. It will be saved to the config
+ * in napi_disable.
+ */
+ if (n->config->napi_id)
+ napi_hash_add_with_id(n, n->config->napi_id);
+ else
+ napi_hash_add(n);
+}
+
+static void napi_save_config(struct napi_struct *n)
+{
+ n->config->defer_hard_irqs = n->defer_hard_irqs;
+ n->config->gro_flush_timeout = n->gro_flush_timeout;
+ n->config->irq_suspend_timeout = n->irq_suspend_timeout;
+ n->config->napi_id = n->napi_id;
+ napi_hash_del(n);
+}
+
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6665,7 +6756,13 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
set_bit(NAPI_STATE_SCHED, &napi->state);
set_bit(NAPI_STATE_NPSVC, &napi->state);
list_add_rcu(&napi->dev_list, &dev->napi_list);
- napi_hash_add(napi);
+
+ /* default settings from sysfs are applied to all NAPIs. any per-NAPI
+ * configuration will be loaded in napi_enable
+ */
+ napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
+ napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout));
+
napi_get_frags_check(napi);
/* Create kthread for this napi if dev->threaded is set.
* Clear dev->threaded if kthread creation failed so that
@@ -6697,6 +6794,11 @@ void napi_disable(struct napi_struct *n)
hrtimer_cancel(&n->timer);
+ if (n->config)
+ napi_save_config(n);
+ else
+ napi_hash_del(n);
+
clear_bit(NAPI_STATE_DISABLE, &n->state);
}
EXPORT_SYMBOL(napi_disable);
@@ -6712,6 +6814,11 @@ void napi_enable(struct napi_struct *n)
{
unsigned long new, val = READ_ONCE(n->state);
+ if (n->config)
+ napi_restore_config(n);
+ else
+ napi_hash_add(n);
+
do {
BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
@@ -6741,7 +6848,11 @@ void __netif_napi_del(struct napi_struct *napi)
if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
return;
- napi_hash_del(napi);
+ if (napi->config) {
+ napi->index = -1;
+ napi->config = NULL;
+ }
+
list_del_rcu(&napi->dev_list);
napi_free_frags(napi);
@@ -6921,7 +7032,7 @@ static int napi_threaded_poll(void *data)
return 0;
}
-static __latent_entropy void net_rx_action(struct softirq_action *h)
+static __latent_entropy void net_rx_action(void)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
@@ -9272,7 +9383,7 @@ EXPORT_SYMBOL(netdev_port_same_parent_id);
*/
int dev_change_proto_down(struct net_device *dev, bool proto_down)
{
- if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN))
+ if (!dev->change_proto_down)
return -EOPNOTSUPP;
if (!netif_device_present(dev))
return -ENODEV;
@@ -9369,6 +9480,20 @@ u8 dev_xdp_prog_count(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
+int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
+{
+ if (!dev->netdev_ops->ndo_bpf)
+ return -EOPNOTSUPP;
+
+ if (dev_get_min_mp_channel_count(dev)) {
+ NL_SET_ERR_MSG(bpf->extack, "unable to propagate XDP to device using memory provider");
+ return -EBUSY;
+ }
+
+ return dev->netdev_ops->ndo_bpf(dev, bpf);
+}
+EXPORT_SYMBOL_GPL(dev_xdp_propagate);
+
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{
struct bpf_prog *prog = dev_xdp_prog(dev, mode);
@@ -9397,6 +9522,11 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
struct netdev_bpf xdp;
int err;
+ if (dev_get_min_mp_channel_count(dev)) {
+ NL_SET_ERR_MSG(extack, "unable to install XDP to device using memory provider");
+ return -EBUSY;
+ }
+
memset(&xdp, 0, sizeof(xdp));
xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
xdp.extack = extack;
@@ -9821,6 +9951,20 @@ err_out:
return err;
}
+u32 dev_get_min_mp_channel_count(const struct net_device *dev)
+{
+ int i;
+
+ ASSERT_RTNL();
+
+ for (i = dev->real_num_rx_queues - 1; i >= 0; i--)
+ if (dev->_rx[i].mp_params.mp_priv)
+ /* The channel count is the idx plus 1. */
+ return i + 1;
+
+ return 0;
+}
+
/**
* dev_index_reserve() - allocate an ifindex in a namespace
* @net: the applicable net namespace
@@ -10321,6 +10465,17 @@ static void netdev_do_free_pcpu_stats(struct net_device *dev)
}
}
+static void netdev_free_phy_link_topology(struct net_device *dev)
+{
+ struct phy_link_topology *topo = dev->link_topo;
+
+ if (IS_ENABLED(CONFIG_PHYLIB) && topo) {
+ xa_destroy(&topo->phys);
+ kfree(topo);
+ dev->link_topo = NULL;
+ }
+}
+
/**
* register_netdevice() - register a network device
* @dev: device to register
@@ -10868,7 +11023,7 @@ noinline void netdev_core_stats_inc(struct net_device *dev, u32 offset)
return;
}
- field = (__force unsigned long __percpu *)((__force void *)p + offset);
+ field = (unsigned long __percpu *)((void __percpu *)p + offset);
this_cpu_inc(*field);
}
EXPORT_SYMBOL_GPL(netdev_core_stats_inc);
@@ -11009,8 +11164,8 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
WARN_ON(dev->reg_state == NETREG_REGISTERED);
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
- dev->gro_flush_timeout = 20000;
- dev->napi_defer_hard_irqs = 1;
+ netdev_set_gro_flush_timeout(dev, 20000);
+ netdev_set_defer_hard_irqs(dev, 1);
}
}
EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
@@ -11034,6 +11189,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
+ size_t napi_config_sz;
+ unsigned int maxqs;
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -11047,6 +11204,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
return NULL;
}
+ maxqs = max(txqs, rxqs);
+
dev = kvzalloc(struct_size(dev, priv, sizeof_priv),
GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!dev)
@@ -11099,6 +11258,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
#ifdef CONFIG_NET_SCHED
hash_init(dev->qdisc_hash);
#endif
+
+ mutex_init(&dev->lock);
+
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
@@ -11120,7 +11282,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (!dev->ethtool)
goto free_all;
- strcpy(dev->name, name);
+ napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config));
+ dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT);
+ if (!dev->napi_config)
+ goto free_all;
+
+ strscpy(dev->name, name);
dev->name_assign_type = name_assign_type;
dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops)
@@ -11169,6 +11336,8 @@ void free_netdev(struct net_device *dev)
return;
}
+ mutex_destroy(&dev->lock);
+
kfree(dev->ethtool);
netif_free_tx_queues(dev);
netif_free_rx_queues(dev);
@@ -11181,6 +11350,8 @@ void free_netdev(struct net_device *dev)
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
+ kvfree(dev->napi_config);
+
ref_tracker_dir_exit(&dev->refcnt_tracker);
#ifdef CONFIG_PCPU_DEV_REFCNT
free_percpu(dev->pcpu_refcnt);
@@ -11191,6 +11362,8 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;
+ netdev_free_phy_link_topology(dev);
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED ||
dev->reg_state == NETREG_DUMMY) {
@@ -11343,6 +11516,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_tcx_uninstall(dev);
dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);
+ dev_dmabuf_uninstall(dev);
netdev_offload_xstats_disable_all(dev);
@@ -11375,6 +11549,8 @@ void unregister_netdevice_many_notify(struct list_head *head,
mutex_destroy(&dev->ethtool->rss_lock);
+ net_shaper_flush_netdev(dev);
+
if (skb)
rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh);
@@ -11407,7 +11583,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
* @head: list of devices
*
* Note: As most callers use a stack allocated list_head,
- * we force a list_del() to make sure stack wont be corrupted later.
+ * we force a list_del() to make sure stack won't be corrupted later.
*/
void unregister_netdevice_many(struct list_head *head)
{
@@ -11462,10 +11638,10 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
/* Don't allow namespace local devices to be moved. */
err = -EINVAL;
- if (dev->features & NETIF_F_NETNS_LOCAL)
+ if (dev->netns_local)
goto out;
- /* Ensure the device has been registrered */
+ /* Ensure the device has been registered */
if (dev->reg_state != NETREG_REGISTERED)
goto out;
@@ -11844,7 +12020,7 @@ static void __net_exit default_device_exit_net(struct net *net)
char fb_name[IFNAMSIZ];
/* Ignore unmoveable devices (i.e. loopback) */
- if (dev->features & NETIF_F_NETNS_LOCAL)
+ if (dev->netns_local)
continue;
/* Leave virtual devices for the generic cleanup */
@@ -11905,7 +12081,7 @@ static struct pernet_operations __net_initdata default_device_ops = {
static void __init net_dev_struct_check(void)
{
/* TX read-mostly hotpath */
- CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags_fast);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx);
@@ -11943,8 +12119,6 @@ static void __init net_dev_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
- CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
- CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
@@ -11956,7 +12130,7 @@ static void __init net_dev_struct_check(void)
#ifdef CONFIG_NET_XGRESS
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
#endif
- CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104);
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 92);
}
/*
diff --git a/net/core/dev.h b/net/core/dev.h
index 5654325c5b71..d043dee25a68 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -35,6 +35,16 @@ void dev_addr_flush(struct net_device *dev);
int dev_addr_init(struct net_device *dev);
void dev_addr_check(struct net_device *dev);
+#if IS_ENABLED(CONFIG_NET_SHAPER)
+void net_shaper_flush_netdev(struct net_device *dev);
+void net_shaper_set_real_num_tx_queues(struct net_device *dev,
+ unsigned int txq);
+#else
+static inline void net_shaper_flush_netdev(struct net_device *dev) {}
+static inline void net_shaper_set_real_num_tx_queues(struct net_device *dev,
+ unsigned int txq) {}
+#endif
+
/* sysctls not referred to from outside net/core/ */
extern int netdev_unregister_timeout_secs;
extern int weight_p;
@@ -138,6 +148,119 @@ static inline void netif_set_gro_ipv4_max_size(struct net_device *dev,
WRITE_ONCE(dev->gro_ipv4_max_size, size);
}
+/**
+ * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs
+ * @n: napi struct to get the defer_hard_irqs field from
+ *
+ * Return: the per-NAPI value of the defar_hard_irqs field.
+ */
+static inline u32 napi_get_defer_hard_irqs(const struct napi_struct *n)
+{
+ return READ_ONCE(n->defer_hard_irqs);
+}
+
+/**
+ * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi
+ * @n: napi_struct to set the defer_hard_irqs field
+ * @defer: the value the field should be set to
+ */
+static inline void napi_set_defer_hard_irqs(struct napi_struct *n, u32 defer)
+{
+ WRITE_ONCE(n->defer_hard_irqs, defer);
+}
+
+/**
+ * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev
+ * @netdev: the net_device for which all NAPIs will have defer_hard_irqs set
+ * @defer: the defer_hard_irqs value to set
+ */
+static inline void netdev_set_defer_hard_irqs(struct net_device *netdev,
+ u32 defer)
+{
+ unsigned int count = max(netdev->num_rx_queues,
+ netdev->num_tx_queues);
+ struct napi_struct *napi;
+ int i;
+
+ WRITE_ONCE(netdev->napi_defer_hard_irqs, defer);
+ list_for_each_entry(napi, &netdev->napi_list, dev_list)
+ napi_set_defer_hard_irqs(napi, defer);
+
+ for (i = 0; i < count; i++)
+ netdev->napi_config[i].defer_hard_irqs = defer;
+}
+
+/**
+ * napi_get_gro_flush_timeout - get the gro_flush_timeout
+ * @n: napi struct to get the gro_flush_timeout from
+ *
+ * Return: the per-NAPI value of the gro_flush_timeout field.
+ */
+static inline unsigned long
+napi_get_gro_flush_timeout(const struct napi_struct *n)
+{
+ return READ_ONCE(n->gro_flush_timeout);
+}
+
+/**
+ * napi_set_gro_flush_timeout - set the gro_flush_timeout for a napi
+ * @n: napi struct to set the gro_flush_timeout
+ * @timeout: timeout value to set
+ *
+ * napi_set_gro_flush_timeout sets the per-NAPI gro_flush_timeout
+ */
+static inline void napi_set_gro_flush_timeout(struct napi_struct *n,
+ unsigned long timeout)
+{
+ WRITE_ONCE(n->gro_flush_timeout, timeout);
+}
+
+/**
+ * netdev_set_gro_flush_timeout - set gro_flush_timeout of a netdev's NAPIs
+ * @netdev: the net_device for which all NAPIs will have gro_flush_timeout set
+ * @timeout: the timeout value to set
+ */
+static inline void netdev_set_gro_flush_timeout(struct net_device *netdev,
+ unsigned long timeout)
+{
+ unsigned int count = max(netdev->num_rx_queues,
+ netdev->num_tx_queues);
+ struct napi_struct *napi;
+ int i;
+
+ WRITE_ONCE(netdev->gro_flush_timeout, timeout);
+ list_for_each_entry(napi, &netdev->napi_list, dev_list)
+ napi_set_gro_flush_timeout(napi, timeout);
+
+ for (i = 0; i < count; i++)
+ netdev->napi_config[i].gro_flush_timeout = timeout;
+}
+
+/**
+ * napi_get_irq_suspend_timeout - get the irq_suspend_timeout
+ * @n: napi struct to get the irq_suspend_timeout from
+ *
+ * Return: the per-NAPI value of the irq_suspend_timeout field.
+ */
+static inline unsigned long
+napi_get_irq_suspend_timeout(const struct napi_struct *n)
+{
+ return READ_ONCE(n->irq_suspend_timeout);
+}
+
+/**
+ * napi_set_irq_suspend_timeout - set the irq_suspend_timeout for a napi
+ * @n: napi struct to set the irq_suspend_timeout
+ * @timeout: timeout value to set
+ *
+ * napi_set_irq_suspend_timeout sets the per-NAPI irq_suspend_timeout
+ */
+static inline void napi_set_irq_suspend_timeout(struct napi_struct *n,
+ unsigned long timeout)
+{
+ WRITE_ONCE(n->irq_suspend_timeout, timeout);
+}
+
int rps_cpumask_housekeeping(struct cpumask *mask);
#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index baa63dee2829..166e404f7c03 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -262,7 +262,7 @@ static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
}
/* This function only works where there is a strict 1-1 relationship
- * between source and destionation of they synch. If you ever need to
+ * between source and destination of they synch. If you ever need to
* sync addresses to more then 1 destination, you need to use
* __hw_addr_sync_multiple().
*/
@@ -299,8 +299,8 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
EXPORT_SYMBOL(__hw_addr_unsync);
/**
- * __hw_addr_sync_dev - Synchonize device's multicast list
- * @list: address list to syncronize
+ * __hw_addr_sync_dev - Synchronize device's multicast list
+ * @list: address list to synchronize
* @dev: device to sync
* @sync: function to call if address should be added
* @unsync: function to call if address should be removed
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 8592c052c0f4..46d43b950471 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -64,7 +64,7 @@ int dev_ifconf(struct net *net, struct ifconf __user *uifc)
}
/* Loop over the interfaces, and write an info block for each. */
- rtnl_lock();
+ rtnl_net_lock(net);
for_each_netdev(net, dev) {
if (!pos)
done = inet_gifconf(dev, NULL, 0, size);
@@ -72,12 +72,12 @@ int dev_ifconf(struct net *net, struct ifconf __user *uifc)
done = inet_gifconf(dev, pos + total,
len - total, size);
if (done < 0) {
- rtnl_unlock();
+ rtnl_net_unlock(net);
return -EFAULT;
}
total += done;
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
return put_user(total, &uifc->ifc_len);
}
@@ -317,8 +317,7 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
* should take precedence in front of hardware timestamping provided by the
* netdev. If the netdev driver needs to perform specific actions even for PHY
* timestamping to work properly (a switch port must trap the timestamped
- * frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in
- * dev->priv_flags.
+ * frames and not forward them), it must set dev->see_all_hwtstamp_requests.
*/
int dev_set_hwtstamp_phylib(struct net_device *dev,
struct kernel_hwtstamp_config *cfg,
@@ -332,13 +331,13 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV;
- if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) {
+ if (phy_ts && dev->see_all_hwtstamp_requests) {
err = ops->ndo_hwtstamp_get(dev, &old_cfg);
if (err)
return err;
}
- if (!phy_ts || (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) {
+ if (!phy_ts || dev->see_all_hwtstamp_requests) {
err = ops->ndo_hwtstamp_set(dev, cfg, extack);
if (err) {
if (extack->_msg)
@@ -347,7 +346,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
}
}
- if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS))
+ if (phy_ts && dev->see_all_hwtstamp_requests)
changed = kernel_hwtstamp_config_changed(&old_cfg, cfg);
if (phy_ts) {
diff --git a/net/core/devmem.c b/net/core/devmem.c
new file mode 100644
index 000000000000..11b91c12ee11
--- /dev/null
+++ b/net/core/devmem.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Devmem TCP
+ *
+ * Authors: Mina Almasry <almasrymina@google.com>
+ * Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+ * Kaiyuan Zhang <kaiyuanz@google.com
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/genalloc.h>
+#include <linux/mm.h>
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
+#include <net/page_pool/helpers.h>
+#include <trace/events/page_pool.h>
+
+#include "devmem.h"
+#include "mp_dmabuf_devmem.h"
+#include "page_pool_priv.h"
+
+/* Device memory support */
+
+/* Protected by rtnl_lock() */
+static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
+
+static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
+ struct gen_pool_chunk *chunk,
+ void *not_used)
+{
+ struct dmabuf_genpool_chunk_owner *owner = chunk->owner;
+
+ kvfree(owner->niovs);
+ kfree(owner);
+}
+
+static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov)
+{
+ struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+
+ return owner->base_dma_addr +
+ ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
+}
+
+void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding)
+{
+ size_t size, avail;
+
+ gen_pool_for_each_chunk(binding->chunk_pool,
+ net_devmem_dmabuf_free_chunk_owner, NULL);
+
+ size = gen_pool_size(binding->chunk_pool);
+ avail = gen_pool_avail(binding->chunk_pool);
+
+ if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu",
+ size, avail))
+ gen_pool_destroy(binding->chunk_pool);
+
+ dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
+ DMA_FROM_DEVICE);
+ dma_buf_detach(binding->dmabuf, binding->attachment);
+ dma_buf_put(binding->dmabuf);
+ xa_destroy(&binding->bound_rxqs);
+ kfree(binding);
+}
+
+struct net_iov *
+net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+ struct dmabuf_genpool_chunk_owner *owner;
+ unsigned long dma_addr;
+ struct net_iov *niov;
+ ssize_t offset;
+ ssize_t index;
+
+ dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE,
+ (void **)&owner);
+ if (!dma_addr)
+ return NULL;
+
+ offset = dma_addr - owner->base_dma_addr;
+ index = offset / PAGE_SIZE;
+ niov = &owner->niovs[index];
+
+ niov->pp_magic = 0;
+ niov->pp = NULL;
+ atomic_long_set(&niov->pp_ref_count, 0);
+
+ return niov;
+}
+
+void net_devmem_free_dmabuf(struct net_iov *niov)
+{
+ struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov);
+ unsigned long dma_addr = net_devmem_get_dma_addr(niov);
+
+ if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr,
+ PAGE_SIZE)))
+ return;
+
+ gen_pool_free(binding->chunk_pool, dma_addr, PAGE_SIZE);
+}
+
+void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+ struct netdev_rx_queue *rxq;
+ unsigned long xa_idx;
+ unsigned int rxq_idx;
+
+ if (binding->list.next)
+ list_del(&binding->list);
+
+ xa_for_each(&binding->bound_rxqs, xa_idx, rxq) {
+ WARN_ON(rxq->mp_params.mp_priv != binding);
+
+ rxq->mp_params.mp_priv = NULL;
+
+ rxq_idx = get_netdev_rx_queue_index(rxq);
+
+ WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx));
+ }
+
+ xa_erase(&net_devmem_dmabuf_bindings, binding->id);
+
+ net_devmem_dmabuf_binding_put(binding);
+}
+
+int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
+ struct net_devmem_dmabuf_binding *binding,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_rx_queue *rxq;
+ u32 xa_idx;
+ int err;
+
+ if (rxq_idx >= dev->real_num_rx_queues) {
+ NL_SET_ERR_MSG(extack, "rx queue index out of range");
+ return -ERANGE;
+ }
+
+ rxq = __netif_get_rx_queue(dev, rxq_idx);
+ if (rxq->mp_params.mp_priv) {
+ NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
+ return -EEXIST;
+ }
+
+#ifdef CONFIG_XDP_SOCKETS
+ if (rxq->pool) {
+ NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP");
+ return -EBUSY;
+ }
+#endif
+
+ err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
+ GFP_KERNEL);
+ if (err)
+ return err;
+
+ rxq->mp_params.mp_priv = binding;
+
+ err = netdev_rx_queue_restart(dev, rxq_idx);
+ if (err)
+ goto err_xa_erase;
+
+ return 0;
+
+err_xa_erase:
+ rxq->mp_params.mp_priv = NULL;
+ xa_erase(&binding->bound_rxqs, xa_idx);
+
+ return err;
+}
+
+struct net_devmem_dmabuf_binding *
+net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
+ struct netlink_ext_ack *extack)
+{
+ struct net_devmem_dmabuf_binding *binding;
+ static u32 id_alloc_next;
+ struct scatterlist *sg;
+ struct dma_buf *dmabuf;
+ unsigned int sg_idx, i;
+ unsigned long virtual;
+ int err;
+
+ dmabuf = dma_buf_get(dmabuf_fd);
+ if (IS_ERR(dmabuf))
+ return ERR_CAST(dmabuf);
+
+ binding = kzalloc_node(sizeof(*binding), GFP_KERNEL,
+ dev_to_node(&dev->dev));
+ if (!binding) {
+ err = -ENOMEM;
+ goto err_put_dmabuf;
+ }
+
+ binding->dev = dev;
+
+ err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
+ binding, xa_limit_32b, &id_alloc_next,
+ GFP_KERNEL);
+ if (err < 0)
+ goto err_free_binding;
+
+ xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC);
+
+ refcount_set(&binding->ref, 1);
+
+ binding->dmabuf = dmabuf;
+
+ binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent);
+ if (IS_ERR(binding->attachment)) {
+ err = PTR_ERR(binding->attachment);
+ NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device");
+ goto err_free_id;
+ }
+
+ binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment,
+ DMA_FROM_DEVICE);
+ if (IS_ERR(binding->sgt)) {
+ err = PTR_ERR(binding->sgt);
+ NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment");
+ goto err_detach;
+ }
+
+ /* For simplicity we expect to make PAGE_SIZE allocations, but the
+ * binding can be much more flexible than that. We may be able to
+ * allocate MTU sized chunks here. Leave that for future work...
+ */
+ binding->chunk_pool =
+ gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev));
+ if (!binding->chunk_pool) {
+ err = -ENOMEM;
+ goto err_unmap;
+ }
+
+ virtual = 0;
+ for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) {
+ dma_addr_t dma_addr = sg_dma_address(sg);
+ struct dmabuf_genpool_chunk_owner *owner;
+ size_t len = sg_dma_len(sg);
+ struct net_iov *niov;
+
+ owner = kzalloc_node(sizeof(*owner), GFP_KERNEL,
+ dev_to_node(&dev->dev));
+ if (!owner) {
+ err = -ENOMEM;
+ goto err_free_chunks;
+ }
+
+ owner->base_virtual = virtual;
+ owner->base_dma_addr = dma_addr;
+ owner->num_niovs = len / PAGE_SIZE;
+ owner->binding = binding;
+
+ err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
+ dma_addr, len, dev_to_node(&dev->dev),
+ owner);
+ if (err) {
+ kfree(owner);
+ err = -EINVAL;
+ goto err_free_chunks;
+ }
+
+ owner->niovs = kvmalloc_array(owner->num_niovs,
+ sizeof(*owner->niovs),
+ GFP_KERNEL);
+ if (!owner->niovs) {
+ err = -ENOMEM;
+ goto err_free_chunks;
+ }
+
+ for (i = 0; i < owner->num_niovs; i++) {
+ niov = &owner->niovs[i];
+ niov->owner = owner;
+ page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
+ net_devmem_get_dma_addr(niov));
+ }
+
+ virtual += len;
+ }
+
+ return binding;
+
+err_free_chunks:
+ gen_pool_for_each_chunk(binding->chunk_pool,
+ net_devmem_dmabuf_free_chunk_owner, NULL);
+ gen_pool_destroy(binding->chunk_pool);
+err_unmap:
+ dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
+ DMA_FROM_DEVICE);
+err_detach:
+ dma_buf_detach(dmabuf, binding->attachment);
+err_free_id:
+ xa_erase(&net_devmem_dmabuf_bindings, binding->id);
+err_free_binding:
+ kfree(binding);
+err_put_dmabuf:
+ dma_buf_put(dmabuf);
+ return ERR_PTR(err);
+}
+
+void dev_dmabuf_uninstall(struct net_device *dev)
+{
+ struct net_devmem_dmabuf_binding *binding;
+ struct netdev_rx_queue *rxq;
+ unsigned long xa_idx;
+ unsigned int i;
+
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
+ binding = dev->_rx[i].mp_params.mp_priv;
+ if (!binding)
+ continue;
+
+ xa_for_each(&binding->bound_rxqs, xa_idx, rxq)
+ if (rxq == &dev->_rx[i]) {
+ xa_erase(&binding->bound_rxqs, xa_idx);
+ break;
+ }
+ }
+}
+
+/*** "Dmabuf devmem memory provider" ***/
+
+int mp_dmabuf_devmem_init(struct page_pool *pool)
+{
+ struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
+
+ if (!binding)
+ return -EINVAL;
+
+ if (!pool->dma_map)
+ return -EOPNOTSUPP;
+
+ if (pool->dma_sync)
+ return -EOPNOTSUPP;
+
+ if (pool->p.order != 0)
+ return -E2BIG;
+
+ net_devmem_dmabuf_binding_get(binding);
+ return 0;
+}
+
+netmem_ref mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp)
+{
+ struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
+ struct net_iov *niov;
+ netmem_ref netmem;
+
+ niov = net_devmem_alloc_dmabuf(binding);
+ if (!niov)
+ return 0;
+
+ netmem = net_iov_to_netmem(niov);
+
+ page_pool_set_pp_info(pool, netmem);
+
+ pool->pages_state_hold_cnt++;
+ trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt);
+ return netmem;
+}
+
+void mp_dmabuf_devmem_destroy(struct page_pool *pool)
+{
+ struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
+
+ net_devmem_dmabuf_binding_put(binding);
+}
+
+bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
+{
+ long refcount = atomic_long_read(netmem_get_pp_ref_count_ref(netmem));
+
+ if (WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
+ return false;
+
+ if (WARN_ON_ONCE(refcount != 1))
+ return false;
+
+ page_pool_clear_pp_info(netmem);
+
+ net_devmem_free_dmabuf(netmem_to_net_iov(netmem));
+
+ /* We don't want the page pool put_page()ing our net_iovs. */
+ return false;
+}
diff --git a/net/core/devmem.h b/net/core/devmem.h
new file mode 100644
index 000000000000..76099ef9c482
--- /dev/null
+++ b/net/core/devmem.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Device memory TCP support
+ *
+ * Authors: Mina Almasry <almasrymina@google.com>
+ * Willem de Bruijn <willemb@google.com>
+ * Kaiyuan Zhang <kaiyuanz@google.com>
+ *
+ */
+#ifndef _NET_DEVMEM_H
+#define _NET_DEVMEM_H
+
+struct netlink_ext_ack;
+
+struct net_devmem_dmabuf_binding {
+ struct dma_buf *dmabuf;
+ struct dma_buf_attachment *attachment;
+ struct sg_table *sgt;
+ struct net_device *dev;
+ struct gen_pool *chunk_pool;
+
+ /* The user holds a ref (via the netlink API) for as long as they want
+ * the binding to remain alive. Each page pool using this binding holds
+ * a ref to keep the binding alive. Each allocated net_iov holds a
+ * ref.
+ *
+ * The binding undos itself and unmaps the underlying dmabuf once all
+ * those refs are dropped and the binding is no longer desired or in
+ * use.
+ */
+ refcount_t ref;
+
+ /* The list of bindings currently active. Used for netlink to notify us
+ * of the user dropping the bind.
+ */
+ struct list_head list;
+
+ /* rxq's this binding is active on. */
+ struct xarray bound_rxqs;
+
+ /* ID of this binding. Globally unique to all bindings currently
+ * active.
+ */
+ u32 id;
+};
+
+#if defined(CONFIG_NET_DEVMEM)
+/* Owner of the dma-buf chunks inserted into the gen pool. Each scatterlist
+ * entry from the dmabuf is inserted into the genpool as a chunk, and needs
+ * this owner struct to keep track of some metadata necessary to create
+ * allocations from this chunk.
+ */
+struct dmabuf_genpool_chunk_owner {
+ /* Offset into the dma-buf where this chunk starts. */
+ unsigned long base_virtual;
+
+ /* dma_addr of the start of the chunk. */
+ dma_addr_t base_dma_addr;
+
+ /* Array of net_iovs for this chunk. */
+ struct net_iov *niovs;
+ size_t num_niovs;
+
+ struct net_devmem_dmabuf_binding *binding;
+};
+
+void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding);
+struct net_devmem_dmabuf_binding *
+net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
+ struct netlink_ext_ack *extack);
+void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding);
+int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
+ struct net_devmem_dmabuf_binding *binding,
+ struct netlink_ext_ack *extack);
+void dev_dmabuf_uninstall(struct net_device *dev);
+
+static inline struct dmabuf_genpool_chunk_owner *
+net_iov_owner(const struct net_iov *niov)
+{
+ return niov->owner;
+}
+
+static inline unsigned int net_iov_idx(const struct net_iov *niov)
+{
+ return niov - net_iov_owner(niov)->niovs;
+}
+
+static inline struct net_devmem_dmabuf_binding *
+net_iov_binding(const struct net_iov *niov)
+{
+ return net_iov_owner(niov)->binding;
+}
+
+static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
+{
+ struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+
+ return owner->base_virtual +
+ ((unsigned long)net_iov_idx(niov) << PAGE_SHIFT);
+}
+
+static inline u32 net_iov_binding_id(const struct net_iov *niov)
+{
+ return net_iov_owner(niov)->binding->id;
+}
+
+static inline void
+net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding)
+{
+ refcount_inc(&binding->ref);
+}
+
+static inline void
+net_devmem_dmabuf_binding_put(struct net_devmem_dmabuf_binding *binding)
+{
+ if (!refcount_dec_and_test(&binding->ref))
+ return;
+
+ __net_devmem_dmabuf_binding_free(binding);
+}
+
+struct net_iov *
+net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding);
+void net_devmem_free_dmabuf(struct net_iov *ppiov);
+
+#else
+struct net_devmem_dmabuf_binding;
+
+static inline void
+__net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding)
+{
+}
+
+static inline struct net_devmem_dmabuf_binding *
+net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
+ struct netlink_ext_ack *extack)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void
+net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+}
+
+static inline int
+net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
+ struct net_devmem_dmabuf_binding *binding,
+ struct netlink_ext_ack *extack)
+
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void dev_dmabuf_uninstall(struct net_device *dev)
+{
+}
+
+static inline struct net_iov *
+net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+ return NULL;
+}
+
+static inline void net_devmem_free_dmabuf(struct net_iov *ppiov)
+{
+}
+
+static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
+{
+ return 0;
+}
+
+static inline u32 net_iov_binding_id(const struct net_iov *niov)
+{
+ return 0;
+}
+#endif
+
+#endif /* _NET_DEVMEM_H */
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 2e0ae3328232..6efd4cccc9dd 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -37,7 +37,7 @@
#include <trace/events/napi.h>
#include <trace/events/devlink.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#define TRACE_ON 1
#define TRACE_OFF 0
diff --git a/net/core/dst.c b/net/core/dst.c
index 95f533844f17..9552a90d4772 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -109,9 +109,6 @@ static void dst_destroy(struct dst_entry *dst)
child = xdst->child;
}
#endif
- if (!(dst->flags & DST_NOCOUNT))
- dst_entries_add(dst->ops, -1);
-
if (dst->ops->destroy)
dst->ops->destroy(dst);
netdev_put(dst->dev, &dst->dev_tracker);
@@ -159,17 +156,27 @@ void dst_dev_put(struct dst_entry *dst)
}
EXPORT_SYMBOL(dst_dev_put);
+static void dst_count_dec(struct dst_entry *dst)
+{
+ if (!(dst->flags & DST_NOCOUNT))
+ dst_entries_add(dst->ops, -1);
+}
+
void dst_release(struct dst_entry *dst)
{
- if (dst && rcuref_put(&dst->__rcuref))
+ if (dst && rcuref_put(&dst->__rcuref)) {
+ dst_count_dec(dst);
call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
+ }
}
EXPORT_SYMBOL(dst_release);
void dst_release_immediate(struct dst_entry *dst)
{
- if (dst && rcuref_put(&dst->__rcuref))
+ if (dst && rcuref_put(&dst->__rcuref)) {
+ dst_count_dec(dst);
dst_destroy(dst);
+ }
}
EXPORT_SYMBOL(dst_release_immediate);
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index fc96259807b6..5cdca49b1d7c 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -43,7 +43,6 @@ static unsigned int fib_seq_sum(struct net *net)
struct fib_notifier_ops *ops;
unsigned int fib_seq = 0;
- rtnl_lock();
rcu_read_lock();
list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
if (!try_module_get(ops->owner))
@@ -52,7 +51,6 @@ static unsigned int fib_seq_sum(struct net *net)
module_put(ops->owner);
}
rcu_read_unlock();
- rtnl_unlock();
return fib_seq;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 6ebffbc63236..34185d138c95 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/module.h>
#include <net/net_namespace.h>
+#include <net/inet_dscp.h>
#include <net/sock.h>
#include <net/fib_rules.h>
#include <net/ip_tunnels.h>
@@ -72,7 +73,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->suppress_prefixlen = -1;
r->suppress_ifgroup = -1;
- /* The lock is not required here, the list in unreacheable
+ /* The lock is not required here, the list in unreachable
* at the moment this function is called */
list_add_tail(&r->list, &ops->rules_list);
return 0;
@@ -100,7 +101,8 @@ static void notify_rule_change(int event, struct fib_rule *rule,
struct fib_rules_ops *ops, struct nlmsghdr *nlh,
u32 pid);
-static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
+static struct fib_rules_ops *lookup_rules_ops(const struct net *net,
+ int family)
{
struct fib_rules_ops *ops;
@@ -369,7 +371,9 @@ static int call_fib_rule_notifiers(struct net *net,
.rule = rule,
};
- ops->fib_rules_seq++;
+ ASSERT_RTNL();
+ /* Paired with READ_ONCE() in fib_rules_seq() */
+ WRITE_ONCE(ops->fib_rules_seq, ops->fib_rules_seq + 1);
return call_fib_notifiers(net, event_type, &info.info);
}
@@ -396,17 +400,16 @@ int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
}
EXPORT_SYMBOL_GPL(fib_rules_dump);
-unsigned int fib_rules_seq_read(struct net *net, int family)
+unsigned int fib_rules_seq_read(const struct net *net, int family)
{
unsigned int fib_rules_seq;
struct fib_rules_ops *ops;
- ASSERT_RTNL();
-
ops = lookup_rules_ops(net, family);
if (!ops)
return 0;
- fib_rules_seq = ops->fib_rules_seq;
+ /* Paired with WRITE_ONCE() in call_fib_rule_notifiers() */
+ fib_rules_seq = READ_ONCE(ops->fib_rules_seq);
rules_ops_put(ops);
return fib_rules_seq;
@@ -555,8 +558,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
nlrule->pref = fib_default_rule_pref(ops);
}
- nlrule->proto = tb[FRA_PROTOCOL] ?
- nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
+ nlrule->proto = nla_get_u8_default(tb[FRA_PROTOCOL], RTPROT_UNSPEC);
if (tb[FRA_IIFNAME]) {
struct net_device *dev;
@@ -766,7 +768,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
[FRA_PROTOCOL] = { .type = NLA_U8 },
[FRA_IP_PROTO] = { .type = NLA_U8 },
[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
- [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
+ [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
+ [FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2),
};
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1205,8 +1208,7 @@ static void notify_rule_change(int event, struct fib_rule *rule,
rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, ops->nlgroup, err);
+ rtnl_set_sk_err(net, ops->nlgroup, err);
}
static void attach_rules(struct list_head *rules, struct net_device *dev)
@@ -1288,13 +1290,18 @@ static struct pernet_operations fib_rules_net_ops = {
.exit = fib_rules_net_exit,
};
+static const struct rtnl_msg_handler fib_rules_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule},
+ {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule},
+ {.msgtype = RTM_GETRULE, .dumpit = fib_nl_dumprule,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+};
+
static int __init fib_rules_init(void)
{
int err;
- rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule,
- RTNL_FLAG_DUMP_UNLOCKED);
+
+ rtnl_register_many(fib_rules_rtnl_msg_handlers);
err = register_pernet_subsys(&fib_rules_net_ops);
if (err < 0)
@@ -1309,9 +1316,7 @@ static int __init fib_rules_init(void)
fail_unregister:
unregister_pernet_subsys(&fib_rules_net_ops);
fail:
- rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
- rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
- rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
+ rtnl_unregister_many(fib_rules_rtnl_msg_handlers);
return err;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index f3c72cf86099..21131ec25f24 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -42,7 +42,7 @@
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/uaccess.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/filter.h>
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
@@ -84,6 +84,7 @@
#include <net/netkit.h>
#include <linux/un.h>
#include <net/xdp_sock_drv.h>
+#include <net/inet_dscp.h>
#include "dev.h"
@@ -1265,8 +1266,8 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
* so we need to keep the user BPF around until the 2nd
* pass. At this time, the user BPF is stored in fp->insns.
*/
- old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
- GFP_KERNEL | __GFP_NOWARN);
+ old_prog = kmemdup_array(fp->insns, old_len, sizeof(struct sock_filter),
+ GFP_KERNEL | __GFP_NOWARN);
if (!old_prog) {
err = -ENOMEM;
goto out_err;
@@ -1653,18 +1654,6 @@ void sk_reuseport_prog_free(struct bpf_prog *prog)
bpf_prog_destroy(prog);
}
-struct bpf_scratchpad {
- union {
- __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
- u8 buff[MAX_BPF_STACK];
- };
- local_lock_t bh_lock;
-};
-
-static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp) = {
- .bh_lock = INIT_LOCAL_LOCK(bh_lock),
-};
-
static inline int __bpf_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
@@ -2021,11 +2010,6 @@ static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
__be32 *, to, u32, to_size, __wsum, seed)
{
- struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
- u32 diff_size = from_size + to_size;
- int i, j = 0;
- __wsum ret;
-
/* This is quite flexible, some examples:
*
* from_size == 0, to_size > 0, seed := csum --> pushing data
@@ -2034,19 +2018,19 @@ BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
*
* Even for diffing, from_size and to_size don't need to be equal.
*/
- if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
- diff_size > sizeof(sp->diff)))
- return -EINVAL;
- local_lock_nested_bh(&bpf_sp.bh_lock);
- for (i = 0; i < from_size / sizeof(__be32); i++, j++)
- sp->diff[j] = ~from[i];
- for (i = 0; i < to_size / sizeof(__be32); i++, j++)
- sp->diff[j] = to[i];
+ __wsum ret = seed;
- ret = csum_partial(sp->diff, diff_size, seed);
- local_unlock_nested_bh(&bpf_sp.bh_lock);
- return ret;
+ if (from_size && to_size)
+ ret = csum_sub(csum_partial(to, to_size, ret),
+ csum_partial(from, from_size, 0));
+ else if (to_size)
+ ret = csum_partial(to, to_size, ret);
+
+ else if (from_size)
+ ret = ~csum_partial(from, from_size, ~ret);
+
+ return csum_from32to16((__force unsigned int)ret);
}
static const struct bpf_func_proto bpf_csum_diff_proto = {
@@ -2248,7 +2232,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
rcu_read_unlock();
return ret;
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
if (dst)
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
out_drop:
@@ -2371,7 +2355,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
struct flowi4 fl4 = {
.flowi4_flags = FLOWI_FLAG_ANYSRC,
.flowi4_mark = skb->mark,
- .flowi4_tos = RT_TOS(ip4h->tos),
+ .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)),
.flowi4_oif = dev->ifindex,
.flowi4_proto = ip4h->protocol,
.daddr = ip4h->daddr,
@@ -2437,9 +2421,9 @@ out:
/* Internal, non-exposed redirect flags. */
enum {
- BPF_F_NEIGH = (1ULL << 1),
- BPF_F_PEER = (1ULL << 2),
- BPF_F_NEXTHOP = (1ULL << 3),
+ BPF_F_NEIGH = (1ULL << 16),
+ BPF_F_PEER = (1ULL << 17),
+ BPF_F_NEXTHOP = (1ULL << 18),
#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
};
@@ -2449,6 +2433,8 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
struct sk_buff *clone;
int ret;
+ BUILD_BUG_ON(BPF_F_REDIRECT_INTERNAL & BPF_F_REDIRECT_FLAGS);
+
if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
return -EINVAL;
@@ -2618,18 +2604,16 @@ BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
static void sk_msg_reset_curr(struct sk_msg *msg)
{
- u32 i = msg->sg.start;
- u32 len = 0;
-
- do {
- len += sk_msg_elem(msg, i)->length;
- sk_msg_iter_var_next(i);
- if (len >= msg->sg.size)
- break;
- } while (i != msg->sg.end);
+ if (!msg->sg.size) {
+ msg->sg.curr = msg->sg.start;
+ msg->sg.copybreak = 0;
+ } else {
+ u32 i = msg->sg.end;
- msg->sg.curr = i;
- msg->sg.copybreak = 0;
+ sk_msg_iter_var_prev(i);
+ msg->sg.curr = i;
+ msg->sg.copybreak = msg->sg.data[i].length;
+ }
}
static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
@@ -2792,7 +2776,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
- if (start >= offset + l)
+ if (start > offset + l)
return -EINVAL;
space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
@@ -2817,6 +2801,8 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
raw = page_address(page);
+ if (i == msg->sg.end)
+ sk_msg_iter_var_prev(i);
psge = sk_msg_elem(msg, i);
front = start - offset;
back = psge->length - front;
@@ -2833,7 +2819,13 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
}
put_page(sg_page(psge));
- } else if (start - offset) {
+ new = i;
+ goto place_new;
+ }
+
+ if (start - offset) {
+ if (i == msg->sg.end)
+ sk_msg_iter_var_prev(i);
psge = sk_msg_elem(msg, i);
rsge = sk_msg_elem_cpy(msg, i);
@@ -2844,39 +2836,44 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
sk_msg_iter_var_next(i);
sg_unmark_end(psge);
sg_unmark_end(&rsge);
- sk_msg_iter_next(msg, end);
}
/* Slot(s) to place newly allocated data */
+ sk_msg_iter_next(msg, end);
new = i;
+ sk_msg_iter_var_next(i);
+
+ if (i == msg->sg.end) {
+ if (!rsge.length)
+ goto place_new;
+ sk_msg_iter_next(msg, end);
+ goto place_new;
+ }
/* Shift one or two slots as needed */
- if (!copy) {
- sge = sk_msg_elem_cpy(msg, i);
+ sge = sk_msg_elem_cpy(msg, new);
+ sg_unmark_end(&sge);
+ nsge = sk_msg_elem_cpy(msg, i);
+ if (rsge.length) {
sk_msg_iter_var_next(i);
- sg_unmark_end(&sge);
+ nnsge = sk_msg_elem_cpy(msg, i);
sk_msg_iter_next(msg, end);
+ }
- nsge = sk_msg_elem_cpy(msg, i);
+ while (i != msg->sg.end) {
+ msg->sg.data[i] = sge;
+ sge = nsge;
+ sk_msg_iter_var_next(i);
if (rsge.length) {
- sk_msg_iter_var_next(i);
+ nsge = nnsge;
nnsge = sk_msg_elem_cpy(msg, i);
- }
-
- while (i != msg->sg.end) {
- msg->sg.data[i] = sge;
- sge = nsge;
- sk_msg_iter_var_next(i);
- if (rsge.length) {
- nsge = nnsge;
- nnsge = sk_msg_elem_cpy(msg, i);
- } else {
- nsge = sk_msg_elem_cpy(msg, i);
- }
+ } else {
+ nsge = sk_msg_elem_cpy(msg, i);
}
}
+place_new:
/* Place newly allocated data buffer */
sk_mem_charge(msg->sk, len);
msg->sg.size += len;
@@ -2905,8 +2902,10 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = {
static void sk_msg_shift_left(struct sk_msg *msg, int i)
{
+ struct scatterlist *sge = sk_msg_elem(msg, i);
int prev;
+ put_page(sg_page(sge));
do {
prev = i;
sk_msg_iter_var_next(i);
@@ -2943,6 +2942,9 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
if (unlikely(flags))
return -EINVAL;
+ if (unlikely(len == 0))
+ return 0;
+
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
@@ -2955,7 +2957,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
} while (i != msg->sg.end);
/* Bounds checks: start and pop must be inside message */
- if (start >= offset + l || last >= msg->sg.size)
+ if (start >= offset + l || last > msg->sg.size)
return -EINVAL;
space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
@@ -2984,12 +2986,12 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
*/
if (start != offset) {
struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
- int a = start;
+ int a = start - offset;
int b = sge->length - pop - a;
sk_msg_iter_var_next(i);
- if (pop < sge->length - a) {
+ if (b > 0) {
if (space) {
sge->length = a;
sk_msg_shift_right(msg, i);
@@ -3008,7 +3010,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
if (unlikely(!page))
return -ENOMEM;
- sge->length = a;
orig = sg_page(sge);
from = sg_virt(sge);
to = page_address(page);
@@ -3018,7 +3019,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
put_page(orig);
}
pop = 0;
- } else if (pop >= sge->length - a) {
+ } else {
pop -= (sge->length - a);
sge->length = a;
}
@@ -3052,7 +3053,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
pop -= sge->length;
sk_msg_shift_left(msg, i);
}
- sk_msg_iter_var_next(i);
}
sk_mem_uncharge(msg->sk, len - pop);
@@ -3189,6 +3189,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
bpf_push_mac_rcsum(skb);
ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
bpf_pull_mac_rcsum(skb);
+ skb_reset_mac_len(skb);
bpf_compute_data_pointers(skb);
return ret;
@@ -5136,6 +5137,17 @@ static u64 __bpf_get_netns_cookie(struct sock *sk)
return net->net_cookie;
}
+BPF_CALL_1(bpf_get_netns_cookie, struct sk_buff *, skb)
+{
+ return __bpf_get_netns_cookie(skb && skb->sk ? skb->sk : NULL);
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_proto = {
+ .func = bpf_get_netns_cookie,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
+};
+
BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
{
return __bpf_get_netns_cookie(ctx);
@@ -5278,6 +5290,11 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
return -EINVAL;
inet_csk(sk)->icsk_rto_min = timeout;
break;
+ case TCP_BPF_SOCK_OPS_CB_FLAGS:
+ if (val & ~(BPF_SOCK_OPS_ALL_CB_FLAGS))
+ return -EINVAL;
+ tp->bpf_sock_ops_cb_flags = val;
+ break;
default:
return -EINVAL;
}
@@ -5366,6 +5383,17 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
if (*optlen < 1)
return -EINVAL;
break;
+ case TCP_BPF_SOCK_OPS_CB_FLAGS:
+ if (*optlen != sizeof(int))
+ return -EINVAL;
+ if (getopt) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ int cb_flags = tp->bpf_sock_ops_cb_flags;
+
+ memcpy(optval, &cb_flags, *optlen);
+ return 0;
+ }
+ return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
default:
if (getopt)
return -EINVAL;
@@ -5899,7 +5927,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
fl4.flowi4_iif = params->ifindex;
fl4.flowi4_oif = 0;
}
- fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
+ fl4.flowi4_tos = params->tos & INET_DSCP_MASK;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = 0;
@@ -6261,12 +6289,10 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
{
int ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
struct net_device *dev = skb->dev;
- int skb_len, dev_len;
- int mtu;
+ int mtu, dev_len, skb_len;
if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
return -EINVAL;
-
if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len)))
return -EINVAL;
@@ -6275,7 +6301,6 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
return -ENODEV;
mtu = READ_ONCE(dev->mtu);
-
dev_len = mtu + dev->hard_header_len;
/* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
@@ -6293,15 +6318,12 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
*/
if (skb_is_gso(skb)) {
ret = BPF_MTU_CHK_RET_SUCCESS;
-
if (flags & BPF_MTU_CHK_SEGS &&
!skb_gso_validate_network_len(skb, mtu))
ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
}
out:
- /* BPF verifier guarantees valid pointer */
*mtu_len = mtu;
-
return ret;
}
@@ -6322,8 +6344,6 @@ BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
return -ENODEV;
mtu = READ_ONCE(dev->mtu);
-
- /* Add L2-header as dev MTU is L3 size */
dev_len = mtu + dev->hard_header_len;
/* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
@@ -6334,9 +6354,7 @@ BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
if (xdp_len > dev_len)
ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
- /* BPF verifier guarantees valid pointer */
*mtu_len = mtu;
-
return ret;
}
@@ -6346,7 +6364,8 @@ static const struct bpf_func_proto bpf_skb_check_mtu_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
- .arg3_type = ARG_PTR_TO_INT,
+ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_WRITE | MEM_ALIGNED,
+ .arg3_size = sizeof(u32),
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6357,7 +6376,8 @@ static const struct bpf_func_proto bpf_xdp_check_mtu_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
- .arg3_type = ARG_PTR_TO_INT,
+ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_WRITE | MEM_ALIGNED,
+ .arg3_size = sizeof(u32),
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6756,8 +6776,6 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
/* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
* sock refcnt is decremented to prevent a request_sock leak.
*/
- if (!sk_fullsock(sk2))
- sk2 = NULL;
if (sk2 != sk) {
sock_gen_put(sk);
/* Ensure there is no need to bump sk2 refcnt */
@@ -6804,8 +6822,6 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
/* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
* sock refcnt is decremented to prevent a request_sock leak.
*/
- if (!sk_fullsock(sk2))
- sk2 = NULL;
if (sk2 != sk) {
sock_gen_put(sk);
/* Ensure there is no need to bump sk2 refcnt */
@@ -7254,7 +7270,7 @@ BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
{
sk = sk_to_full_sk(sk);
- if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
+ if (sk && sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
return (unsigned long)sk;
return (unsigned long)NULL;
@@ -7883,42 +7899,37 @@ static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = {
#endif /* CONFIG_INET */
-bool bpf_helper_changes_pkt_data(void *func)
-{
- if (func == bpf_skb_vlan_push ||
- func == bpf_skb_vlan_pop ||
- func == bpf_skb_store_bytes ||
- func == bpf_skb_change_proto ||
- func == bpf_skb_change_head ||
- func == sk_skb_change_head ||
- func == bpf_skb_change_tail ||
- func == sk_skb_change_tail ||
- func == bpf_skb_adjust_room ||
- func == sk_skb_adjust_room ||
- func == bpf_skb_pull_data ||
- func == sk_skb_pull_data ||
- func == bpf_clone_redirect ||
- func == bpf_l3_csum_replace ||
- func == bpf_l4_csum_replace ||
- func == bpf_xdp_adjust_head ||
- func == bpf_xdp_adjust_meta ||
- func == bpf_msg_pull_data ||
- func == bpf_msg_push_data ||
- func == bpf_msg_pop_data ||
- func == bpf_xdp_adjust_tail ||
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
- func == bpf_lwt_seg6_store_bytes ||
- func == bpf_lwt_seg6_adjust_srh ||
- func == bpf_lwt_seg6_action ||
-#endif
-#ifdef CONFIG_INET
- func == bpf_sock_ops_store_hdr_opt ||
-#endif
- func == bpf_lwt_in_push_encap ||
- func == bpf_lwt_xmit_push_encap)
+bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_clone_redirect:
+ case BPF_FUNC_l3_csum_replace:
+ case BPF_FUNC_l4_csum_replace:
+ case BPF_FUNC_lwt_push_encap:
+ case BPF_FUNC_lwt_seg6_action:
+ case BPF_FUNC_lwt_seg6_adjust_srh:
+ case BPF_FUNC_lwt_seg6_store_bytes:
+ case BPF_FUNC_msg_pop_data:
+ case BPF_FUNC_msg_pull_data:
+ case BPF_FUNC_msg_push_data:
+ case BPF_FUNC_skb_adjust_room:
+ case BPF_FUNC_skb_change_head:
+ case BPF_FUNC_skb_change_proto:
+ case BPF_FUNC_skb_change_tail:
+ case BPF_FUNC_skb_pull_data:
+ case BPF_FUNC_skb_store_bytes:
+ case BPF_FUNC_skb_vlan_pop:
+ case BPF_FUNC_skb_vlan_push:
+ case BPF_FUNC_store_hdr_opt:
+ case BPF_FUNC_xdp_adjust_head:
+ case BPF_FUNC_xdp_adjust_meta:
+ case BPF_FUNC_xdp_adjust_tail:
+ /* tail-called program could call any of the above */
+ case BPF_FUNC_tail_call:
return true;
-
- return false;
+ default:
+ return false;
+ }
}
const struct bpf_func_proto bpf_event_output_data_proto __weak;
@@ -8187,6 +8198,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skb_under_cgroup_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_proto;
+ case BPF_FUNC_get_netns_cookie:
+ return &bpf_get_netns_cookie_proto;
case BPF_FUNC_get_socket_uid:
return &bpf_get_socket_uid_proto;
case BPF_FUNC_fib_lookup:
@@ -8579,13 +8592,16 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (off + size > offsetofend(struct __sk_buff, cb[4]))
return false;
break;
+ case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
+ case bpf_ctx_range(struct __sk_buff, data_end):
+ if (info->is_ldsx || size != size_default)
+ return false;
+ break;
case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
- case bpf_ctx_range(struct __sk_buff, data):
- case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range(struct __sk_buff, data_end):
if (size != size_default)
return false;
break;
@@ -9029,6 +9045,14 @@ static bool xdp_is_valid_access(int off, int size,
}
}
return false;
+ } else {
+ switch (off) {
+ case offsetof(struct xdp_md, data_meta):
+ case offsetof(struct xdp_md, data):
+ case offsetof(struct xdp_md, data_end):
+ if (info->is_ldsx)
+ return false;
+ }
}
switch (off) {
@@ -9354,12 +9378,12 @@ static bool flow_dissector_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, data):
- if (size != size_default)
+ if (info->is_ldsx || size != size_default)
return false;
info->reg_type = PTR_TO_PACKET;
return true;
case bpf_ctx_range(struct __sk_buff, data_end):
- if (size != size_default)
+ if (info->is_ldsx || size != size_default)
return false;
info->reg_type = PTR_TO_PACKET_END;
return true;
@@ -10208,10 +10232,6 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
} \
} while (0)
-#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
- SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
- S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
-
static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -12045,7 +12065,7 @@ int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
}
BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
-BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
@@ -12094,6 +12114,7 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
&bpf_kfunc_set_sock_addr);
diff --git a/net/core/gro.c b/net/core/gro.c
index b3b43de1a650..d1f44084e978 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -98,7 +98,6 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
unsigned int headlen = skb_headlen(skb);
unsigned int len = skb_gro_len(skb);
unsigned int delta_truesize;
- unsigned int gro_max_size;
unsigned int new_truesize;
struct sk_buff *lp;
int segs;
@@ -112,12 +111,8 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
if (p->pp_recycle != skb->pp_recycle)
return -ETOOMANYREFS;
- /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */
- gro_max_size = p->protocol == htons(ETH_P_IPV6) ?
- READ_ONCE(p->dev->gro_max_size) :
- READ_ONCE(p->dev->gro_ipv4_max_size);
-
- if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
+ if (unlikely(p->len + len >= netif_get_gro_max_size(p->dev, p) ||
+ NAPI_GRO_CB(skb)->flush))
return -E2BIG;
if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) {
@@ -374,7 +369,7 @@ static void gro_list_prepare(const struct list_head *head,
skb_mac_header(skb),
maclen);
- /* in most common scenarions 'slow_gro' is 0
+ /* in most common scenarios 'slow_gro' is 0
* otherwise we are already on some slower paths
* either skip all the infrequent tests altogether or
* avoid trying too hard to skip each of them individually
@@ -408,7 +403,8 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
pinfo = skb_shinfo(skb);
frag0 = &pinfo->frags[0];
- if (pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0)) &&
+ if (pinfo->nr_frags && skb_frag_page(frag0) &&
+ !PageHighMem(skb_frag_page(frag0)) &&
(!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index ab150641142a..1b4d39e38084 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -45,9 +45,14 @@ static unsigned int default_operstate(const struct net_device *dev)
int iflink = dev_get_iflink(dev);
struct net_device *peer;
- if (iflink == dev->ifindex)
+ /* If called from netdev_run_todo()/linkwatch_sync_dev(),
+ * dev_net(dev) can be already freed, and RTNL is not held.
+ */
+ if (dev->reg_state == NETREG_UNREGISTERED ||
+ iflink == dev->ifindex)
return IF_OPER_DOWN;
+ ASSERT_RTNL();
peer = __dev_get_by_index(dev_net(dev), iflink);
if (!peer)
return IF_OPER_DOWN;
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index afb05f58b64c..ae74634310a3 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -10,8 +10,10 @@
#include <linux/bpf.h>
#include <net/lwtunnel.h>
#include <net/gre.h>
+#include <net/ip.h>
#include <net/ip6_route.h>
#include <net/ipv6_stubs.h>
+#include <net/inet_dscp.h>
struct bpf_lwt_prog {
struct bpf_prog *prog;
@@ -86,16 +88,18 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
static int bpf_lwt_input_reroute(struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
int err = -EINVAL;
if (skb->protocol == htons(ETH_P_IP)) {
struct net_device *dev = skb_dst(skb)->dev;
- struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
dev_hold(dev);
skb_dst_drop(skb);
- err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, dev);
+ reason = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev);
+ err = reason ? -EINVAL : 0;
dev_put(dev);
} else if (skb->protocol == htons(ETH_P_IPV6)) {
skb_dst_drop(skb);
@@ -205,7 +209,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
fl4.flowi4_oif = oif;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_uid = sock_net_uid(net, sk);
- fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
fl4.flowi4_proto = iph->protocol;
fl4.daddr = iph->daddr;
diff --git a/net/core/mp_dmabuf_devmem.h b/net/core/mp_dmabuf_devmem.h
new file mode 100644
index 000000000000..67cd0dd7319c
--- /dev/null
+++ b/net/core/mp_dmabuf_devmem.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Dmabuf device memory provider.
+ *
+ * Authors: Mina Almasry <almasrymina@google.com>
+ *
+ */
+#ifndef _NET_MP_DMABUF_DEVMEM_H
+#define _NET_MP_DMABUF_DEVMEM_H
+
+#include <net/netmem.h>
+
+#if defined(CONFIG_NET_DEVMEM)
+int mp_dmabuf_devmem_init(struct page_pool *pool);
+
+netmem_ref mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp);
+
+void mp_dmabuf_devmem_destroy(struct page_pool *pool);
+
+bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem);
+#else
+static inline int mp_dmabuf_devmem_init(struct page_pool *pool)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline netmem_ref
+mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp)
+{
+ return 0;
+}
+
+static inline void mp_dmabuf_devmem_destroy(struct page_pool *pool)
+{
+}
+
+static inline bool
+mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
+{
+ return false;
+}
+#endif
+
+#endif /* _NET_MP_DMABUF_DEVMEM_H */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a6fe88eca939..89656d180bc6 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -14,7 +14,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/slab.h>
-#include <linux/kmemleak.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -61,6 +60,25 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
static const struct seq_operations neigh_stat_seq_ops;
#endif
+static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
+{
+ int i;
+
+ switch (family) {
+ default:
+ DEBUG_NET_WARN_ON_ONCE(1);
+ fallthrough; /* to avoid panic by null-ptr-deref */
+ case AF_INET:
+ i = NEIGH_ARP_TABLE;
+ break;
+ case AF_INET6:
+ i = NEIGH_ND_TABLE;
+ break;
+ }
+
+ return &dev->neighbours[i];
+}
+
/*
Neighbour hash table buckets are protected with rwlock tbl->lock.
@@ -205,18 +223,14 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
}
}
-static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
- struct neigh_table *tbl)
+bool neigh_remove_one(struct neighbour *n)
{
bool retval = false;
write_lock(&n->lock);
if (refcount_read(&n->refcnt) == 1) {
- struct neighbour *neigh;
-
- neigh = rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock));
- rcu_assign_pointer(*np, neigh);
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
retval = true;
}
@@ -226,29 +240,6 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
return retval;
}
-bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
-{
- struct neigh_hash_table *nht;
- void *pkey = ndel->primary_key;
- u32 hash_val;
- struct neighbour *n;
- struct neighbour __rcu **np;
-
- nht = rcu_dereference_protected(tbl->nht,
- lockdep_is_held(&tbl->lock));
- hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
- hash_val = hash_val >> (32 - nht->hash_shift);
-
- np = &nht->hash_buckets[hash_val];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock)))) {
- if (n == ndel)
- return neigh_del(n, np, tbl);
- np = &n->next;
- }
- return false;
-}
-
static int neigh_forced_gc(struct neigh_table *tbl)
{
int max_clean = atomic_read(&tbl->gc_entries) -
@@ -276,7 +267,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
remove = true;
write_unlock(&n->lock);
- if (remove && neigh_remove_one(n, tbl))
+ if (remove && neigh_remove_one(n))
shrunk++;
if (shrunk >= max_clean)
break;
@@ -380,54 +371,42 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
- int i;
- struct neigh_hash_table *nht;
+ struct hlist_head *dev_head;
+ struct hlist_node *tmp;
+ struct neighbour *n;
- nht = rcu_dereference_protected(tbl->nht,
- lockdep_is_held(&tbl->lock));
+ dev_head = neigh_get_dev_table(dev, tbl->family);
- for (i = 0; i < (1 << nht->hash_shift); i++) {
- struct neighbour *n;
- struct neighbour __rcu **np = &nht->hash_buckets[i];
+ hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
+ if (skip_perm && n->nud_state & NUD_PERMANENT)
+ continue;
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
- if (dev && n->dev != dev) {
- np = &n->next;
- continue;
- }
- if (skip_perm && n->nud_state & NUD_PERMANENT) {
- np = &n->next;
- continue;
- }
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
- write_lock(&n->lock);
- neigh_del_timer(n);
- neigh_mark_dead(n);
- if (refcount_read(&n->refcnt) != 1) {
- /* The most unpleasant situation.
- We must destroy neighbour entry,
- but someone still uses it.
-
- The destroy will be delayed until
- the last user releases us, but
- we must kill timers etc. and move
- it to safe state.
- */
- __skb_queue_purge(&n->arp_queue);
- n->arp_queue_len_bytes = 0;
- WRITE_ONCE(n->output, neigh_blackhole);
- if (n->nud_state & NUD_VALID)
- n->nud_state = NUD_NOARP;
- else
- n->nud_state = NUD_NONE;
- neigh_dbg(2, "neigh %p is stray\n", n);
- }
- write_unlock(&n->lock);
- neigh_cleanup_and_release(n);
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
+ write_lock(&n->lock);
+ neigh_del_timer(n);
+ neigh_mark_dead(n);
+ if (refcount_read(&n->refcnt) != 1) {
+ /* The most unpleasant situation.
+ * We must destroy neighbour entry,
+ * but someone still uses it.
+ *
+ * The destroy will be delayed until
+ * the last user releases us, but
+ * we must kill timers etc. and move
+ * it to safe state.
+ */
+ __skb_queue_purge(&n->arp_queue);
+ n->arp_queue_len_bytes = 0;
+ WRITE_ONCE(n->output, neigh_blackhole);
+ if (n->nud_state & NUD_VALID)
+ n->nud_state = NUD_NOARP;
+ else
+ n->nud_state = NUD_NONE;
+ neigh_dbg(2, "neigh %p is stray\n", n);
}
+ write_unlock(&n->lock);
+ neigh_cleanup_and_release(n);
}
}
@@ -530,27 +509,21 @@ static void neigh_get_hash_rnd(u32 *x)
static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
{
- size_t size = (1 << shift) * sizeof(struct neighbour *);
+ size_t size = (1 << shift) * sizeof(struct hlist_head);
+ struct hlist_head *hash_heads;
struct neigh_hash_table *ret;
- struct neighbour __rcu **buckets;
int i;
ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
if (!ret)
return NULL;
- if (size <= PAGE_SIZE) {
- buckets = kzalloc(size, GFP_ATOMIC);
- } else {
- buckets = (struct neighbour __rcu **)
- __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
- get_order(size));
- kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
- }
- if (!buckets) {
+
+ hash_heads = kvzalloc(size, GFP_ATOMIC);
+ if (!hash_heads) {
kfree(ret);
return NULL;
}
- ret->hash_buckets = buckets;
+ ret->hash_heads = hash_heads;
ret->hash_shift = shift;
for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
neigh_get_hash_rnd(&ret->hash_rnd[i]);
@@ -562,15 +535,8 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
struct neigh_hash_table *nht = container_of(head,
struct neigh_hash_table,
rcu);
- size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
- struct neighbour __rcu **buckets = nht->hash_buckets;
- if (size <= PAGE_SIZE) {
- kfree(buckets);
- } else {
- kmemleak_free(buckets);
- free_pages((unsigned long)buckets, get_order(size));
- }
+ kvfree(nht->hash_heads);
kfree(nht);
}
@@ -589,24 +555,17 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
return old_nht;
for (i = 0; i < (1 << old_nht->hash_shift); i++) {
- struct neighbour *n, *next;
+ struct hlist_node *tmp;
+ struct neighbour *n;
- for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
- lockdep_is_held(&tbl->lock));
- n != NULL;
- n = next) {
+ neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) {
hash = tbl->hash(n->primary_key, n->dev,
new_nht->hash_rnd);
hash >>= (32 - new_nht->hash_shift);
- next = rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock));
- rcu_assign_pointer(n->next,
- rcu_dereference_protected(
- new_nht->hash_buckets[hash],
- lockdep_is_held(&tbl->lock)));
- rcu_assign_pointer(new_nht->hash_buckets[hash], n);
+ hlist_del_rcu(&n->hash);
+ hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
}
}
@@ -693,11 +652,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
goto out_tbl_unlock;
}
- for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
- lockdep_is_held(&tbl->lock));
- n1 != NULL;
- n1 = rcu_dereference_protected(n1->next,
- lockdep_is_held(&tbl->lock))) {
+ neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) {
if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
if (want_ref)
neigh_hold(n1);
@@ -713,10 +668,11 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
list_add_tail(&n->managed_list, &n->tbl->managed_list);
if (want_ref)
neigh_hold(n);
- rcu_assign_pointer(n->next,
- rcu_dereference_protected(nht->hash_buckets[hash_val],
- lockdep_is_held(&tbl->lock)));
- rcu_assign_pointer(nht->hash_buckets[hash_val], n);
+ hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
+
+ hlist_add_head_rcu(&n->dev_list,
+ neigh_get_dev_table(dev, tbl->family));
+
write_unlock_bh(&tbl->lock);
neigh_dbg(2, "neigh %p is created\n", n);
rc = n;
@@ -948,10 +904,10 @@ static void neigh_connect(struct neighbour *neigh)
static void neigh_periodic_work(struct work_struct *work)
{
struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
+ struct neigh_hash_table *nht;
+ struct hlist_node *tmp;
struct neighbour *n;
- struct neighbour __rcu **np;
unsigned int i;
- struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
@@ -976,10 +932,7 @@ static void neigh_periodic_work(struct work_struct *work)
goto out;
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
- np = &nht->hash_buckets[i];
-
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
unsigned int state;
write_lock(&n->lock);
@@ -988,7 +941,7 @@ static void neigh_periodic_work(struct work_struct *work)
if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
(n->flags & NTF_EXT_LEARNED)) {
write_unlock(&n->lock);
- goto next_elt;
+ continue;
}
if (time_before(n->used, n->confirmed) &&
@@ -999,18 +952,14 @@ static void neigh_periodic_work(struct work_struct *work)
(state == NUD_FAILED ||
!time_in_range_open(jiffies, n->used,
n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
continue;
}
write_unlock(&n->lock);
-
-next_elt:
- np = &n->next;
}
/*
* It's fine to release lock here, even if hash table
@@ -1957,7 +1906,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
NETLINK_CB(skb).portid, extack);
write_lock_bh(&tbl->lock);
neigh_release(neigh);
- neigh_remove_one(neigh, tbl);
+ neigh_remove_one(neigh);
write_unlock_bh(&tbl->lock);
out:
@@ -2728,9 +2677,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
for (h = s_h; h < (1 << nht->hash_shift); h++) {
if (h > s_h)
s_idx = 0;
- for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0;
- n != NULL;
- n = rcu_dereference(n->next)) {
+ idx = 0;
+ neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) {
if (idx < s_idx || !net_eq(dev_net(n->dev), net))
goto next;
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
@@ -2876,6 +2824,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
if (err < 0 && cb->strict_check)
return err;
+ err = 0;
s_t = cb->args[0];
@@ -3097,9 +3046,7 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
struct neighbour *n;
- for (n = rcu_dereference(nht->hash_buckets[chain]);
- n != NULL;
- n = rcu_dereference(n->next))
+ neigh_for_each_in_bucket(n, &nht->hash_heads[chain])
cb(n, cookie);
}
read_unlock_bh(&tbl->lock);
@@ -3111,29 +3058,25 @@ EXPORT_SYMBOL(neigh_for_each);
void __neigh_for_each_release(struct neigh_table *tbl,
int (*cb)(struct neighbour *))
{
- int chain;
struct neigh_hash_table *nht;
+ int chain;
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
+ struct hlist_node *tmp;
struct neighbour *n;
- struct neighbour __rcu **np;
- np = &nht->hash_buckets[chain];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) {
int release;
write_lock(&n->lock);
release = cb(n);
if (release) {
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
- } else
- np = &n->next;
+ }
write_unlock(&n->lock);
if (release)
neigh_cleanup_and_release(n);
@@ -3190,43 +3133,53 @@ EXPORT_SYMBOL(neigh_xmit);
#ifdef CONFIG_PROC_FS
-static struct neighbour *neigh_get_first(struct seq_file *seq)
+static struct neighbour *neigh_get_valid(struct seq_file *seq,
+ struct neighbour *n,
+ loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
struct net *net = seq_file_net(seq);
+
+ if (!net_eq(dev_net(n->dev), net))
+ return NULL;
+
+ if (state->neigh_sub_iter) {
+ loff_t fakep = 0;
+ void *v;
+
+ v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
+ if (!v)
+ return NULL;
+ if (pos)
+ return v;
+ }
+
+ if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+ return n;
+
+ if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
+ return n;
+
+ return NULL;
+}
+
+static struct neighbour *neigh_get_first(struct seq_file *seq)
+{
+ struct neigh_seq_state *state = seq->private;
struct neigh_hash_table *nht = state->nht;
- struct neighbour *n = NULL;
- int bucket;
+ struct neighbour *n, *tmp;
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
- for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
- n = rcu_dereference(nht->hash_buckets[bucket]);
-
- while (n) {
- if (!net_eq(dev_net(n->dev), net))
- goto next;
- if (state->neigh_sub_iter) {
- loff_t fakep = 0;
- void *v;
- v = state->neigh_sub_iter(state, n, &fakep);
- if (!v)
- goto next;
- }
- if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
- break;
- if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
- break;
-next:
- n = rcu_dereference(n->next);
+ while (++state->bucket < (1 << nht->hash_shift)) {
+ neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) {
+ tmp = neigh_get_valid(seq, n, NULL);
+ if (tmp)
+ return tmp;
}
-
- if (n)
- break;
}
- state->bucket = bucket;
- return n;
+ return NULL;
}
static struct neighbour *neigh_get_next(struct seq_file *seq,
@@ -3234,46 +3187,28 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
- struct net *net = seq_file_net(seq);
- struct neigh_hash_table *nht = state->nht;
+ struct neighbour *tmp;
if (state->neigh_sub_iter) {
void *v = state->neigh_sub_iter(state, n, pos);
+
if (v)
return n;
}
- n = rcu_dereference(n->next);
- while (1) {
- while (n) {
- if (!net_eq(dev_net(n->dev), net))
- goto next;
- if (state->neigh_sub_iter) {
- void *v = state->neigh_sub_iter(state, n, pos);
- if (v)
- return n;
- goto next;
- }
- if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
- break;
-
- if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
- break;
-next:
- n = rcu_dereference(n->next);
+ hlist_for_each_entry_continue(n, hash) {
+ tmp = neigh_get_valid(seq, n, pos);
+ if (tmp) {
+ n = tmp;
+ goto out;
}
-
- if (n)
- break;
-
- if (++state->bucket >= (1 << nht->hash_shift))
- break;
-
- n = rcu_dereference(nht->hash_buckets[state->bucket]);
}
+ n = neigh_get_first(seq);
+out:
if (n && pos)
--(*pos);
+
return n;
}
@@ -3376,7 +3311,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
struct neigh_seq_state *state = seq->private;
state->tbl = tbl;
- state->bucket = 0;
+ state->bucket = -1;
state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
rcu_read_lock();
@@ -3530,8 +3465,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags,
rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+ rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}
void neigh_app_ns(struct neighbour *n)
@@ -3887,17 +3821,18 @@ EXPORT_SYMBOL(neigh_sysctl_unregister);
#endif /* CONFIG_SYSCTL */
+static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
+ {.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
+ {.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info},
+ {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set},
+};
+
static int __init neigh_init(void)
{
- rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info,
- RTNL_FLAG_DUMP_UNLOCKED);
-
- rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
- 0);
- rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
-
+ rtnl_register_many(neigh_rtnl_msg_handlers);
return 0;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 444f23e74f8e..2d9afc6e2161 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -32,6 +32,7 @@
#ifdef CONFIG_SYSFS
static const char fmt_hex[] = "%#x\n";
static const char fmt_dec[] = "%d\n";
+static const char fmt_uint[] = "%u\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
@@ -408,7 +409,7 @@ NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
{
- WRITE_ONCE(dev->gro_flush_timeout, val);
+ netdev_set_gro_flush_timeout(dev, val);
return 0;
}
@@ -425,7 +426,10 @@ NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
{
- WRITE_ONCE(dev->napi_defer_hard_irqs, val);
+ if (val > S32_MAX)
+ return -ERANGE;
+
+ netdev_set_defer_hard_irqs(dev, (u32)val);
return 0;
}
@@ -438,7 +442,7 @@ static ssize_t napi_defer_hard_irqs_store(struct device *dev,
return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs);
}
-NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_dec);
+NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint);
static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
@@ -1056,7 +1060,7 @@ static const void *rx_queue_namespace(const struct kobject *kobj)
struct device *dev = &queue->dev->dev;
const void *ns = NULL;
- if (dev->class && dev->class->ns_type)
+ if (dev->class && dev->class->namespace)
ns = dev->class->namespace(dev);
return ns;
@@ -1524,7 +1528,7 @@ static const struct attribute_group dql_group = {
};
#else
/* Fake declaration, all the code using it should be dead */
-extern const struct attribute_group dql_group;
+static const struct attribute_group dql_group = {};
#endif /* CONFIG_BQL */
#ifdef CONFIG_XPS
@@ -1740,7 +1744,7 @@ static const void *netdev_queue_namespace(const struct kobject *kobj)
struct device *dev = &queue->dev->dev;
const void *ns = NULL;
- if (dev->class && dev->class->ns_type)
+ if (dev->class && dev->class->namespace)
ns = dev->class->namespace(dev);
return ns;
@@ -1764,8 +1768,7 @@ static const struct kobj_type netdev_queue_ktype = {
static bool netdev_uses_bql(const struct net_device *dev)
{
- if (dev->features & NETIF_F_LLTX ||
- dev->priv_flags & IFF_NO_QUEUE)
+ if (dev->lltx || (dev->priv_flags & IFF_NO_QUEUE))
return false;
return IS_ENABLED(CONFIG_BQL);
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 6aef976bc1da..f2fa34b1d78d 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -23,7 +23,7 @@
#include <linux/net_dropmon.h>
#include <linux/slab.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/bitops.h>
#define CREATE_TRACE_POINTS
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6a823ba906c6..b5cd3ae4f04c 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -56,7 +56,6 @@ static bool init_net_initialized;
* outside.
*/
DECLARE_RWSEM(pernet_ops_rwsem);
-EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
#define MIN_PERNET_OPS_ID \
((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -125,7 +124,7 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
int err = -ENOMEM;
void *data = NULL;
- if (ops->id && ops->size) {
+ if (ops->id) {
data = kzalloc(ops->size, GFP_KERNEL);
if (!data)
goto out;
@@ -140,7 +139,7 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
if (!err)
return 0;
- if (ops->id && ops->size) {
+ if (ops->id) {
ng = rcu_dereference_protected(net->gen,
lockdep_is_held(&pernet_ops_rwsem));
ng->ptr[*ops->id] = NULL;
@@ -182,7 +181,8 @@ static void ops_free_list(const struct pernet_operations *ops,
struct list_head *net_exit_list)
{
struct net *net;
- if (ops->size && ops->id) {
+
+ if (ops->id) {
list_for_each_entry(net, net_exit_list, exit_list)
kfree(net_generic(net, *ops->id));
}
@@ -308,16 +308,45 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_id);
+static __net_init void preinit_net_sysctl(struct net *net)
+{
+ net->core.sysctl_somaxconn = SOMAXCONN;
+ /* Limits per socket sk_omem_alloc usage.
+ * TCP zerocopy regular usage needs 128 KB.
+ */
+ net->core.sysctl_optmem_max = 128 * 1024;
+ net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
+ net->core.sysctl_tstamp_allow_data = 1;
+}
+
/* init code that must occur even if setup_net() is not called. */
-static __net_init void preinit_net(struct net *net)
+static __net_init void preinit_net(struct net *net, struct user_namespace *user_ns)
{
+ refcount_set(&net->passive, 1);
+ refcount_set(&net->ns.count, 1);
+ ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt");
+
+ get_random_bytes(&net->hash_mix, sizeof(u32));
+ net->dev_base_seq = 1;
+ net->user_ns = user_ns;
+
+ idr_init(&net->netns_ids);
+ spin_lock_init(&net->nsid_lock);
+ mutex_init(&net->ipv4.ra_mutex);
+
+#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
+ mutex_init(&net->rtnl_mutex);
+ lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL);
+#endif
+
+ preinit_net_sysctl(net);
}
/*
* setup_net runs the initializers for the network namespace object.
*/
-static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
+static __net_init int setup_net(struct net *net)
{
/* Must be called with pernet_ops_rwsem held */
const struct pernet_operations *ops, *saved_ops;
@@ -325,19 +354,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
LIST_HEAD(dev_kill_list);
int error = 0;
- refcount_set(&net->ns.count, 1);
- ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
-
- refcount_set(&net->passive, 1);
- get_random_bytes(&net->hash_mix, sizeof(u32));
preempt_disable();
net->net_cookie = gen_cookie_next(&net_cookie);
preempt_enable();
- net->dev_base_seq = 1;
- net->user_ns = user_ns;
- idr_init(&net->netns_ids);
- spin_lock_init(&net->nsid_lock);
- mutex_init(&net->ipv4.ra_mutex);
list_for_each_entry(ops, &pernet_list, list) {
error = ops_init(ops, net);
@@ -382,32 +401,6 @@ out_undo:
goto out;
}
-static int __net_init net_defaults_init_net(struct net *net)
-{
- net->core.sysctl_somaxconn = SOMAXCONN;
- /* Limits per socket sk_omem_alloc usage.
- * TCP zerocopy regular usage needs 128 KB.
- */
- net->core.sysctl_optmem_max = 128 * 1024;
- net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
-
- return 0;
-}
-
-static struct pernet_operations net_defaults_ops = {
- .init = net_defaults_init_net,
-};
-
-static __init int net_defaults_init(void)
-{
- if (register_pernet_subsys(&net_defaults_ops))
- panic("Cannot initialize net default settings");
-
- return 0;
-}
-
-core_initcall(net_defaults_init);
-
#ifdef CONFIG_NET_NS
static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
{
@@ -456,6 +449,21 @@ out_free:
goto out;
}
+static LLIST_HEAD(defer_free_list);
+
+static void net_complete_free(void)
+{
+ struct llist_node *kill_list;
+ struct net *net, *next;
+
+ /* Get the list of namespaces to free from last round. */
+ kill_list = llist_del_all(&defer_free_list);
+
+ llist_for_each_entry_safe(net, next, kill_list, defer_free_list)
+ kmem_cache_free(net_cachep, net);
+
+}
+
static void net_free(struct net *net)
{
if (refcount_dec_and_test(&net->passive)) {
@@ -464,7 +472,8 @@ static void net_free(struct net *net)
/* There should not be any trackers left there. */
ref_tracker_dir_exit(&net->notrefcnt_tracker);
- kmem_cache_free(net_cachep, net);
+ /* Wait for an extra rcu_barrier() before final free. */
+ llist_add(&net->defer_free_list, &defer_free_list);
}
}
@@ -496,8 +505,7 @@ struct net *copy_net_ns(unsigned long flags,
goto dec_ucounts;
}
- preinit_net(net);
- refcount_set(&net->passive, 1);
+ preinit_net(net, user_ns);
net->ucounts = ucounts;
get_user_ns(user_ns);
@@ -505,7 +513,7 @@ struct net *copy_net_ns(unsigned long flags,
if (rv < 0)
goto put_userns;
- rv = setup_net(net, user_ns);
+ rv = setup_net(net);
up_read(&pernet_ops_rwsem);
@@ -650,6 +658,8 @@ static void cleanup_net(struct work_struct *work)
*/
rcu_barrier();
+ net_complete_free();
+
/* Finally it is safe to free my network namespace structure */
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list);
@@ -708,20 +718,18 @@ EXPORT_SYMBOL_GPL(get_net_ns);
struct net *get_net_ns_by_fd(int fd)
{
- struct fd f = fdget(fd);
- struct net *net = ERR_PTR(-EINVAL);
+ CLASS(fd, f)(fd);
- if (!f.file)
+ if (fd_empty(f))
return ERR_PTR(-EBADF);
- if (proc_ns_file(f.file)) {
- struct ns_common *ns = get_proc_ns(file_inode(f.file));
+ if (proc_ns_file(fd_file(f))) {
+ struct ns_common *ns = get_proc_ns(file_inode(fd_file(f)));
if (ns->ops == &netns_operations)
- net = get_net(container_of(ns, struct net, ns));
+ return get_net(container_of(ns, struct net, ns));
}
- fdput(f);
- return net;
+ return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
#endif
@@ -1167,13 +1175,23 @@ static void __init netns_ipv4_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
sysctl_tcp_early_demux);
CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_tcp_l3mdev_accept);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
sysctl_tcp_reordering);
CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
sysctl_tcp_rmem);
- CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18);
+ CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 22);
}
#endif
+static const struct rtnl_msg_handler net_ns_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWNSID, .doit = rtnl_net_newid,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.msgtype = RTM_GETNSID, .doit = rtnl_net_getid,
+ .dumpit = rtnl_net_dumpid,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+};
+
void __init net_ns_init(void)
{
struct net_generic *ng;
@@ -1199,9 +1217,10 @@ void __init net_ns_init(void)
#ifdef CONFIG_KEYS
init_net.key_domain = &init_net_key_domain;
#endif
+ preinit_net(&init_net, &init_user_ns);
+
down_write(&pernet_ops_rwsem);
- preinit_net(&init_net);
- if (setup_net(&init_net, &init_user_ns))
+ if (setup_net(&init_net))
panic("Could not setup the initial network namespace");
init_net_initialized = true;
@@ -1210,11 +1229,7 @@ void __init net_ns_init(void)
if (register_pernet_subsys(&net_ns_ops))
panic("Could not register network namespace subsystems");
- rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
- rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
- RTNL_FLAG_DOIT_UNLOCKED |
- RTNL_FLAG_DUMP_UNLOCKED);
+ rtnl_register_many(net_ns_rtnl_msg_handlers);
}
static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
@@ -1244,7 +1259,7 @@ static int __register_pernet_operations(struct list_head *list,
LIST_HEAD(net_exit_list);
list_add_tail(&ops->list, list);
- if (ops->init || (ops->id && ops->size)) {
+ if (ops->init || ops->id) {
/* We held write locked pernet_ops_rwsem, and parallel
* setup_net() and cleanup_net() are not possible.
*/
@@ -1310,6 +1325,9 @@ static int register_pernet_operations(struct list_head *list,
{
int error;
+ if (WARN_ON(!!ops->id ^ !!ops->size))
+ return -EINVAL;
+
if (ops->id) {
error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID,
GFP_KERNEL);
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index 8350a0afa9ec..a89cbd8d87c3 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -9,16 +9,21 @@
#include "netdev-genl-gen.h"
#include <uapi/linux/netdev.h>
+#include <linux/list.h>
/* Integer value ranges */
static const struct netlink_range_validation netdev_a_page_pool_id_range = {
.min = 1ULL,
- .max = 4294967295ULL,
+ .max = U32_MAX,
};
static const struct netlink_range_validation netdev_a_page_pool_ifindex_range = {
.min = 1ULL,
- .max = 2147483647ULL,
+ .max = S32_MAX,
+};
+
+static const struct netlink_range_validation netdev_a_napi_defer_hard_irqs_range = {
+ .max = S32_MAX,
};
/* Common nested types */
@@ -27,6 +32,11 @@ const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFIND
[NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range),
};
+const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1] = {
+ [NETDEV_A_QUEUE_ID] = { .type = NLA_U32, },
+ [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1),
+};
+
/* NETDEV_CMD_DEV_GET - do */
static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = {
[NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
@@ -74,6 +84,21 @@ static const struct nla_policy netdev_qstats_get_nl_policy[NETDEV_A_QSTATS_SCOPE
[NETDEV_A_QSTATS_SCOPE] = NLA_POLICY_MASK(NLA_UINT, 0x1),
};
+/* NETDEV_CMD_BIND_RX - do */
+static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1] = {
+ [NETDEV_A_DMABUF_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+ [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, },
+ [NETDEV_A_DMABUF_QUEUES] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy),
+};
+
+/* NETDEV_CMD_NAPI_SET - do */
+static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT + 1] = {
+ [NETDEV_A_NAPI_ID] = { .type = NLA_U32, },
+ [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range),
+ [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, },
+ [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, },
+};
+
/* Ops table for netdev */
static const struct genl_split_ops netdev_nl_ops[] = {
{
@@ -151,6 +176,20 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.maxattr = NETDEV_A_QSTATS_SCOPE,
.flags = GENL_CMD_CAP_DUMP,
},
+ {
+ .cmd = NETDEV_CMD_BIND_RX,
+ .doit = netdev_nl_bind_rx_doit,
+ .policy = netdev_bind_rx_nl_policy,
+ .maxattr = NETDEV_A_DMABUF_FD,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_NAPI_SET,
+ .doit = netdev_nl_napi_set_doit,
+ .policy = netdev_napi_set_nl_policy,
+ .maxattr = NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
};
static const struct genl_multicast_group netdev_nl_mcgrps[] = {
@@ -168,4 +207,7 @@ struct genl_family netdev_nl_family __ro_after_init = {
.n_split_ops = ARRAY_SIZE(netdev_nl_ops),
.mcgrps = netdev_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps),
+ .sock_priv_size = sizeof(struct list_head),
+ .sock_priv_init = (void *)netdev_nl_sock_priv_init,
+ .sock_priv_destroy = (void *)netdev_nl_sock_priv_destroy,
};
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index 4db40fd5b4a9..e09dd7539ff2 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -10,9 +10,11 @@
#include <net/genetlink.h>
#include <uapi/linux/netdev.h>
+#include <linux/list.h>
/* Common nested types */
extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1];
+extern const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1];
int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
@@ -30,6 +32,8 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info);
enum {
NETDEV_NLGRP_MGMT,
@@ -38,4 +42,7 @@ enum {
extern struct genl_family netdev_nl_family;
+void netdev_nl_sock_priv_init(struct list_head *priv);
+void netdev_nl_sock_priv_destroy(struct list_head *priv);
+
#endif /* _LINUX_NETDEV_GEN_H */
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 05f9515d2c05..9527dd46e4dc 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -3,16 +3,17 @@
#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/rtnetlink.h>
+#include <net/busy_poll.h>
#include <net/net_namespace.h>
+#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
#include <net/sock.h>
#include <net/xdp.h>
#include <net/xdp_sock.h>
-#include <net/netdev_rx_queue.h>
-#include <net/netdev_queues.h>
-#include <net/busy_poll.h>
-#include "netdev-genl-gen.h"
#include "dev.h"
+#include "devmem.h"
+#include "netdev-genl-gen.h"
struct netdev_nl_dump_ctx {
unsigned long ifindex;
@@ -23,7 +24,7 @@ struct netdev_nl_dump_ctx {
static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb)
{
- NL_ASSERT_DUMP_CTX_FITS(struct netdev_nl_dump_ctx);
+ NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx);
return (struct netdev_nl_dump_ctx *)cb->ctx;
}
@@ -160,6 +161,9 @@ static int
netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
const struct genl_info *info)
{
+ unsigned long irq_suspend_timeout;
+ unsigned long gro_flush_timeout;
+ u32 napi_defer_hard_irqs;
void *hdr;
pid_t pid;
@@ -188,6 +192,21 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
goto nla_put_failure;
}
+ napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi);
+ if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS,
+ napi_defer_hard_irqs))
+ goto nla_put_failure;
+
+ irq_suspend_timeout = napi_get_irq_suspend_timeout(napi);
+ if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
+ irq_suspend_timeout))
+ goto nla_put_failure;
+
+ gro_flush_timeout = napi_get_gro_flush_timeout(napi);
+ if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
+ gro_flush_timeout))
+ goto nla_put_failure;
+
genlmsg_end(rsp, hdr);
return 0;
@@ -214,13 +233,17 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
rtnl_lock();
+ rcu_read_lock();
napi = napi_by_id(napi_id);
- if (napi)
+ if (napi) {
err = netdev_nl_napi_fill_one(rsp, napi, info);
- else
- err = -EINVAL;
+ } else {
+ NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
+ err = -ENOENT;
+ }
+ rcu_read_unlock();
rtnl_unlock();
if (err)
@@ -289,9 +312,63 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
}
static int
+netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info)
+{
+ u64 irq_suspend_timeout = 0;
+ u64 gro_flush_timeout = 0;
+ u32 defer = 0;
+
+ if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) {
+ defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]);
+ napi_set_defer_hard_irqs(napi, defer);
+ }
+
+ if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) {
+ irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]);
+ napi_set_irq_suspend_timeout(napi, irq_suspend_timeout);
+ }
+
+ if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) {
+ gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]);
+ napi_set_gro_flush_timeout(napi, gro_flush_timeout);
+ }
+
+ return 0;
+}
+
+int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct napi_struct *napi;
+ unsigned int napi_id;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
+ return -EINVAL;
+
+ napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
+
+ rtnl_lock();
+ rcu_read_lock();
+
+ napi = napi_by_id(napi_id);
+ if (napi) {
+ err = netdev_nl_napi_set_config(napi, info);
+ } else {
+ NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
+ err = -ENOENT;
+ }
+
+ rcu_read_unlock();
+ rtnl_unlock();
+
+ return err;
+}
+
+static int
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
u32 q_idx, u32 q_type, const struct genl_info *info)
{
+ struct net_devmem_dmabuf_binding *binding;
struct netdev_rx_queue *rxq;
struct netdev_queue *txq;
void *hdr;
@@ -311,6 +388,12 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
rxq->napi->napi_id))
goto nla_put_failure;
+
+ binding = rxq->mp_params.mp_priv;
+ if (binding &&
+ nla_put_u32(rsp, NETDEV_A_QUEUE_DMABUF, binding->id))
+ goto nla_put_failure;
+
break;
case NETDEV_QUEUE_TYPE_TX:
txq = netdev_get_tx_queue(netdev, q_idx);
@@ -721,6 +804,129 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
return err;
}
+int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
+ struct net_devmem_dmabuf_binding *binding;
+ struct list_head *sock_binding_list;
+ u32 ifindex, dmabuf_fd, rxq_idx;
+ struct net_device *netdev;
+ struct sk_buff *rsp;
+ struct nlattr *attr;
+ int rem, err = 0;
+ void *hdr;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES))
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
+ dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
+
+ sock_binding_list = genl_sk_priv_get(&netdev_nl_family,
+ NETLINK_CB(skb).sk);
+ if (IS_ERR(sock_binding_list))
+ return PTR_ERR(sock_binding_list);
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_genlmsg_free;
+ }
+
+ rtnl_lock();
+
+ netdev = __dev_get_by_index(genl_info_net(info), ifindex);
+ if (!netdev || !netif_device_present(netdev)) {
+ err = -ENODEV;
+ goto err_unlock;
+ }
+
+ if (dev_xdp_prog_count(netdev)) {
+ NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached");
+ err = -EEXIST;
+ goto err_unlock;
+ }
+
+ binding = net_devmem_bind_dmabuf(netdev, dmabuf_fd, info->extack);
+ if (IS_ERR(binding)) {
+ err = PTR_ERR(binding);
+ goto err_unlock;
+ }
+
+ nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES,
+ genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem) {
+ err = nla_parse_nested(
+ tb, ARRAY_SIZE(netdev_queue_id_nl_policy) - 1, attr,
+ netdev_queue_id_nl_policy, info->extack);
+ if (err < 0)
+ goto err_unbind;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) ||
+ NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE)) {
+ err = -EINVAL;
+ goto err_unbind;
+ }
+
+ if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
+ NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]);
+ err = -EINVAL;
+ goto err_unbind;
+ }
+
+ rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]);
+
+ err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding,
+ info->extack);
+ if (err)
+ goto err_unbind;
+ }
+
+ list_add(&binding->list, sock_binding_list);
+
+ nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
+ genlmsg_end(rsp, hdr);
+
+ err = genlmsg_reply(rsp, info);
+ if (err)
+ goto err_unbind;
+
+ rtnl_unlock();
+
+ return 0;
+
+err_unbind:
+ net_devmem_unbind_dmabuf(binding);
+err_unlock:
+ rtnl_unlock();
+err_genlmsg_free:
+ nlmsg_free(rsp);
+ return err;
+}
+
+void netdev_nl_sock_priv_init(struct list_head *priv)
+{
+ INIT_LIST_HEAD(priv);
+}
+
+void netdev_nl_sock_priv_destroy(struct list_head *priv)
+{
+ struct net_devmem_dmabuf_binding *binding;
+ struct net_devmem_dmabuf_binding *temp;
+
+ list_for_each_entry_safe(binding, temp, priv, list) {
+ rtnl_lock();
+ net_devmem_unbind_dmabuf(binding);
+ rtnl_unlock();
+ }
+}
+
static int netdev_genl_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
new file mode 100644
index 000000000000..e217a5838c87
--- /dev/null
+++ b/net/core/netdev_rx_queue.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/netdevice.h>
+#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
+
+#include "page_pool_priv.h"
+
+int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
+{
+ struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, rxq_idx);
+ void *new_mem, *old_mem;
+ int err;
+
+ if (!dev->queue_mgmt_ops || !dev->queue_mgmt_ops->ndo_queue_stop ||
+ !dev->queue_mgmt_ops->ndo_queue_mem_free ||
+ !dev->queue_mgmt_ops->ndo_queue_mem_alloc ||
+ !dev->queue_mgmt_ops->ndo_queue_start)
+ return -EOPNOTSUPP;
+
+ ASSERT_RTNL();
+
+ new_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL);
+ if (!new_mem)
+ return -ENOMEM;
+
+ old_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL);
+ if (!old_mem) {
+ err = -ENOMEM;
+ goto err_free_new_mem;
+ }
+
+ err = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx);
+ if (err)
+ goto err_free_old_mem;
+
+ err = page_pool_check_memory_provider(dev, rxq);
+ if (err)
+ goto err_free_new_queue_mem;
+
+ err = dev->queue_mgmt_ops->ndo_queue_stop(dev, old_mem, rxq_idx);
+ if (err)
+ goto err_free_new_queue_mem;
+
+ err = dev->queue_mgmt_ops->ndo_queue_start(dev, new_mem, rxq_idx);
+ if (err)
+ goto err_start_queue;
+
+ dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
+
+ kvfree(old_mem);
+ kvfree(new_mem);
+
+ return 0;
+
+err_start_queue:
+ /* Restarting the queue with old_mem should be successful as we haven't
+ * changed any of the queue configuration, and there is not much we can
+ * do to recover from a failure here.
+ *
+ * WARN if we fail to recover the old rx queue, and at least free
+ * old_mem so we don't also leak that.
+ */
+ if (dev->queue_mgmt_ops->ndo_queue_start(dev, old_mem, rxq_idx)) {
+ WARN(1,
+ "Failed to restart old queue in error path. RX queue %d may be unhealthy.",
+ rxq_idx);
+ dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
+ }
+
+err_free_new_queue_mem:
+ dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem);
+
+err_free_old_mem:
+ kvfree(old_mem);
+
+err_free_new_mem:
+ kvfree(new_mem);
+
+ return err;
+}
diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h
new file mode 100644
index 000000000000..7eadb8393e00
--- /dev/null
+++ b/net/core/netmem_priv.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NETMEM_PRIV_H
+#define __NETMEM_PRIV_H
+
+static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
+{
+ return __netmem_clear_lsb(netmem)->pp_magic;
+}
+
+static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
+{
+ __netmem_clear_lsb(netmem)->pp_magic |= pp_magic;
+}
+
+static inline void netmem_clear_pp_magic(netmem_ref netmem)
+{
+ __netmem_clear_lsb(netmem)->pp_magic = 0;
+}
+
+static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
+{
+ __netmem_clear_lsb(netmem)->pp = pool;
+}
+
+static inline void netmem_set_dma_addr(netmem_ref netmem,
+ unsigned long dma_addr)
+{
+ __netmem_clear_lsb(netmem)->dma_addr = dma_addr;
+}
+#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d657b042d5a0..2e459b9d88eb 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -34,7 +34,7 @@
#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/ip6_checksum.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <trace/events/napi.h>
#include <linux/kconfig.h>
@@ -45,11 +45,6 @@
#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32
-
-static struct sk_buff_head skb_pool;
-
-DEFINE_STATIC_SRCU(netpoll_srcu);
-
#define USEC_PER_POLL 50
#define MAX_SKB_SIZE \
@@ -162,7 +157,7 @@ static void poll_one_napi(struct napi_struct *napi)
if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
return;
- /* We explicilty pass the polling call a budget of 0 to
+ /* We explicitly pass the polling call a budget of 0 to
* indicate that we are clearing the Tx path only.
*/
work = napi->poll(napi, 0);
@@ -220,39 +215,39 @@ EXPORT_SYMBOL(netpoll_poll_dev);
void netpoll_poll_disable(struct net_device *dev)
{
struct netpoll_info *ni;
- int idx;
+
might_sleep();
- idx = srcu_read_lock(&netpoll_srcu);
- ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
+ ni = rtnl_dereference(dev->npinfo);
if (ni)
down(&ni->dev_lock);
- srcu_read_unlock(&netpoll_srcu, idx);
}
void netpoll_poll_enable(struct net_device *dev)
{
struct netpoll_info *ni;
- rcu_read_lock();
- ni = rcu_dereference(dev->npinfo);
+
+ ni = rtnl_dereference(dev->npinfo);
if (ni)
up(&ni->dev_lock);
- rcu_read_unlock();
}
-static void refill_skbs(void)
+static void refill_skbs(struct netpoll *np)
{
+ struct sk_buff_head *skb_pool;
struct sk_buff *skb;
unsigned long flags;
- spin_lock_irqsave(&skb_pool.lock, flags);
- while (skb_pool.qlen < MAX_SKBS) {
+ skb_pool = &np->skb_pool;
+
+ spin_lock_irqsave(&skb_pool->lock, flags);
+ while (skb_pool->qlen < MAX_SKBS) {
skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
if (!skb)
break;
- __skb_queue_tail(&skb_pool, skb);
+ __skb_queue_tail(skb_pool, skb);
}
- spin_unlock_irqrestore(&skb_pool.lock, flags);
+ spin_unlock_irqrestore(&skb_pool->lock, flags);
}
static void zap_completion_queue(void)
@@ -289,12 +284,12 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
struct sk_buff *skb;
zap_completion_queue();
- refill_skbs();
+ refill_skbs(np);
repeat:
skb = alloc_skb(len, GFP_ATOMIC);
if (!skb)
- skb = skb_dequeue(&skb_pool);
+ skb = skb_dequeue(&np->skb_pool);
if (!skb) {
if (++count < 10) {
@@ -536,6 +531,14 @@ static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
return -1;
}
+static void skb_pool_flush(struct netpoll *np)
+{
+ struct sk_buff_head *skb_pool;
+
+ skb_pool = &np->skb_pool;
+ skb_queue_purge_reason(skb_pool, SKB_CONSUMED);
+}
+
int netpoll_parse_options(struct netpoll *np, char *opt)
{
char *cur=opt, *delim;
@@ -624,17 +627,14 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
const struct net_device_ops *ops;
int err;
- np->dev = ndev;
- strscpy(np->dev_name, ndev->name, IFNAMSIZ);
-
if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
np_err(np, "%s doesn't support polling, aborting\n",
- np->dev_name);
+ ndev->name);
err = -ENOTSUPP;
goto out;
}
- if (!ndev->npinfo) {
+ if (!rcu_access_pointer(ndev->npinfo)) {
npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
if (!npinfo) {
err = -ENOMEM;
@@ -647,9 +647,9 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
refcount_set(&npinfo->refcnt, 1);
- ops = np->dev->netdev_ops;
+ ops = ndev->netdev_ops;
if (ops->ndo_netpoll_setup) {
- err = ops->ndo_netpoll_setup(ndev, npinfo);
+ err = ops->ndo_netpoll_setup(ndev);
if (err)
goto free_npinfo;
}
@@ -658,6 +658,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
refcount_inc(&npinfo->refcnt);
}
+ np->dev = ndev;
+ strscpy(np->dev_name, ndev->name, IFNAMSIZ);
npinfo->netpoll = np;
/* last thing to do is link it to the net device structure */
@@ -675,9 +677,12 @@ EXPORT_SYMBOL_GPL(__netpoll_setup);
int netpoll_setup(struct netpoll *np)
{
struct net_device *ndev = NULL;
+ bool ip_overwritten = false;
struct in_device *in_dev;
int err;
+ skb_queue_head_init(&np->skb_pool);
+
rtnl_lock();
if (np->dev_name[0]) {
struct net *net = current->nsproxy->net_ns;
@@ -739,6 +744,7 @@ put_noaddr:
}
np->local_ip.ip = ifa->ifa_local;
+ ip_overwritten = true;
np_info(np, "local IP %pI4\n", &np->local_ip.ip);
} else {
#if IS_ENABLED(CONFIG_IPV6)
@@ -755,6 +761,7 @@ put_noaddr:
!!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
continue;
np->local_ip.in6 = ifp->addr;
+ ip_overwritten = true;
err = 0;
break;
}
@@ -776,15 +783,20 @@ put_noaddr:
}
/* fill up the skb queue */
- refill_skbs();
+ refill_skbs(np);
err = __netpoll_setup(np, ndev);
if (err)
- goto put;
+ goto flush;
rtnl_unlock();
return 0;
+flush:
+ skb_pool_flush(np);
put:
+ DEBUG_NET_WARN_ON_ONCE(np->dev);
+ if (ip_overwritten)
+ memset(&np->local_ip, 0, sizeof(np->local_ip));
netdev_put(ndev, &np->dev_tracker);
unlock:
rtnl_unlock();
@@ -792,13 +804,6 @@ unlock:
}
EXPORT_SYMBOL(netpoll_setup);
-static int __init netpoll_init(void)
-{
- skb_queue_head_init(&skb_pool);
- return 0;
-}
-core_initcall(netpoll_init);
-
static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
{
struct netpoll_info *npinfo =
@@ -824,8 +829,6 @@ void __netpoll_cleanup(struct netpoll *np)
if (!npinfo)
return;
- synchronize_srcu(&netpoll_srcu);
-
if (refcount_dec_and_test(&npinfo->refcnt)) {
const struct net_device_ops *ops;
@@ -837,6 +840,8 @@ void __netpoll_cleanup(struct netpoll *np)
call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info);
} else
RCU_INIT_POINTER(np->dev->npinfo, NULL);
+
+ skb_pool_flush(np);
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
@@ -851,14 +856,20 @@ void __netpoll_free(struct netpoll *np)
}
EXPORT_SYMBOL_GPL(__netpoll_free);
+void do_netpoll_cleanup(struct netpoll *np)
+{
+ __netpoll_cleanup(np);
+ netdev_put(np->dev, &np->dev_tracker);
+ np->dev = NULL;
+}
+EXPORT_SYMBOL(do_netpoll_cleanup);
+
void netpoll_cleanup(struct netpoll *np)
{
rtnl_lock();
if (!np->dev)
goto out;
- __netpoll_cleanup(np);
- netdev_put(np->dev, &np->dev_tracker);
- np->dev = NULL;
+ do_netpoll_cleanup(np);
out:
rtnl_unlock();
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 2abe6e919224..f89cf93f6eb4 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/device.h>
+#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/xdp.h>
@@ -24,8 +25,12 @@
#include <trace/events/page_pool.h>
+#include "mp_dmabuf_devmem.h"
+#include "netmem_priv.h"
#include "page_pool_priv.h"
+DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
+
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (60 * HZ)
@@ -187,6 +192,8 @@ static int page_pool_init(struct page_pool *pool,
int cpuid)
{
unsigned int ring_qsize = 1024; /* Default */
+ struct netdev_rx_queue *rxq;
+ int err;
page_pool_struct_check();
@@ -268,7 +275,37 @@ static int page_pool_init(struct page_pool *pool,
if (pool->dma_map)
get_device(pool->p.dev);
+ if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) {
+ /* We rely on rtnl_lock()ing to make sure netdev_rx_queue
+ * configuration doesn't change while we're initializing
+ * the page_pool.
+ */
+ ASSERT_RTNL();
+ rxq = __netif_get_rx_queue(pool->slow.netdev,
+ pool->slow.queue_idx);
+ pool->mp_priv = rxq->mp_params.mp_priv;
+ }
+
+ if (pool->mp_priv) {
+ err = mp_dmabuf_devmem_init(pool);
+ if (err) {
+ pr_warn("%s() mem-provider init failed %d\n", __func__,
+ err);
+ goto free_ptr_ring;
+ }
+
+ static_branch_inc(&page_pool_mem_providers);
+ }
+
return 0;
+
+free_ptr_ring:
+ ptr_ring_cleanup(&pool->ring, NULL);
+#ifdef CONFIG_PAGE_POOL_STATS
+ if (!pool->system)
+ free_percpu(pool->recycle_stats);
+#endif
+ return err;
}
static void page_pool_uninit(struct page_pool *pool)
@@ -358,7 +395,7 @@ static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool)
if (unlikely(!netmem))
break;
- if (likely(page_to_nid(netmem_to_page(netmem)) == pref_nid)) {
+ if (likely(netmem_is_pref_nid(netmem, pref_nid))) {
pool->alloc.cache[pool->alloc.count++] = netmem;
} else {
/* NUMA mismatch;
@@ -452,32 +489,6 @@ unmap_failed:
return false;
}
-static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
-{
- struct page *page = netmem_to_page(netmem);
-
- page->pp = pool;
- page->pp_magic |= PP_SIGNATURE;
-
- /* Ensuring all pages have been split into one fragment initially:
- * page_pool_set_pp_info() is only called once for every page when it
- * is allocated from the page allocator and page_pool_fragment_page()
- * is dirtying the same cache line as the page->pp_magic above, so
- * the overhead is negligible.
- */
- page_pool_fragment_netmem(netmem, 1);
- if (pool->has_init_callback)
- pool->slow.init_callback(netmem, pool->slow.init_arg);
-}
-
-static void page_pool_clear_pp_info(netmem_ref netmem)
-{
- struct page *page = netmem_to_page(netmem);
-
- page->pp_magic = 0;
- page->pp = NULL;
-}
-
static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
gfp_t gfp)
{
@@ -573,7 +584,10 @@ netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp)
return netmem;
/* Slow-path: cache empty, do real allocation */
- netmem = __page_pool_alloc_pages_slow(pool, gfp);
+ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
+ netmem = mp_dmabuf_devmem_alloc_netmems(pool, gfp);
+ else
+ netmem = __page_pool_alloc_pages_slow(pool, gfp);
return netmem;
}
EXPORT_SYMBOL(page_pool_alloc_netmem);
@@ -609,6 +623,28 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict)
return inflight;
}
+void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
+{
+ netmem_set_pp(netmem, pool);
+ netmem_or_pp_magic(netmem, PP_SIGNATURE);
+
+ /* Ensuring all pages have been split into one fragment initially:
+ * page_pool_set_pp_info() is only called once for every page when it
+ * is allocated from the page allocator and page_pool_fragment_page()
+ * is dirtying the same cache line as the page->pp_magic above, so
+ * the overhead is negligible.
+ */
+ page_pool_fragment_netmem(netmem, 1);
+ if (pool->has_init_callback)
+ pool->slow.init_callback(netmem, pool->slow.init_arg);
+}
+
+void page_pool_clear_pp_info(netmem_ref netmem)
+{
+ netmem_clear_pp_magic(netmem);
+ netmem_set_pp(netmem, NULL);
+}
+
static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
netmem_ref netmem)
{
@@ -637,8 +673,13 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
{
int count;
+ bool put;
- __page_pool_release_page_dma(pool, netmem);
+ put = true;
+ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
+ put = mp_dmabuf_devmem_release_page(pool, netmem);
+ else
+ __page_pool_release_page_dma(pool, netmem);
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
@@ -646,8 +687,10 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
trace_page_pool_state_release(pool, netmem, count);
- page_pool_clear_pp_info(netmem);
- put_page(netmem_to_page(netmem));
+ if (put) {
+ page_pool_clear_pp_info(netmem);
+ put_page(netmem_to_page(netmem));
+ }
/* An optimization would be to call __free_pages(page, pool->p.order)
* knowing page is not part of page-cache (thus avoiding a
* __page_cache_release() call).
@@ -692,8 +735,9 @@ static bool page_pool_recycle_in_cache(netmem_ref netmem,
static bool __page_pool_page_can_be_recycled(netmem_ref netmem)
{
- return page_ref_count(netmem_to_page(netmem)) == 1 &&
- !page_is_pfmemalloc(netmem_to_page(netmem));
+ return netmem_is_net_iov(netmem) ||
+ (page_ref_count(netmem_to_page(netmem)) == 1 &&
+ !page_is_pfmemalloc(netmem_to_page(netmem)));
}
/* If the page refcnt == 1, this will try to recycle the page.
@@ -728,6 +772,7 @@ __page_pool_put_page(struct page_pool *pool, netmem_ref netmem,
/* Page found as candidate for recycling */
return netmem;
}
+
/* Fallback/non-XDP mode: API user have elevated refcnt.
*
* Many drivers split up the page into fragments, and some
@@ -905,6 +950,7 @@ netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
if (netmem && *offset + size > max_size) {
netmem = page_pool_drain_frag(pool, netmem);
if (netmem) {
+ recycle_stat_inc(pool, cached);
alloc_stat_inc(pool, fast);
goto frag_reset;
}
@@ -929,7 +975,6 @@ frag_reset:
pool->frag_users++;
pool->frag_offset = *offset + size;
- alloc_stat_inc(pool, fast);
return netmem;
}
EXPORT_SYMBOL(page_pool_alloc_frag_netmem);
@@ -949,7 +994,7 @@ static void page_pool_empty_ring(struct page_pool *pool)
/* Empty recycle ring */
while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) {
/* Verify the refcnt invariant of cached pages */
- if (!(page_ref_count(netmem_to_page(netmem)) == 1))
+ if (!(netmem_ref_count(netmem) == 1))
pr_crit("%s() page_pool refcnt %d violation\n",
__func__, netmem_ref_count(netmem));
@@ -964,6 +1009,12 @@ static void __page_pool_destroy(struct page_pool *pool)
page_pool_unlist(pool);
page_pool_uninit(pool);
+
+ if (pool->mp_priv) {
+ mp_dmabuf_devmem_destroy(pool);
+ static_branch_dec(&page_pool_mem_providers);
+ }
+
kfree(pool);
}
diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h
index 90665d40f1eb..57439787b9c2 100644
--- a/net/core/page_pool_priv.h
+++ b/net/core/page_pool_priv.h
@@ -3,10 +3,56 @@
#ifndef __PAGE_POOL_PRIV_H
#define __PAGE_POOL_PRIV_H
+#include <net/page_pool/helpers.h>
+
+#include "netmem_priv.h"
+
s32 page_pool_inflight(const struct page_pool *pool, bool strict);
int page_pool_list(struct page_pool *pool);
void page_pool_detached(struct page_pool *pool);
void page_pool_unlist(struct page_pool *pool);
+static inline bool
+page_pool_set_dma_addr_netmem(netmem_ref netmem, dma_addr_t addr)
+{
+ if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
+ netmem_set_dma_addr(netmem, addr >> PAGE_SHIFT);
+
+ /* We assume page alignment to shave off bottom bits,
+ * if this "compression" doesn't work we need to drop.
+ */
+ return addr != (dma_addr_t)netmem_get_dma_addr(netmem)
+ << PAGE_SHIFT;
+ }
+
+ netmem_set_dma_addr(netmem, addr);
+ return false;
+}
+
+static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+ return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr);
+}
+
+#if defined(CONFIG_PAGE_POOL)
+void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem);
+void page_pool_clear_pp_info(netmem_ref netmem);
+int page_pool_check_memory_provider(struct net_device *dev,
+ struct netdev_rx_queue *rxq);
+#else
+static inline void page_pool_set_pp_info(struct page_pool *pool,
+ netmem_ref netmem)
+{
+}
+static inline void page_pool_clear_pp_info(netmem_ref netmem)
+{
+}
+static inline int page_pool_check_memory_provider(struct net_device *dev,
+ struct netdev_rx_queue *rxq)
+{
+ return 0;
+}
+#endif
+
#endif
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index 3a3277ba167b..48335766c1bf 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -4,10 +4,12 @@
#include <linux/netdevice.h>
#include <linux/xarray.h>
#include <net/net_debug.h>
-#include <net/page_pool/types.h>
+#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
+#include <net/page_pool/types.h>
#include <net/sock.h>
+#include "devmem.h"
#include "page_pool_priv.h"
#include "netdev-genl-gen.h"
@@ -212,6 +214,7 @@ static int
page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
const struct genl_info *info)
{
+ struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
size_t inflight, refsz;
void *hdr;
@@ -241,6 +244,9 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
pool->user.detach_time))
goto err_cancel;
+ if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id))
+ goto err_cancel;
+
genlmsg_end(rsp, hdr);
return 0;
@@ -344,6 +350,30 @@ void page_pool_unlist(struct page_pool *pool)
mutex_unlock(&page_pools_lock);
}
+int page_pool_check_memory_provider(struct net_device *dev,
+ struct netdev_rx_queue *rxq)
+{
+ struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv;
+ struct page_pool *pool;
+ struct hlist_node *n;
+
+ if (!binding)
+ return 0;
+
+ mutex_lock(&page_pools_lock);
+ hlist_for_each_entry_safe(pool, n, &dev->page_pools, user.list) {
+ if (pool->mp_priv != binding)
+ continue;
+
+ if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) {
+ mutex_unlock(&page_pools_lock);
+ return 0;
+ }
+ }
+ mutex_unlock(&page_pools_lock);
+ return -ENODATA;
+}
+
static void page_pool_unreg_netdev_wipe(struct net_device *netdev)
{
struct page_pool *pool;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 197a50ef8e2e..7e23cacbe66e 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -69,7 +69,7 @@
*
* By design there should only be *one* "controlling" process. In practice
* multiple write accesses gives unpredictable result. Understood by "write"
- * to /proc gives result code thats should be read be the "writer".
+ * to /proc gives result code that should be read be the "writer".
* For practical use this should be no problem.
*
* Note when adding devices to a specific CPU there good idea to also assign
@@ -2285,7 +2285,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
s64 remaining;
struct hrtimer_sleeper t;
- hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_setup_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
hrtimer_set_expires(&t.timer, spin_until);
remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
@@ -2371,11 +2371,11 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
if (pkt_dev->spi) {
/* We need as quick as possible to find the right SA
- * Searching with minimum criteria to archieve this.
+ * Searching with minimum criteria to achieve, this.
*/
x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET);
} else {
- /* slow path: we dont already have xfrm_state */
+ /* slow path: we don't already have xfrm_state */
x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 0,
(xfrm_address_t *)&pkt_dev->cur_daddr,
(xfrm_address_t *)&pkt_dev->cur_saddr,
@@ -3838,8 +3838,8 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->ipsmode = XFRM_MODE_TRANSPORT;
pkt_dev->ipsproto = IPPROTO_ESP;
- /* xfrm tunnel mode needs additional dst to extract outter
- * ip header protocol/ttl/id field, here creat a phony one.
+ /* xfrm tunnel mode needs additional dst to extract outer
+ * ip header protocol/ttl/id field, here create a phony one.
* instead of looking for a valid rt, which definitely hurting
* performance under such circumstance.
*/
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 73fd7f543fd0..ebcfc2debf1a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -179,6 +179,166 @@ bool lockdep_rtnl_is_held(void)
EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */
+#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
+void __rtnl_net_lock(struct net *net)
+{
+ ASSERT_RTNL();
+
+ mutex_lock(&net->rtnl_mutex);
+}
+EXPORT_SYMBOL(__rtnl_net_lock);
+
+void __rtnl_net_unlock(struct net *net)
+{
+ ASSERT_RTNL();
+
+ mutex_unlock(&net->rtnl_mutex);
+}
+EXPORT_SYMBOL(__rtnl_net_unlock);
+
+void rtnl_net_lock(struct net *net)
+{
+ rtnl_lock();
+ __rtnl_net_lock(net);
+}
+EXPORT_SYMBOL(rtnl_net_lock);
+
+void rtnl_net_unlock(struct net *net)
+{
+ __rtnl_net_unlock(net);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL(rtnl_net_unlock);
+
+int rtnl_net_trylock(struct net *net)
+{
+ int ret = rtnl_trylock();
+
+ if (ret)
+ __rtnl_net_lock(net);
+
+ return ret;
+}
+EXPORT_SYMBOL(rtnl_net_trylock);
+
+static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
+{
+ if (net_eq(net_a, net_b))
+ return 0;
+
+ /* always init_net first */
+ if (net_eq(net_a, &init_net))
+ return -1;
+
+ if (net_eq(net_b, &init_net))
+ return 1;
+
+ /* otherwise lock in ascending order */
+ return net_a < net_b ? -1 : 1;
+}
+
+int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b)
+{
+ const struct net *net_a, *net_b;
+
+ net_a = container_of(a, struct net, rtnl_mutex.dep_map);
+ net_b = container_of(b, struct net, rtnl_mutex.dep_map);
+
+ return rtnl_net_cmp_locks(net_a, net_b);
+}
+
+bool rtnl_net_is_locked(struct net *net)
+{
+ return rtnl_is_locked() && mutex_is_locked(&net->rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_net_is_locked);
+
+bool lockdep_rtnl_net_is_held(struct net *net)
+{
+ return lockdep_rtnl_is_held() && lockdep_is_held(&net->rtnl_mutex);
+}
+EXPORT_SYMBOL(lockdep_rtnl_net_is_held);
+#else
+static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
+{
+ /* No need to swap */
+ return -1;
+}
+#endif
+
+struct rtnl_nets {
+ /* ->newlink() needs to freeze 3 netns at most;
+ * 2 for the new device, 1 for its peer.
+ */
+ struct net *net[3];
+ unsigned char len;
+};
+
+static void rtnl_nets_init(struct rtnl_nets *rtnl_nets)
+{
+ memset(rtnl_nets, 0, sizeof(*rtnl_nets));
+}
+
+static void rtnl_nets_destroy(struct rtnl_nets *rtnl_nets)
+{
+ int i;
+
+ for (i = 0; i < rtnl_nets->len; i++) {
+ put_net(rtnl_nets->net[i]);
+ rtnl_nets->net[i] = NULL;
+ }
+
+ rtnl_nets->len = 0;
+}
+
+/**
+ * rtnl_nets_add - Add netns to be locked before ->newlink().
+ *
+ * @rtnl_nets: rtnl_nets pointer passed to ->get_peer_net().
+ * @net: netns pointer with an extra refcnt held.
+ *
+ * The extra refcnt is released in rtnl_nets_destroy().
+ */
+static void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net)
+{
+ int i;
+
+ DEBUG_NET_WARN_ON_ONCE(rtnl_nets->len == ARRAY_SIZE(rtnl_nets->net));
+
+ for (i = 0; i < rtnl_nets->len; i++) {
+ switch (rtnl_net_cmp_locks(rtnl_nets->net[i], net)) {
+ case 0:
+ put_net(net);
+ return;
+ case 1:
+ swap(rtnl_nets->net[i], net);
+ }
+ }
+
+ rtnl_nets->net[i] = net;
+ rtnl_nets->len++;
+}
+
+static void rtnl_nets_lock(struct rtnl_nets *rtnl_nets)
+{
+ int i;
+
+ rtnl_lock();
+
+ for (i = 0; i < rtnl_nets->len; i++)
+ __rtnl_net_lock(rtnl_nets->net[i]);
+}
+
+static void rtnl_nets_unlock(struct rtnl_nets *rtnl_nets)
+{
+ int i;
+
+ for (i = 0; i < rtnl_nets->len; i++)
+ __rtnl_net_unlock(rtnl_nets->net[i]);
+
+ rtnl_unlock();
+}
+
static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
@@ -269,64 +429,13 @@ unlock:
}
/**
- * rtnl_register_module - Register a rtnetlink message type
- *
- * @owner: module registering the hook (THIS_MODULE)
- * @protocol: Protocol family or PF_UNSPEC
- * @msgtype: rtnetlink message type
- * @doit: Function pointer called for each request message
- * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
- *
- * Like rtnl_register, but for use by removable modules.
- */
-int rtnl_register_module(struct module *owner,
- int protocol, int msgtype,
- rtnl_doit_func doit, rtnl_dumpit_func dumpit,
- unsigned int flags)
-{
- return rtnl_register_internal(owner, protocol, msgtype,
- doit, dumpit, flags);
-}
-EXPORT_SYMBOL_GPL(rtnl_register_module);
-
-/**
- * rtnl_register - Register a rtnetlink message type
- * @protocol: Protocol family or PF_UNSPEC
- * @msgtype: rtnetlink message type
- * @doit: Function pointer called for each request message
- * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
- *
- * Registers the specified function pointers (at least one of them has
- * to be non-NULL) to be called whenever a request message for the
- * specified protocol family and message type is received.
- *
- * The special protocol family PF_UNSPEC may be used to define fallback
- * function pointers for the case when no entry for the specific protocol
- * family exists.
- */
-void rtnl_register(int protocol, int msgtype,
- rtnl_doit_func doit, rtnl_dumpit_func dumpit,
- unsigned int flags)
-{
- int err;
-
- err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit,
- flags);
- if (err)
- pr_err("Unable to register rtnetlink message handler, "
- "protocol = %d, message type = %d\n", protocol, msgtype);
-}
-
-/**
* rtnl_unregister - Unregister a rtnetlink message type
* @protocol: Protocol family or PF_UNSPEC
* @msgtype: rtnetlink message type
*
* Returns 0 on success or a negative error code.
*/
-int rtnl_unregister(int protocol, int msgtype)
+static int rtnl_unregister(int protocol, int msgtype)
{
struct rtnl_link __rcu **tab;
struct rtnl_link *link;
@@ -349,7 +458,6 @@ int rtnl_unregister(int protocol, int msgtype)
return 0;
}
-EXPORT_SYMBOL_GPL(rtnl_unregister);
/**
* rtnl_unregister_all - Unregister all rtnetlink message type of a protocol
@@ -384,46 +492,86 @@ void rtnl_unregister_all(int protocol)
}
EXPORT_SYMBOL_GPL(rtnl_unregister_all);
-static LIST_HEAD(link_ops);
-
-static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
+/**
+ * __rtnl_register_many - Register rtnetlink message types
+ * @handlers: Array of struct rtnl_msg_handlers
+ * @n: The length of @handlers
+ *
+ * Registers the specified function pointers (at least one of them has
+ * to be non-NULL) to be called whenever a request message for the
+ * specified protocol family and message type is received.
+ *
+ * The special protocol family PF_UNSPEC may be used to define fallback
+ * function pointers for the case when no entry for the specific protocol
+ * family exists.
+ *
+ * When one element of @handlers fails to register,
+ * 1) built-in: panics.
+ * 2) modules : the previous successful registrations are unwinded
+ * and an error is returned.
+ *
+ * Use rtnl_register_many().
+ */
+int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n)
{
- const struct rtnl_link_ops *ops;
+ const struct rtnl_msg_handler *handler;
+ int i, err;
+
+ for (i = 0, handler = handlers; i < n; i++, handler++) {
+ err = rtnl_register_internal(handler->owner, handler->protocol,
+ handler->msgtype, handler->doit,
+ handler->dumpit, handler->flags);
+ if (err) {
+ if (!handler->owner)
+ panic("Unable to register rtnetlink message "
+ "handlers, %pS\n", handlers);
- list_for_each_entry(ops, &link_ops, list) {
- if (!strcmp(ops->kind, kind))
- return ops;
+ __rtnl_unregister_many(handlers, i);
+ break;
+ }
}
- return NULL;
+
+ return err;
}
+EXPORT_SYMBOL_GPL(__rtnl_register_many);
-/**
- * __rtnl_link_register - Register rtnl_link_ops with rtnetlink.
- * @ops: struct rtnl_link_ops * to register
- *
- * The caller must hold the rtnl_mutex. This function should be used
- * by drivers that create devices during module initialization. It
- * must be called before registering the devices.
- *
- * Returns 0 on success or a negative error code.
- */
-int __rtnl_link_register(struct rtnl_link_ops *ops)
+void __rtnl_unregister_many(const struct rtnl_msg_handler *handlers, int n)
{
- if (rtnl_link_ops_get(ops->kind))
- return -EEXIST;
+ const struct rtnl_msg_handler *handler;
+ int i;
- /* The check for alloc/setup is here because if ops
- * does not have that filled up, it is not possible
- * to use the ops for creating device. So do not
- * fill up dellink as well. That disables rtnl_dellink.
- */
- if ((ops->alloc || ops->setup) && !ops->dellink)
- ops->dellink = unregister_netdevice_queue;
+ for (i = n - 1, handler = handlers + n - 1; i >= 0; i--, handler--)
+ rtnl_unregister(handler->protocol, handler->msgtype);
+}
+EXPORT_SYMBOL_GPL(__rtnl_unregister_many);
- list_add_tail(&ops->list, &link_ops);
- return 0;
+static DEFINE_MUTEX(link_ops_mutex);
+static LIST_HEAD(link_ops);
+
+static struct rtnl_link_ops *rtnl_link_ops_get(const char *kind, int *srcu_index)
+{
+ struct rtnl_link_ops *ops;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(ops, &link_ops, list) {
+ if (!strcmp(ops->kind, kind)) {
+ *srcu_index = srcu_read_lock(&ops->srcu);
+ goto unlock;
+ }
+ }
+
+ ops = NULL;
+unlock:
+ rcu_read_unlock();
+
+ return ops;
+}
+
+static void rtnl_link_ops_put(struct rtnl_link_ops *ops, int srcu_index)
+{
+ srcu_read_unlock(&ops->srcu, srcu_index);
}
-EXPORT_SYMBOL_GPL(__rtnl_link_register);
/**
* rtnl_link_register - Register rtnl_link_ops with rtnetlink.
@@ -433,6 +581,7 @@ EXPORT_SYMBOL_GPL(__rtnl_link_register);
*/
int rtnl_link_register(struct rtnl_link_ops *ops)
{
+ struct rtnl_link_ops *tmp;
int err;
/* Sanity-check max sizes to avoid stack buffer overflow. */
@@ -440,9 +589,31 @@ int rtnl_link_register(struct rtnl_link_ops *ops)
ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE))
return -EINVAL;
- rtnl_lock();
- err = __rtnl_link_register(ops);
- rtnl_unlock();
+ /* The check for alloc/setup is here because if ops
+ * does not have that filled up, it is not possible
+ * to use the ops for creating device. So do not
+ * fill up dellink as well. That disables rtnl_dellink.
+ */
+ if ((ops->alloc || ops->setup) && !ops->dellink)
+ ops->dellink = unregister_netdevice_queue;
+
+ err = init_srcu_struct(&ops->srcu);
+ if (err)
+ return err;
+
+ mutex_lock(&link_ops_mutex);
+
+ list_for_each_entry(tmp, &link_ops, list) {
+ if (!strcmp(ops->kind, tmp->kind)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+ }
+
+ list_add_tail_rcu(&ops->list, &link_ops);
+unlock:
+ mutex_unlock(&link_ops_mutex);
+
return err;
}
EXPORT_SYMBOL_GPL(rtnl_link_register);
@@ -459,25 +630,6 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
unregister_netdevice_many(&list_kill);
}
-/**
- * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
- * @ops: struct rtnl_link_ops * to unregister
- *
- * The caller must hold the rtnl_mutex and guarantee net_namespace_list
- * integrity (hold pernet_ops_rwsem for writing to close the race
- * with setup_net() and cleanup_net()).
- */
-void __rtnl_link_unregister(struct rtnl_link_ops *ops)
-{
- struct net *net;
-
- for_each_net(net) {
- __rtnl_kill_links(net, ops);
- }
- list_del(&ops->list);
-}
-EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
-
/* Return with the rtnl_lock held when there are no network
* devices unregistering in any network namespace.
*/
@@ -506,10 +658,22 @@ static void rtnl_lock_unregistering_all(void)
*/
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
+ struct net *net;
+
+ mutex_lock(&link_ops_mutex);
+ list_del_rcu(&ops->list);
+ mutex_unlock(&link_ops_mutex);
+
+ synchronize_srcu(&ops->srcu);
+ cleanup_srcu_struct(&ops->srcu);
+
/* Close the race with setup_net() and cleanup_net() */
down_write(&pernet_ops_rwsem);
rtnl_lock_unregistering_all();
- __rtnl_link_unregister(ops);
+
+ for_each_net(net)
+ __rtnl_kill_links(net, ops);
+
rtnl_unlock();
up_write(&pernet_ops_rwsem);
}
@@ -566,31 +730,51 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
static LIST_HEAD(rtnl_af_ops);
-static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
+static struct rtnl_af_ops *rtnl_af_lookup(const int family, int *srcu_index)
{
- const struct rtnl_af_ops *ops;
+ struct rtnl_af_ops *ops;
ASSERT_RTNL();
- list_for_each_entry(ops, &rtnl_af_ops, list) {
- if (ops->family == family)
- return ops;
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
+ if (ops->family == family) {
+ *srcu_index = srcu_read_lock(&ops->srcu);
+ goto unlock;
+ }
}
- return NULL;
+ ops = NULL;
+unlock:
+ rcu_read_unlock();
+
+ return ops;
+}
+
+static void rtnl_af_put(struct rtnl_af_ops *ops, int srcu_index)
+{
+ srcu_read_unlock(&ops->srcu, srcu_index);
}
/**
* rtnl_af_register - Register rtnl_af_ops with rtnetlink.
* @ops: struct rtnl_af_ops * to register
*
- * Returns 0 on success or a negative error code.
+ * Return: 0 on success or a negative error code.
*/
-void rtnl_af_register(struct rtnl_af_ops *ops)
+int rtnl_af_register(struct rtnl_af_ops *ops)
{
+ int err = init_srcu_struct(&ops->srcu);
+
+ if (err)
+ return err;
+
rtnl_lock();
list_add_tail_rcu(&ops->list, &rtnl_af_ops);
rtnl_unlock();
+
+ return 0;
}
EXPORT_SYMBOL_GPL(rtnl_af_register);
@@ -605,6 +789,8 @@ void rtnl_af_unregister(struct rtnl_af_ops *ops)
rtnl_unlock();
synchronize_rcu();
+ synchronize_srcu(&ops->srcu);
+ cleanup_srcu_struct(&ops->srcu);
}
EXPORT_SYMBOL_GPL(rtnl_af_unregister);
@@ -1118,6 +1304,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */
+ rtnl_devlink_port_size(dev)
+ rtnl_dpll_pin_size(dev)
+ + nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */
+ 0;
}
@@ -1867,6 +2054,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
READ_ONCE(dev->tso_max_size)) ||
nla_put_u32(skb, IFLA_TSO_MAX_SEGS,
READ_ONCE(dev->tso_max_segs)) ||
+ nla_put_uint(skb, IFLA_MAX_PACING_OFFLOAD_HORIZON,
+ READ_ONCE(dev->max_pacing_offload_horizon)) ||
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES,
READ_ONCE(dev->num_rx_queues)) ||
@@ -1975,6 +2164,7 @@ nla_put_failure:
}
static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
+ [IFLA_UNSPEC] = { .strict_start_type = IFLA_DPLL_PIN },
[IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
[IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
@@ -2003,7 +2193,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
[IFLA_GSO_MAX_SEGS] = { .type = NLA_U32 },
- [IFLA_GSO_MAX_SIZE] = { .type = NLA_U32 },
+ [IFLA_GSO_MAX_SIZE] = NLA_POLICY_MIN(NLA_U32, MAX_TCP_HEADER + 1),
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
[IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
@@ -2028,7 +2218,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT },
[IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT },
[IFLA_ALLMULTI] = { .type = NLA_REJECT },
- [IFLA_GSO_IPV4_MAX_SIZE] = { .type = NLA_U32 },
+ [IFLA_GSO_IPV4_MAX_SIZE] = NLA_POLICY_MIN(NLA_U32, MAX_TCP_HEADER + 1),
[IFLA_GRO_IPV4_MAX_SIZE] = { .type = NLA_U32 },
};
@@ -2083,10 +2273,11 @@ static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
[IFLA_XDP_PROG_ID] = { .type = NLA_U32 },
};
-static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
+static struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla,
+ int *ops_srcu_index)
{
- const struct rtnl_link_ops *ops = NULL;
struct nlattr *linfo[IFLA_INFO_MAX + 1];
+ struct rtnl_link_ops *ops = NULL;
if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0)
return NULL;
@@ -2095,7 +2286,7 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla
char kind[MODULE_NAME_LEN];
nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
- ops = rtnl_link_ops_get(kind);
+ ops = rtnl_link_ops_get(kind, ops_srcu_index);
}
return ops;
@@ -2215,8 +2406,8 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh,
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
- const struct rtnl_link_ops *kind_ops = NULL;
struct netlink_ext_ack *extack = cb->extack;
+ struct rtnl_link_ops *kind_ops = NULL;
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int flags = NLM_F_MULTI;
@@ -2227,6 +2418,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct net *tgt_net = net;
u32 ext_filter_mask = 0;
struct net_device *dev;
+ int ops_srcu_index;
int master_idx = 0;
int netnsid = -1;
int err, i;
@@ -2250,7 +2442,9 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
tgt_net = rtnl_get_net_ns_capable(skb->sk, netnsid);
if (IS_ERR(tgt_net)) {
NL_SET_ERR_MSG(extack, "Invalid target network namespace id");
- return PTR_ERR(tgt_net);
+ err = PTR_ERR(tgt_net);
+ netnsid = -1;
+ goto out;
}
break;
case IFLA_EXT_MASK:
@@ -2260,12 +2454,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
master_idx = nla_get_u32(tb[i]);
break;
case IFLA_LINKINFO:
- kind_ops = linkinfo_to_kind_ops(tb[i]);
+ kind_ops = linkinfo_to_kind_ops(tb[i], &ops_srcu_index);
break;
default:
if (cb->strict_check) {
NL_SET_ERR_MSG(extack, "Unsupported attribute in link dump request");
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
}
}
@@ -2286,8 +2481,15 @@ walk_entries:
if (err < 0)
break;
}
+
+
cb->seq = tgt_net->dev_base_seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+
+out:
+
+ if (kind_ops)
+ rtnl_link_ops_put(kind_ops, ops_srcu_index);
if (netnsid >= 0)
put_net(tgt_net);
@@ -2316,9 +2518,10 @@ int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
}
EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg);
-struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
+static struct net *rtnl_link_get_net_ifla(struct nlattr *tb[])
{
- struct net *net;
+ struct net *net = NULL;
+
/* Examine the link attributes and figure out which
* network namespace we are talking about.
*/
@@ -2326,8 +2529,17 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
else if (tb[IFLA_NET_NS_FD])
net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
- else
+
+ return net;
+}
+
+struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
+{
+ struct net *net = rtnl_link_get_net_ifla(tb);
+
+ if (!net)
net = get_net(src_net);
+
return net;
}
EXPORT_SYMBOL(rtnl_link_get_net);
@@ -2467,20 +2679,24 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
int rem, err;
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
- const struct rtnl_af_ops *af_ops;
+ struct rtnl_af_ops *af_ops;
+ int af_ops_srcu_index;
- af_ops = rtnl_af_lookup(nla_type(af));
+ af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index);
if (!af_ops)
return -EAFNOSUPPORT;
if (!af_ops->set_link_af)
- return -EOPNOTSUPP;
-
- if (af_ops->validate_link_af) {
+ err = -EOPNOTSUPP;
+ else if (af_ops->validate_link_af)
err = af_ops->validate_link_af(dev, af, extack);
- if (err < 0)
- return err;
- }
+ else
+ err = 0;
+
+ rtnl_af_put(af_ops, af_ops_srcu_index);
+
+ if (err < 0)
+ return err;
}
}
@@ -2724,7 +2940,7 @@ static int do_set_proto_down(struct net_device *dev,
bool proto_down;
int err;
- if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN)) {
+ if (!dev->change_proto_down) {
NL_SET_ERR_MSG(extack, "Protodown not supported by device");
return -EOPNOTSUPP;
}
@@ -2771,8 +2987,8 @@ static int do_set_proto_down(struct net_device *dev,
#define DO_SETLINK_MODIFIED 0x01
/* notify flag means notify + modified. */
#define DO_SETLINK_NOTIFY 0x03
-static int do_setlink(const struct sk_buff *skb,
- struct net_device *dev, struct ifinfomsg *ifm,
+static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
+ struct net *tgt_net, struct ifinfomsg *ifm,
struct netlink_ext_ack *extack,
struct nlattr **tb, int status)
{
@@ -2780,32 +2996,25 @@ static int do_setlink(const struct sk_buff *skb,
char ifname[IFNAMSIZ];
int err;
+ err = validate_linkmsg(dev, tb, extack);
+ if (err < 0)
+ goto errout;
+
if (tb[IFLA_IFNAME])
nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
ifname[0] = '\0';
- if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) {
+ if (!net_eq(tgt_net, dev_net(dev))) {
const char *pat = ifname[0] ? ifname : NULL;
- struct net *net;
int new_ifindex;
- net = rtnl_link_get_net_capable(skb, dev_net(dev),
- tb, CAP_NET_ADMIN);
- if (IS_ERR(net)) {
- err = PTR_ERR(net);
- goto errout;
- }
-
- if (tb[IFLA_NEW_IFINDEX])
- new_ifindex = nla_get_s32(tb[IFLA_NEW_IFINDEX]);
- else
- new_ifindex = 0;
+ new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0);
- err = __dev_change_net_namespace(dev, net, pat, new_ifindex);
- put_net(net);
+ err = __dev_change_net_namespace(dev, tgt_net, pat, new_ifindex);
if (err)
goto errout;
+
status |= DO_SETLINK_MODIFIED;
}
@@ -3064,11 +3273,18 @@ static int do_setlink(const struct sk_buff *skb,
int rem;
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
- const struct rtnl_af_ops *af_ops;
+ struct rtnl_af_ops *af_ops;
+ int af_ops_srcu_index;
- BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
+ af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index);
+ if (!af_ops) {
+ err = -EAFNOSUPPORT;
+ goto errout;
+ }
err = af_ops->set_link_af(dev, af, extack);
+ rtnl_af_put(af_ops, af_ops_srcu_index);
+
if (err < 0)
goto errout;
@@ -3165,11 +3381,13 @@ static struct net_device *rtnl_dev_get(struct net *net,
static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct ifinfomsg *ifm = nlmsg_data(nlh);
struct net *net = sock_net(skb->sk);
- struct ifinfomsg *ifm;
- struct net_device *dev;
- int err;
struct nlattr *tb[IFLA_MAX+1];
+ struct net_device *dev = NULL;
+ struct rtnl_nets rtnl_nets;
+ struct net *tgt_net;
+ int err;
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
ifla_policy, extack);
@@ -3180,25 +3398,31 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
- err = -EINVAL;
- ifm = nlmsg_data(nlh);
+ tgt_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
+ if (IS_ERR(tgt_net)) {
+ err = PTR_ERR(tgt_net);
+ goto errout;
+ }
+
+ rtnl_nets_init(&rtnl_nets);
+ rtnl_nets_add(&rtnl_nets, get_net(net));
+ rtnl_nets_add(&rtnl_nets, tgt_net);
+
+ rtnl_nets_lock(&rtnl_nets);
+
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
dev = rtnl_dev_get(net, tb);
else
- goto errout;
+ err = -EINVAL;
- if (dev == NULL) {
+ if (dev)
+ err = do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0);
+ else if (!err)
err = -ENODEV;
- goto errout;
- }
- err = validate_linkmsg(dev, tb, extack);
- if (err < 0)
- goto errout;
-
- err = do_setlink(skb, dev, ifm, extack, tb, 0);
+ rtnl_nets_unlock(&rtnl_nets);
errout:
return err;
}
@@ -3258,14 +3482,14 @@ EXPORT_SYMBOL_GPL(rtnl_delete_link);
static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct ifinfomsg *ifm = nlmsg_data(nlh);
struct net *net = sock_net(skb->sk);
u32 portid = NETLINK_CB(skb).portid;
- struct net *tgt_net = net;
- struct net_device *dev = NULL;
- struct ifinfomsg *ifm;
struct nlattr *tb[IFLA_MAX+1];
- int err;
+ struct net_device *dev = NULL;
+ struct net *tgt_net = net;
int netnsid = -1;
+ int err;
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
ifla_policy, extack);
@@ -3283,27 +3507,24 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
return PTR_ERR(tgt_net);
}
- err = -EINVAL;
- ifm = nlmsg_data(nlh);
+ rtnl_net_lock(tgt_net);
+
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
dev = rtnl_dev_get(tgt_net, tb);
+
+ if (dev)
+ err = rtnl_delete_link(dev, portid, nlh);
+ else if (ifm->ifi_index > 0 || tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+ err = -ENODEV;
else if (tb[IFLA_GROUP])
err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
else
- goto out;
-
- if (!dev) {
- if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME] || ifm->ifi_index > 0)
- err = -ENODEV;
-
- goto out;
- }
+ err = -EINVAL;
- err = rtnl_delete_link(dev, portid, nlh);
+ rtnl_net_unlock(tgt_net);
-out:
if (netnsid >= 0)
put_net(tgt_net);
@@ -3430,21 +3651,90 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
}
EXPORT_SYMBOL(rtnl_create_link);
+struct rtnl_newlink_tbs {
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
+ struct nlattr *attr[RTNL_MAX_TYPE + 1];
+ struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+};
+
+static int rtnl_changelink(const struct sk_buff *skb, struct nlmsghdr *nlh,
+ const struct rtnl_link_ops *ops,
+ struct net_device *dev, struct net *tgt_net,
+ struct rtnl_newlink_tbs *tbs,
+ struct nlattr **data,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr ** const linkinfo = tbs->linkinfo;
+ struct nlattr ** const tb = tbs->tb;
+ int status = 0;
+ int err;
+
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+
+ if (linkinfo[IFLA_INFO_DATA]) {
+ if (!ops || ops != dev->rtnl_link_ops || !ops->changelink)
+ return -EOPNOTSUPP;
+
+ err = ops->changelink(dev, tb, data, extack);
+ if (err < 0)
+ return err;
+
+ status |= DO_SETLINK_NOTIFY;
+ }
+
+ if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
+ const struct rtnl_link_ops *m_ops = NULL;
+ struct nlattr **slave_data = NULL;
+ struct net_device *master_dev;
+
+ master_dev = netdev_master_upper_dev_get(dev);
+ if (master_dev)
+ m_ops = master_dev->rtnl_link_ops;
+
+ if (!m_ops || !m_ops->slave_changelink)
+ return -EOPNOTSUPP;
+
+ if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
+ return -EINVAL;
+
+ if (m_ops->slave_maxtype) {
+ err = nla_parse_nested_deprecated(tbs->slave_attr,
+ m_ops->slave_maxtype,
+ linkinfo[IFLA_INFO_SLAVE_DATA],
+ m_ops->slave_policy, extack);
+ if (err < 0)
+ return err;
+
+ slave_data = tbs->slave_attr;
+ }
+
+ err = m_ops->slave_changelink(master_dev, dev, tb, slave_data, extack);
+ if (err < 0)
+ return err;
+
+ status |= DO_SETLINK_NOTIFY;
+ }
+
+ return do_setlink(skb, dev, tgt_net, nlmsg_data(nlh), extack, tb, status);
+}
+
static int rtnl_group_changelink(const struct sk_buff *skb,
- struct net *net, int group,
- struct ifinfomsg *ifm,
- struct netlink_ext_ack *extack,
- struct nlattr **tb)
+ struct net *net, struct net *tgt_net,
+ int group, struct ifinfomsg *ifm,
+ struct netlink_ext_ack *extack,
+ struct nlattr **tb)
{
struct net_device *dev, *aux;
int err;
for_each_netdev_safe(net, dev, aux) {
if (dev->group == group) {
- err = validate_linkmsg(dev, tb, extack);
- if (err < 0)
- return err;
- err = do_setlink(skb, dev, ifm, extack, tb, 0);
+ err = do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0);
if (err < 0)
return err;
}
@@ -3455,6 +3745,8 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
const struct rtnl_link_ops *ops,
+ struct net *tgt_net, struct net *link_net,
+ struct net *peer_net,
const struct nlmsghdr *nlh,
struct nlattr **tb, struct nlattr **data,
struct netlink_ext_ack *extack)
@@ -3462,7 +3754,6 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
unsigned char name_assign_type = NET_NAME_USER;
struct net *net = sock_net(skb->sk);
u32 portid = NETLINK_CB(skb).portid;
- struct net *dest_net, *link_net;
struct net_device *dev;
char ifname[IFNAMSIZ];
int err;
@@ -3477,27 +3768,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
name_assign_type = NET_NAME_ENUM;
}
- dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
- if (IS_ERR(dest_net))
- return PTR_ERR(dest_net);
-
- if (tb[IFLA_LINK_NETNSID]) {
- int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
-
- link_net = get_net_ns_by_id(dest_net, id);
- if (!link_net) {
- NL_SET_ERR_MSG(extack, "Unknown network namespace id");
- err = -EINVAL;
- goto out;
- }
- err = -EPERM;
- if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
- goto out;
- } else {
- link_net = NULL;
- }
-
- dev = rtnl_create_link(link_net ? : dest_net, ifname,
+ dev = rtnl_create_link(link_net ? : tgt_net, ifname,
name_assign_type, ops, tb, extack);
if (IS_ERR(dev)) {
err = PTR_ERR(dev);
@@ -3506,8 +3777,13 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
dev->ifindex = ifm->ifi_index;
+ if (link_net)
+ net = link_net;
+ if (peer_net)
+ net = peer_net;
+
if (ops->newlink)
- err = ops->newlink(link_net ? : net, dev, tb, data, extack);
+ err = ops->newlink(net, dev, tb, data, extack);
else
err = register_netdevice(dev);
if (err < 0) {
@@ -3519,7 +3795,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
if (err < 0)
goto out_unregister;
if (link_net) {
- err = dev_change_net_namespace(dev, dest_net, ifname);
+ err = dev_change_net_namespace(dev, tgt_net, ifname);
if (err < 0)
goto out_unregister;
}
@@ -3529,9 +3805,6 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
goto out_unregister;
}
out:
- if (link_net)
- put_net(link_net);
- put_net(dest_net);
return err;
out_unregister:
if (ops->newlink) {
@@ -3545,41 +3818,42 @@ out_unregister:
goto out;
}
-struct rtnl_newlink_tbs {
+static struct net *rtnl_get_peer_net(const struct rtnl_link_ops *ops,
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
struct nlattr *tb[IFLA_MAX + 1];
- struct nlattr *attr[RTNL_MAX_TYPE + 1];
- struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
-};
+ int err;
+
+ if (!data || !data[ops->peer_type])
+ return NULL;
+
+ err = rtnl_nla_parse_ifinfomsg(tb, data[ops->peer_type], extack);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ if (ops->validate) {
+ err = ops->validate(tb, NULL, extack);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
+
+ return rtnl_link_get_net_ifla(tb);
+}
static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ const struct rtnl_link_ops *ops,
+ struct net *tgt_net, struct net *link_net,
+ struct net *peer_net,
struct rtnl_newlink_tbs *tbs,
+ struct nlattr **data,
struct netlink_ext_ack *extack)
{
- struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
struct nlattr ** const tb = tbs->tb;
- const struct rtnl_link_ops *m_ops;
- struct net_device *master_dev;
struct net *net = sock_net(skb->sk);
- const struct rtnl_link_ops *ops;
- struct nlattr **slave_data;
- char kind[MODULE_NAME_LEN];
struct net_device *dev;
struct ifinfomsg *ifm;
- struct nlattr **data;
bool link_specified;
- int err;
-
-#ifdef CONFIG_MODULES
-replay:
-#endif
- err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
- ifla_policy, extack);
- if (err < 0)
- return err;
-
- err = rtnl_ensure_unique_netns(tb, extack, false);
- if (err < 0)
- return err;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0) {
@@ -3596,151 +3870,153 @@ replay:
dev = NULL;
}
- master_dev = NULL;
- m_ops = NULL;
- if (dev) {
- master_dev = netdev_master_upper_dev_get(dev);
- if (master_dev)
- m_ops = master_dev->rtnl_link_ops;
+ if (dev)
+ return rtnl_changelink(skb, nlh, ops, dev, tgt_net, tbs, data, extack);
+
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+ /* No dev found and NLM_F_CREATE not set. Requested dev does not exist,
+ * or it's for a group
+ */
+ if (link_specified || !tb[IFLA_GROUP])
+ return -ENODEV;
+
+ return rtnl_group_changelink(skb, net, tgt_net,
+ nla_get_u32(tb[IFLA_GROUP]),
+ ifm, extack, tb);
+ }
+
+ if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
+ return -EOPNOTSUPP;
+
+ if (!ops) {
+ NL_SET_ERR_MSG(extack, "Unknown device type");
+ return -EOPNOTSUPP;
}
+ return rtnl_newlink_create(skb, ifm, ops, tgt_net, link_net, peer_net, nlh,
+ tb, data, extack);
+}
+
+static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *tgt_net, *link_net = NULL, *peer_net = NULL;
+ struct nlattr **tb, **linkinfo, **data = NULL;
+ struct rtnl_link_ops *ops = NULL;
+ struct rtnl_newlink_tbs *tbs;
+ struct rtnl_nets rtnl_nets;
+ int ops_srcu_index;
+ int ret;
+
+ tbs = kmalloc(sizeof(*tbs), GFP_KERNEL);
+ if (!tbs)
+ return -ENOMEM;
+
+ tb = tbs->tb;
+ ret = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), tb,
+ IFLA_MAX, ifla_policy, extack);
+ if (ret < 0)
+ goto free;
+
+ ret = rtnl_ensure_unique_netns(tb, extack, false);
+ if (ret < 0)
+ goto free;
+
+ linkinfo = tbs->linkinfo;
if (tb[IFLA_LINKINFO]) {
- err = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX,
+ ret = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX,
tb[IFLA_LINKINFO],
ifla_info_policy, NULL);
- if (err < 0)
- return err;
- } else
- memset(linkinfo, 0, sizeof(linkinfo));
+ if (ret < 0)
+ goto free;
+ } else {
+ memset(linkinfo, 0, sizeof(tbs->linkinfo));
+ }
if (linkinfo[IFLA_INFO_KIND]) {
+ char kind[MODULE_NAME_LEN];
+
nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
- ops = rtnl_link_ops_get(kind);
- } else {
- kind[0] = '\0';
- ops = NULL;
+ ops = rtnl_link_ops_get(kind, &ops_srcu_index);
+#ifdef CONFIG_MODULES
+ if (!ops) {
+ request_module("rtnl-link-%s", kind);
+ ops = rtnl_link_ops_get(kind, &ops_srcu_index);
+ }
+#endif
}
- data = NULL;
+ rtnl_nets_init(&rtnl_nets);
+
if (ops) {
- if (ops->maxtype > RTNL_MAX_TYPE)
- return -EINVAL;
+ if (ops->maxtype > RTNL_MAX_TYPE) {
+ ret = -EINVAL;
+ goto put_ops;
+ }
if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
- err = nla_parse_nested_deprecated(tbs->attr, ops->maxtype,
+ ret = nla_parse_nested_deprecated(tbs->attr, ops->maxtype,
linkinfo[IFLA_INFO_DATA],
ops->policy, extack);
- if (err < 0)
- return err;
+ if (ret < 0)
+ goto put_ops;
+
data = tbs->attr;
}
+
if (ops->validate) {
- err = ops->validate(tb, data, extack);
- if (err < 0)
- return err;
+ ret = ops->validate(tb, data, extack);
+ if (ret < 0)
+ goto put_ops;
}
- }
-
- slave_data = NULL;
- if (m_ops) {
- if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
- return -EINVAL;
- if (m_ops->slave_maxtype &&
- linkinfo[IFLA_INFO_SLAVE_DATA]) {
- err = nla_parse_nested_deprecated(tbs->slave_attr,
- m_ops->slave_maxtype,
- linkinfo[IFLA_INFO_SLAVE_DATA],
- m_ops->slave_policy,
- extack);
- if (err < 0)
- return err;
- slave_data = tbs->slave_attr;
+ if (ops->peer_type) {
+ peer_net = rtnl_get_peer_net(ops, data, extack);
+ if (IS_ERR(peer_net)) {
+ ret = PTR_ERR(peer_net);
+ goto put_ops;
+ }
+ if (peer_net)
+ rtnl_nets_add(&rtnl_nets, peer_net);
}
}
- if (dev) {
- int status = 0;
-
- if (nlh->nlmsg_flags & NLM_F_EXCL)
- return -EEXIST;
- if (nlh->nlmsg_flags & NLM_F_REPLACE)
- return -EOPNOTSUPP;
-
- err = validate_linkmsg(dev, tb, extack);
- if (err < 0)
- return err;
-
- if (linkinfo[IFLA_INFO_DATA]) {
- if (!ops || ops != dev->rtnl_link_ops ||
- !ops->changelink)
- return -EOPNOTSUPP;
+ tgt_net = rtnl_link_get_net_capable(skb, sock_net(skb->sk), tb, CAP_NET_ADMIN);
+ if (IS_ERR(tgt_net)) {
+ ret = PTR_ERR(tgt_net);
+ goto put_net;
+ }
- err = ops->changelink(dev, tb, data, extack);
- if (err < 0)
- return err;
- status |= DO_SETLINK_NOTIFY;
- }
+ rtnl_nets_add(&rtnl_nets, tgt_net);
- if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
- if (!m_ops || !m_ops->slave_changelink)
- return -EOPNOTSUPP;
+ if (tb[IFLA_LINK_NETNSID]) {
+ int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
- err = m_ops->slave_changelink(master_dev, dev, tb,
- slave_data, extack);
- if (err < 0)
- return err;
- status |= DO_SETLINK_NOTIFY;
+ link_net = get_net_ns_by_id(tgt_net, id);
+ if (!link_net) {
+ NL_SET_ERR_MSG(extack, "Unknown network namespace id");
+ ret = -EINVAL;
+ goto put_net;
}
- return do_setlink(skb, dev, ifm, extack, tb, status);
- }
-
- if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
- /* No dev found and NLM_F_CREATE not set. Requested dev does not exist,
- * or it's for a group
- */
- if (link_specified)
- return -ENODEV;
- if (tb[IFLA_GROUP])
- return rtnl_group_changelink(skb, net,
- nla_get_u32(tb[IFLA_GROUP]),
- ifm, extack, tb);
- return -ENODEV;
- }
-
- if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
- return -EOPNOTSUPP;
+ rtnl_nets_add(&rtnl_nets, link_net);
- if (!ops) {
-#ifdef CONFIG_MODULES
- if (kind[0]) {
- __rtnl_unlock();
- request_module("rtnl-link-%s", kind);
- rtnl_lock();
- ops = rtnl_link_ops_get(kind);
- if (ops)
- goto replay;
+ if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ goto put_net;
}
-#endif
- NL_SET_ERR_MSG(extack, "Unknown device type");
- return -EOPNOTSUPP;
}
- return rtnl_newlink_create(skb, ifm, ops, nlh, tb, data, extack);
-}
-
-static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct rtnl_newlink_tbs *tbs;
- int ret;
-
- tbs = kmalloc(sizeof(*tbs), GFP_KERNEL);
- if (!tbs)
- return -ENOMEM;
+ rtnl_nets_lock(&rtnl_nets);
+ ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, peer_net, tbs, data, extack);
+ rtnl_nets_unlock(&rtnl_nets);
- ret = __rtnl_newlink(skb, nlh, tbs, extack);
+put_net:
+ rtnl_nets_destroy(&rtnl_nets);
+put_ops:
+ if (ops)
+ rtnl_link_ops_put(ops, ops_srcu_index);
+free:
kfree(tbs);
return ret;
}
@@ -4087,8 +4363,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
}
return skb;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+ rtnl_set_sk_err(net, RTNLGRP_LINK, err);
return NULL;
}
@@ -4313,9 +4588,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
netif_is_bridge_port(dev)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
const struct net_device_ops *ops = br_dev->netdev_ops;
+ bool notified = false;
err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid,
- nlh->nlmsg_flags, extack);
+ nlh->nlmsg_flags, &notified, extack);
if (err)
goto out;
else
@@ -4324,16 +4600,18 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
/* Embedded bridge, macvlan, and any other device support */
if ((ndm->ndm_flags & NTF_SELF)) {
+ bool notified = false;
+
if (dev->netdev_ops->ndo_fdb_add)
err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
vid,
nlh->nlmsg_flags,
- extack);
+ &notified, extack);
else
err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid,
nlh->nlmsg_flags);
- if (!err) {
+ if (!err && !notified) {
rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH,
ndm->ndm_state);
ndm->ndm_flags &= ~NTF_SELF;
@@ -4433,11 +4711,13 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
netif_is_bridge_port(dev)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+ bool notified = false;
ops = br_dev->netdev_ops;
if (!del_bulk) {
if (ops->ndo_fdb_del)
- err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid,
+ &notified, extack);
} else {
if (ops->ndo_fdb_del_bulk)
err = ops->ndo_fdb_del_bulk(nlh, dev, extack);
@@ -4451,10 +4731,13 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
/* Embedded bridge, macvlan, and any other device support */
if (ndm->ndm_flags & NTF_SELF) {
+ bool notified = false;
+
ops = dev->netdev_ops;
if (!del_bulk) {
if (ops->ndo_fdb_del)
- err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid,
+ &notified, extack);
else
err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
} else {
@@ -4465,7 +4748,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
}
if (!err) {
- if (!del_bulk)
+ if (!del_bulk && !notified)
rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH,
ndm->ndm_state);
ndm->ndm_flags &= ~NTF_SELF;
@@ -6170,7 +6453,7 @@ static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
int idx, s_idx;
int err;
- NL_ASSERT_DUMP_CTX_FITS(struct rtnl_mdb_dump_ctx);
+ NL_ASSERT_CTX_FITS(struct rtnl_mdb_dump_ctx);
if (cb->strict_check) {
err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack);
@@ -6737,6 +7020,41 @@ static struct pernet_operations rtnetlink_net_ops = {
.exit = rtnetlink_net_exit,
};
+static const struct rtnl_msg_handler rtnetlink_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink,
+ .flags = RTNL_FLAG_DOIT_PERNET},
+ {.msgtype = RTM_DELLINK, .doit = rtnl_dellink,
+ .flags = RTNL_FLAG_DOIT_PERNET_WIP},
+ {.msgtype = RTM_GETLINK, .doit = rtnl_getlink,
+ .dumpit = rtnl_dump_ifinfo, .flags = RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
+ {.msgtype = RTM_SETLINK, .doit = rtnl_setlink,
+ .flags = RTNL_FLAG_DOIT_PERNET_WIP},
+ {.msgtype = RTM_GETADDR, .dumpit = rtnl_dump_all},
+ {.msgtype = RTM_GETROUTE, .dumpit = rtnl_dump_all},
+ {.msgtype = RTM_GETNETCONF, .dumpit = rtnl_dump_all},
+ {.msgtype = RTM_GETSTATS, .doit = rtnl_stats_get,
+ .dumpit = rtnl_stats_dump},
+ {.msgtype = RTM_SETSTATS, .doit = rtnl_stats_set},
+ {.msgtype = RTM_NEWLINKPROP, .doit = rtnl_newlinkprop},
+ {.msgtype = RTM_DELLINKPROP, .doit = rtnl_dellinkprop},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_GETLINK,
+ .dumpit = rtnl_bridge_getlink},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_DELLINK,
+ .doit = rtnl_bridge_dellink},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_SETLINK,
+ .doit = rtnl_bridge_setlink},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_NEWNEIGH, .doit = rtnl_fdb_add},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_DELNEIGH, .doit = rtnl_fdb_del,
+ .flags = RTNL_FLAG_BULK_DEL_SUPPORTED},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_GETNEIGH, .doit = rtnl_fdb_get,
+ .dumpit = rtnl_fdb_dump},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_NEWMDB, .doit = rtnl_mdb_add},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_DELMDB, .doit = rtnl_mdb_del,
+ .flags = RTNL_FLAG_BULK_DEL_SUPPORTED},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_GETMDB, .doit = rtnl_mdb_get,
+ .dumpit = rtnl_mdb_dump},
+};
+
void __init rtnetlink_init(void)
{
if (register_pernet_subsys(&rtnetlink_net_ops))
@@ -6744,34 +7062,5 @@ void __init rtnetlink_init(void)
register_netdevice_notifier(&rtnetlink_dev_notifier);
- rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
- rtnl_dump_ifinfo, RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
- rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);
-
- rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0);
- rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0);
- rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0);
-
- rtnl_register(PF_UNSPEC, RTM_NEWLINKPROP, rtnl_newlinkprop, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0);
-
- rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL,
- RTNL_FLAG_BULK_DEL_SUPPORTED);
- rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);
-
- rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
- rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0);
-
- rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
- 0);
- rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0);
-
- rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0);
- rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL,
- RTNL_FLAG_BULK_DEL_SUPPORTED);
+ rtnl_register_many(rtnetlink_rtnl_msg_handlers);
}
diff --git a/net/core/rtnl_net_debug.c b/net/core/rtnl_net_debug.c
new file mode 100644
index 000000000000..f406045cbd0e
--- /dev/null
+++ b/net/core/rtnl_net_debug.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/notifier.h>
+#include <linux/rtnetlink.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+static int rtnl_net_debug_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+ enum netdev_cmd cmd = event;
+
+ /* Keep enum and don't add default to trigger -Werror=switch */
+ switch (cmd) {
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_REBOOT:
+ case NETDEV_CHANGE:
+ case NETDEV_REGISTER:
+ case NETDEV_UNREGISTER:
+ case NETDEV_CHANGEMTU:
+ case NETDEV_CHANGEADDR:
+ case NETDEV_PRE_CHANGEADDR:
+ case NETDEV_GOING_DOWN:
+ case NETDEV_CHANGENAME:
+ case NETDEV_FEAT_CHANGE:
+ case NETDEV_BONDING_FAILOVER:
+ case NETDEV_PRE_UP:
+ case NETDEV_PRE_TYPE_CHANGE:
+ case NETDEV_POST_TYPE_CHANGE:
+ case NETDEV_POST_INIT:
+ case NETDEV_PRE_UNINIT:
+ case NETDEV_RELEASE:
+ case NETDEV_NOTIFY_PEERS:
+ case NETDEV_JOIN:
+ case NETDEV_CHANGEUPPER:
+ case NETDEV_RESEND_IGMP:
+ case NETDEV_PRECHANGEMTU:
+ case NETDEV_CHANGEINFODATA:
+ case NETDEV_BONDING_INFO:
+ case NETDEV_PRECHANGEUPPER:
+ case NETDEV_CHANGELOWERSTATE:
+ case NETDEV_UDP_TUNNEL_PUSH_INFO:
+ case NETDEV_UDP_TUNNEL_DROP_INFO:
+ case NETDEV_CHANGE_TX_QUEUE_LEN:
+ case NETDEV_CVLAN_FILTER_PUSH_INFO:
+ case NETDEV_CVLAN_FILTER_DROP_INFO:
+ case NETDEV_SVLAN_FILTER_PUSH_INFO:
+ case NETDEV_SVLAN_FILTER_DROP_INFO:
+ case NETDEV_OFFLOAD_XSTATS_ENABLE:
+ case NETDEV_OFFLOAD_XSTATS_DISABLE:
+ case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
+ case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
+ case NETDEV_XDP_FEAT_CHANGE:
+ ASSERT_RTNL();
+ break;
+
+ /* Once an event fully supports RTNL_NET, move it here
+ * and remove "if (0)" below.
+ *
+ * case NETDEV_XXX:
+ * ASSERT_RTNL_NET(net);
+ * break;
+ */
+ }
+
+ /* Just to avoid unused-variable error for dev and net. */
+ if (0)
+ ASSERT_RTNL_NET(net);
+
+ return NOTIFY_DONE;
+}
+
+static int rtnl_net_debug_net_id;
+
+static int __net_init rtnl_net_debug_net_init(struct net *net)
+{
+ struct notifier_block *nb;
+
+ nb = net_generic(net, rtnl_net_debug_net_id);
+ nb->notifier_call = rtnl_net_debug_event;
+
+ return register_netdevice_notifier_net(net, nb);
+}
+
+static void __net_exit rtnl_net_debug_net_exit(struct net *net)
+{
+ struct notifier_block *nb;
+
+ nb = net_generic(net, rtnl_net_debug_net_id);
+ unregister_netdevice_notifier_net(net, nb);
+}
+
+static struct pernet_operations rtnl_net_debug_net_ops __net_initdata = {
+ .init = rtnl_net_debug_net_init,
+ .exit = rtnl_net_debug_net_exit,
+ .id = &rtnl_net_debug_net_id,
+ .size = sizeof(struct notifier_block),
+};
+
+static struct notifier_block rtnl_net_debug_block = {
+ .notifier_call = rtnl_net_debug_event,
+};
+
+static int __init rtnl_net_debug_init(void)
+{
+ int ret;
+
+ ret = register_pernet_device(&rtnl_net_debug_net_ops);
+ if (ret)
+ return ret;
+
+ ret = register_netdevice_notifier(&rtnl_net_debug_block);
+ if (ret)
+ unregister_pernet_subsys(&rtnl_net_debug_net_ops);
+
+ return ret;
+}
+
+subsys_initcall(rtnl_net_debug_init);
diff --git a/net/core/skb_fault_injection.c b/net/core/skb_fault_injection.c
new file mode 100644
index 000000000000..4235db6bdfad
--- /dev/null
+++ b/net/core/skb_fault_injection.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/debugfs.h>
+#include <linux/fault-inject.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+static struct {
+ struct fault_attr attr;
+ char devname[IFNAMSIZ];
+ bool filtered;
+} skb_realloc = {
+ .attr = FAULT_ATTR_INITIALIZER,
+ .filtered = false,
+};
+
+static bool should_fail_net_realloc_skb(struct sk_buff *skb)
+{
+ struct net_device *net = skb->dev;
+
+ if (skb_realloc.filtered &&
+ strncmp(net->name, skb_realloc.devname, IFNAMSIZ))
+ /* device name filter set, but names do not match */
+ return false;
+
+ if (!should_fail(&skb_realloc.attr, 1))
+ return false;
+
+ return true;
+}
+ALLOW_ERROR_INJECTION(should_fail_net_realloc_skb, TRUE);
+
+void skb_might_realloc(struct sk_buff *skb)
+{
+ if (!should_fail_net_realloc_skb(skb))
+ return;
+
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(skb_might_realloc);
+
+static int __init fail_skb_realloc_setup(char *str)
+{
+ return setup_fault_attr(&skb_realloc.attr, str);
+}
+__setup("fail_skb_realloc=", fail_skb_realloc_setup);
+
+static void reset_settings(void)
+{
+ skb_realloc.filtered = false;
+ memset(&skb_realloc.devname, 0, IFNAMSIZ);
+}
+
+static ssize_t devname_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ ssize_t ret;
+
+ reset_settings();
+ ret = simple_write_to_buffer(&skb_realloc.devname, IFNAMSIZ,
+ ppos, buffer, count);
+ if (ret < 0)
+ return ret;
+
+ skb_realloc.devname[IFNAMSIZ - 1] = '\0';
+ /* Remove a possible \n at the end of devname */
+ strim(skb_realloc.devname);
+
+ if (strnlen(skb_realloc.devname, IFNAMSIZ))
+ skb_realloc.filtered = true;
+
+ return count;
+}
+
+static ssize_t devname_read(struct file *file,
+ char __user *buffer,
+ size_t size, loff_t *ppos)
+{
+ if (!skb_realloc.filtered)
+ return 0;
+
+ return simple_read_from_buffer(buffer, size, ppos, &skb_realloc.devname,
+ strlen(skb_realloc.devname));
+}
+
+static const struct file_operations devname_ops = {
+ .write = devname_write,
+ .read = devname_read,
+};
+
+static int __init fail_skb_realloc_debugfs(void)
+{
+ umode_t mode = S_IFREG | 0600;
+ struct dentry *dir;
+
+ dir = fault_create_debugfs_attr("fail_skb_realloc", NULL,
+ &skb_realloc.attr);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ debugfs_create_file("devname", mode, dir, NULL, &devname_ops);
+
+ return 0;
+}
+
+late_initcall(fail_skb_realloc_debugfs);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 83f8cd8aa2d1..6841e61a6bd0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -88,6 +88,7 @@
#include <linux/textsearch.h>
#include "dev.h"
+#include "netmem_priv.h"
#include "sock_destructor.h"
#ifdef CONFIG_SKB_EXTENSIONS
@@ -314,8 +315,8 @@ void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
fragsz = SKB_DATA_ALIGN(fragsz);
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
- data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
- align_mask);
+ data = __page_frag_alloc_align(&nc->page, fragsz,
+ GFP_ATOMIC | __GFP_NOWARN, align_mask);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
return data;
@@ -330,7 +331,8 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
fragsz = SKB_DATA_ALIGN(fragsz);
- data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC,
+ data = __page_frag_alloc_align(nc, fragsz,
+ GFP_ATOMIC | __GFP_NOWARN,
align_mask);
} else {
local_bh_disable();
@@ -349,7 +351,7 @@ static struct sk_buff *napi_skb_cache_get(void)
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!nc->skb_count)) {
nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
- GFP_ATOMIC,
+ GFP_ATOMIC | __GFP_NOWARN,
NAPI_SKB_CACHE_BULK,
nc->skb_cache);
if (unlikely(!nc->skb_count)) {
@@ -418,7 +420,8 @@ struct sk_buff *slab_build_skb(void *data)
struct sk_buff *skb;
unsigned int size;
- skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache,
+ GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
@@ -469,7 +472,8 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache,
+ GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
@@ -749,14 +753,14 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
if (in_hardirq() || irqs_disabled()) {
nc = this_cpu_ptr(&netdev_alloc_cache);
data = page_frag_alloc(nc, len, gfp_mask);
- pfmemalloc = nc->pfmemalloc;
+ pfmemalloc = page_frag_cache_is_pfmemalloc(nc);
} else {
local_bh_disable();
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc = this_cpu_ptr(&napi_alloc_cache.page);
data = page_frag_alloc(nc, len, gfp_mask);
- pfmemalloc = nc->pfmemalloc;
+ pfmemalloc = page_frag_cache_is_pfmemalloc(nc);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
local_bh_enable();
@@ -846,7 +850,7 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
len = SKB_HEAD_ALIGN(len);
data = page_frag_alloc(&nc->page, len, gfp_mask);
- pfmemalloc = nc->page.pfmemalloc;
+ pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
}
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
@@ -917,9 +921,9 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
-static bool is_pp_page(struct page *page)
+static bool is_pp_netmem(netmem_ref netmem)
{
- return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
+ return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE;
}
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
@@ -1017,9 +1021,7 @@ EXPORT_SYMBOL(skb_cow_data_for_xdp);
#if IS_ENABLED(CONFIG_PAGE_POOL)
bool napi_pp_put_page(netmem_ref netmem)
{
- struct page *page = netmem_to_page(netmem);
-
- page = compound_head(page);
+ netmem = netmem_compound_head(netmem);
/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
* in order to preserve any existing bits, such as bit 0 for the
@@ -1028,10 +1030,10 @@ bool napi_pp_put_page(netmem_ref netmem)
* and page_is_pfmemalloc() is checked in __page_pool_put_page()
* to avoid recycling the pfmemalloc page.
*/
- if (unlikely(!is_pp_page(page)))
+ if (unlikely(!is_pp_netmem(netmem)))
return false;
- page_pool_put_full_netmem(page->pp, page_to_netmem(page), false);
+ page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false);
return true;
}
@@ -1058,7 +1060,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
static int skb_pp_frag_ref(struct sk_buff *skb)
{
struct skb_shared_info *shinfo;
- struct page *head_page;
+ netmem_ref head_netmem;
int i;
if (!skb->pp_recycle)
@@ -1067,11 +1069,11 @@ static int skb_pp_frag_ref(struct sk_buff *skb)
shinfo = skb_shinfo(skb);
for (i = 0; i < shinfo->nr_frags; i++) {
- head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
- if (likely(is_pp_page(head_page)))
- page_pool_ref_page(head_page);
+ head_netmem = netmem_compound_head(shinfo->frags[i].netmem);
+ if (likely(is_pp_netmem(head_netmem)))
+ page_pool_ref_netmem(head_netmem);
else
- page_ref_inc(head_page);
+ page_ref_inc(netmem_to_page(head_netmem));
}
return 0;
}
@@ -1369,6 +1371,14 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
struct page *p;
u8 *vaddr;
+ if (skb_frag_is_net_iov(frag)) {
+ printk("%sskb frag %d: not readable\n", level, i);
+ len -= skb_frag_size(frag);
+ if (!len)
+ break;
+ continue;
+ }
+
skb_frag_foreach_page(frag, skb_frag_off(frag),
skb_frag_size(frag), p, p_off, p_len,
copied) {
@@ -1962,6 +1972,9 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
return -EINVAL;
+ if (!skb_frags_readable(skb))
+ return -EFAULT;
+
if (!num_frags)
goto release;
@@ -2135,6 +2148,9 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
unsigned int size;
int headerlen;
+ if (!skb_frags_readable(skb))
+ return NULL;
+
if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
return NULL;
@@ -2473,6 +2489,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
struct sk_buff *n;
int oldheadroom;
+ if (!skb_frags_readable(skb))
+ return NULL;
+
if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
return NULL;
@@ -2817,6 +2836,9 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
*/
int i, k, eat = (skb->tail + delta) - skb->end;
+ if (!skb_frags_readable(skb))
+ return NULL;
+
if (eat > 0 || skb_cloned(skb)) {
if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
GFP_ATOMIC))
@@ -2970,6 +2992,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
to += copy;
}
+ if (!skb_frags_readable(skb))
+ goto fault;
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
skb_frag_t *f = &skb_shinfo(skb)->frags[i];
@@ -3158,9 +3183,15 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
/*
* then map the fragments
*/
+ if (!skb_frags_readable(skb))
+ return false;
+
for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
+ if (WARN_ON_ONCE(!skb_frag_page(f)))
+ return false;
+
if (__splice_segment(skb_frag_page(f),
skb_frag_off(f), skb_frag_size(f),
offset, len, spd, false, sk, pipe))
@@ -3378,6 +3409,9 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
from += copy;
}
+ if (!skb_frags_readable(skb))
+ goto fault;
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
int end;
@@ -3457,6 +3491,9 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
pos = copy;
}
+ if (WARN_ON_ONCE(!skb_frags_readable(skb)))
+ return 0;
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -3557,6 +3594,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
pos = copy;
}
+ if (!skb_frags_readable(skb))
+ return 0;
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
@@ -4048,6 +4088,7 @@ static inline void skb_split_inside_header(struct sk_buff *skb,
skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
+ skb1->unreadable = skb->unreadable;
skb_shinfo(skb)->nr_frags = 0;
skb1->data_len = skb->data_len;
skb1->len += skb1->data_len;
@@ -4095,6 +4136,8 @@ static inline void skb_split_no_header(struct sk_buff *skb,
pos += size;
}
skb_shinfo(skb1)->nr_frags = k;
+
+ skb1->unreadable = skb->unreadable;
}
/**
@@ -4169,8 +4212,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
/* Actual merge is delayed until the point when we know we can
* commit all, so that we don't have to undo partial changes
*/
- if (!to ||
- !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
+ if (!skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
skb_frag_off(fragfrom))) {
merge = -1;
} else {
@@ -4333,6 +4375,9 @@ next_skb:
return block_limit - abs_offset;
}
+ if (!skb_frags_readable(st->cur_skb))
+ return 0;
+
if (st->frag_idx == 0 && !st->frag_data)
st->stepped_offset += skb_headlen(st->cur_skb);
@@ -4409,6 +4454,41 @@ void skb_abort_seq_read(struct skb_seq_state *st)
}
EXPORT_SYMBOL(skb_abort_seq_read);
+/**
+ * skb_copy_seq_read() - copy from a skb_seq_state to a buffer
+ * @st: source skb_seq_state
+ * @offset: offset in source
+ * @to: destination buffer
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @offset bytes into the source @st to the destination
+ * buffer @to. `offset` should increase (or be unchanged) with each subsequent
+ * call to this function. If offset needs to decrease from the previous use `st`
+ * should be reset first.
+ *
+ * Return: 0 on success or -EINVAL if the copy ended early
+ */
+int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len)
+{
+ const u8 *data;
+ u32 sqlen;
+
+ for (;;) {
+ sqlen = skb_seq_read(offset, &data, st);
+ if (sqlen == 0)
+ return -EINVAL;
+ if (sqlen >= len) {
+ memcpy(to, data, len);
+ return 0;
+ }
+ memcpy(to, data, sqlen);
+ to += sqlen;
+ offset += sqlen;
+ len -= sqlen;
+ }
+}
+EXPORT_SYMBOL(skb_copy_seq_read);
+
#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
@@ -5161,7 +5241,7 @@ EXPORT_SYMBOL_GPL(skb_to_sgvec);
* 3. sg_unmark_end
* 4. skb_to_sgvec(payload2)
*
- * When mapping mutilple payload conditionally, skb_to_sgvec_nomark
+ * When mapping multiple payload conditionally, skb_to_sgvec_nomark
* is more preferable.
*/
int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
@@ -5426,7 +5506,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
{
bool ret;
- if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
+ if (likely(tsonly || READ_ONCE(sock_net(sk)->core.sysctl_tstamp_allow_data)))
return true;
read_lock_bh(&sk->sk_callback_lock);
@@ -5945,7 +6025,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
if (to->pp_recycle != from->pp_recycle)
return false;
- if (len <= skb_tailroom(to)) {
+ if (skb_frags_readable(from) != skb_frags_readable(to))
+ return false;
+
+ if (len <= skb_tailroom(to) && skb_frags_readable(from)) {
if (len)
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
*delta_truesize = 0;
@@ -6019,7 +6102,7 @@ EXPORT_SYMBOL(skb_try_coalesce);
* @skb: buffer to clean
* @xnet: packet is crossing netns
*
- * skb_scrub_packet can be used after encapsulating or decapsulting a packet
+ * skb_scrub_packet can be used after encapsulating or decapsulating a packet
* into/from a tunnel. Some information have to be cleared during these
* operations.
* skb_scrub_packet can also be used to clean a skb before injecting it in
@@ -6122,6 +6205,9 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len)
if (!pskb_may_pull(skb, write_len))
return -ENOMEM;
+ if (!skb_frags_readable(skb))
+ return -EFAULT;
+
if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
return 0;
@@ -6241,7 +6327,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
return err;
skb->protocol = skb->vlan_proto;
- skb->mac_len += VLAN_HLEN;
+ skb->network_header -= VLAN_HLEN;
skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
}
@@ -6801,7 +6887,7 @@ void skb_condense(struct sk_buff *skb)
{
if (skb->data_len) {
if (skb->data_len > skb->end - skb->tail ||
- skb_cloned(skb))
+ skb_cloned(skb) || !skb_frags_readable(skb))
return;
/* Nice, we can free page frag(s) right now */
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index bbf40b999713..e90fbab703b2 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -293,7 +293,7 @@ out:
/* If we trim data a full sg elem before curr pointer update
* copybreak and current so that any future copy operations
* start at new copy location.
- * However trimed data that has not yet been used in a copy op
+ * However trimmed data that has not yet been used in a copy op
* does not require an update.
*/
if (!msg->sg.size) {
@@ -1117,9 +1117,9 @@ static void sk_psock_strp_data_ready(struct sock *sk)
if (tls_sw_has_ctx_rx(sk)) {
psock->saved_data_ready(sk);
} else {
- write_lock_bh(&sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
strp_data_ready(&psock->strp);
- write_unlock_bh(&sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
}
}
rcu_read_unlock();
diff --git a/net/core/sock.c b/net/core/sock.c
index 9abc4fe25953..74729d20cd00 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -85,7 +85,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/errqueue.h>
@@ -124,6 +124,7 @@
#include <linux/netdevice.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
+#include <linux/skbuff_ref.h>
#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/sock.h>
@@ -285,8 +286,6 @@ EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
-int sysctl_tstamp_allow_data __read_mostly = 1;
-
DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
EXPORT_SYMBOL_GPL(memalloc_socks_key);
@@ -821,14 +820,11 @@ EXPORT_SYMBOL(sock_set_sndtimeo);
static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
{
+ sock_valbool_flag(sk, SOCK_RCVTSTAMP, val);
+ sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, val && ns);
if (val) {
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
- sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
- sock_set_flag(sk, SOCK_RCVTSTAMP);
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- } else {
- sock_reset_flag(sk, SOCK_RCVTSTAMP);
- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
}
}
@@ -1049,6 +1045,75 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
return 0;
}
+#ifdef CONFIG_PAGE_POOL
+
+/* This is the number of tokens and frags that the user can SO_DEVMEM_DONTNEED
+ * in 1 syscall. The limit exists to limit the amount of memory the kernel
+ * allocates to copy these tokens, and to prevent looping over the frags for
+ * too long.
+ */
+#define MAX_DONTNEED_TOKENS 128
+#define MAX_DONTNEED_FRAGS 1024
+
+static noinline_for_stack int
+sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+ unsigned int num_tokens, i, j, k, netmem_num = 0;
+ struct dmabuf_token *tokens;
+ int ret = 0, num_frags = 0;
+ netmem_ref netmems[16];
+
+ if (!sk_is_tcp(sk))
+ return -EBADF;
+
+ if (optlen % sizeof(*tokens) ||
+ optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS)
+ return -EINVAL;
+
+ num_tokens = optlen / sizeof(*tokens);
+ tokens = kvmalloc_array(num_tokens, sizeof(*tokens), GFP_KERNEL);
+ if (!tokens)
+ return -ENOMEM;
+
+ if (copy_from_sockptr(tokens, optval, optlen)) {
+ kvfree(tokens);
+ return -EFAULT;
+ }
+
+ xa_lock_bh(&sk->sk_user_frags);
+ for (i = 0; i < num_tokens; i++) {
+ for (j = 0; j < tokens[i].token_count; j++) {
+ if (++num_frags > MAX_DONTNEED_FRAGS)
+ goto frag_limit_reached;
+
+ netmem_ref netmem = (__force netmem_ref)__xa_erase(
+ &sk->sk_user_frags, tokens[i].token_start + j);
+
+ if (!netmem || WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
+ continue;
+
+ netmems[netmem_num++] = netmem;
+ if (netmem_num == ARRAY_SIZE(netmems)) {
+ xa_unlock_bh(&sk->sk_user_frags);
+ for (k = 0; k < netmem_num; k++)
+ WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
+ netmem_num = 0;
+ xa_lock_bh(&sk->sk_user_frags);
+ }
+ ret++;
+ }
+ }
+
+frag_limit_reached:
+ xa_unlock_bh(&sk->sk_user_frags);
+ for (k = 0; k < netmem_num; k++)
+ WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
+
+ kvfree(tokens);
+ return ret;
+}
+#endif
+
void sockopt_lock_sock(struct sock *sk)
{
/* When current->bpf_ctx is set, the setsockopt is called from
@@ -1211,6 +1276,10 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
ret = -EOPNOTSUPP;
return ret;
}
+#ifdef CONFIG_PAGE_POOL
+ case SO_DEVMEM_DONTNEED:
+ return sock_devmem_dontneed(sk, optval, optlen);
+#endif
}
sockopt_lock_sock(sk);
@@ -2048,7 +2117,7 @@ static inline void sock_lock_init(struct sock *sk)
/*
* Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
- * even temporarly, because of RCU lookups. sk_node should also be left as is.
+ * even temporarily, because of RCU lookups. sk_node should also be left as is.
* We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
*/
static void sock_copy(struct sock *nsk, const struct sock *osk)
@@ -2526,19 +2595,16 @@ void __sock_wfree(struct sk_buff *skb)
void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
- skb->sk = sk;
#ifdef CONFIG_INET
- if (unlikely(!sk_fullsock(sk))) {
- skb->destructor = sock_edemux;
- sock_hold(sk);
- return;
- }
+ if (unlikely(!sk_fullsock(sk)))
+ return skb_set_owner_edemux(skb, sk);
#endif
+ skb->sk = sk;
skb->destructor = sock_wfree;
skb_set_hash_from_sk(skb, sk);
/*
* We used to take a refcount on sk, but following operation
- * is enough to guarantee sk_free() wont free this sock until
+ * is enough to guarantee sk_free() won't free this sock until
* all in-flight packets are completed
*/
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
@@ -2831,6 +2897,8 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
{
u32 tsflags;
+ BUILD_BUG_ON(SOF_TIMESTAMPING_LAST == (1 << 31));
+
switch (cmsg->cmsg_type) {
case SO_MARK:
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
@@ -2859,6 +2927,17 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
return -EINVAL;
sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
break;
+ case SCM_TS_OPT_ID:
+ if (sk_is_tcp(sk))
+ return -EINVAL;
+ tsflags = READ_ONCE(sk->sk_tsflags);
+ if (!(tsflags & SOF_TIMESTAMPING_OPT_ID))
+ return -EINVAL;
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
+ return -EINVAL;
+ sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg);
+ sockc->tsflags |= SOCKCM_FLAG_TS_OPT_ID;
+ break;
/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
case SCM_RIGHTS:
case SCM_CREDENTIALS:
@@ -3429,7 +3508,7 @@ static void sock_def_destruct(struct sock *sk)
void sk_send_sigurg(struct sock *sk)
{
if (sk->sk_socket && sk->sk_socket->file)
- if (send_sigurg(&sk->sk_socket->file->f_owner))
+ if (send_sigurg(sk->sk_socket->file))
sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
}
EXPORT_SYMBOL(sk_send_sigurg);
@@ -3697,7 +3776,7 @@ EXPORT_SYMBOL(sock_recv_errqueue);
*
* FIX: POSIX 1003.1g is very ambiguous here. It states that
* asynchronous errors should be reported by getsockopt. We assume
- * this means if you specify SO_ERROR (otherwise whats the point of it).
+ * this means if you specify SO_ERROR (otherwise what is the point of it).
*/
int sock_common_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
@@ -3751,9 +3830,6 @@ void sk_common_release(struct sock *sk)
sk->sk_prot->unhash(sk);
- if (sk->sk_socket)
- sk->sk_socket->sk = NULL;
-
/*
* In this point socket cannot receive new packets, but it is possible
* that some packets are in flight because some CPU runs receiver and
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index d3dbb92153f2..f1b9b3958792 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -67,46 +67,39 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
{
- u32 ufd = attr->target_fd;
struct bpf_map *map;
- struct fd f;
int ret;
if (attr->attach_flags || attr->replace_bpf_fd)
return -EINVAL;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->target_fd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
mutex_lock(&sockmap_mutex);
ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type);
mutex_unlock(&sockmap_mutex);
- fdput(f);
return ret;
}
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
{
- u32 ufd = attr->target_fd;
struct bpf_prog *prog;
struct bpf_map *map;
- struct fd f;
int ret;
if (attr->attach_flags || attr->replace_bpf_fd)
return -EINVAL;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->target_fd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
prog = bpf_prog_get(attr->attach_bpf_fd);
- if (IS_ERR(prog)) {
- ret = PTR_ERR(prog);
- goto put_map;
- }
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
if (prog->type != ptype) {
ret = -EINVAL;
@@ -118,8 +111,6 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
mutex_unlock(&sockmap_mutex);
put_prog:
bpf_prog_put(prog);
-put_map:
- fdput(f);
return ret;
}
@@ -168,6 +159,7 @@ static void sock_map_del_link(struct sock *sk,
verdict_stop = true;
list_del(&link->list);
sk_psock_free_link(link);
+ break;
}
}
spin_unlock_bh(&psock->link_lock);
@@ -420,12 +412,11 @@ static void *sock_map_lookup_sys(struct bpf_map *map, void *key)
static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock **psk)
{
- struct sock *sk;
+ struct sock *sk = NULL;
int err = 0;
spin_lock_bh(&stab->lock);
- sk = *psk;
- if (!sk_test || sk_test == sk)
+ if (!sk_test || sk_test == *psk)
sk = xchg(psk, NULL);
if (likely(sk))
@@ -656,6 +647,8 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
sk = __sock_map_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
+ if ((flags & BPF_F_INGRESS) && sk_is_vsock(sk))
+ return SK_DROP;
skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
return SK_PASS;
@@ -684,6 +677,8 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
return SK_DROP;
if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk))
return SK_DROP;
+ if (sk_is_vsock(sk))
+ return SK_DROP;
msg->flags = flags;
msg->sk_redir = sk;
@@ -1183,6 +1178,7 @@ static void sock_hash_free(struct bpf_map *map)
sock_put(elem->sk);
sock_hash_free_elem(htab, elem);
}
+ cond_resched();
}
/* wait for psock readers accessing its map link */
@@ -1257,6 +1253,8 @@ BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
sk = __sock_hash_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
+ if ((flags & BPF_F_INGRESS) && sk_is_vsock(sk))
+ return SK_DROP;
skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
return SK_PASS;
@@ -1285,6 +1283,8 @@ BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg,
return SK_DROP;
if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk))
return SK_DROP;
+ if (sk_is_vsock(sk))
+ return SK_DROP;
msg->flags = flags;
msg->sk_redir = sk;
@@ -1550,18 +1550,17 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
- u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+ u32 prog_cnt = 0, flags = 0;
struct bpf_prog **pprog;
struct bpf_prog *prog;
struct bpf_map *map;
- struct fd f;
u32 id = 0;
int ret;
if (attr->query.query_flags)
return -EINVAL;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->target_fd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
@@ -1593,7 +1592,6 @@ end:
copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
ret = -EFAULT;
- fdput(f);
return ret;
}
@@ -1762,6 +1760,10 @@ static int sock_map_link_update_prog(struct bpf_link *link,
ret = -EINVAL;
goto out;
}
+ if (!sockmap_link->map) {
+ ret = -ENOLINK;
+ goto out;
+ }
ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink,
sockmap_link->attach_type);
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 5a165286e4d8..4211710393a8 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -173,10 +173,9 @@ static bool __reuseport_detach_closed_sock(struct sock *sk,
static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
{
- unsigned int size = sizeof(struct sock_reuseport) +
- sizeof(struct sock *) * max_socks;
- struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC);
+ struct sock_reuseport *reuse;
+ reuse = kzalloc(struct_size(reuse, socks, max_socks), GFP_ATOMIC);
if (!reuse)
return NULL;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 86a2476678c4..cb8d32e5c14e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -51,29 +51,45 @@ int sysctl_devconf_inherit_init_net __read_mostly;
EXPORT_SYMBOL(sysctl_devconf_inherit_init_net);
#if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS)
-static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos,
- struct cpumask *mask)
+static int dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos,
+ struct cpumask *mask)
{
- char kbuf[128];
+ char *kbuf;
int len;
if (*ppos || !*lenp) {
*lenp = 0;
- return;
+ return 0;
+ }
+
+ /* CPUs are displayed as a hex bitmap + a comma between each groups of 8
+ * nibbles (except the last one which has a newline instead).
+ * Guesstimate the buffer size at the group granularity level.
+ */
+ len = min(DIV_ROUND_UP(nr_cpumask_bits, 32) * (8 + 1), *lenp);
+ kbuf = kmalloc(len, GFP_KERNEL);
+ if (!kbuf) {
+ *lenp = 0;
+ return -ENOMEM;
}
- len = min(sizeof(kbuf) - 1, *lenp);
len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask));
if (!len) {
*lenp = 0;
- return;
+ goto free_buf;
}
- if (len < *lenp)
- kbuf[len++] = '\n';
+ /* scnprintf writes a trailing null char not counted in the returned
+ * length, override it with a newline.
+ */
+ kbuf[len++] = '\n';
memcpy(buffer, kbuf, len);
*lenp = len;
*ppos += len;
+
+free_buf:
+ kfree(kbuf);
+ return 0;
}
#endif
@@ -117,8 +133,8 @@ static int rps_default_mask_sysctl(const struct ctl_table *table, int write,
if (err)
goto done;
} else {
- dump_cpumask(buffer, lenp, ppos,
- net->core.rps_default_mask ? : cpu_none_mask);
+ err = dump_cpumask(buffer, lenp, ppos,
+ net->core.rps_default_mask ? : cpu_none_mask);
}
done:
@@ -247,7 +263,7 @@ write_unlock:
}
rcu_read_unlock();
- dump_cpumask(buffer, lenp, ppos, mask);
+ ret = dump_cpumask(buffer, lenp, ppos, mask);
}
done:
@@ -491,15 +507,6 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "tstamp_allow_data",
- .data = &sysctl_tstamp_allow_data,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE
- },
#ifdef CONFIG_RPS
{
.procname = "rps_sock_flow_entries",
@@ -665,6 +672,15 @@ static struct ctl_table netns_core_table[] = {
.extra2 = SYSCTL_ONE,
.proc_handler = proc_dou8vec_minmax,
},
+ {
+ .procname = "tstamp_allow_data",
+ .data = &init_net.core.sysctl_tstamp_allow_data,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
+ },
/* sysctl_core_net_init() will set the values after this
* to readonly in network namespaces
*/
diff --git a/net/core/tso.c b/net/core/tso.c
index e00796e3b146..6df997b9076e 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -3,7 +3,7 @@
#include <linux/if_vlan.h>
#include <net/ip.h>
#include <net/tso.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso,
int size, bool is_last)
diff --git a/net/core/utils.c b/net/core/utils.c
index c994e95172ac..27f4cffaae05 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Generic address resultion entity
+ * Generic address resolution entity
*
* Authors:
* net_random Alan Cox
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 2e6b8c8fd2de..03eb1d941fca 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -2408,6 +2408,11 @@ static struct notifier_block dcbnl_nb __read_mostly = {
.notifier_call = dcbnl_netdevice_event,
};
+static const struct rtnl_msg_handler dcbnl_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_GETDCB, .doit = dcb_doit},
+ {.msgtype = RTM_SETDCB, .doit = dcb_doit},
+};
+
static int __init dcbnl_init(void)
{
int err;
@@ -2416,8 +2421,7 @@ static int __init dcbnl_init(void)
if (err)
return err;
- rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
+ rtnl_register_many(dcbnl_rtnl_msg_handlers);
return 0;
}
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index ca8670f78ac6..f349d16dd8f6 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -22,7 +22,7 @@
#include "../dccp.h"
#include "ccid3.h"
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static bool ccid3_debug;
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 54086bb05c42..f7554dcdaaba 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1166,8 +1166,12 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
goto not_valid_or_not_known;
}
- return dccp_feat_push_confirm(fn, feat, local, &fval);
+ if (dccp_feat_push_confirm(fn, feat, local, &fval)) {
+ kfree(fval.sp.vec);
+ return DCCP_RESET_CODE_TOO_BUSY;
+ }
+ return 0;
} else if (entry->state == FEAT_UNSTABLE) { /* 6.6.2 */
return 0;
}
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index da5dba120bc9..d6649246188d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -618,7 +618,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
by tcp. Feel free to propose better solution.
--ANK (980728)
*/
- if (np->rxopt.all)
+ if (np->rxopt.all && sk->sk_state != DCCP_LISTEN)
opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == DCCP_OPEN) { /* Fast path */
diff --git a/net/dccp/options.c b/net/dccp/options.c
index d24cad05001e..db62d4767024 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -10,7 +10,7 @@
#include <linux/dccp.h>
#include <linux/module.h>
#include <linux/types.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index 13c73f50da3d..d6e3db300acb 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -531,10 +531,8 @@ int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info)
return err;
}
- if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
- action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
- else
- action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
+ action = nla_get_u8_default(info->attrs[DEVLINK_ATTR_RELOAD_ACTION],
+ DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
if (!devlink_reload_action_is_supported(devlink, action)) {
NL_SET_ERR_MSG(info->extack, "Requested reload action is not supported by the driver");
@@ -971,14 +969,14 @@ static int devlink_nl_flash_update_fill(struct sk_buff *msg,
nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
params->component))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
- params->done, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
+ params->done))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
- params->total, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
+ params->total))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
- params->timeout, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
+ params->timeout))
goto nla_put_failure;
out:
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index c7a8e13f917c..14eaad9cfe35 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -166,7 +166,7 @@ int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
static inline struct devlink_nl_dump_state *
devlink_dump_state(struct netlink_callback *cb)
{
- NL_ASSERT_DUMP_CTX_FITS(struct devlink_nl_dump_state);
+ NL_ASSERT_CTX_FITS(struct devlink_nl_dump_state);
return (struct devlink_nl_dump_state *)cb->ctx;
}
@@ -181,6 +181,11 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
return 0;
}
+static inline int devlink_nl_put_u64(struct sk_buff *msg, int attrtype, u64 val)
+{
+ return nla_put_u64_64bit(msg, attrtype, val, DEVLINK_ATTR_PAD);
+}
+
int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
struct devlink *devlink, int attrtype);
int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info);
diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c
index 55009b377447..e55701b007f0 100644
--- a/net/devlink/dpipe.c
+++ b/net/devlink/dpipe.c
@@ -165,18 +165,17 @@ static int devlink_dpipe_table_put(struct sk_buff *skb,
return -EMSGSIZE;
if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size,
- DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size))
goto nla_put_failure;
if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
table->counters_enabled))
goto nla_put_failure;
if (table->resource_valid) {
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
- table->resource_id, DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
- table->resource_units, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
+ table->resource_id) ||
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
+ table->resource_units))
goto nla_put_failure;
}
if (devlink_dpipe_matches_put(table, skb))
@@ -403,12 +402,11 @@ static int devlink_dpipe_entry_put(struct sk_buff *skb,
if (!entry_attr)
return -EMSGSIZE;
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
- DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index))
goto nla_put_failure;
if (entry->counter_valid)
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
- entry->counter, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
+ entry->counter))
goto nla_put_failure;
matches_attr = nla_nest_start_noflag(skb,
diff --git a/net/devlink/health.c b/net/devlink/health.c
index acb8c0e174bb..b8d3084e6fe0 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -287,29 +287,27 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
reporter->health_state))
goto reporter_nest_cancel;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
- reporter->error_count, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
+ reporter->error_count))
goto reporter_nest_cancel;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
- reporter->recovery_count, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
+ reporter->recovery_count))
goto reporter_nest_cancel;
if (reporter->ops->recover &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
- reporter->graceful_period,
- DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
+ reporter->graceful_period))
goto reporter_nest_cancel;
if (reporter->ops->recover &&
nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
reporter->auto_recover))
goto reporter_nest_cancel;
if (reporter->dump_fmsg &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
- jiffies_to_msecs(reporter->dump_ts),
- DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
+ jiffies_to_msecs(reporter->dump_ts)))
goto reporter_nest_cancel;
if (reporter->dump_fmsg &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
- reporter->dump_real_ts, DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
+ reporter->dump_real_ts))
goto reporter_nest_cancel;
if (reporter->ops->dump &&
nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
@@ -963,8 +961,7 @@ devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb)
case NLA_U32:
return nla_put_u32(skb, attrtype, *(u32 *)msg->value);
case NLA_U64:
- return nla_put_u64_64bit(skb, attrtype, *(u64 *)msg->value,
- DEVLINK_ATTR_PAD);
+ return devlink_nl_put_u64(skb, attrtype, *(u64 *)msg->value);
case NLA_NUL_STRING:
return nla_put_string(skb, attrtype, (char *)&msg->value);
case NLA_BINARY:
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
index 7139e67e93ae..8828ffaf6cbc 100644
--- a/net/devlink/rate.c
+++ b/net/devlink/rate.c
@@ -108,12 +108,12 @@ static int devlink_nl_rate_fill(struct sk_buff *msg,
goto nla_put_failure;
}
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
- devlink_rate->tx_share, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_RATE_TX_SHARE,
+ devlink_rate->tx_share))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX,
- devlink_rate->tx_max, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_RATE_TX_MAX,
+ devlink_rate->tx_max))
goto nla_put_failure;
if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY,
diff --git a/net/devlink/region.c b/net/devlink/region.c
index 7319127c5913..63fb297f6d67 100644
--- a/net/devlink/region.c
+++ b/net/devlink/region.c
@@ -77,7 +77,7 @@ static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
if (!snap_attr)
- return -EINVAL;
+ return -EMSGSIZE;
err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id);
if (err)
@@ -102,7 +102,7 @@ static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg,
snapshots_attr = nla_nest_start_noflag(msg,
DEVLINK_ATTR_REGION_SNAPSHOTS);
if (!snapshots_attr)
- return -EINVAL;
+ return -EMSGSIZE;
list_for_each_entry(snapshot, &region->snapshot_list, list) {
err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot);
@@ -145,9 +145,7 @@ static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
if (err)
goto nla_put_failure;
- err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
- region->size,
- DEVLINK_ATTR_PAD);
+ err = devlink_nl_put_u64(msg, DEVLINK_ATTR_REGION_SIZE, region->size);
if (err)
goto nla_put_failure;
@@ -210,8 +208,8 @@ devlink_nl_region_notify_build(struct devlink_region *region,
if (err)
goto out_cancel_msg;
} else {
- err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
- region->size, DEVLINK_ATTR_PAD);
+ err = devlink_nl_put_u64(msg, DEVLINK_ATTR_REGION_SIZE,
+ region->size);
if (err)
goto out_cancel_msg;
}
@@ -773,8 +771,7 @@ static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
if (err)
goto nla_put_failure;
- err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr,
- DEVLINK_ATTR_PAD);
+ err = devlink_nl_put_u64(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr);
if (err)
goto nla_put_failure;
diff --git a/net/devlink/resource.c b/net/devlink/resource.c
index 594c8aeb3bfa..2d6324f3d91f 100644
--- a/net/devlink/resource.c
+++ b/net/devlink/resource.c
@@ -141,12 +141,12 @@ devlink_resource_size_params_put(struct devlink_resource *resource,
struct devlink_resource_size_params *size_params;
size_params = &resource->size_params;
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
- size_params->size_granularity, DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
- size_params->size_max, DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
- size_params->size_min, DEVLINK_ATTR_PAD) ||
+ if (devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
+ size_params->size_granularity) ||
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
+ size_params->size_max) ||
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
+ size_params->size_min) ||
nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit))
return -EMSGSIZE;
return 0;
@@ -157,9 +157,8 @@ static int devlink_resource_occ_put(struct devlink_resource *resource,
{
if (!resource->occ_get)
return 0;
- return nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
- resource->occ_get(resource->occ_get_priv),
- DEVLINK_ATTR_PAD);
+ return devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_OCC,
+ resource->occ_get(resource->occ_get_priv));
}
static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
@@ -174,14 +173,12 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
return -EMSGSIZE;
if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size,
- DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
- DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size) ||
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id))
goto nla_put_failure;
if (resource->size != resource->size_new &&
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
- resource->size_new, DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
+ resource->size_new))
goto nla_put_failure;
if (devlink_resource_occ_put(resource, skb))
goto nla_put_failure;
@@ -348,7 +345,7 @@ int devl_resource_register(struct devlink *devlink,
resource = devlink_resource_find(devlink, NULL, resource_id);
if (resource)
- return -EINVAL;
+ return -EEXIST;
resource = kzalloc(sizeof(*resource), GFP_KERNEL);
if (!resource)
@@ -384,39 +381,6 @@ int devl_resource_register(struct devlink *devlink,
}
EXPORT_SYMBOL_GPL(devl_resource_register);
-/**
- * devlink_resource_register - devlink resource register
- *
- * @devlink: devlink
- * @resource_name: resource's name
- * @resource_size: resource's size
- * @resource_id: resource's id
- * @parent_resource_id: resource's parent id
- * @size_params: size parameters
- *
- * Generic resources should reuse the same names across drivers.
- * Please see the generic resources list at:
- * Documentation/networking/devlink/devlink-resource.rst
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-int devlink_resource_register(struct devlink *devlink,
- const char *resource_name,
- u64 resource_size,
- u64 resource_id,
- u64 parent_resource_id,
- const struct devlink_resource_size_params *size_params)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_resource_register(devlink, resource_name, resource_size,
- resource_id, parent_resource_id, size_params);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_resource_register);
-
static void devlink_resource_unregister(struct devlink *devlink,
struct devlink_resource *resource)
{
@@ -517,28 +481,6 @@ void devl_resource_occ_get_register(struct devlink *devlink,
EXPORT_SYMBOL_GPL(devl_resource_occ_get_register);
/**
- * devlink_resource_occ_get_register - register occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- * @occ_get: occupancy getter callback
- * @occ_get_priv: occupancy getter callback priv
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resource_occ_get_register(struct devlink *devlink,
- u64 resource_id,
- devlink_resource_occ_get_t *occ_get,
- void *occ_get_priv)
-{
- devl_lock(devlink);
- devl_resource_occ_get_register(devlink, resource_id,
- occ_get, occ_get_priv);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
-
-/**
* devl_resource_occ_get_unregister - unregister occupancy getter
*
* @devlink: devlink
@@ -560,20 +502,3 @@ void devl_resource_occ_get_unregister(struct devlink *devlink,
resource->occ_get_priv = NULL;
}
EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister);
-
-/**
- * devlink_resource_occ_get_unregister - unregister occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resource_occ_get_unregister(struct devlink *devlink,
- u64 resource_id)
-{
- devl_lock(devlink);
- devl_resource_occ_get_unregister(devlink, resource_id);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
diff --git a/net/devlink/trap.c b/net/devlink/trap.c
index 5d18c7424df1..f36087f90db5 100644
--- a/net/devlink/trap.c
+++ b/net/devlink/trap.c
@@ -189,14 +189,12 @@ devlink_trap_group_stats_put(struct sk_buff *msg,
if (!attr)
return -EMSGSIZE;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
- u64_stats_read(&stats.rx_packets),
- DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ u64_stats_read(&stats.rx_packets)))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
- u64_stats_read(&stats.rx_bytes),
- DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ u64_stats_read(&stats.rx_bytes)))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -231,18 +229,15 @@ static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
return -EMSGSIZE;
if (devlink->ops->trap_drop_counter_get &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
- DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
- u64_stats_read(&stats.rx_packets),
- DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ u64_stats_read(&stats.rx_packets)))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
- u64_stats_read(&stats.rx_bytes),
- DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ u64_stats_read(&stats.rx_bytes)))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -750,8 +745,7 @@ devlink_trap_policer_stats_put(struct sk_buff *msg, struct devlink *devlink,
if (!attr)
return -EMSGSIZE;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
- DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -783,12 +777,12 @@ devlink_nl_trap_policer_fill(struct sk_buff *msg, struct devlink *devlink,
policer_item->policer->id))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_RATE,
- policer_item->rate, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_TRAP_POLICER_RATE,
+ policer_item->rate))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_BURST,
- policer_item->burst, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_TRAP_POLICER_BURST,
+ policer_item->burst))
goto nla_put_failure;
err = devlink_trap_policer_stats_put(msg, devlink,
diff --git a/net/dsa/devlink.c b/net/dsa/devlink.c
index 0aac887d0098..f41f9fc2194e 100644
--- a/net/dsa/devlink.c
+++ b/net/dsa/devlink.c
@@ -229,10 +229,15 @@ int dsa_devlink_resource_register(struct dsa_switch *ds,
u64 parent_resource_id,
const struct devlink_resource_size_params *size_params)
{
- return devlink_resource_register(ds->devlink, resource_name,
- resource_size, resource_id,
- parent_resource_id,
- size_params);
+ int ret;
+
+ devl_lock(ds->devlink);
+ ret = devl_resource_register(ds->devlink, resource_name, resource_size,
+ resource_id, parent_resource_id,
+ size_params);
+ devl_unlock(ds->devlink);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(dsa_devlink_resource_register);
@@ -247,15 +252,19 @@ void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds,
devlink_resource_occ_get_t *occ_get,
void *occ_get_priv)
{
- return devlink_resource_occ_get_register(ds->devlink, resource_id,
- occ_get, occ_get_priv);
+ devl_lock(ds->devlink);
+ devl_resource_occ_get_register(ds->devlink, resource_id, occ_get,
+ occ_get_priv);
+ devl_unlock(ds->devlink);
}
EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_register);
void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds,
u64 resource_id)
{
- devlink_resource_occ_get_unregister(ds->devlink, resource_id);
+ devl_lock(ds->devlink);
+ devl_resource_occ_get_unregister(ds->devlink, resource_id);
+ devl_unlock(ds->devlink);
}
EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 668c729946ea..5a7c0e565a89 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1505,14 +1505,6 @@ static int dsa_switch_probe(struct dsa_switch *ds)
if (!ds->num_ports)
return -EINVAL;
- if (ds->phylink_mac_ops) {
- if (ds->ops->phylink_mac_select_pcs ||
- ds->ops->phylink_mac_config ||
- ds->ops->phylink_mac_link_down ||
- ds->ops->phylink_mac_link_up)
- return -EINVAL;
- }
-
if (np) {
err = dsa_switch_parse_of(ds, np);
if (err)
@@ -1577,6 +1569,7 @@ EXPORT_SYMBOL_GPL(dsa_unregister_switch);
void dsa_switch_shutdown(struct dsa_switch *ds)
{
struct net_device *conduit, *user_dev;
+ LIST_HEAD(close_list);
struct dsa_port *dp;
mutex_lock(&dsa2_mutex);
@@ -1586,10 +1579,16 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
rtnl_lock();
+ dsa_switch_for_each_cpu_port(dp, ds)
+ list_add(&dp->conduit->close_list, &close_list);
+
+ dev_close_many(&close_list, true);
+
dsa_switch_for_each_user_port(dp, ds) {
conduit = dsa_port_to_conduit(dp);
user_dev = dp->user;
+ netif_device_detach(user_dev);
netdev_upper_dev_unlink(conduit, user_dev);
}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 25258b33e59e..ee0aaec4c8e0 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1575,44 +1575,16 @@ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
cpu_dp->tag_ops = tag_ops;
}
-static struct phylink_pcs *
-dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
- phy_interface_t interface)
-{
- struct dsa_port *dp = dsa_phylink_to_port(config);
- struct phylink_pcs *pcs = ERR_PTR(-EOPNOTSUPP);
- struct dsa_switch *ds = dp->ds;
-
- if (ds->ops->phylink_mac_select_pcs)
- pcs = ds->ops->phylink_mac_select_pcs(ds, dp->index, interface);
-
- return pcs;
-}
-
static void dsa_port_phylink_mac_config(struct phylink_config *config,
unsigned int mode,
const struct phylink_link_state *state)
{
- struct dsa_port *dp = dsa_phylink_to_port(config);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_config)
- return;
-
- ds->ops->phylink_mac_config(ds, dp->index, mode, state);
}
static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
unsigned int mode,
phy_interface_t interface)
{
- struct dsa_port *dp = dsa_phylink_to_port(config);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_link_down)
- return;
-
- ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
}
static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
@@ -1622,18 +1594,9 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
int speed, int duplex,
bool tx_pause, bool rx_pause)
{
- struct dsa_port *dp = dsa_phylink_to_port(config);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_link_up)
- return;
-
- ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev,
- speed, duplex, tx_pause, rx_pause);
}
static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
- .mac_select_pcs = dsa_port_phylink_mac_select_pcs,
.mac_config = dsa_port_phylink_mac_config,
.mac_link_down = dsa_port_phylink_mac_link_down,
.mac_link_up = dsa_port_phylink_mac_link_up,
@@ -1871,9 +1834,6 @@ static void dsa_shared_port_link_down(struct dsa_port *dp)
if (ds->phylink_mac_ops && ds->phylink_mac_ops->mac_link_down)
ds->phylink_mac_ops->mac_link_down(&dp->pl_config, MLO_AN_FIXED,
PHY_INTERFACE_MODE_NA);
- else if (ds->ops->phylink_mac_link_down)
- ds->ops->phylink_mac_link_down(ds, dp->index, MLO_AN_FIXED,
- PHY_INTERFACE_MODE_NA);
}
int dsa_shared_port_link_register_of(struct dsa_port *dp)
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index ee7b272ab715..281bbac5539d 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -111,9 +111,10 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
* DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
* ---------------------------------------------------------------------------
* tag0 : zero-based value represents port
- * (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ * (eg, 0x0=port1, 0x2=port3, 0x3=port4)
*/
+#define KSZ8795_TAIL_TAG_EG_PORT_M GENMASK(1, 0)
#define KSZ8795_TAIL_TAG_OVERRIDE BIT(6)
#define KSZ8795_TAIL_TAG_LOOKUP BIT(7)
@@ -141,7 +142,8 @@ static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev)
{
u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
- return ksz_common_rcv(skb, dev, tag[0] & 7, KSZ_EGRESS_TAG_LEN);
+ return ksz_common_rcv(skb, dev, tag[0] & KSZ8795_TAIL_TAG_EG_PORT_M,
+ KSZ_EGRESS_TAG_LEN);
}
static const struct dsa_device_ops ksz8795_netdev_ops = {
@@ -176,8 +178,9 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795, KSZ8795_NAME);
#define KSZ9477_INGRESS_TAG_LEN 2
#define KSZ9477_PTP_TAG_LEN 4
-#define KSZ9477_PTP_TAG_INDICATION 0x80
+#define KSZ9477_PTP_TAG_INDICATION BIT(7)
+#define KSZ9477_TAIL_TAG_EG_PORT_M GENMASK(2, 0)
#define KSZ9477_TAIL_TAG_PRIO GENMASK(8, 7)
#define KSZ9477_TAIL_TAG_OVERRIDE BIT(9)
#define KSZ9477_TAIL_TAG_LOOKUP BIT(10)
@@ -310,7 +313,7 @@ static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
{
/* Tag decoding */
u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
- unsigned int port = tag[0] & 7;
+ unsigned int port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M;
unsigned int len = KSZ_EGRESS_TAG_LEN;
/* Extra 4-bytes PTP timestamp */
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 8e8b1bef6af6..11ea8cfd6266 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -79,7 +79,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
struct net_device *netdev)
{
- int src_port, switch_id;
+ int src_port = -1, switch_id = -1;
dsa_8021q_rcv(skb, &src_port, &switch_id, NULL, NULL);
diff --git a/net/dsa/user.c b/net/dsa/user.c
index f5adfa1d978a..06c30a9e29ff 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -1042,15 +1042,12 @@ static void dsa_user_get_strings(struct net_device *dev,
struct dsa_switch *ds = dp->ds;
if (stringset == ETH_SS_STATS) {
- int len = ETH_GSTRING_LEN;
-
- strscpy_pad(data, "tx_packets", len);
- strscpy_pad(data + len, "tx_bytes", len);
- strscpy_pad(data + 2 * len, "rx_packets", len);
- strscpy_pad(data + 3 * len, "rx_bytes", len);
+ ethtool_puts(&data, "tx_packets");
+ ethtool_puts(&data, "tx_bytes");
+ ethtool_puts(&data, "rx_packets");
+ ethtool_puts(&data, "rx_bytes");
if (ds->ops->get_strings)
- ds->ops->get_strings(ds, dp->index, stringset,
- data + 4 * len);
+ ds->ops->get_strings(ds, dp->index, stringset, data);
} else if (stringset == ETH_SS_TEST) {
net_selftest_get_strings(data);
}
@@ -1308,8 +1305,7 @@ static int dsa_user_set_pauseparam(struct net_device *dev,
}
#ifdef CONFIG_NET_POLL_CONTROLLER
-static int dsa_user_netpoll_setup(struct net_device *dev,
- struct netpoll_info *ni)
+static int dsa_user_netpoll_setup(struct net_device *dev)
{
struct net_device *conduit = dsa_user_to_conduit(dev);
struct dsa_user_priv *p = netdev_priv(dev);
@@ -1365,7 +1361,7 @@ dsa_user_mall_tc_entry_find(struct net_device *dev, unsigned long cookie)
static int
dsa_user_add_cls_matchall_mirred(struct net_device *dev,
struct tc_cls_matchall_offload *cls,
- bool ingress)
+ bool ingress, bool ingress_target)
{
struct netlink_ext_ack *extack = cls->common.extack;
struct dsa_port *dp = dsa_user_to_port(dev);
@@ -1377,11 +1373,19 @@ dsa_user_add_cls_matchall_mirred(struct net_device *dev,
struct dsa_port *to_dp;
int err;
- if (!ds->ops->port_mirror_add)
+ if (cls->common.protocol != htons(ETH_P_ALL)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can only offload \"protocol all\" matchall filter");
return -EOPNOTSUPP;
+ }
- if (!flow_action_basic_hw_stats_check(&cls->rule->action,
- cls->common.extack))
+ if (!ds->ops->port_mirror_add) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Switch does not support mirroring operation");
+ return -EOPNOTSUPP;
+ }
+
+ if (!flow_action_basic_hw_stats_check(&cls->rule->action, extack))
return -EOPNOTSUPP;
act = &cls->rule->action.entries[0];
@@ -1389,8 +1393,36 @@ dsa_user_add_cls_matchall_mirred(struct net_device *dev,
if (!act->dev)
return -EINVAL;
- if (!dsa_user_dev_check(act->dev))
+ if (dsa_user_dev_check(act->dev)) {
+ if (ingress_target) {
+ /* We can only fulfill this using software assist */
+ if (cls->common.skip_sw) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can only mirred to ingress of DSA user port if filter also runs in software");
+ return -EOPNOTSUPP;
+ }
+ to_dp = dp->cpu_dp;
+ } else {
+ to_dp = dsa_user_to_port(act->dev);
+ }
+ } else {
+ /* Handle mirroring to foreign target ports as a mirror towards
+ * the CPU. The software tc rule will take the packets from
+ * there.
+ */
+ if (cls->common.skip_sw) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can only mirred to CPU if filter also runs in software");
+ return -EOPNOTSUPP;
+ }
+ to_dp = dp->cpu_dp;
+ }
+
+ if (dp->ds != to_dp->ds) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cross-chip mirroring not implemented");
return -EOPNOTSUPP;
+ }
mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL);
if (!mall_tc_entry)
@@ -1399,9 +1431,6 @@ dsa_user_add_cls_matchall_mirred(struct net_device *dev,
mall_tc_entry->cookie = cls->cookie;
mall_tc_entry->type = DSA_PORT_MALL_MIRROR;
mirror = &mall_tc_entry->mirror;
-
- to_dp = dsa_user_to_port(act->dev);
-
mirror->to_local_port = to_dp->index;
mirror->ingress = ingress;
@@ -1442,8 +1471,7 @@ dsa_user_add_cls_matchall_police(struct net_device *dev,
return -EOPNOTSUPP;
}
- if (!flow_action_basic_hw_stats_check(&cls->rule->action,
- cls->common.extack))
+ if (!flow_action_basic_hw_stats_check(&cls->rule->action, extack))
return -EOPNOTSUPP;
list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) {
@@ -1481,17 +1509,30 @@ static int dsa_user_add_cls_matchall(struct net_device *dev,
struct tc_cls_matchall_offload *cls,
bool ingress)
{
- int err = -EOPNOTSUPP;
+ const struct flow_action *action = &cls->rule->action;
+ struct netlink_ext_ack *extack = cls->common.extack;
- if (cls->common.protocol == htons(ETH_P_ALL) &&
- flow_offload_has_one_action(&cls->rule->action) &&
- cls->rule->action.entries[0].id == FLOW_ACTION_MIRRED)
- err = dsa_user_add_cls_matchall_mirred(dev, cls, ingress);
- else if (flow_offload_has_one_action(&cls->rule->action) &&
- cls->rule->action.entries[0].id == FLOW_ACTION_POLICE)
- err = dsa_user_add_cls_matchall_police(dev, cls, ingress);
+ if (!flow_offload_has_one_action(action)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload matchall filter with more than one action");
+ return -EOPNOTSUPP;
+ }
- return err;
+ switch (action->entries[0].id) {
+ case FLOW_ACTION_MIRRED:
+ return dsa_user_add_cls_matchall_mirred(dev, cls, ingress,
+ false);
+ case FLOW_ACTION_MIRRED_INGRESS:
+ return dsa_user_add_cls_matchall_mirred(dev, cls, ingress,
+ true);
+ case FLOW_ACTION_POLICE:
+ return dsa_user_add_cls_matchall_police(dev, cls, ingress);
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unknown action");
+ break;
+ }
+
+ return -EOPNOTSUPP;
}
static void dsa_user_del_cls_matchall(struct net_device *dev,
@@ -2642,11 +2683,12 @@ void dsa_user_setup_tagger(struct net_device *user)
user->features = conduit->vlan_features | NETIF_F_HW_TC;
user->hw_features |= NETIF_F_HW_TC;
- user->features |= NETIF_F_LLTX;
if (user->needed_tailroom)
user->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST);
if (ds->needs_standalone_vlan_filtering)
user->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+ user->lltx = true;
}
int dsa_user_suspend(struct net_device *user_dev)
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 9a190635fe95..9b540644ba31 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -8,4 +8,5 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \
- module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o
+ module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o \
+ phy.o
diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c
index 0515d6604b3b..f0883357d12e 100644
--- a/net/ethtool/bitset.c
+++ b/net/ethtool/bitset.c
@@ -425,12 +425,32 @@ static int ethnl_parse_bit(unsigned int *index, bool *val, unsigned int nbits,
return 0;
}
+/**
+ * ethnl_bitmap32_equal() - Compare two bitmaps
+ * @map1: first bitmap
+ * @map2: second bitmap
+ * @nbits: bit size to compare
+ *
+ * Return: true if first @nbits are equal, false if not
+ */
+static bool ethnl_bitmap32_equal(const u32 *map1, const u32 *map2,
+ unsigned int nbits)
+{
+ if (memcmp(map1, map2, nbits / 32 * sizeof(u32)))
+ return false;
+ if (nbits % 32 == 0)
+ return true;
+ return !((map1[nbits / 32] ^ map2[nbits / 32]) &
+ ethnl_lower_bits(nbits % 32));
+}
+
static int
ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits,
const struct nlattr *attr, struct nlattr **tb,
ethnl_string_array_t names,
struct netlink_ext_ack *extack, bool *mod)
{
+ u32 *saved_bitmap = NULL;
struct nlattr *bit_attr;
bool no_mask;
int rem;
@@ -448,8 +468,20 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits,
}
no_mask = tb[ETHTOOL_A_BITSET_NOMASK];
- if (no_mask)
- ethnl_bitmap32_clear(bitmap, 0, nbits, mod);
+ if (no_mask) {
+ unsigned int nwords = DIV_ROUND_UP(nbits, 32);
+ unsigned int nbytes = nwords * sizeof(u32);
+ bool dummy;
+
+ /* The bitmap size is only the size of the map part without
+ * its mask part.
+ */
+ saved_bitmap = kcalloc(nwords, sizeof(u32), GFP_KERNEL);
+ if (!saved_bitmap)
+ return -ENOMEM;
+ memcpy(saved_bitmap, bitmap, nbytes);
+ ethnl_bitmap32_clear(bitmap, 0, nbits, &dummy);
+ }
nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) {
bool old_val, new_val;
@@ -458,22 +490,30 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits,
if (nla_type(bit_attr) != ETHTOOL_A_BITSET_BITS_BIT) {
NL_SET_ERR_MSG_ATTR(extack, bit_attr,
"only ETHTOOL_A_BITSET_BITS_BIT allowed in ETHTOOL_A_BITSET_BITS");
+ kfree(saved_bitmap);
return -EINVAL;
}
ret = ethnl_parse_bit(&idx, &new_val, nbits, bit_attr, no_mask,
names, extack);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(saved_bitmap);
return ret;
+ }
old_val = bitmap[idx / 32] & ((u32)1 << (idx % 32));
if (new_val != old_val) {
if (new_val)
bitmap[idx / 32] |= ((u32)1 << (idx % 32));
else
bitmap[idx / 32] &= ~((u32)1 << (idx % 32));
- *mod = true;
+ if (!no_mask)
+ *mod = true;
}
}
+ if (no_mask && !ethnl_bitmap32_equal(saved_bitmap, bitmap, nbits))
+ *mod = true;
+
+ kfree(saved_bitmap);
return 0;
}
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index f6f136ec7ddf..f22051f33868 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -13,7 +13,7 @@
const struct nla_policy ethnl_cable_test_act_policy[] = {
[ETHTOOL_A_CABLE_TEST_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
};
static int ethnl_cable_test_started(struct phy_device *phydev, u8 cmd)
@@ -58,6 +58,7 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
struct ethnl_req_info req_info = {};
const struct ethtool_phy_ops *ops;
struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
struct net_device *dev;
int ret;
@@ -69,12 +70,16 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
return ret;
dev = req_info.dev;
- if (!dev->phydev) {
+
+ rtnl_lock();
+ phydev = ethnl_req_get_phydev(&req_info,
+ tb[ETHTOOL_A_CABLE_TEST_HEADER],
+ info->extack);
+ if (IS_ERR_OR_NULL(phydev)) {
ret = -EOPNOTSUPP;
- goto out_dev_put;
+ goto out_rtnl;
}
- rtnl_lock();
ops = ethtool_phy_ops;
if (!ops || !ops->start_cable_test) {
ret = -EOPNOTSUPP;
@@ -85,17 +90,15 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
goto out_rtnl;
- ret = ops->start_cable_test(dev->phydev, info->extack);
+ ret = ops->start_cable_test(phydev, info->extack);
ethnl_ops_complete(dev);
if (!ret)
- ethnl_cable_test_started(dev->phydev,
- ETHTOOL_MSG_CABLE_TEST_NTF);
+ ethnl_cable_test_started(phydev, ETHTOOL_MSG_CABLE_TEST_NTF);
out_rtnl:
rtnl_unlock();
-out_dev_put:
ethnl_parse_header_dev_put(&req_info);
return ret;
}
@@ -160,7 +163,8 @@ void ethnl_cable_test_finished(struct phy_device *phydev)
}
EXPORT_SYMBOL_GPL(ethnl_cable_test_finished);
-int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result)
+int ethnl_cable_test_result_with_src(struct phy_device *phydev, u8 pair,
+ u8 result, u32 src)
{
struct nlattr *nest;
int ret = -EMSGSIZE;
@@ -173,6 +177,10 @@ int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result)
goto err;
if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_RESULT_CODE, result))
goto err;
+ if (src != ETHTOOL_A_CABLE_INF_SRC_UNSPEC) {
+ if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_RESULT_SRC, src))
+ goto err;
+ }
nla_nest_end(phydev->skb, nest);
return 0;
@@ -181,9 +189,10 @@ err:
nla_nest_cancel(phydev->skb, nest);
return ret;
}
-EXPORT_SYMBOL_GPL(ethnl_cable_test_result);
+EXPORT_SYMBOL_GPL(ethnl_cable_test_result_with_src);
-int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm)
+int ethnl_cable_test_fault_length_with_src(struct phy_device *phydev, u8 pair,
+ u32 cm, u32 src)
{
struct nlattr *nest;
int ret = -EMSGSIZE;
@@ -197,6 +206,11 @@ int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm)
goto err;
if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_CM, cm))
goto err;
+ if (src != ETHTOOL_A_CABLE_INF_SRC_UNSPEC) {
+ if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_SRC,
+ src))
+ goto err;
+ }
nla_nest_end(phydev->skb, nest);
return 0;
@@ -205,7 +219,7 @@ err:
nla_nest_cancel(phydev->skb, nest);
return ret;
}
-EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length);
+EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length_with_src);
static const struct nla_policy cable_test_tdr_act_cfg_policy[] = {
[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST] = { .type = NLA_U32 },
@@ -216,7 +230,7 @@ static const struct nla_policy cable_test_tdr_act_cfg_policy[] = {
const struct nla_policy ethnl_cable_test_tdr_act_policy[] = {
[ETHTOOL_A_CABLE_TEST_TDR_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
[ETHTOOL_A_CABLE_TEST_TDR_CFG] = { .type = NLA_NESTED },
};
@@ -305,6 +319,7 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
struct ethnl_req_info req_info = {};
const struct ethtool_phy_ops *ops;
struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
struct phy_tdr_config cfg;
struct net_device *dev;
int ret;
@@ -317,10 +332,6 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
return ret;
dev = req_info.dev;
- if (!dev->phydev) {
- ret = -EOPNOTSUPP;
- goto out_dev_put;
- }
ret = ethnl_act_cable_test_tdr_cfg(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG],
info, &cfg);
@@ -328,6 +339,14 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
goto out_dev_put;
rtnl_lock();
+ phydev = ethnl_req_get_phydev(&req_info,
+ tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER],
+ info->extack);
+ if (IS_ERR_OR_NULL(phydev)) {
+ ret = -EOPNOTSUPP;
+ goto out_rtnl;
+ }
+
ops = ethtool_phy_ops;
if (!ops || !ops->start_cable_test_tdr) {
ret = -EOPNOTSUPP;
@@ -338,12 +357,12 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
goto out_rtnl;
- ret = ops->start_cable_test_tdr(dev->phydev, info->extack, &cfg);
+ ret = ops->start_cable_test_tdr(phydev, info->extack, &cfg);
ethnl_ops_complete(dev);
if (!ret)
- ethnl_cable_test_started(dev->phydev,
+ ethnl_cable_test_started(phydev,
ETHTOOL_MSG_CABLE_TEST_TDR_NTF);
out_rtnl:
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index cee188da54f8..ca4f80282448 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -114,8 +114,7 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
struct net_device *dev = req_info->dev;
struct ethtool_channels channels = {};
struct nlattr **tb = info->attrs;
- u32 err_attr, max_rxfh_in_use;
- u64 max_rxnfc_in_use;
+ u32 err_attr;
int ret;
dev->ethtool_ops->get_channels(dev, &channels);
@@ -166,20 +165,9 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
return -EINVAL;
}
- /* ensure the new Rx count fits within the configured Rx flow
- * indirection table/rxnfc settings
- */
- if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
- max_rxnfc_in_use = 0;
- max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
- if (channels.combined_count + channels.rx_count <= max_rxfh_in_use) {
- GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing indirection table (%d)", max_rxfh_in_use);
- return -EINVAL;
- }
- if (channels.combined_count + channels.rx_count <= max_rxnfc_in_use) {
- GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing ntuple filter settings");
- return -EINVAL;
- }
+ ret = ethtool_check_max_channel(dev, channels, info);
+ if (ret)
+ return ret;
/* Disabling channels, query zero-copy AF_XDP sockets */
from_channel = channels.combined_count +
diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h
index e71cc3e1b7eb..1e790413db0e 100644
--- a/net/ethtool/cmis.h
+++ b/net/ethtool/cmis.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#define ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH 120
+#define ETHTOOL_CMIS_CDB_EPL_MAX_PL_LENGTH 2048
#define ETHTOOL_CMIS_CDB_CMD_PAGE 0x9F
#define ETHTOOL_CMIS_CDB_PAGE_I2C_ADDR 0x50
@@ -23,6 +24,7 @@ enum ethtool_cmis_cdb_cmd_id {
ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES = 0x0041,
ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD = 0x0101,
ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL = 0x0103,
+ ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_EPL = 0x0104,
ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD = 0x0107,
ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE = 0x0109,
ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE = 0x010A,
@@ -38,6 +40,7 @@ enum ethtool_cmis_cdb_cmd_id {
* @resv1: Added to match the CMIS standard request continuity.
* @resv2: Added to match the CMIS standard request continuity.
* @payload: Payload for the CDB commands.
+ * @epl: Extended payload for the CDB commands.
*/
struct ethtool_cmis_cdb_request {
__be16 id;
@@ -49,6 +52,7 @@ struct ethtool_cmis_cdb_request {
u8 resv2;
u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH];
);
+ u8 *epl; /* Everything above this field checksummed. */
};
#define CDB_F_COMPLETION_VALID BIT(0)
@@ -96,19 +100,20 @@ struct ethtool_cmis_cdb_rpl {
u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH];
};
-u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs);
+u32 ethtool_cmis_get_max_lpl_size(u8 num_of_byte_octs);
+u32 ethtool_cmis_get_max_epl_size(u8 num_of_byte_octs);
void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args,
- enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl,
- u8 lpl_len, u16 max_duration,
- u8 read_write_len_ext, u16 msleep_pre_rpl,
- u8 rpl_exp_len, u8 flags);
+ enum ethtool_cmis_cdb_cmd_id cmd, u8 *lpl,
+ u8 lpl_len, u8 *epl, u16 epl_len,
+ u16 max_duration, u8 read_write_len_ext,
+ u16 msleep_pre_rpl, u8 rpl_exp_len,
+ u8 flags);
void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags);
void ethtool_cmis_page_init(struct ethtool_module_eeprom *page_data,
u8 page, u32 offset, u32 length);
-void ethtool_cmis_page_fini(struct ethtool_module_eeprom *page_data);
struct ethtool_cmis_cdb *
ethtool_cmis_cdb_init(struct net_device *dev,
diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c
index 1bb08783b60d..d159dc121bde 100644
--- a/net/ethtool/cmis_cdb.c
+++ b/net/ethtool/cmis_cdb.c
@@ -11,25 +11,41 @@
* min(i, 15) byte octets where i specifies the allowable additional number of
* byte octets in a READ or a WRITE.
*/
-u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs)
+u32 ethtool_cmis_get_max_lpl_size(u8 num_of_byte_octs)
{
return 8 * (1 + min_t(u8, num_of_byte_octs, 15));
}
+/* For accessing the EPL field on page 9Fh, the allowable length extension is
+ * min(i, 255) byte octets where i specifies the allowable additional number of
+ * byte octets in a READ or a WRITE.
+ */
+u32 ethtool_cmis_get_max_epl_size(u8 num_of_byte_octs)
+{
+ return 8 * (1 + min_t(u8, num_of_byte_octs, 255));
+}
+
void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args,
- enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl,
- u8 lpl_len, u16 max_duration,
- u8 read_write_len_ext, u16 msleep_pre_rpl,
- u8 rpl_exp_len, u8 flags)
+ enum ethtool_cmis_cdb_cmd_id cmd, u8 *lpl,
+ u8 lpl_len, u8 *epl, u16 epl_len,
+ u16 max_duration, u8 read_write_len_ext,
+ u16 msleep_pre_rpl, u8 rpl_exp_len, u8 flags)
{
args->req.id = cpu_to_be16(cmd);
args->req.lpl_len = lpl_len;
- if (pl)
- memcpy(args->req.payload, pl, args->req.lpl_len);
+ if (lpl) {
+ memcpy(args->req.payload, lpl, args->req.lpl_len);
+ args->read_write_len_ext =
+ ethtool_cmis_get_max_lpl_size(read_write_len_ext);
+ }
+ if (epl) {
+ args->req.epl_len = cpu_to_be16(epl_len);
+ args->req.epl = epl;
+ args->read_write_len_ext =
+ ethtool_cmis_get_max_epl_size(read_write_len_ext);
+ }
args->max_duration = max_duration;
- args->read_write_len_ext =
- ethtool_cmis_get_max_payload_size(read_write_len_ext);
args->msleep_pre_rpl = msleep_pre_rpl;
args->rpl_exp_len = rpl_exp_len;
args->flags = flags;
@@ -100,7 +116,8 @@ static u8 cmis_cdb_advert_rpl_inst_supported(struct cmis_cdb_advert_rpl *rpl)
}
static int cmis_cdb_advertisement_get(struct ethtool_cmis_cdb *cdb,
- struct net_device *dev)
+ struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
struct ethtool_module_eeprom page_data = {};
@@ -119,8 +136,12 @@ static int cmis_cdb_advertisement_get(struct ethtool_cmis_cdb *cdb,
return err;
}
- if (!cmis_cdb_advert_rpl_inst_supported(&rpl))
+ if (!cmis_cdb_advert_rpl_inst_supported(&rpl)) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "CDB functionality is not supported",
+ NULL);
return -EOPNOTSUPP;
+ }
cdb->read_write_len_ext = rpl.read_write_len_ext;
@@ -178,7 +199,7 @@ cmis_cdb_validate_password(struct ethtool_cmis_cdb *cdb,
}
ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_QUERY_STATUS,
- (u8 *)&qs_pl, sizeof(qs_pl), 0,
+ (u8 *)&qs_pl, sizeof(qs_pl), NULL, 0, 0,
cdb->read_write_len_ext, 1000,
sizeof(*rpl),
CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
@@ -240,8 +261,9 @@ static int cmis_cdb_module_features_get(struct ethtool_cmis_cdb *cdb,
ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags);
ethtool_cmis_cdb_compose_args(&args,
ETHTOOL_CMIS_CDB_CMD_MODULE_FEATURES,
- NULL, 0, 0, cdb->read_write_len_ext,
- 1000, sizeof(*rpl), flags);
+ NULL, 0, NULL, 0, 0,
+ cdb->read_write_len_ext, 1000,
+ sizeof(*rpl), flags);
err = ethtool_cmis_cdb_execute_cmd(dev, &args);
if (err < 0) {
@@ -282,7 +304,7 @@ ethtool_cmis_cdb_init(struct net_device *dev,
goto err;
}
- err = cmis_cdb_advertisement_get(cdb, dev);
+ err = cmis_cdb_advertisement_get(cdb, dev, ntf_params);
if (err < 0)
goto err;
@@ -444,6 +466,9 @@ static void cmis_cdb_status_fail_msg_get(u8 status, char **err_msg)
case 0b01000101:
*err_msg = "CDB status failed: CdbChkCode error";
break;
+ case 0b01000110:
+ *err_msg = "CDB status failed: Password error";
+ break;
default:
*err_msg = "Unknown failure reason";
}
@@ -538,6 +563,49 @@ __ethtool_cmis_cdb_execute_cmd(struct net_device *dev,
return err;
}
+#define CMIS_CDB_EPL_PAGE_START 0xA0
+#define CMIS_CDB_EPL_PAGE_END 0xAF
+#define CMIS_CDB_EPL_FW_BLOCK_OFFSET_START 128
+#define CMIS_CDB_EPL_FW_BLOCK_OFFSET_END 255
+
+static int
+ethtool_cmis_cdb_execute_epl_cmd(struct net_device *dev,
+ struct ethtool_cmis_cdb_cmd_args *args,
+ struct ethtool_module_eeprom *page_data)
+{
+ u16 epl_len = be16_to_cpu(args->req.epl_len);
+ u32 bytes_written = 0;
+ u8 page;
+ int err;
+
+ for (page = CMIS_CDB_EPL_PAGE_START;
+ page <= CMIS_CDB_EPL_PAGE_END && bytes_written < epl_len; page++) {
+ u16 offset = CMIS_CDB_EPL_FW_BLOCK_OFFSET_START;
+
+ while (offset <= CMIS_CDB_EPL_FW_BLOCK_OFFSET_END &&
+ bytes_written < epl_len) {
+ u32 bytes_left = epl_len - bytes_written;
+ u16 space_left, bytes_to_write;
+
+ space_left = CMIS_CDB_EPL_FW_BLOCK_OFFSET_END - offset + 1;
+ bytes_to_write = min_t(u16, bytes_left,
+ min_t(u16, space_left,
+ args->read_write_len_ext));
+
+ err = __ethtool_cmis_cdb_execute_cmd(dev, page_data,
+ page, offset,
+ bytes_to_write,
+ args->req.epl + bytes_written);
+ if (err < 0)
+ return err;
+
+ offset += bytes_to_write;
+ bytes_written += bytes_to_write;
+ }
+ }
+ return 0;
+}
+
static u8 cmis_cdb_calc_checksum(const void *data, size_t size)
{
const u8 *bytes = (const u8 *)data;
@@ -559,7 +627,9 @@ int ethtool_cmis_cdb_execute_cmd(struct net_device *dev,
int err;
args->req.chk_code =
- cmis_cdb_calc_checksum(&args->req, sizeof(args->req));
+ cmis_cdb_calc_checksum(&args->req,
+ offsetof(struct ethtool_cmis_cdb_request,
+ epl));
if (args->req.lpl_len > args->read_write_len_ext) {
args->err_msg = "LPL length is longer than CDB read write length extension allows";
@@ -581,6 +651,12 @@ int ethtool_cmis_cdb_execute_cmd(struct net_device *dev,
if (err < 0)
return err;
+ if (args->req.epl_len) {
+ err = ethtool_cmis_cdb_execute_epl_cmd(dev, args, &page_data);
+ if (err < 0)
+ return err;
+ }
+
offset = CMIS_CDB_CMD_ID_OFFSET +
offsetof(struct ethtool_cmis_cdb_request, id);
err = __ethtool_cmis_cdb_execute_cmd(dev, &page_data,
diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c
index 655ff5224ffa..48aef6220f00 100644
--- a/net/ethtool/cmis_fw_update.c
+++ b/net/ethtool/cmis_fw_update.c
@@ -9,6 +9,7 @@
struct cmis_fw_update_fw_mng_features {
u8 start_cmd_payload_size;
+ u8 write_mechanism;
u16 max_duration_start;
u16 max_duration_write;
u16 max_duration_complete;
@@ -36,7 +37,9 @@ struct cmis_cdb_fw_mng_features_rpl {
};
enum cmis_cdb_fw_write_mechanism {
+ CMIS_CDB_FW_WRITE_MECHANISM_NONE = 0x00,
CMIS_CDB_FW_WRITE_MECHANISM_LPL = 0x01,
+ CMIS_CDB_FW_WRITE_MECHANISM_EPL = 0x10,
CMIS_CDB_FW_WRITE_MECHANISM_BOTH = 0x11,
};
@@ -54,7 +57,8 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb,
ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags);
ethtool_cmis_cdb_compose_args(&args,
ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES,
- NULL, 0, cdb->max_completion_time,
+ NULL, 0, NULL, 0,
+ cdb->max_completion_time,
cdb->read_write_len_ext, 1000,
sizeof(*rpl), flags);
@@ -67,10 +71,9 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb,
}
rpl = (struct cmis_cdb_fw_mng_features_rpl *)args.req.payload;
- if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL ||
- rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_BOTH)) {
+ if (rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_NONE) {
ethnl_module_fw_flash_ntf_err(dev, ntf_params,
- "Write LPL is not supported",
+ "CDB write mechanism is not supported",
NULL);
return -EOPNOTSUPP;
}
@@ -82,6 +85,10 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb,
*/
cdb->read_write_len_ext = rpl->read_write_len_ext;
fw_mng->start_cmd_payload_size = rpl->start_cmd_payload_size;
+ fw_mng->write_mechanism =
+ rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL ?
+ CMIS_CDB_FW_WRITE_MECHANISM_LPL :
+ CMIS_CDB_FW_WRITE_MECHANISM_EPL;
fw_mng->max_duration_start = be16_to_cpu(rpl->max_duration_start);
fw_mng->max_duration_write = be16_to_cpu(rpl->max_duration_write);
fw_mng->max_duration_complete = be16_to_cpu(rpl->max_duration_complete);
@@ -122,7 +129,7 @@ cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb,
ethtool_cmis_cdb_compose_args(&args,
ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD,
- (u8 *)&pl, lpl_len,
+ (u8 *)&pl, lpl_len, NULL, 0,
fw_mng->max_duration_start,
cdb->read_write_len_ext, 1000, 0,
CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
@@ -148,9 +155,9 @@ struct cmis_cdb_write_fw_block_lpl_pl {
};
static int
-cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb,
- struct ethtool_cmis_fw_update_params *fw_update,
- struct cmis_fw_update_fw_mng_features *fw_mng)
+cmis_fw_update_write_image_lpl(struct ethtool_cmis_cdb *cdb,
+ struct ethtool_cmis_fw_update_params *fw_update,
+ struct cmis_fw_update_fw_mng_features *fw_mng)
{
u8 start = fw_mng->start_cmd_payload_size;
u32 offset, max_block_size, max_lpl_len;
@@ -158,7 +165,7 @@ cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb,
int err;
max_lpl_len = min_t(u32,
- ethtool_cmis_get_max_payload_size(cdb->read_write_len_ext),
+ ethtool_cmis_get_max_lpl_size(cdb->read_write_len_ext),
ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH);
max_block_size =
max_lpl_len - sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl,
@@ -183,7 +190,7 @@ cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb,
ethtool_cmis_cdb_compose_args(&args,
ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL,
- (u8 *)&pl, lpl_len,
+ (u8 *)&pl, lpl_len, NULL, 0,
fw_mng->max_duration_write,
cdb->read_write_len_ext, 1, 0,
CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
@@ -201,6 +208,67 @@ cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb,
return 0;
}
+struct cmis_cdb_write_fw_block_epl_pl {
+ u8 fw_block[ETHTOOL_CMIS_CDB_EPL_MAX_PL_LENGTH];
+};
+
+static int
+cmis_fw_update_write_image_epl(struct ethtool_cmis_cdb *cdb,
+ struct ethtool_cmis_fw_update_params *fw_update,
+ struct cmis_fw_update_fw_mng_features *fw_mng)
+{
+ u8 start = fw_mng->start_cmd_payload_size;
+ u32 image_size = fw_update->fw->size;
+ u32 offset, lpl_len;
+ int err;
+
+ lpl_len = sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl,
+ block_address);
+
+ for (offset = start; offset < image_size;
+ offset += ETHTOOL_CMIS_CDB_EPL_MAX_PL_LENGTH) {
+ struct cmis_cdb_write_fw_block_lpl_pl lpl = {
+ .block_address = cpu_to_be32(offset - start),
+ };
+ struct cmis_cdb_write_fw_block_epl_pl *epl;
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ u32 epl_len;
+
+ ethnl_module_fw_flash_ntf_in_progress(fw_update->dev,
+ &fw_update->ntf_params,
+ offset - start,
+ image_size);
+
+ epl_len = min_t(u32, ETHTOOL_CMIS_CDB_EPL_MAX_PL_LENGTH,
+ image_size - offset);
+ epl = kmalloc_array(epl_len, sizeof(u8), GFP_KERNEL);
+ if (!epl)
+ return -ENOMEM;
+
+ memcpy(epl->fw_block, &fw_update->fw->data[offset], epl_len);
+
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_EPL,
+ (u8 *)&lpl, lpl_len, (u8 *)epl,
+ epl_len,
+ fw_mng->max_duration_write,
+ cdb->read_write_len_ext, 1, 0,
+ CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(fw_update->dev, &args);
+ kfree(epl);
+ if (err < 0) {
+ ethnl_module_fw_flash_ntf_err(fw_update->dev,
+ &fw_update->ntf_params,
+ "Write FW block EPL command failed",
+ args.err_msg);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int
cmis_fw_update_complete_download(struct ethtool_cmis_cdb *cdb,
struct net_device *dev,
@@ -212,7 +280,8 @@ cmis_fw_update_complete_download(struct ethtool_cmis_cdb *cdb,
ethtool_cmis_cdb_compose_args(&args,
ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD,
- NULL, 0, fw_mng->max_duration_complete,
+ NULL, 0, NULL, 0,
+ fw_mng->max_duration_complete,
cdb->read_write_len_ext, 1000, 0,
CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
@@ -236,9 +305,15 @@ cmis_fw_update_download_image(struct ethtool_cmis_cdb *cdb,
if (err < 0)
return err;
- err = cmis_fw_update_write_image(cdb, fw_update, fw_mng);
- if (err < 0)
- return err;
+ if (fw_mng->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL) {
+ err = cmis_fw_update_write_image_lpl(cdb, fw_update, fw_mng);
+ if (err < 0)
+ return err;
+ } else {
+ err = cmis_fw_update_write_image_epl(cdb, fw_update, fw_mng);
+ if (err < 0)
+ return err;
+ }
err = cmis_fw_update_complete_download(cdb, fw_update->dev, fw_mng,
&fw_update->ntf_params);
@@ -294,7 +369,7 @@ cmis_fw_update_run_image(struct ethtool_cmis_cdb *cdb, struct net_device *dev,
int err;
ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE,
- (u8 *)&pl, sizeof(pl),
+ (u8 *)&pl, sizeof(pl), NULL, 0,
cdb->max_completion_time,
cdb->read_write_len_ext, 1000, 0,
CDB_F_MODULE_STATE_VALID);
@@ -326,7 +401,8 @@ cmis_fw_update_commit_image(struct ethtool_cmis_cdb *cdb,
ethtool_cmis_cdb_compose_args(&args,
ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE,
- NULL, 0, cdb->max_completion_time,
+ NULL, 0, NULL, 0,
+ cdb->max_completion_time,
cdb->read_write_len_ext, 1000, 0,
CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 07032babd1b6..05ce4f8080b3 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -6,6 +6,7 @@
#include <linux/rtnetlink.h>
#include <linux/ptp_clock_kernel.h>
+#include "netlink.h"
#include "common.h"
const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
@@ -24,8 +25,6 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
[NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
[NETIF_F_GSO_BIT] = "tx-generic-segmentation",
- [NETIF_F_LLTX_BIT] = "tx-lockless",
- [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
[NETIF_F_GRO_BIT] = "rx-gro",
[NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
[NETIF_F_LRO_BIT] = "rx-lro",
@@ -51,7 +50,6 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
- [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
[NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
[NETIF_F_RXHASH_BIT] = "rx-hashing",
[NETIF_F_RXCSUM_BIT] = "rx-checksum",
@@ -429,6 +427,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = {
[const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw",
[const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc",
[const_ilog2(SOF_TIMESTAMPING_OPT_ID_TCP)] = "option-id-tcp",
+ [const_ilog2(SOF_TIMESTAMPING_OPT_RX_FILTER)] = "option-rx-filter",
};
static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT);
@@ -539,7 +538,21 @@ static int ethtool_get_rxnfc_rule_count(struct net_device *dev)
return info.rule_cnt;
}
-int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max)
+/* Max offset for one RSS context */
+static u32 ethtool_get_rss_ctx_max_channel(struct ethtool_rxfh_context *ctx)
+{
+ u32 max_ring = 0;
+ u32 i, *tbl;
+
+ if (WARN_ON_ONCE(!ctx))
+ return 0;
+ tbl = ethtool_rxfh_context_indir(ctx);
+ for (i = 0; i < ctx->indir_size; i++)
+ max_ring = max(max_ring, tbl[i]);
+ return max_ring;
+}
+
+static int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
struct ethtool_rxnfc *info;
@@ -575,10 +588,18 @@ int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max)
if (rule_info.fs.ring_cookie != RX_CLS_FLOW_DISC &&
rule_info.fs.ring_cookie != RX_CLS_FLOW_WAKE &&
- !(rule_info.flow_type & FLOW_RSS) &&
- !ethtool_get_flow_spec_ring_vf(rule_info.fs.ring_cookie))
- max_ring =
- max_t(u64, max_ring, rule_info.fs.ring_cookie);
+ !ethtool_get_flow_spec_ring_vf(rule_info.fs.ring_cookie)) {
+ u64 ring = rule_info.fs.ring_cookie;
+
+ if (rule_info.flow_type & FLOW_RSS) {
+ struct ethtool_rxfh_context *ctx;
+
+ ctx = xa_load(&dev->ethtool->rss_ctx,
+ rule_info.rss_context);
+ ring += ethtool_get_rss_ctx_max_channel(ctx);
+ }
+ max_ring = max_t(u64, max_ring, ring);
+ }
}
kvfree(info);
@@ -590,6 +611,7 @@ err_free_info:
return err;
}
+/* Max offset across all of a device's RSS contexts */
static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev)
{
struct ethtool_rxfh_context *ctx;
@@ -597,22 +619,17 @@ static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev)
u32 max_ring = 0;
mutex_lock(&dev->ethtool->rss_lock);
- xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
- u32 i, *tbl;
-
- tbl = ethtool_rxfh_context_indir(ctx);
- for (i = 0; i < ctx->indir_size; i++)
- max_ring = max(max_ring, tbl[i]);
- }
+ xa_for_each(&dev->ethtool->rss_ctx, context, ctx)
+ max_ring = max(max_ring, ethtool_get_rss_ctx_max_channel(ctx));
mutex_unlock(&dev->ethtool->rss_lock);
return max_ring;
}
-u32 ethtool_get_max_rxfh_channel(struct net_device *dev)
+static u32 ethtool_get_max_rxfh_channel(struct net_device *dev)
{
struct ethtool_rxfh_param rxfh = {};
- u32 dev_size, current_max;
+ u32 dev_size, current_max = 0;
int ret;
/* While we do track whether RSS context has an indirection
@@ -650,10 +667,95 @@ out_free:
return current_max;
}
+int ethtool_check_max_channel(struct net_device *dev,
+ struct ethtool_channels channels,
+ struct genl_info *info)
+{
+ u64 max_rxnfc_in_use;
+ u32 max_rxfh_in_use;
+ int max_mp_in_use;
+
+ /* ensure the new Rx count fits within the configured Rx flow
+ * indirection table/rxnfc settings
+ */
+ if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
+ max_rxnfc_in_use = 0;
+ max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
+ if (channels.combined_count + channels.rx_count <= max_rxfh_in_use) {
+ if (info)
+ GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing indirection table (%d)", max_rxfh_in_use);
+ return -EINVAL;
+ }
+ if (channels.combined_count + channels.rx_count <= max_rxnfc_in_use) {
+ if (info)
+ GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing ntuple filter settings");
+ return -EINVAL;
+ }
+
+ max_mp_in_use = dev_get_min_mp_channel_count(dev);
+ if (channels.combined_count + channels.rx_count <= max_mp_in_use) {
+ if (info)
+ GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing memory provider setting (%d)", max_mp_in_use);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_rxnfc *info;
+ int rc, i, rule_cnt;
+
+ if (!ops->get_rxnfc)
+ return 0;
+
+ rule_cnt = ethtool_get_rxnfc_rule_count(dev);
+ if (!rule_cnt)
+ return 0;
+
+ if (rule_cnt < 0)
+ return -EINVAL;
+
+ info = kvzalloc(struct_size(info, rule_locs, rule_cnt), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->cmd = ETHTOOL_GRXCLSRLALL;
+ info->rule_cnt = rule_cnt;
+ rc = ops->get_rxnfc(dev, info, info->rule_locs);
+ if (rc)
+ goto out_free;
+
+ for (i = 0; i < rule_cnt; i++) {
+ struct ethtool_rxnfc rule_info = {
+ .cmd = ETHTOOL_GRXCLSRULE,
+ .fs.location = info->rule_locs[i],
+ };
+
+ rc = ops->get_rxnfc(dev, &rule_info, NULL);
+ if (rc)
+ goto out_free;
+
+ if (rule_info.fs.flow_type & FLOW_RSS &&
+ rule_info.rss_context == rss_context) {
+ rc = -EBUSY;
+ goto out_free;
+ }
+ }
+
+out_free:
+ kvfree(info);
+ return rc;
+}
+
int ethtool_check_ops(const struct ethtool_ops *ops)
{
if (WARN_ON(ops->set_coalesce && !ops->supported_coalesce_params))
return -EINVAL;
+ if (WARN_ON(ops->rxfh_max_num_contexts == 1))
+ return -EINVAL;
/* NOTE: sufficiently insane drivers may swap ethtool_ops at runtime,
* the fact that ops are checked at registration time does not
* mean the ops attached to a netdev later on are sane.
@@ -665,20 +767,21 @@ int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info
{
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ int err = 0;
memset(info, 0, sizeof(*info));
info->cmd = ETHTOOL_GET_TS_INFO;
+ info->phc_index = -1;
if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev))
- return phy_ts_info(phydev, info);
- if (ops->get_ts_info)
- return ops->get_ts_info(dev, info);
+ err = phy_ts_info(phydev, info);
+ else if (ops->get_ts_info)
+ err = ops->get_ts_info(dev, info);
- info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
- info->phc_index = -1;
+ info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE;
- return 0;
+ return err;
}
int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index 863806fcf01a..4a2de3ce7354 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -20,6 +20,8 @@ struct link_mode_info {
u8 duplex;
};
+struct genl_info;
+
extern const char
netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN];
extern const char
@@ -42,8 +44,10 @@ int __ethtool_get_link(struct net_device *dev);
bool convert_legacy_settings_to_link_ksettings(
struct ethtool_link_ksettings *link_ksettings,
const struct ethtool_cmd *legacy_settings);
-u32 ethtool_get_max_rxfh_channel(struct net_device *dev);
-int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max);
+int ethtool_check_max_channel(struct net_device *dev,
+ struct ethtool_channels channels,
+ struct genl_info *info);
+int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context);
int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info);
extern const struct ethtool_phy_ops *ethtool_phy_ops;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index ae041f51cd2d..7bb94875a7ec 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -992,6 +992,12 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
if (rc)
return rc;
+ /* Nonzero ring with RSS only makes sense if NIC adds them together */
+ if (cmd == ETHTOOL_SRXCLSRLINS && info.flow_type & FLOW_RSS &&
+ !ops->cap_rss_rxnfc_adds &&
+ ethtool_get_flow_spec_ring(info.fs.ring_cookie))
+ return -EINVAL;
+
if (ops->get_rxfh) {
struct ethtool_rxfh_param rxfh = {};
@@ -1230,7 +1236,8 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32)
return -EINVAL;
/* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !ops->cap_rss_ctx_supported)
+ if (rxfh.rss_context && !(ops->cap_rss_ctx_supported ||
+ ops->create_rxfh_context))
return -EOPNOTSUPP;
rxfh.indir_size = rxfh_dev.indir_size;
@@ -1263,10 +1270,15 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (rxfh_dev.indir)
memcpy(rxfh_dev.indir, ethtool_rxfh_context_indir(ctx),
indir_bytes);
- if (rxfh_dev.key)
- memcpy(rxfh_dev.key, ethtool_rxfh_context_key(ctx),
- user_key_size);
- rxfh_dev.hfunc = ctx->hfunc;
+ if (!ops->rxfh_per_ctx_key) {
+ rxfh_dev.key_size = 0;
+ } else {
+ if (rxfh_dev.key)
+ memcpy(rxfh_dev.key,
+ ethtool_rxfh_context_key(ctx),
+ user_key_size);
+ rxfh_dev.hfunc = ctx->hfunc;
+ }
rxfh_dev.input_xfrm = ctx->input_xfrm;
ret = 0;
} else {
@@ -1284,6 +1296,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
sizeof(rxfh.input_xfrm))) {
ret = -EFAULT;
} else if (copy_to_user(useraddr +
+ offsetof(struct ethtool_rxfh, key_size),
+ &rxfh_dev.key_size,
+ sizeof(rxfh.key_size))) {
+ ret = -EFAULT;
+ } else if (copy_to_user(useraddr +
offsetof(struct ethtool_rxfh, rss_config[0]),
rss_config, total_size)) {
ret = -EFAULT;
@@ -1360,7 +1377,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32)
return -EINVAL;
/* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !ops->cap_rss_ctx_supported)
+ if (rxfh.rss_context && !(ops->cap_rss_ctx_supported ||
+ ops->create_rxfh_context))
return -EOPNOTSUPP;
/* Check input data transformation capabilities */
if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR &&
@@ -1390,6 +1408,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]);
+ /* Check settings which may be global rather than per RSS-context */
+ if (rxfh.rss_context && !ops->rxfh_per_ctx_key)
+ if (rxfh.key_size ||
+ (rxfh.hfunc && rxfh.hfunc != ETH_RSS_HASH_NO_CHANGE) ||
+ (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_NO_CHANGE))
+ return -EOPNOTSUPP;
+
rss_config = kzalloc(indir_bytes + dev_key_size, GFP_USER);
if (!rss_config)
return -ENOMEM;
@@ -1443,6 +1468,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
mutex_lock(&dev->ethtool->rss_lock);
locked = true;
}
+
+ if (rxfh.rss_context && rxfh_dev.rss_delete) {
+ ret = ethtool_check_rss_ctx_busy(dev, rxfh.rss_context);
+ if (ret)
+ goto out;
+ }
+
if (create) {
if (rxfh_dev.rss_delete) {
ret = -EINVAL;
@@ -1486,6 +1518,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
extack);
/* Make sure driver populates defaults */
WARN_ON_ONCE(!ret && !rxfh_dev.key &&
+ ops->rxfh_per_ctx_key &&
!memchr_inv(ethtool_rxfh_context_key(ctx),
0, ctx->key_size));
} else if (rxfh_dev.rss_delete) {
@@ -2072,8 +2105,6 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
{
struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS };
u16 from_channel, to_channel;
- u64 max_rxnfc_in_use;
- u32 max_rxfh_in_use;
unsigned int i;
int ret;
@@ -2103,14 +2134,9 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
(!channels.rx_count || !channels.tx_count))
return -EINVAL;
- /* ensure the new Rx count fits within the configured Rx flow
- * indirection table/rxnfc settings */
- if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
- max_rxnfc_in_use = 0;
- max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
- if (channels.combined_count + channels.rx_count <=
- max_t(u64, max_rxnfc_in_use, max_rxfh_in_use))
- return -EINVAL;
+ ret = ethtool_check_max_channel(dev, channels, NULL);
+ if (ret)
+ return ret;
/* Disabling channels, query zero-copy AF_XDP sockets */
from_channel = channels.combined_count +
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
index 5c317d23787b..30b8ce275159 100644
--- a/net/ethtool/linkinfo.c
+++ b/net/ethtool/linkinfo.c
@@ -35,7 +35,7 @@ static int linkinfo_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
ret = __ethtool_get_link_ksettings(dev, &data->ksettings);
- if (ret < 0 && info)
+ if (ret < 0)
GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
ethnl_ops_complete(dev);
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index b2591db49f7d..259cd9ef1f2a 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -40,7 +40,7 @@ static int linkmodes_prepare_data(const struct ethnl_req_info *req_base,
return ret;
ret = __ethtool_get_link_ksettings(dev, &data->ksettings);
- if (ret < 0 && info) {
+ if (ret < 0) {
GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
goto out;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index cb1eea00e349..e3f0ef6b851b 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -2,6 +2,7 @@
#include <net/sock.h>
#include <linux/ethtool_netlink.h>
+#include <linux/phy_link_topology.h>
#include <linux/pm_runtime.h>
#include "netlink.h"
#include "module_fw.h"
@@ -31,6 +32,24 @@ const struct nla_policy ethnl_header_policy_stats[] = {
ETHTOOL_FLAGS_STATS),
};
+const struct nla_policy ethnl_header_policy_phy[] = {
+ [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 },
+ [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = ALTIFNAMSIZ - 1 },
+ [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32,
+ ETHTOOL_FLAGS_BASIC),
+ [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
+const struct nla_policy ethnl_header_policy_phy_stats[] = {
+ [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 },
+ [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = ALTIFNAMSIZ - 1 },
+ [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32,
+ ETHTOOL_FLAGS_STATS),
+ [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid,
enum ethnl_sock_type type)
{
@@ -119,7 +138,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
const struct nlattr *header, struct net *net,
struct netlink_ext_ack *extack, bool require_dev)
{
- struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)];
+ struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy_phy)];
const struct nlattr *devname_attr;
struct net_device *dev = NULL;
u32 flags = 0;
@@ -134,7 +153,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
/* No validation here, command policy should have a nested policy set
* for the header, therefore validation should have already been done.
*/
- ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy) - 1, header,
+ ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy_phy) - 1, header,
NULL, extack);
if (ret < 0)
return ret;
@@ -175,11 +194,45 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
return -EINVAL;
}
+ if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) {
+ if (dev) {
+ req_info->phy_index = nla_get_u32(tb[ETHTOOL_A_HEADER_PHY_INDEX]);
+ } else {
+ NL_SET_ERR_MSG_ATTR(extack, header,
+ "phy_index set without a netdev");
+ return -EINVAL;
+ }
+ }
+
req_info->dev = dev;
req_info->flags = flags;
return 0;
}
+struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info,
+ const struct nlattr *header,
+ struct netlink_ext_ack *extack)
+{
+ struct phy_device *phydev;
+
+ ASSERT_RTNL();
+
+ if (!req_info->dev)
+ return NULL;
+
+ if (!req_info->phy_index)
+ return req_info->dev->phydev;
+
+ phydev = phy_link_topo_get_phy(req_info->dev, req_info->phy_index);
+ if (!phydev) {
+ NL_SET_ERR_MSG_ATTR(extack, header,
+ "no phy matching phyindex");
+ return ERR_PTR(-ENODEV);
+ }
+
+ return phydev;
+}
+
/**
* ethnl_fill_reply_header() - Put common header into a reply message
* @skb: skb with the message
@@ -1128,6 +1181,8 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_RSS_GET,
.doit = ethnl_default_doit,
+ .start = ethnl_rss_dump_start,
+ .dumpit = ethnl_rss_dumpit,
.policy = ethnl_rss_get_policy,
.maxattr = ARRAY_SIZE(ethnl_rss_get_policy) - 1,
},
@@ -1179,6 +1234,15 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_module_fw_flash_act_policy,
.maxattr = ARRAY_SIZE(ethnl_module_fw_flash_act_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_PHY_GET,
+ .doit = ethnl_phy_doit,
+ .start = ethnl_phy_start,
+ .dumpit = ethnl_phy_dumpit,
+ .done = ethnl_phy_done,
+ .policy = ethnl_phy_get_policy,
+ .maxattr = ARRAY_SIZE(ethnl_phy_get_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 46ec273a87c5..203b08eb6c6f 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -251,6 +251,9 @@ static inline unsigned int ethnl_reply_header_size(void)
* @dev: network device the request is for (may be null)
* @dev_tracker: refcount tracker for @dev reference
* @flags: request flags common for all request types
+ * @phy_index: phy_device index connected to @dev this request is for. Can be
+ * 0 if the request doesn't target a phy, or if the @dev's attached
+ * phy is targeted.
*
* This is a common base for request specific structures holding data from
* parsed userspace request. These always embed struct ethnl_req_info at
@@ -260,6 +263,7 @@ struct ethnl_req_info {
struct net_device *dev;
netdevice_tracker dev_tracker;
u32 flags;
+ u32 phy_index;
};
static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info)
@@ -268,6 +272,27 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info)
}
/**
+ * ethnl_req_get_phydev() - Gets the phy_device targeted by this request,
+ * if any. Must be called under rntl_lock().
+ * @req_info: The ethnl request to get the phy from.
+ * @header: The netlink header, used for error reporting.
+ * @extack: The netlink extended ACK, for error reporting.
+ *
+ * The caller must hold RTNL, until it's done interacting with the returned
+ * phy_device.
+ *
+ * Return: A phy_device pointer corresponding either to the passed phy_index
+ * if one is provided. If not, the phy_device attached to the
+ * net_device targeted by this request is returned. If there's no
+ * targeted net_device, or no phy_device is attached, NULL is
+ * returned. If the provided phy_index is invalid, an error pointer
+ * is returned.
+ */
+struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info,
+ const struct nlattr *header,
+ struct netlink_ext_ack *extack);
+
+/**
* struct ethnl_reply_data - base type of reply data for GET requests
* @dev: device for current reply message; in single shot requests it is
* equal to &ethnl_req_info.dev; in dumps it's different for each
@@ -409,9 +434,12 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops;
extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops;
extern const struct ethnl_request_ops ethnl_plca_status_request_ops;
extern const struct ethnl_request_ops ethnl_mm_request_ops;
+extern const struct ethnl_request_ops ethnl_phy_request_ops;
extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
+extern const struct nla_policy ethnl_header_policy_phy[ETHTOOL_A_HEADER_PHY_INDEX + 1];
+extern const struct nla_policy ethnl_header_policy_phy_stats[ETHTOOL_A_HEADER_PHY_INDEX + 1];
extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_ONLY + 1];
extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1];
extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1];
@@ -449,13 +477,14 @@ extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER +
extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1];
extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1];
extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1];
-extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_CONTEXT + 1];
+extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_START_CONTEXT + 1];
extern const struct nla_policy ethnl_plca_get_cfg_policy[ETHTOOL_A_PLCA_HEADER + 1];
extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1];
extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1];
extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1];
extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1];
extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1];
+extern const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1];
int ethnl_set_features(struct sk_buff *skb, struct genl_info *info);
int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
@@ -464,6 +493,12 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
int ethnl_tunnel_info_start(struct netlink_callback *cb);
int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info);
+int ethnl_rss_dump_start(struct netlink_callback *cb);
+int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ethnl_phy_start(struct netlink_callback *cb);
+int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info);
+int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ethnl_phy_done(struct netlink_callback *cb);
extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c
new file mode 100644
index 000000000000..ed8f690f6bac
--- /dev/null
+++ b/net/ethtool/phy.c
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Bootlin
+ *
+ */
+#include "common.h"
+#include "netlink.h"
+
+#include <linux/phy.h>
+#include <linux/phy_link_topology.h>
+#include <linux/sfp.h>
+
+struct phy_req_info {
+ struct ethnl_req_info base;
+ struct phy_device_node *pdn;
+};
+
+#define PHY_REQINFO(__req_base) \
+ container_of(__req_base, struct phy_req_info, base)
+
+const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1] = {
+ [ETHTOOL_A_PHY_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+/* Caller holds rtnl */
+static ssize_t
+ethnl_phy_reply_size(const struct ethnl_req_info *req_base,
+ struct netlink_ext_ack *extack)
+{
+ struct phy_req_info *req_info = PHY_REQINFO(req_base);
+ struct phy_device_node *pdn = req_info->pdn;
+ struct phy_device *phydev = pdn->phy;
+ size_t size = 0;
+
+ ASSERT_RTNL();
+
+ /* ETHTOOL_A_PHY_INDEX */
+ size += nla_total_size(sizeof(u32));
+
+ /* ETHTOOL_A_DRVNAME */
+ if (phydev->drv)
+ size += nla_total_size(strlen(phydev->drv->name) + 1);
+
+ /* ETHTOOL_A_NAME */
+ size += nla_total_size(strlen(dev_name(&phydev->mdio.dev)) + 1);
+
+ /* ETHTOOL_A_PHY_UPSTREAM_TYPE */
+ size += nla_total_size(sizeof(u32));
+
+ if (phy_on_sfp(phydev)) {
+ const char *upstream_sfp_name = sfp_get_name(pdn->parent_sfp_bus);
+
+ /* ETHTOOL_A_PHY_UPSTREAM_SFP_NAME */
+ if (upstream_sfp_name)
+ size += nla_total_size(strlen(upstream_sfp_name) + 1);
+
+ /* ETHTOOL_A_PHY_UPSTREAM_INDEX */
+ size += nla_total_size(sizeof(u32));
+ }
+
+ /* ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME */
+ if (phydev->sfp_bus) {
+ const char *sfp_name = sfp_get_name(phydev->sfp_bus);
+
+ if (sfp_name)
+ size += nla_total_size(strlen(sfp_name) + 1);
+ }
+
+ return size;
+}
+
+static int
+ethnl_phy_fill_reply(const struct ethnl_req_info *req_base, struct sk_buff *skb)
+{
+ struct phy_req_info *req_info = PHY_REQINFO(req_base);
+ struct phy_device_node *pdn = req_info->pdn;
+ struct phy_device *phydev = pdn->phy;
+ enum phy_upstream ptype;
+
+ ptype = pdn->upstream_type;
+
+ if (nla_put_u32(skb, ETHTOOL_A_PHY_INDEX, phydev->phyindex) ||
+ nla_put_string(skb, ETHTOOL_A_PHY_NAME, dev_name(&phydev->mdio.dev)) ||
+ nla_put_u32(skb, ETHTOOL_A_PHY_UPSTREAM_TYPE, ptype))
+ return -EMSGSIZE;
+
+ if (phydev->drv &&
+ nla_put_string(skb, ETHTOOL_A_PHY_DRVNAME, phydev->drv->name))
+ return -EMSGSIZE;
+
+ if (ptype == PHY_UPSTREAM_PHY) {
+ struct phy_device *upstream = pdn->upstream.phydev;
+ const char *sfp_upstream_name;
+
+ /* Parent index */
+ if (nla_put_u32(skb, ETHTOOL_A_PHY_UPSTREAM_INDEX, upstream->phyindex))
+ return -EMSGSIZE;
+
+ if (pdn->parent_sfp_bus) {
+ sfp_upstream_name = sfp_get_name(pdn->parent_sfp_bus);
+ if (sfp_upstream_name &&
+ nla_put_string(skb, ETHTOOL_A_PHY_UPSTREAM_SFP_NAME,
+ sfp_upstream_name))
+ return -EMSGSIZE;
+ }
+ }
+
+ if (phydev->sfp_bus) {
+ const char *sfp_name = sfp_get_name(phydev->sfp_bus);
+
+ if (sfp_name &&
+ nla_put_string(skb, ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME,
+ sfp_name))
+ return -EMSGSIZE;
+ }
+
+ return 0;
+}
+
+static int ethnl_phy_parse_request(struct ethnl_req_info *req_base,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct phy_link_topology *topo = req_base->dev->link_topo;
+ struct phy_req_info *req_info = PHY_REQINFO(req_base);
+ struct phy_device *phydev;
+
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PHY_HEADER],
+ extack);
+ if (!phydev)
+ return 0;
+
+ if (IS_ERR(phydev))
+ return PTR_ERR(phydev);
+
+ if (!topo)
+ return 0;
+
+ req_info->pdn = xa_load(&topo->phys, phydev->phyindex);
+
+ return 0;
+}
+
+int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct phy_req_info req_info = {};
+ struct nlattr **tb = info->attrs;
+ struct sk_buff *rskb;
+ void *reply_payload;
+ int reply_len;
+ int ret;
+
+ ret = ethnl_parse_header_dev_get(&req_info.base,
+ tb[ETHTOOL_A_PHY_HEADER],
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ return ret;
+
+ rtnl_lock();
+
+ ret = ethnl_phy_parse_request(&req_info.base, tb, info->extack);
+ if (ret < 0)
+ goto err_unlock_rtnl;
+
+ /* No PHY, return early */
+ if (!req_info.pdn)
+ goto err_unlock_rtnl;
+
+ ret = ethnl_phy_reply_size(&req_info.base, info->extack);
+ if (ret < 0)
+ goto err_unlock_rtnl;
+ reply_len = ret + ethnl_reply_header_size();
+
+ rskb = ethnl_reply_init(reply_len, req_info.base.dev,
+ ETHTOOL_MSG_PHY_GET_REPLY,
+ ETHTOOL_A_PHY_HEADER,
+ info, &reply_payload);
+ if (!rskb) {
+ ret = -ENOMEM;
+ goto err_unlock_rtnl;
+ }
+
+ ret = ethnl_phy_fill_reply(&req_info.base, rskb);
+ if (ret)
+ goto err_free_msg;
+
+ rtnl_unlock();
+ ethnl_parse_header_dev_put(&req_info.base);
+ genlmsg_end(rskb, reply_payload);
+
+ return genlmsg_reply(rskb, info);
+
+err_free_msg:
+ nlmsg_free(rskb);
+err_unlock_rtnl:
+ rtnl_unlock();
+ ethnl_parse_header_dev_put(&req_info.base);
+ return ret;
+}
+
+struct ethnl_phy_dump_ctx {
+ struct phy_req_info *phy_req_info;
+ unsigned long ifindex;
+ unsigned long phy_index;
+};
+
+int ethnl_phy_start(struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx;
+ int ret;
+
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+
+ ctx->phy_req_info = kzalloc(sizeof(*ctx->phy_req_info), GFP_KERNEL);
+ if (!ctx->phy_req_info)
+ return -ENOMEM;
+
+ ret = ethnl_parse_header_dev_get(&ctx->phy_req_info->base,
+ info->attrs[ETHTOOL_A_PHY_HEADER],
+ sock_net(cb->skb->sk), cb->extack,
+ false);
+ ctx->ifindex = 0;
+ ctx->phy_index = 0;
+
+ if (ret)
+ kfree(ctx->phy_req_info);
+
+ return ret;
+}
+
+int ethnl_phy_done(struct netlink_callback *cb)
+{
+ struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx;
+
+ if (ctx->phy_req_info->base.dev)
+ ethnl_parse_header_dev_put(&ctx->phy_req_info->base);
+
+ kfree(ctx->phy_req_info);
+
+ return 0;
+}
+
+static int ethnl_phy_dump_one_dev(struct sk_buff *skb, struct net_device *dev,
+ struct netlink_callback *cb)
+{
+ struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx;
+ struct phy_req_info *pri = ctx->phy_req_info;
+ struct phy_device_node *pdn;
+ int ret = 0;
+ void *ehdr;
+
+ if (!dev->link_topo)
+ return 0;
+
+ xa_for_each_start(&dev->link_topo->phys, ctx->phy_index, pdn, ctx->phy_index) {
+ ehdr = ethnl_dump_put(skb, cb, ETHTOOL_MSG_PHY_GET_REPLY);
+ if (!ehdr) {
+ ret = -EMSGSIZE;
+ break;
+ }
+
+ ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_PHY_HEADER);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ break;
+ }
+
+ pri->pdn = pdn;
+ ret = ethnl_phy_fill_reply(&pri->base, skb);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ break;
+ }
+
+ genlmsg_end(skb, ehdr);
+ }
+
+ return ret;
+}
+
+int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx;
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ int ret = 0;
+
+ rtnl_lock();
+
+ if (ctx->phy_req_info->base.dev) {
+ ret = ethnl_phy_dump_one_dev(skb, ctx->phy_req_info->base.dev, cb);
+ } else {
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ ret = ethnl_phy_dump_one_dev(skb, dev, cb);
+ if (ret)
+ break;
+
+ ctx->phy_index = 0;
+ }
+ }
+ rtnl_unlock();
+
+ return ret;
+}
diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c
index b1e2e3b5027f..d95d92f173a6 100644
--- a/net/ethtool/plca.c
+++ b/net/ethtool/plca.c
@@ -25,7 +25,7 @@ struct plca_reply_data {
const struct nla_policy ethnl_plca_get_cfg_policy[] = {
[ETHTOOL_A_PLCA_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
};
static void plca_update_sint(int *dst, struct nlattr **tb, u32 attrid,
@@ -58,10 +58,14 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
struct plca_reply_data *data = PLCA_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
const struct ethtool_phy_ops *ops;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER],
+ info->extack);
// check that the PHY device is available and connected
- if (!dev->phydev) {
+ if (IS_ERR_OR_NULL(phydev)) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -80,7 +84,7 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
memset(&data->plca_cfg, 0xff,
sizeof_field(struct plca_reply_data, plca_cfg));
- ret = ops->get_plca_cfg(dev->phydev, &data->plca_cfg);
+ ret = ops->get_plca_cfg(phydev, &data->plca_cfg);
ethnl_ops_complete(dev);
out:
@@ -129,7 +133,7 @@ static int plca_get_cfg_fill_reply(struct sk_buff *skb,
const struct nla_policy ethnl_plca_set_cfg_policy[] = {
[ETHTOOL_A_PLCA_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
[ETHTOOL_A_PLCA_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
[ETHTOOL_A_PLCA_NODE_ID] = NLA_POLICY_MAX(NLA_U32, 255),
[ETHTOOL_A_PLCA_NODE_CNT] = NLA_POLICY_RANGE(NLA_U32, 1, 255),
@@ -141,15 +145,17 @@ const struct nla_policy ethnl_plca_set_cfg_policy[] = {
static int
ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info)
{
- struct net_device *dev = req_info->dev;
const struct ethtool_phy_ops *ops;
struct nlattr **tb = info->attrs;
struct phy_plca_cfg plca_cfg;
+ struct phy_device *phydev;
bool mod = false;
int ret;
+ phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PLCA_HEADER],
+ info->extack);
// check that the PHY device is available and connected
- if (!dev->phydev)
+ if (IS_ERR_OR_NULL(phydev))
return -EOPNOTSUPP;
ops = ethtool_phy_ops;
@@ -168,7 +174,7 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info)
if (!mod)
return 0;
- ret = ops->set_plca_cfg(dev->phydev, &plca_cfg, info->extack);
+ ret = ops->set_plca_cfg(phydev, &plca_cfg, info->extack);
return ret < 0 ? ret : 1;
}
@@ -191,7 +197,7 @@ const struct ethnl_request_ops ethnl_plca_cfg_request_ops = {
const struct nla_policy ethnl_plca_get_status_policy[] = {
[ETHTOOL_A_PLCA_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
};
static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
@@ -201,10 +207,14 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
struct plca_reply_data *data = PLCA_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
const struct ethtool_phy_ops *ops;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER],
+ info->extack);
// check that the PHY device is available and connected
- if (!dev->phydev) {
+ if (IS_ERR_OR_NULL(phydev)) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -223,7 +233,7 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
memset(&data->plca_st, 0xff,
sizeof_field(struct plca_reply_data, plca_st));
- ret = ops->get_plca_status(dev->phydev, &data->plca_st);
+ ret = ops->get_plca_status(phydev, &data->plca_st);
ethnl_ops_complete(dev);
out:
return ret;
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index ff81aa749784..a0705edca22a 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -28,17 +28,15 @@ struct pse_reply_data {
/* PSE_GET */
const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1] = {
- [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy),
};
-static int pse_get_pse_attributes(struct net_device *dev,
+static int pse_get_pse_attributes(struct phy_device *phydev,
struct netlink_ext_ack *extack,
struct pse_reply_data *data)
{
- struct phy_device *phydev = dev->phydev;
-
if (!phydev) {
- NL_SET_ERR_MSG(extack, "No PHY is attached");
+ NL_SET_ERR_MSG(extack, "No PHY found");
return -EOPNOTSUPP;
}
@@ -58,13 +56,20 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base,
{
struct pse_reply_data *data = PSE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
- ret = pse_get_pse_attributes(dev, info->extack, data);
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PSE_HEADER],
+ info->extack);
+ if (IS_ERR(phydev))
+ return -ENODEV;
+
+ ret = pse_get_pse_attributes(phydev, info->extack, data);
ethnl_ops_complete(dev);
@@ -206,7 +211,7 @@ static void pse_cleanup_data(struct ethnl_reply_data *reply_base)
/* PSE_SET */
const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
- [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy),
[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] =
NLA_POLICY_RANGE(NLA_U32, ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED),
@@ -217,14 +222,11 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
};
static int
-ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
+ethnl_set_pse_validate(struct phy_device *phydev, struct genl_info *info)
{
- struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct phy_device *phydev;
- phydev = dev->phydev;
- if (!phydev) {
+ if (IS_ERR_OR_NULL(phydev)) {
NL_SET_ERR_MSG(info->extack, "No PHY is attached");
return -EOPNOTSUPP;
}
@@ -249,18 +251,21 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
return -EOPNOTSUPP;
}
- return 1;
+ return 0;
}
static int
ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
{
- struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
struct phy_device *phydev;
- int ret = 0;
+ int ret;
- phydev = dev->phydev;
+ phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER],
+ info->extack);
+ ret = ethnl_set_pse_validate(phydev, info);
+ if (ret)
+ return ret;
if (tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]) {
unsigned int pw_limit;
@@ -307,7 +312,6 @@ const struct ethnl_request_ops ethnl_pse_request_ops = {
.fill_reply = pse_fill_reply,
.cleanup_data = pse_cleanup_data,
- .set_validate = ethnl_set_pse_validate,
.set = ethnl_set_pse,
/* PSE has no notification */
};
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index 5c4c4505ab9a..7cb106b590ab 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -10,6 +10,7 @@ struct rss_req_info {
struct rss_reply_data {
struct ethnl_reply_data base;
+ bool no_key_fields;
u32 indir_size;
u32 hkey_size;
u32 hfunc;
@@ -27,6 +28,7 @@ struct rss_reply_data {
const struct nla_policy ethnl_rss_get_policy[] = {
[ETHTOOL_A_RSS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
[ETHTOOL_A_RSS_CONTEXT] = { .type = NLA_U32 },
+ [ETHTOOL_A_RSS_START_CONTEXT] = { .type = NLA_U32 },
};
static int
@@ -37,18 +39,18 @@ rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
if (tb[ETHTOOL_A_RSS_CONTEXT])
request->rss_context = nla_get_u32(tb[ETHTOOL_A_RSS_CONTEXT]);
+ if (tb[ETHTOOL_A_RSS_START_CONTEXT]) {
+ NL_SET_BAD_ATTR(extack, tb[ETHTOOL_A_RSS_START_CONTEXT]);
+ return -EINVAL;
+ }
return 0;
}
static int
-rss_prepare_data(const struct ethnl_req_info *req_base,
- struct ethnl_reply_data *reply_base,
- const struct genl_info *info)
+rss_prepare_get(const struct rss_req_info *request, struct net_device *dev,
+ struct rss_reply_data *data, const struct genl_info *info)
{
- struct rss_reply_data *data = RSS_REPDATA(reply_base);
- struct rss_req_info *request = RSS_REQINFO(req_base);
- struct net_device *dev = reply_base->dev;
struct ethtool_rxfh_param rxfh = {};
const struct ethtool_ops *ops;
u32 total_size, indir_bytes;
@@ -56,12 +58,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
int ret;
ops = dev->ethtool_ops;
- if (!ops->get_rxfh)
- return -EOPNOTSUPP;
-
- /* Some drivers don't handle rss_context */
- if (request->rss_context && !ops->cap_rss_ctx_supported)
- return -EOPNOTSUPP;
ret = ethnl_ops_begin(dev);
if (ret < 0)
@@ -91,7 +87,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
rxfh.indir = data->indir_table;
rxfh.key_size = data->hkey_size;
rxfh.key = data->hkey;
- rxfh.rss_context = request->rss_context;
ret = ops->get_rxfh(dev, &rxfh);
if (ret)
@@ -105,6 +100,67 @@ out_ops:
}
static int
+rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev,
+ struct rss_reply_data *data, const struct genl_info *info)
+{
+ struct ethtool_rxfh_context *ctx;
+ u32 total_size, indir_bytes;
+ u8 *rss_config;
+
+ ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context);
+ if (!ctx)
+ return -ENOENT;
+
+ data->indir_size = ctx->indir_size;
+ data->hkey_size = ctx->key_size;
+ data->hfunc = ctx->hfunc;
+ data->input_xfrm = ctx->input_xfrm;
+
+ indir_bytes = data->indir_size * sizeof(u32);
+ total_size = indir_bytes + data->hkey_size;
+ rss_config = kzalloc(total_size, GFP_KERNEL);
+ if (!rss_config)
+ return -ENOMEM;
+
+ data->indir_table = (u32 *)rss_config;
+ memcpy(data->indir_table, ethtool_rxfh_context_indir(ctx), indir_bytes);
+
+ if (data->hkey_size) {
+ data->hkey = rss_config + indir_bytes;
+ memcpy(data->hkey, ethtool_rxfh_context_key(ctx),
+ data->hkey_size);
+ }
+
+ return 0;
+}
+
+static int
+rss_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ const struct genl_info *info)
+{
+ struct rss_reply_data *data = RSS_REPDATA(reply_base);
+ struct rss_req_info *request = RSS_REQINFO(req_base);
+ struct net_device *dev = reply_base->dev;
+ const struct ethtool_ops *ops;
+
+ ops = dev->ethtool_ops;
+ if (!ops->get_rxfh)
+ return -EOPNOTSUPP;
+
+ /* Some drivers don't handle rss_context */
+ if (request->rss_context) {
+ if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context)
+ return -EOPNOTSUPP;
+
+ data->no_key_fields = !ops->rxfh_per_ctx_key;
+ return rss_prepare_ctx(request, dev, data, info);
+ }
+
+ return rss_prepare_get(request, dev, data, info);
+}
+
+static int
rss_reply_size(const struct ethnl_req_info *req_base,
const struct ethnl_reply_data *reply_base)
{
@@ -131,13 +187,18 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
nla_put_u32(skb, ETHTOOL_A_RSS_CONTEXT, request->rss_context))
return -EMSGSIZE;
+ if ((data->indir_size &&
+ nla_put(skb, ETHTOOL_A_RSS_INDIR,
+ sizeof(u32) * data->indir_size, data->indir_table)))
+ return -EMSGSIZE;
+
+ if (data->no_key_fields)
+ return 0;
+
if ((data->hfunc &&
nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) ||
(data->input_xfrm &&
nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) ||
- (data->indir_size &&
- nla_put(skb, ETHTOOL_A_RSS_INDIR,
- sizeof(u32) * data->indir_size, data->indir_table)) ||
(data->hkey_size &&
nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey)))
return -EMSGSIZE;
@@ -152,6 +213,146 @@ static void rss_cleanup_data(struct ethnl_reply_data *reply_base)
kfree(data->indir_table);
}
+struct rss_nl_dump_ctx {
+ unsigned long ifindex;
+ unsigned long ctx_idx;
+
+ /* User wants to only dump contexts from given ifindex */
+ unsigned int match_ifindex;
+ unsigned int start_ctx;
+};
+
+static struct rss_nl_dump_ctx *rss_dump_ctx(struct netlink_callback *cb)
+{
+ NL_ASSERT_CTX_FITS(struct rss_nl_dump_ctx);
+
+ return (struct rss_nl_dump_ctx *)cb->ctx;
+}
+
+int ethnl_rss_dump_start(struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb);
+ struct ethnl_req_info req_info = {};
+ struct nlattr **tb = info->attrs;
+ int ret;
+
+ /* Filtering by context not supported */
+ if (tb[ETHTOOL_A_RSS_CONTEXT]) {
+ NL_SET_BAD_ATTR(info->extack, tb[ETHTOOL_A_RSS_CONTEXT]);
+ return -EINVAL;
+ }
+ if (tb[ETHTOOL_A_RSS_START_CONTEXT]) {
+ ctx->start_ctx = nla_get_u32(tb[ETHTOOL_A_RSS_START_CONTEXT]);
+ ctx->ctx_idx = ctx->start_ctx;
+ }
+
+ ret = ethnl_parse_header_dev_get(&req_info,
+ tb[ETHTOOL_A_RSS_HEADER],
+ sock_net(cb->skb->sk), cb->extack,
+ false);
+ if (req_info.dev) {
+ ctx->match_ifindex = req_info.dev->ifindex;
+ ctx->ifindex = ctx->match_ifindex;
+ ethnl_parse_header_dev_put(&req_info);
+ req_info.dev = NULL;
+ }
+
+ return ret;
+}
+
+static int
+rss_dump_one_ctx(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net_device *dev, u32 rss_context)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct rss_reply_data data = {};
+ struct rss_req_info req = {};
+ void *ehdr;
+ int ret;
+
+ req.rss_context = rss_context;
+
+ ehdr = ethnl_dump_put(skb, cb, ETHTOOL_MSG_RSS_GET_REPLY);
+ if (!ehdr)
+ return -EMSGSIZE;
+
+ ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_RSS_HEADER);
+ if (ret < 0)
+ goto err_cancel;
+
+ /* Context 0 is not currently storred or cached in the XArray */
+ if (!rss_context)
+ ret = rss_prepare_get(&req, dev, &data, info);
+ else
+ ret = rss_prepare_ctx(&req, dev, &data, info);
+ if (ret)
+ goto err_cancel;
+
+ ret = rss_fill_reply(skb, &req.base, &data.base);
+ if (ret)
+ goto err_cleanup;
+ genlmsg_end(skb, ehdr);
+
+ rss_cleanup_data(&data.base);
+ return 0;
+
+err_cleanup:
+ rss_cleanup_data(&data.base);
+err_cancel:
+ genlmsg_cancel(skb, ehdr);
+ return ret;
+}
+
+static int
+rss_dump_one_dev(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net_device *dev)
+{
+ struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb);
+ int ret;
+
+ if (!dev->ethtool_ops->get_rxfh)
+ return 0;
+
+ if (!ctx->ctx_idx) {
+ ret = rss_dump_one_ctx(skb, cb, dev, 0);
+ if (ret)
+ return ret;
+ ctx->ctx_idx++;
+ }
+
+ for (; xa_find(&dev->ethtool->rss_ctx, &ctx->ctx_idx,
+ ULONG_MAX, XA_PRESENT); ctx->ctx_idx++) {
+ ret = rss_dump_one_ctx(skb, cb, dev, ctx->ctx_idx);
+ if (ret)
+ return ret;
+ }
+ ctx->ctx_idx = ctx->start_ctx;
+
+ return 0;
+}
+
+int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ int ret = 0;
+
+ rtnl_lock();
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ if (ctx->match_ifindex && ctx->match_ifindex != ctx->ifindex)
+ break;
+
+ ret = rss_dump_one_dev(skb, cb, dev);
+ if (ret)
+ break;
+ }
+ rtnl_unlock();
+
+ return ret;
+}
+
const struct ethnl_request_ops ethnl_rss_request_ops = {
.request_cmd = ETHTOOL_MSG_RSS_GET,
.reply_cmd = ETHTOOL_MSG_RSS_GET_REPLY,
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index c678b484a079..b3382b3cf325 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -126,7 +126,7 @@ struct strset_reply_data {
const struct nla_policy ethnl_strset_get_policy[] = {
[ETHTOOL_A_STRSET_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
[ETHTOOL_A_STRSET_STRINGSETS] = { .type = NLA_NESTED },
[ETHTOOL_A_STRSET_COUNTS_ONLY] = { .type = NLA_FLAG },
};
@@ -233,17 +233,18 @@ static void strset_cleanup_data(struct ethnl_reply_data *reply_base)
}
static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
- unsigned int id, bool counts_only)
+ struct phy_device *phydev, unsigned int id,
+ bool counts_only)
{
const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops;
const struct ethtool_ops *ops = dev->ethtool_ops;
void *strings;
int count, ret;
- if (id == ETH_SS_PHY_STATS && dev->phydev &&
+ if (id == ETH_SS_PHY_STATS && phydev &&
!ops->get_ethtool_phy_stats && phy_ops &&
phy_ops->get_sset_count)
- ret = phy_ops->get_sset_count(dev->phydev);
+ ret = phy_ops->get_sset_count(phydev);
else if (ops->get_sset_count && ops->get_strings)
ret = ops->get_sset_count(dev, id);
else
@@ -258,10 +259,10 @@ static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
strings = kcalloc(count, ETH_GSTRING_LEN, GFP_KERNEL);
if (!strings)
return -ENOMEM;
- if (id == ETH_SS_PHY_STATS && dev->phydev &&
+ if (id == ETH_SS_PHY_STATS && phydev &&
!ops->get_ethtool_phy_stats && phy_ops &&
phy_ops->get_strings)
- phy_ops->get_strings(dev->phydev, strings);
+ phy_ops->get_strings(phydev, strings);
else
ops->get_strings(dev, id, strings);
info->strings = strings;
@@ -279,6 +280,8 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base,
const struct strset_req_info *req_info = STRSET_REQINFO(req_base);
struct strset_reply_data *data = STRSET_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
unsigned int i;
int ret;
@@ -289,14 +292,20 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base,
for (i = 0; i < ETH_SS_COUNT; i++) {
if ((req_info->req_ids & (1U << i)) &&
data->sets[i].per_dev) {
- if (info)
- GENL_SET_ERR_MSG(info, "requested per device strings without dev");
+ GENL_SET_ERR_MSG(info, "requested per device strings without dev");
return -EINVAL;
}
}
return 0;
}
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_HEADER_FLAGS],
+ info->extack);
+
+ /* phydev can be NULL, check for errors only */
+ if (IS_ERR(phydev))
+ return PTR_ERR(phydev);
+
ret = ethnl_ops_begin(dev);
if (ret < 0)
goto err_strset;
@@ -305,7 +314,7 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base,
!data->sets[i].per_dev)
continue;
- ret = strset_prepare_set(&data->sets[i], dev, i,
+ ret = strset_prepare_set(&data->sets[i], dev, phydev, i,
req_info->counts_only);
if (ret < 0)
goto err_ops;
diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c
index 34fd1d9b2db8..55442b2f518a 100644
--- a/net/handshake/handshake-test.c
+++ b/net/handshake/handshake-test.c
@@ -17,7 +17,7 @@
#include <uapi/linux/handshake.h>
#include "handshake.h"
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
static int test_accept_func(struct handshake_req *req, struct genl_info *info,
int fd)
diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c
index 89637e732866..7e46d130dce2 100644
--- a/net/handshake/netlink.c
+++ b/net/handshake/netlink.c
@@ -153,7 +153,7 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
if (!req) {
err = -EBUSY;
trace_handshake_cmd_done_err(net, req, sock->sk, err);
- fput(sock->file);
+ sockfd_put(sock);
return err;
}
@@ -164,7 +164,7 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
status = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]);
handshake_complete(req, status, info);
- fput(sock->file);
+ sockfd_put(sock);
return 0;
}
diff --git a/net/handshake/request.c b/net/handshake/request.c
index 94d5cef3e048..274d2c89b6b2 100644
--- a/net/handshake/request.c
+++ b/net/handshake/request.c
@@ -13,7 +13,6 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/inet.h>
-#include <linux/fdtable.h>
#include <linux/rhashtable.h>
#include <net/sock.h>
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index e4cc6b78dcfc..03eadd6c51fd 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -246,20 +246,22 @@ static const struct header_ops hsr_header_ops = {
.parse = eth_header_parse,
};
-static struct sk_buff *hsr_init_skb(struct hsr_port *master)
+static struct sk_buff *hsr_init_skb(struct hsr_port *master, int extra)
{
struct hsr_priv *hsr = master->hsr;
struct sk_buff *skb;
int hlen, tlen;
+ int len;
hlen = LL_RESERVED_SPACE(master->dev);
tlen = master->dev->needed_tailroom;
+ len = sizeof(struct hsr_sup_tag) + sizeof(struct hsr_sup_payload);
/* skb size is same for PRP/HSR frames, only difference
* being, for PRP it is a trailer and for HSR it is a
- * header
+ * header.
+ * RedBox might use @extra more bytes.
*/
- skb = dev_alloc_skb(sizeof(struct hsr_sup_tag) +
- sizeof(struct hsr_sup_payload) + hlen + tlen);
+ skb = dev_alloc_skb(len + extra + hlen + tlen);
if (!skb)
return skb;
@@ -268,6 +270,8 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master)
skb->dev = master->dev;
skb->priority = TC_PRIO_CONTROL;
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
if (dev_hard_header(skb, skb->dev, ETH_P_PRP,
hsr->sup_multicast_addr,
skb->dev->dev_addr, skb->len) <= 0)
@@ -275,8 +279,6 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master)
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
return skb;
out:
@@ -295,6 +297,7 @@ static void send_hsr_supervision_frame(struct hsr_port *port,
struct hsr_sup_tlv *hsr_stlv;
struct hsr_sup_tag *hsr_stag;
struct sk_buff *skb;
+ int extra = 0;
*interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
if (hsr->announce_count < 3 && hsr->prot_version == 0) {
@@ -303,7 +306,11 @@ static void send_hsr_supervision_frame(struct hsr_port *port,
hsr->announce_count++;
}
- skb = hsr_init_skb(port);
+ if (hsr->redbox)
+ extra = sizeof(struct hsr_sup_tlv) +
+ sizeof(struct hsr_sup_payload);
+
+ skb = hsr_init_skb(port, extra);
if (!skb) {
netdev_warn_once(port->dev, "HSR: Could not send supervision frame\n");
return;
@@ -362,7 +369,7 @@ static void send_prp_supervision_frame(struct hsr_port *master,
struct hsr_sup_tag *hsr_stag;
struct sk_buff *skb;
- skb = hsr_init_skb(master);
+ skb = hsr_init_skb(master, 0);
if (!skb) {
netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n");
return;
@@ -427,6 +434,9 @@ static void hsr_proxy_announce(struct timer_list *t)
* of SAN nodes stored in ProxyNodeTable.
*/
interlink = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK);
+ if (!interlink)
+ goto done;
+
list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) {
if (hsr_addr_is_redbox(hsr, node->macaddress_A))
continue;
@@ -441,6 +451,7 @@ static void hsr_proxy_announce(struct timer_list *t)
mod_timer(&hsr->announce_proxy_timer, jiffies + interval);
}
+done:
rcu_read_unlock();
}
@@ -511,6 +522,77 @@ static void hsr_change_rx_flags(struct net_device *dev, int change)
}
}
+static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
+ __be16 proto, u16 vid)
+{
+ bool is_slave_a_added = false;
+ bool is_slave_b_added = false;
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+ int ret = 0;
+
+ hsr = netdev_priv(dev);
+
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER ||
+ port->type == HSR_PT_INTERLINK)
+ continue;
+
+ ret = vlan_vid_add(port->dev, proto, vid);
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ if (ret) {
+ /* clean up Slave-B */
+ netdev_err(dev, "add vid failed for Slave-A\n");
+ if (is_slave_b_added)
+ vlan_vid_del(port->dev, proto, vid);
+ return ret;
+ }
+
+ is_slave_a_added = true;
+ break;
+
+ case HSR_PT_SLAVE_B:
+ if (ret) {
+ /* clean up Slave-A */
+ netdev_err(dev, "add vid failed for Slave-B\n");
+ if (is_slave_a_added)
+ vlan_vid_del(port->dev, proto, vid);
+ return ret;
+ }
+
+ is_slave_b_added = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev,
+ __be16 proto, u16 vid)
+{
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+
+ hsr_for_each_port(hsr, port) {
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ vlan_vid_del(port->dev, proto, vid);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
+
static const struct net_device_ops hsr_device_ops = {
.ndo_change_mtu = hsr_dev_change_mtu,
.ndo_open = hsr_dev_open,
@@ -519,6 +601,8 @@ static const struct net_device_ops hsr_device_ops = {
.ndo_change_rx_flags = hsr_change_rx_flags,
.ndo_fix_features = hsr_fix_features,
.ndo_set_rx_mode = hsr_set_rx_mode,
+ .ndo_vlan_rx_add_vid = hsr_ndo_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = hsr_ndo_vlan_rx_kill_vid,
};
static const struct device_type hsr_type = {
@@ -554,25 +638,21 @@ void hsr_dev_setup(struct net_device *dev)
dev->netdev_ops = &hsr_device_ops;
SET_NETDEV_DEVTYPE(dev, &hsr_type);
dev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL;
+ /* Prevent recursive tx locking */
+ dev->lltx = true;
+ /* Not sure about this. Taken from bridge code. netdevice.h says
+ * it means "Does not change network namespaces".
+ */
+ dev->netns_local = true;
dev->needs_free_netdev = true;
dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
- NETIF_F_HW_VLAN_CTAG_TX;
+ NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_FILTER;
dev->features = dev->hw_features;
-
- /* Prevent recursive tx locking */
- dev->features |= NETIF_F_LLTX;
- /* VLAN on top of HSR needs testing and probably some work on
- * hsr_header_create() etc.
- */
- dev->features |= NETIF_F_VLAN_CHALLENGED;
- /* Not sure about this. Taken from bridge code. netdev_features.h says
- * it means "Does not change network namespaces".
- */
- dev->features |= NETIF_F_NETNS_LOCAL;
}
/* Return true if dev is a HSR master; return false otherwise.
@@ -625,7 +705,6 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
/* Overflow soon to find bugs easier: */
hsr->sequence_nr = HSR_SEQNR_START;
hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
- hsr->interlink_sequence_nr = HSR_SEQNR_START;
timer_setup(&hsr->announce_timer, hsr_announce, 0);
timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
@@ -649,6 +728,10 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
(slave[1]->features & NETIF_F_HW_HSR_FWD))
hsr->fwd_offloaded = true;
+ if ((slave[0]->features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+ (slave[1]->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ hsr_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
res = register_netdevice(hsr_dev);
if (res)
goto err_unregister;
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index b38060246e62..87bb3a91598e 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -280,6 +280,7 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb,
struct hsr_port *port, u8 proto_version)
{
struct hsr_ethhdr *hsr_ethhdr;
+ unsigned char *pc;
int lsdu_size;
/* pad to minimum packet size which is 60 + 6 (HSR tag) */
@@ -290,7 +291,18 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb,
if (frame->is_vlan)
lsdu_size -= 4;
- hsr_ethhdr = (struct hsr_ethhdr *)skb_mac_header(skb);
+ pc = skb_mac_header(skb);
+ if (frame->is_vlan)
+ /* This 4-byte shift (size of a vlan tag) does not
+ * mean that the ethhdr starts there. But rather it
+ * provides the proper environment for accessing
+ * the fields, such as hsr_tag etc., just like
+ * when the vlan tag is not there. This is because
+ * the hsr tag is after the vlan tag.
+ */
+ hsr_ethhdr = (struct hsr_ethhdr *)(pc + VLAN_HLEN);
+ else
+ hsr_ethhdr = (struct hsr_ethhdr *)pc;
hsr_set_path_id(hsr_ethhdr, port);
set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size);
@@ -368,7 +380,7 @@ struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame,
return skb_clone(frame->skb_std, GFP_ATOMIC);
}
- skb = skb_copy_expand(frame->skb_std, 0,
+ skb = skb_copy_expand(frame->skb_std, skb_headroom(frame->skb_std),
skb_tailroom(frame->skb_std) + HSR_HLEN,
GFP_ATOMIC);
return prp_fill_rct(skb, frame, port);
@@ -688,11 +700,10 @@ static int fill_frame_info(struct hsr_frame_info *frame,
frame->is_vlan = true;
if (frame->is_vlan) {
+ if (skb->mac_len < offsetofend(struct hsr_vlan_ethhdr, vlanhdr))
+ return -EINVAL;
vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr;
proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto;
- /* FIXME: */
- netdev_warn_once(skb->dev, "VLAN not yet supported");
- return -EINVAL;
}
frame->is_from_san = false;
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index ab1f8d35d9dc..fcfeb79bb040 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -203,7 +203,6 @@ struct hsr_priv {
struct timer_list prune_proxy_timer;
int announce_count;
u16 sequence_nr;
- u16 interlink_sequence_nr; /* Interlink port seq_nr */
u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */
enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */
spinlock_t seqnr_lock; /* locking for sequence_nr */
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index f6ff0b61e08a..b68f2f71d0e1 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -82,10 +82,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
return -EINVAL;
}
- if (!data[IFLA_HSR_MULTICAST_SPEC])
- multicast_spec = 0;
- else
- multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]);
+ multicast_spec = nla_get_u8_default(data[IFLA_HSR_MULTICAST_SPEC], 0);
if (data[IFLA_HSR_PROTOCOL])
proto = nla_get_u8(data[IFLA_HSR_PROTOCOL]);
@@ -128,9 +125,9 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
{
struct hsr_priv *hsr = netdev_priv(dev);
- del_timer_sync(&hsr->prune_timer);
- del_timer_sync(&hsr->prune_proxy_timer);
- del_timer_sync(&hsr->announce_timer);
+ timer_delete_sync(&hsr->prune_timer);
+ timer_delete_sync(&hsr->prune_proxy_timer);
+ timer_delete_sync(&hsr->announce_timer);
timer_delete_sync(&hsr->announce_proxy_timer);
hsr_debugfs_term(hsr);
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index af6cf64a00e0..464f683e016d 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -67,7 +67,16 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
skb_set_network_header(skb, ETH_HLEN + HSR_HLEN);
skb_reset_mac_len(skb);
- hsr_forward_skb(skb, port);
+ /* Only the frames received over the interlink port will assign a
+ * sequence number and require synchronisation vs other sender.
+ */
+ if (port->type == HSR_PT_INTERLINK) {
+ spin_lock_bh(&hsr->seqnr_lock);
+ hsr_forward_skb(skb, port);
+ spin_unlock_bh(&hsr->seqnr_lock);
+ } else {
+ hsr_forward_skb(skb, port);
+ }
finish_consume:
return RX_HANDLER_CONSUMED;
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 77b4e92027c5..175efd860f7b 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -116,7 +116,7 @@ static void lowpan_setup(struct net_device *ldev)
ldev->netdev_ops = &lowpan_netdev_ops;
ldev->header_ops = &lowpan_header_ops;
ldev->needs_free_netdev = true;
- ldev->features |= NETIF_F_NETNS_LOCAL;
+ ldev->netns_local = true;
}
static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index 60e8fff1347e..88adb04e4072 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -226,11 +226,11 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
list_for_each_entry(wpan_dev, &rdev->wpan_dev_list, list) {
if (!wpan_dev->netdev)
continue;
- wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ wpan_dev->netdev->netns_local = false;
err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d");
if (err)
break;
- wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ wpan_dev->netdev->netns_local = true;
}
if (err) {
@@ -242,11 +242,11 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
list) {
if (!wpan_dev->netdev)
continue;
- wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ wpan_dev->netdev->netns_local = false;
err = dev_change_net_namespace(wpan_dev->netdev, net,
"wpan%d");
WARN_ON(err);
- wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ wpan_dev->netdev->netns_local = true;
}
return err;
@@ -291,7 +291,7 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb,
switch (state) {
/* TODO NETDEV_DEVTYPE */
case NETDEV_REGISTER:
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
wpan_dev->identifier = ++rdev->wpan_dev_id;
list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list);
rdev->devlist_generation++;
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 29bf97640166..74ef0a310afb 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -202,10 +202,7 @@ int ieee802154_associate_req(struct sk_buff *skb, struct genl_info *info)
addr.pan_id = nla_get_shortaddr(
info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
- if (info->attrs[IEEE802154_ATTR_PAGE])
- page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]);
- else
- page = 0;
+ page = nla_get_u8_default(info->attrs[IEEE802154_ATTR_PAGE], 0);
ret = ieee802154_mlme_ops(dev)->assoc_req(dev, &addr,
nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]),
@@ -338,10 +335,7 @@ int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
blx = nla_get_u8(info->attrs[IEEE802154_ATTR_BAT_EXT]);
coord_realign = nla_get_u8(info->attrs[IEEE802154_ATTR_COORD_REALIGN]);
- if (info->attrs[IEEE802154_ATTR_PAGE])
- page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]);
- else
- page = 0;
+ page = nla_get_u8_default(info->attrs[IEEE802154_ATTR_PAGE], 0);
if (addr.short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) {
ieee802154_nl_start_confirm(dev, IEEE802154_NO_SHORT_ADDRESS);
@@ -388,10 +382,7 @@ int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]);
duration = nla_get_u8(info->attrs[IEEE802154_ATTR_DURATION]);
- if (info->attrs[IEEE802154_ATTR_PAGE])
- page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]);
- else
- page = 0;
+ page = nla_get_u8_default(info->attrs[IEEE802154_ATTR_PAGE], 0);
ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels,
page, duration);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 7eb37de3add2..5a024ca60d35 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -1438,22 +1438,18 @@ static int nl802154_trigger_scan(struct sk_buff *skb, struct genl_info *info)
}
/* Use current page by default */
- if (info->attrs[NL802154_ATTR_PAGE])
- request->page = nla_get_u8(info->attrs[NL802154_ATTR_PAGE]);
- else
- request->page = wpan_phy->current_page;
+ request->page = nla_get_u8_default(info->attrs[NL802154_ATTR_PAGE],
+ wpan_phy->current_page);
/* Scan all supported channels by default */
- if (info->attrs[NL802154_ATTR_SCAN_CHANNELS])
- request->channels = nla_get_u32(info->attrs[NL802154_ATTR_SCAN_CHANNELS]);
- else
- request->channels = wpan_phy->supported.channels[request->page];
+ request->channels =
+ nla_get_u32_default(info->attrs[NL802154_ATTR_SCAN_CHANNELS],
+ wpan_phy->supported.channels[request->page]);
/* Use maximum duration order by default */
- if (info->attrs[NL802154_ATTR_SCAN_DURATION])
- request->duration = nla_get_u8(info->attrs[NL802154_ATTR_SCAN_DURATION]);
- else
- request->duration = IEEE802154_MAX_SCAN_DURATION;
+ request->duration =
+ nla_get_u8_default(info->attrs[NL802154_ATTR_SCAN_DURATION],
+ IEEE802154_MAX_SCAN_DURATION);
err = rdev_trigger_scan(rdev, request);
if (err) {
@@ -1598,10 +1594,8 @@ nl802154_send_beacons(struct sk_buff *skb, struct genl_info *info)
request->wpan_phy = wpan_phy;
/* Use maximum duration order by default */
- if (info->attrs[NL802154_ATTR_BEACON_INTERVAL])
- request->interval = nla_get_u8(info->attrs[NL802154_ATTR_BEACON_INTERVAL]);
- else
- request->interval = IEEE802154_MAX_SCAN_DURATION;
+ request->interval = nla_get_u8_default(info->attrs[NL802154_ATTR_BEACON_INTERVAL],
+ IEEE802154_MAX_SCAN_DURATION);
err = rdev_send_beacons(rdev, request);
if (err) {
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 990a83455dcf..18d267921bb5 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -1043,19 +1043,21 @@ static int ieee802154_create(struct net *net, struct socket *sock,
if (sk->sk_prot->hash) {
rc = sk->sk_prot->hash(sk);
- if (rc) {
- sk_common_release(sk);
- goto out;
- }
+ if (rc)
+ goto out_sk_release;
}
if (sk->sk_prot->init) {
rc = sk->sk_prot->init(sk);
if (rc)
- sk_common_release(sk);
+ goto out_sk_release;
}
out:
return rc;
+out_sk_release:
+ sk_common_release(sk);
+ sock->sk = NULL;
+ goto out;
}
static const struct net_proto_family ieee802154_family_ops = {
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 8e94ed7c56a0..6d2c97f8e9ef 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -661,7 +661,8 @@ config TCP_CONG_CDG
For further details see:
D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using
- delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg
+ delay gradients." In Networking 2011. Preprint:
+ http://caia.swin.edu.au/cv/dahayes/content/networking2011-cdg-preprint.pdf
config TCP_CONG_BBR
tristate "BBR TCP"
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b24d74616637..8095e82de808 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -376,32 +376,30 @@ lookup_protocol:
inet->inet_sport = htons(inet->inet_num);
/* Add to protocol hash chains. */
err = sk->sk_prot->hash(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
if (!kern) {
err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
+out_sk_release:
+ sk_common_release(sk);
+ sock->sk = NULL;
+ goto out;
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 11c1519b3699..cb9a7ed8abd3 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1215,7 +1215,7 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
NEIGH_UPDATE_F_ADMIN, 0);
write_lock_bh(&tbl->lock);
neigh_release(neigh);
- neigh_remove_one(neigh, tbl);
+ neigh_remove_one(neigh);
write_unlock_bh(&tbl->lock);
}
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 3f88d0961e5b..554804774628 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -14,10 +14,6 @@
/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
static struct bpf_struct_ops bpf_tcp_congestion_ops;
-static u32 unsupported_ops[] = {
- offsetof(struct tcp_congestion_ops, get_info),
-};
-
static const struct btf_type *tcp_sock_type;
static u32 tcp_sock_id, sock_id;
static const struct btf_type *tcp_congestion_ops_type;
@@ -45,18 +41,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
return 0;
}
-static bool is_unsupported(u32 member_offset)
-{
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(unsupported_ops); i++) {
- if (member_offset == unsupported_ops[i])
- return true;
- }
-
- return false;
-}
-
static bool bpf_tcp_ca_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -251,15 +235,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
return 0;
}
-static int bpf_tcp_ca_check_member(const struct btf_type *t,
- const struct btf_member *member,
- const struct bpf_prog *prog)
-{
- if (is_unsupported(__btf_member_bit_offset(t, member) / 8))
- return -ENOTSUPP;
- return 0;
-}
-
static int bpf_tcp_ca_reg(void *kdata, struct bpf_link *link)
{
return tcp_register_congestion_control(kdata);
@@ -354,7 +329,6 @@ static struct bpf_struct_ops bpf_tcp_congestion_ops = {
.reg = bpf_tcp_ca_reg,
.unreg = bpf_tcp_ca_unreg,
.update = bpf_tcp_ca_update,
- .check_member = bpf_tcp_ca_check_member,
.init_member = bpf_tcp_ca_init_member,
.init = bpf_tcp_ca_init,
.validate = bpf_tcp_ca_validate,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 8cc0e2f4159d..740af8541d2f 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -37,7 +37,7 @@
#include <net/cipso_ipv4.h>
#include <linux/atomic.h>
#include <linux/bug.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
/* List of available DOI definitions */
/* XXX - This currently assumes a minimal number of different DOIs in use,
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index cc6d0bd7b0a9..4aca1f05edd3 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -61,15 +61,17 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
err = -EACCES;
goto out;
}
+
+ /* Update addresses before rehashing */
+ inet->inet_daddr = fl4->daddr;
+ inet->inet_dport = usin->sin_port;
if (!inet->inet_saddr)
- inet->inet_saddr = fl4->saddr; /* Update source address */
+ inet->inet_saddr = fl4->saddr;
if (!inet->inet_rcv_saddr) {
inet->inet_rcv_saddr = fl4->saddr;
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
}
- inet->inet_daddr = fl4->daddr;
- inet->inet_dport = usin->sin_port;
reuseport_has_conns_set(sk);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d96f3e452fef..c8b3cf5fba4c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -119,11 +119,9 @@ struct inet_fill_args {
#define IN4_ADDR_HSIZE_SHIFT 8
#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
-static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
-
static u32 inet_addr_hash(const struct net *net, __be32 addr)
{
- u32 val = (__force u32) addr ^ net_hash_mix(net);
+ u32 val = __ipv4_addr_hash(addr, net_hash_mix(net));
return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
}
@@ -133,13 +131,13 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
u32 hash = inet_addr_hash(net, ifa->ifa_local);
ASSERT_RTNL();
- hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
+ hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]);
}
static void inet_hash_remove(struct in_ifaddr *ifa)
{
ASSERT_RTNL();
- hlist_del_init_rcu(&ifa->hash);
+ hlist_del_init_rcu(&ifa->addr_lst);
}
/**
@@ -186,9 +184,8 @@ struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
u32 hash = inet_addr_hash(net, addr);
struct in_ifaddr *ifa;
- hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
- if (ifa->ifa_local == addr &&
- net_eq(dev_net(ifa->ifa_dev->dev), net))
+ hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst)
+ if (ifa->ifa_local == addr)
return ifa;
return NULL;
@@ -216,17 +213,27 @@ static void devinet_sysctl_unregister(struct in_device *idev)
/* Locks all the inet devices. */
-static struct in_ifaddr *inet_alloc_ifa(void)
+static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
{
- return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
+ struct in_ifaddr *ifa;
+
+ ifa = kzalloc(sizeof(*ifa), GFP_KERNEL_ACCOUNT);
+ if (!ifa)
+ return NULL;
+
+ in_dev_hold(in_dev);
+ ifa->ifa_dev = in_dev;
+
+ INIT_HLIST_NODE(&ifa->addr_lst);
+
+ return ifa;
}
static void inet_rcu_free_ifa(struct rcu_head *head)
{
struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
- if (ifa->ifa_dev)
- in_dev_put(ifa->ifa_dev);
+ in_dev_put(ifa->ifa_dev);
kfree(ifa);
}
@@ -288,17 +295,19 @@ static struct in_device *inetdev_init(struct net_device *dev)
/* Account for reference dev->ip_ptr (below) */
refcount_set(&in_dev->refcnt, 1);
- err = devinet_sysctl_register(in_dev);
- if (err) {
- in_dev->dead = 1;
- neigh_parms_release(&arp_tbl, in_dev->arp_parms);
- in_dev_put(in_dev);
- in_dev = NULL;
- goto out;
+ if (dev != blackhole_netdev) {
+ err = devinet_sysctl_register(in_dev);
+ if (err) {
+ in_dev->dead = 1;
+ neigh_parms_release(&arp_tbl, in_dev->arp_parms);
+ in_dev_put(in_dev);
+ in_dev = NULL;
+ goto out;
+ }
+ ip_mc_init_dev(in_dev);
+ if (dev->flags & IFF_UP)
+ ip_mc_up(in_dev);
}
- ip_mc_init_dev(in_dev);
- if (dev->flags & IFF_UP)
- ip_mc_up(in_dev);
/* we can receive as soon as ip_ptr is set -- do this last */
rcu_assign_pointer(dev->ip_ptr, in_dev);
@@ -337,6 +346,19 @@ static void inetdev_destroy(struct in_device *in_dev)
in_dev_put(in_dev);
}
+static int __init inet_blackhole_dev_init(void)
+{
+ int err = 0;
+
+ rtnl_lock();
+ if (!inetdev_init(blackhole_netdev))
+ err = -ENOMEM;
+ rtnl_unlock();
+
+ return err;
+}
+late_initcall(inet_blackhole_dev_init);
+
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
{
const struct in_ifaddr *ifa;
@@ -474,26 +496,18 @@ static void inet_del_ifa(struct in_device *in_dev,
__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
}
-static void check_lifetime(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
-
static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
u32 portid, struct netlink_ext_ack *extack)
{
struct in_ifaddr __rcu **last_primary, **ifap;
struct in_device *in_dev = ifa->ifa_dev;
+ struct net *net = dev_net(in_dev->dev);
struct in_validator_info ivi;
struct in_ifaddr *ifa1;
int ret;
ASSERT_RTNL();
- if (!ifa->ifa_local) {
- inet_free_ifa(ifa);
- return 0;
- }
-
ifa->ifa_flags &= ~IFA_F_SECONDARY;
last_primary = &in_dev->ifa_list;
@@ -551,8 +565,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
inet_hash_insert(dev_net(in_dev->dev), ifa);
- cancel_delayed_work(&check_lifetime_work);
- queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
+ cancel_delayed_work(&net->ipv4.addr_chk_work);
+ queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0);
/* Send message first, then call notifier.
Notifier will trigger FIB update, so that
@@ -565,26 +579,21 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
static int inet_insert_ifa(struct in_ifaddr *ifa)
{
+ if (!ifa->ifa_local) {
+ inet_free_ifa(ifa);
+ return 0;
+ }
+
return __inet_insert_ifa(ifa, NULL, 0, NULL);
}
static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
{
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
-
- ASSERT_RTNL();
+ struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
- if (!in_dev) {
- inet_free_ifa(ifa);
- return -ENOBUFS;
- }
ipv4_devconf_setall(in_dev);
neigh_parms_data_state_setall(in_dev->arp_parms);
- if (ifa->ifa_dev != in_dev) {
- WARN_ON(ifa->ifa_dev);
- in_dev_hold(in_dev);
- ifa->ifa_dev = in_dev;
- }
+
if (ipv4_is_loopback(ifa->ifa_local))
ifa->ifa_scope = RT_SCOPE_HOST;
return inet_insert_ifa(ifa);
@@ -634,7 +643,7 @@ static int ip_mc_autojoin_config(struct net *net, bool join,
struct sock *sk = net->ipv4.mc_autojoin_sk;
int ret;
- ASSERT_RTNL();
+ ASSERT_RTNL_NET(net);
lock_sock(sk);
if (join)
@@ -660,22 +669,24 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct in_ifaddr *ifa;
int err;
- ASSERT_RTNL();
-
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_ipv4_policy, extack);
if (err < 0)
- goto errout;
+ goto out;
ifm = nlmsg_data(nlh);
+
+ rtnl_net_lock(net);
+
in_dev = inetdev_by_index(net, ifm->ifa_index);
if (!in_dev) {
NL_SET_ERR_MSG(extack, "ipv4: Device not found");
err = -ENODEV;
- goto errout;
+ goto unlock;
}
- for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
+ for (ifap = &in_dev->ifa_list;
+ (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
ifap = &ifa->ifa_next) {
if (tb[IFA_LOCAL] &&
ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
@@ -691,33 +702,37 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (ipv4_is_multicast(ifa->ifa_address))
ip_mc_autojoin_config(net, false, ifa);
+
__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
- return 0;
+ goto unlock;
}
NL_SET_ERR_MSG(extack, "ipv4: Address not found");
err = -EADDRNOTAVAIL;
-errout:
+unlock:
+ rtnl_net_unlock(net);
+out:
return err;
}
-#define INFINITY_LIFE_TIME 0xFFFFFFFF
-
static void check_lifetime(struct work_struct *work)
{
unsigned long now, next, next_sec, next_sched;
struct in_ifaddr *ifa;
struct hlist_node *n;
+ struct net *net;
int i;
+ net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work);
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
for (i = 0; i < IN4_ADDR_HSIZE; i++) {
+ struct hlist_head *head = &net->ipv4.inet_addr_lst[i];
bool change_needed = false;
rcu_read_lock();
- hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
+ hlist_for_each_entry_rcu(ifa, head, addr_lst) {
unsigned long age, tstamp;
u32 preferred_lft;
u32 valid_lft;
@@ -754,8 +769,9 @@ static void check_lifetime(struct work_struct *work)
rcu_read_unlock();
if (!change_needed)
continue;
- rtnl_lock();
- hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
+
+ rtnl_net_lock(net);
+ hlist_for_each_entry_safe(ifa, n, head, addr_lst) {
unsigned long age;
if (ifa->ifa_flags & IFA_F_PERMANENT)
@@ -771,7 +787,7 @@ static void check_lifetime(struct work_struct *work)
struct in_ifaddr *tmp;
ifap = &ifa->ifa_dev->ifa_list;
- tmp = rtnl_dereference(*ifap);
+ tmp = rtnl_net_dereference(net, *ifap);
while (tmp) {
if (tmp == ifa) {
inet_del_ifa(ifa->ifa_dev,
@@ -779,7 +795,7 @@ static void check_lifetime(struct work_struct *work)
break;
}
ifap = &tmp->ifa_next;
- tmp = rtnl_dereference(*ifap);
+ tmp = rtnl_net_dereference(net, *ifap);
}
} else if (ifa->ifa_preferred_lft !=
INFINITY_LIFE_TIME &&
@@ -789,7 +805,7 @@ static void check_lifetime(struct work_struct *work)
rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
}
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
}
next_sec = round_jiffies_up(next);
@@ -804,8 +820,8 @@ static void check_lifetime(struct work_struct *work)
if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
- queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
- next_sched - now);
+ queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work,
+ next_sched - now);
}
static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
@@ -834,35 +850,54 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
}
-static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
- __u32 *pvalid_lft, __u32 *pprefered_lft,
- struct netlink_ext_ack *extack)
+static int inet_validate_rtm(struct nlmsghdr *nlh, struct nlattr **tb,
+ struct netlink_ext_ack *extack,
+ __u32 *valid_lft, __u32 *prefered_lft)
{
- struct nlattr *tb[IFA_MAX+1];
- struct in_ifaddr *ifa;
- struct ifaddrmsg *ifm;
- struct net_device *dev;
- struct in_device *in_dev;
+ struct ifaddrmsg *ifm = nlmsg_data(nlh);
int err;
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_ipv4_policy, extack);
if (err < 0)
- goto errout;
-
- ifm = nlmsg_data(nlh);
- err = -EINVAL;
+ return err;
if (ifm->ifa_prefixlen > 32) {
NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
- goto errout;
+ return -EINVAL;
}
if (!tb[IFA_LOCAL]) {
NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
- goto errout;
+ return -EINVAL;
+ }
+
+ if (tb[IFA_CACHEINFO]) {
+ struct ifa_cacheinfo *ci;
+
+ ci = nla_data(tb[IFA_CACHEINFO]);
+ if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
+ NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
+ return -EINVAL;
+ }
+
+ *valid_lft = ci->ifa_valid;
+ *prefered_lft = ci->ifa_prefered;
}
+ return 0;
+}
+
+static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct ifaddrmsg *ifm = nlmsg_data(nlh);
+ struct in_device *in_dev;
+ struct net_device *dev;
+ struct in_ifaddr *ifa;
+ int err;
+
dev = __dev_get_by_index(net, ifm->ifa_index);
err = -ENODEV;
if (!dev) {
@@ -870,12 +905,12 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
goto errout;
}
- in_dev = __in_dev_get_rtnl(dev);
+ in_dev = __in_dev_get_rtnl_net(dev);
err = -ENOBUFS;
if (!in_dev)
goto errout;
- ifa = inet_alloc_ifa();
+ ifa = inet_alloc_ifa(in_dev);
if (!ifa)
/*
* A potential indev allocation can be left alive, it stays
@@ -885,19 +920,14 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
ipv4_devconf_setall(in_dev);
neigh_parms_data_state_setall(in_dev->arp_parms);
- in_dev_hold(in_dev);
if (!tb[IFA_ADDRESS])
tb[IFA_ADDRESS] = tb[IFA_LOCAL];
- INIT_HLIST_NODE(&ifa->hash);
ifa->ifa_prefixlen = ifm->ifa_prefixlen;
ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
- ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
- ifm->ifa_flags;
+ ifa->ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
ifa->ifa_scope = ifm->ifa_scope;
- ifa->ifa_dev = in_dev;
-
ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
@@ -915,76 +945,69 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
if (tb[IFA_PROTO])
ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
- if (tb[IFA_CACHEINFO]) {
- struct ifa_cacheinfo *ci;
-
- ci = nla_data(tb[IFA_CACHEINFO]);
- if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
- NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
- err = -EINVAL;
- goto errout_free;
- }
- *pvalid_lft = ci->ifa_valid;
- *pprefered_lft = ci->ifa_prefered;
- }
-
return ifa;
-errout_free:
- inet_free_ifa(ifa);
errout:
return ERR_PTR(err);
}
-static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
+static struct in_ifaddr *find_matching_ifa(struct net *net, struct in_ifaddr *ifa)
{
struct in_device *in_dev = ifa->ifa_dev;
struct in_ifaddr *ifa1;
- if (!ifa->ifa_local)
- return NULL;
-
- in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
+ in_dev_for_each_ifa_rtnl_net(net, ifa1, in_dev) {
if (ifa1->ifa_mask == ifa->ifa_mask &&
inet_ifa_match(ifa1->ifa_address, ifa) &&
ifa1->ifa_local == ifa->ifa_local)
return ifa1;
}
+
return NULL;
}
static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ __u32 prefered_lft = INFINITY_LIFE_TIME;
+ __u32 valid_lft = INFINITY_LIFE_TIME;
struct net *net = sock_net(skb->sk);
- struct in_ifaddr *ifa;
struct in_ifaddr *ifa_existing;
- __u32 valid_lft = INFINITY_LIFE_TIME;
- __u32 prefered_lft = INFINITY_LIFE_TIME;
+ struct nlattr *tb[IFA_MAX + 1];
+ struct in_ifaddr *ifa;
+ int ret;
- ASSERT_RTNL();
+ ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft);
+ if (ret < 0)
+ return ret;
+
+ if (!nla_get_in_addr(tb[IFA_LOCAL]))
+ return 0;
- ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
- if (IS_ERR(ifa))
- return PTR_ERR(ifa);
+ rtnl_net_lock(net);
- ifa_existing = find_matching_ifa(ifa);
+ ifa = inet_rtm_to_ifa(net, nlh, tb, extack);
+ if (IS_ERR(ifa)) {
+ ret = PTR_ERR(ifa);
+ goto unlock;
+ }
+
+ ifa_existing = find_matching_ifa(net, ifa);
if (!ifa_existing) {
/* It would be best to check for !NLM_F_CREATE here but
* userspace already relies on not having to provide this.
*/
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
- int ret = ip_mc_autojoin_config(net, true, ifa);
-
+ ret = ip_mc_autojoin_config(net, true, ifa);
if (ret < 0) {
NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
inet_free_ifa(ifa);
- return ret;
+ goto unlock;
}
}
- return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
- extack);
+
+ ret = __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, extack);
} else {
u32 new_metric = ifa->ifa_rt_priority;
u8 new_proto = ifa->ifa_proto;
@@ -994,7 +1017,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (nlh->nlmsg_flags & NLM_F_EXCL ||
!(nlh->nlmsg_flags & NLM_F_REPLACE)) {
NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
- return -EEXIST;
+ ret = -EEXIST;
+ goto unlock;
}
ifa = ifa_existing;
@@ -1006,12 +1030,16 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifa->ifa_proto = new_proto;
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
- cancel_delayed_work(&check_lifetime_work);
+ cancel_delayed_work(&net->ipv4.addr_chk_work);
queue_delayed_work(system_power_efficient_wq,
- &check_lifetime_work, 0);
+ &net->ipv4.addr_chk_work, 0);
rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
}
- return 0;
+
+unlock:
+ rtnl_net_unlock(net);
+
+ return ret;
}
/*
@@ -1098,7 +1126,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
goto out;
}
- rtnl_lock();
+ rtnl_net_lock(net);
ret = -ENODEV;
dev = __dev_get_by_name(net, ifr->ifr_name);
@@ -1108,7 +1136,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
if (colon)
*colon = ':';
- in_dev = __in_dev_get_rtnl(dev);
+ in_dev = __in_dev_get_rtnl_net(dev);
if (in_dev) {
if (tryaddrmatch) {
/* Matthias Andree */
@@ -1118,7 +1146,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
This is checked above. */
for (ifap = &in_dev->ifa_list;
- (ifa = rtnl_dereference(*ifap)) != NULL;
+ (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
ifap = &ifa->ifa_next) {
if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
sin_orig.sin_addr.s_addr ==
@@ -1132,7 +1160,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
comparing just the label */
if (!ifa) {
for (ifap = &in_dev->ifa_list;
- (ifa = rtnl_dereference(*ifap)) != NULL;
+ (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
ifap = &ifa->ifa_next)
if (!strcmp(ifr->ifr_name, ifa->ifa_label))
break;
@@ -1174,6 +1202,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
inet_del_ifa(in_dev, ifap, 1);
break;
}
+
+ /* NETDEV_UP/DOWN/CHANGE could touch a peer dev */
+ ASSERT_RTNL();
ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
break;
@@ -1184,10 +1215,12 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
if (!ifa) {
ret = -ENOBUFS;
- ifa = inet_alloc_ifa();
+ if (!in_dev)
+ break;
+ ifa = inet_alloc_ifa(in_dev);
if (!ifa)
break;
- INIT_HLIST_NODE(&ifa->hash);
+
if (colon)
memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
else
@@ -1273,14 +1306,14 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
break;
}
done:
- rtnl_unlock();
+ rtnl_net_unlock(net);
out:
return ret;
}
int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
const struct in_ifaddr *ifa;
struct ifreq ifr;
int done = 0;
@@ -1291,7 +1324,7 @@ int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
if (!in_dev)
goto out;
- in_dev_for_each_ifa_rtnl(ifa, in_dev) {
+ in_dev_for_each_ifa_rtnl_net(dev_net(dev), ifa, in_dev) {
if (!buf) {
done += size;
continue;
@@ -1586,16 +1619,13 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
if (!inetdev_valid_mtu(dev->mtu))
break;
if (dev->flags & IFF_LOOPBACK) {
- struct in_ifaddr *ifa = inet_alloc_ifa();
+ struct in_ifaddr *ifa = inet_alloc_ifa(in_dev);
if (ifa) {
- INIT_HLIST_NODE(&ifa->hash);
ifa->ifa_local =
ifa->ifa_address = htonl(INADDR_LOOPBACK);
ifa->ifa_prefixlen = 8;
ifa->ifa_mask = inet_make_mask(8);
- in_dev_hold(in_dev);
- ifa->ifa_dev = in_dev;
ifa->ifa_scope = RT_SCOPE_HOST;
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
@@ -1948,8 +1978,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
}
static size_t inet_get_link_af_size(const struct net_device *dev,
@@ -2145,8 +2174,7 @@ void inet_netconf_notify_devconf(struct net *net, int event, int type,
rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
}
static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
@@ -2367,7 +2395,7 @@ static void inet_forward_change(struct net *net)
if (on)
dev_disable_lro(dev);
- in_dev = __in_dev_get_rtnl(dev);
+ in_dev = __in_dev_get_rtnl_net(dev);
if (in_dev) {
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -2458,7 +2486,7 @@ static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
if (write && *valp != val) {
if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
- if (!rtnl_trylock()) {
+ if (!rtnl_net_trylock(net)) {
/* Restore the original values before restarting */
*valp = val;
*ppos = pos;
@@ -2477,7 +2505,7 @@ static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
idev->dev->ifindex,
cnf);
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
rt_cache_flush(net);
} else
inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -2670,14 +2698,21 @@ static struct ctl_table ctl_forward_entry[] = {
static __net_init int devinet_init_net(struct net *net)
{
- int err;
- struct ipv4_devconf *all, *dflt;
#ifdef CONFIG_SYSCTL
- struct ctl_table *tbl;
struct ctl_table_header *forw_hdr;
+ struct ctl_table *tbl;
#endif
+ struct ipv4_devconf *all, *dflt;
+ int err;
+ int i;
err = -ENOMEM;
+ net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!net->ipv4.inet_addr_lst)
+ goto err_alloc_hash;
+
all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
if (!all)
goto err_alloc_all;
@@ -2738,6 +2773,11 @@ static __net_init int devinet_init_net(struct net *net)
net->ipv4.forw_hdr = forw_hdr;
#endif
+ for (i = 0; i < IN4_ADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]);
+
+ INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime);
+
net->ipv4.devconf_all = all;
net->ipv4.devconf_dflt = dflt;
return 0;
@@ -2755,6 +2795,8 @@ err_alloc_ctl:
err_alloc_dflt:
kfree(all);
err_alloc_all:
+ kfree(net->ipv4.inet_addr_lst);
+err_alloc_hash:
return err;
}
@@ -2762,7 +2804,11 @@ static __net_exit void devinet_exit_net(struct net *net)
{
#ifdef CONFIG_SYSCTL
const struct ctl_table *tbl;
+#endif
+ cancel_delayed_work_sync(&net->ipv4.addr_chk_work);
+
+#ifdef CONFIG_SYSCTL
tbl = net->ipv4.forw_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.forw_hdr);
__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
@@ -2773,6 +2819,7 @@ static __net_exit void devinet_exit_net(struct net *net)
#endif
kfree(net->ipv4.devconf_dflt);
kfree(net->ipv4.devconf_all);
+ kfree(net->ipv4.inet_addr_lst);
}
static __net_initdata struct pernet_operations devinet_ops = {
@@ -2788,25 +2835,25 @@ static struct rtnl_af_ops inet_af_ops __read_mostly = {
.set_link_af = inet_set_link_af,
};
+static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = {
+ {.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr,
+ .flags = RTNL_FLAG_DOIT_PERNET},
+ {.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr,
+ .flags = RTNL_FLAG_DOIT_PERNET},
+ {.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
+ {.protocol = PF_INET, .msgtype = RTM_GETNETCONF,
+ .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+};
+
void __init devinet_init(void)
{
- int i;
-
- for (i = 0; i < IN4_ADDR_HSIZE; i++)
- INIT_HLIST_HEAD(&inet_addr_lst[i]);
-
register_pernet_subsys(&devinet_ops);
register_netdevice_notifier(&ip_netdev_notifier);
- queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
-
- rtnl_af_register(&inet_af_ops);
+ if (rtnl_af_register(&inet_af_ops))
+ panic("Unable to register inet_af_ops\n");
- rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
- rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
- rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
- RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
- rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
- inet_netconf_dump_devconf,
- RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
+ rtnl_register_many(devinet_rtnl_msg_handlers);
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 47378ca41904..f3281312eb5e 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -115,7 +115,8 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
- skb_page_unref(sg_page(sg), skb->pp_recycle);
+ skb_page_unref(page_to_netmem(sg_page(sg)),
+ skb->pp_recycle);
}
#ifdef CONFIG_INET_ESPINTCP
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 80c4ea0e12f4..e0d94270da28 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -53,9 +53,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
if (sp->len == XFRM_MAX_DEPTH)
goto out_reset;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ip_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET);
+ x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ip_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET);
if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
/* non-offload path will record the error and audit log */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7ad2cafb9276..272e42d81323 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -293,7 +293,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
- .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
+ .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(skb))),
.flowi4_scope = scope,
.flowi4_mark = vmark ? skb->mark : 0,
};
@@ -342,10 +342,11 @@ EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev);
* called with rcu_read_lock()
*/
static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
- u8 tos, int oif, struct net_device *dev,
+ dscp_t dscp, int oif, struct net_device *dev,
int rpf, struct in_device *idev, u32 *itag)
{
struct net *net = dev_net(dev);
+ enum skb_drop_reason reason;
struct flow_keys flkeys;
int ret, no_addr;
struct fib_result res;
@@ -357,7 +358,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
fl4.daddr = src;
fl4.saddr = dst;
- fl4.flowi4_tos = tos;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(dscp);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_tun_key.tun_id = 0;
fl4.flowi4_flags = 0;
@@ -377,9 +378,15 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (fib_lookup(net, &fl4, &res, 0))
goto last_resort;
- if (res.type != RTN_UNICAST &&
- (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
- goto e_inval;
+ if (res.type != RTN_UNICAST) {
+ if (res.type != RTN_LOCAL) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
+ goto e_inval;
+ } else if (!IN_DEV_ACCEPT_LOCAL(idev)) {
+ reason = SKB_DROP_REASON_IP_LOCAL_SOURCE;
+ goto e_inval;
+ }
+ }
fib_combine_itag(itag, &res);
dev_match = fib_info_nh_uses_dev(res.fi, dev);
@@ -412,14 +419,14 @@ last_resort:
return 0;
e_inval:
- return -EINVAL;
+ return -reason;
e_rpf:
- return -EXDEV;
+ return -SKB_DROP_REASON_IP_RPFILTER;
}
/* Ignore rp_filter for packets protected by IPsec. */
int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
- u8 tos, int oif, struct net_device *dev,
+ dscp_t dscp, int oif, struct net_device *dev,
struct in_device *idev, u32 *itag)
{
int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
@@ -440,7 +447,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
* and the same host but different containers are not.
*/
if (inet_lookup_ifaddr_rcu(net, src))
- return -EINVAL;
+ return -SKB_DROP_REASON_IP_LOCAL_SOURCE;
ok:
*itag = 0;
@@ -448,7 +455,8 @@ ok:
}
full_check:
- return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
+ return __fib_validate_source(skb, src, dst, dscp, oif, dev, r, idev,
+ itag);
}
static inline __be32 sk_extract_addr(struct sockaddr *addr)
@@ -1343,7 +1351,7 @@ static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
struct flowi4 fl4 = {
.flowi4_mark = frn->fl_mark,
.daddr = frn->fl_addr,
- .flowi4_tos = frn->fl_tos,
+ .flowi4_tos = frn->fl_tos & INET_DSCP_MASK,
.flowi4_scope = frn->fl_scope,
};
struct fib_table *tb;
@@ -1648,6 +1656,15 @@ static struct pernet_operations fib_net_ops = {
.exit_batch = fib_net_exit_batch,
};
+static const struct rtnl_msg_handler fib_rtnl_msg_handlers[] __initconst = {
+ {.protocol = PF_INET, .msgtype = RTM_NEWROUTE,
+ .doit = inet_rtm_newroute},
+ {.protocol = PF_INET, .msgtype = RTM_DELROUTE,
+ .doit = inet_rtm_delroute},
+ {.protocol = PF_INET, .msgtype = RTM_GETROUTE, .dumpit = inet_dump_fib,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
+};
+
void __init ip_fib_init(void)
{
fib_trie_init();
@@ -1657,8 +1674,5 @@ void __init ip_fib_init(void)
register_netdevice_notifier(&fib_netdev_notifier);
register_inetaddr_notifier(&fib_inetaddr_notifier);
- rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
- rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib,
- RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
+ rtnl_register_many(fib_rtnl_msg_handlers);
}
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index 0e23ade74493..b1551c26554b 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -22,15 +22,15 @@ int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
ASSERT_RTNL();
info->family = AF_INET;
- net->ipv4.fib_seq++;
+ /* Paired with READ_ONCE() in fib4_seq_read() */
+ WRITE_ONCE(net->ipv4.fib_seq, net->ipv4.fib_seq + 1);
return call_fib_notifiers(net, event_type, info);
}
-static unsigned int fib4_seq_read(struct net *net)
+static unsigned int fib4_seq_read(const struct net *net)
{
- ASSERT_RTNL();
-
- return net->ipv4.fib_seq + fib4_rules_seq_read(net);
+ /* Paired with WRITE_ONCE() in call_fib4_notifiers() */
+ return READ_ONCE(net->ipv4.fib_seq) + fib4_rules_seq_read(net);
}
static int fib4_dump(struct net *net, struct notifier_block *nb,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 5bdd1c016009..8325224ef072 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -37,6 +37,7 @@ struct fib4_rule {
u8 dst_len;
u8 src_len;
dscp_t dscp;
+ u8 dscp_full:1; /* DSCP or TOS selector */
__be32 src;
__be32 srcmask;
__be32 dst;
@@ -73,7 +74,7 @@ int fib4_rules_dump(struct net *net, struct notifier_block *nb,
return fib_rules_dump(net, nb, AF_INET, extack);
}
-unsigned int fib4_rules_seq_read(struct net *net)
+unsigned int fib4_rules_seq_read(const struct net *net)
{
return fib_rules_seq_read(net, AF_INET);
}
@@ -186,7 +187,15 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
((daddr ^ r->dst) & r->dstmask))
return 0;
- if (r->dscp && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
+ /* When DSCP selector is used we need to match on the entire DSCP field
+ * in the flow information structure. When TOS selector is used we need
+ * to mask the upper three DSCP bits prior to matching to maintain
+ * legacy behavior.
+ */
+ if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
+ return 0;
+ else if (!r->dscp_full && r->dscp &&
+ !fib_dscp_masked_match(r->dscp, fl4))
return 0;
if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
@@ -217,6 +226,20 @@ static struct fib_table *fib_empty_table(struct net *net)
return NULL;
}
+static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4,
+ struct netlink_ext_ack *extack)
+{
+ if (rule4->dscp) {
+ NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
+ return -EINVAL;
+ }
+
+ rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ rule4->dscp_full = true;
+
+ return 0;
+}
+
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
@@ -238,6 +261,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
rule4->dscp = inet_dsfield_to_dscp(frh->tos);
+ if (tb[FRA_DSCP] &&
+ fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0)
+ goto errout;
+
/* split local/main if they are not already split */
err = fib_unmerge(net);
if (err)
@@ -320,9 +347,19 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule4->dst_len != frh->dst_len))
return 0;
- if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos)
+ if (frh->tos &&
+ (rule4->dscp_full ||
+ inet_dscp_to_dsfield(rule4->dscp) != frh->tos))
return 0;
+ if (tb[FRA_DSCP]) {
+ dscp_t dscp;
+
+ dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
+ if (!rule4->dscp_full || rule4->dscp != dscp)
+ return 0;
+ }
+
#ifdef CONFIG_IP_ROUTE_CLASSID
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
return 0;
@@ -344,7 +381,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule4->dst_len;
frh->src_len = rule4->src_len;
- frh->tos = inet_dscp_to_dsfield(rule4->dscp);
+
+ if (rule4->dscp_full) {
+ frh->tos = 0;
+ if (nla_put_u8(skb, FRA_DSCP,
+ inet_dscp_to_dsfield(rule4->dscp) >> 2))
+ goto nla_put_failure;
+ } else {
+ frh->tos = inet_dscp_to_dsfield(rule4->dscp);
+ }
if ((rule4->dst_len &&
nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
@@ -366,7 +411,8 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(4) /* dst */
+ nla_total_size(4) /* src */
- + nla_total_size(4); /* flow */
+ + nla_total_size(4) /* flow */
+ + nla_total_size(1); /* dscp */
}
static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 2b57cd2b96e2..d2cee5c314f5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -50,17 +50,12 @@
#include "fib_lookup.h"
-static DEFINE_SPINLOCK(fib_info_lock);
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_info_hash_size;
static unsigned int fib_info_hash_bits;
static unsigned int fib_info_cnt;
-#define DEVINDEX_HASHBITS 8
-#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
-static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
-
/* for_nexthops and change_nexthops only used when nexthop object
* is not set in a fib_info. The logic within can reference fib_nh.
*/
@@ -260,12 +255,11 @@ EXPORT_SYMBOL_GPL(free_fib_info);
void fib_release_info(struct fib_info *fi)
{
- spin_lock_bh(&fib_info_lock);
+ ASSERT_RTNL();
if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
hlist_del(&fi->fib_hash);
- /* Paired with READ_ONCE() in fib_create_info(). */
- WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1);
+ fib_info_cnt--;
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
@@ -275,14 +269,13 @@ void fib_release_info(struct fib_info *fi)
change_nexthops(fi) {
if (!nexthop_nh->fib_nh_dev)
continue;
- hlist_del(&nexthop_nh->nh_hash);
+ hlist_del_rcu(&nexthop_nh->nh_hash);
} endfor_nexthops(fi)
}
/* Paired with READ_ONCE() from fib_table_lookup() */
WRITE_ONCE(fi->fib_dead, 1);
fib_info_put(fi);
}
- spin_unlock_bh(&fib_info_lock);
}
static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
@@ -322,17 +315,9 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
return 0;
}
-static inline unsigned int fib_devindex_hashfn(unsigned int val)
-{
- return hash_32(val, DEVINDEX_HASHBITS);
-}
-
-static struct hlist_head *
-fib_info_devhash_bucket(const struct net_device *dev)
+static struct hlist_head *fib_nh_head(struct net_device *dev)
{
- u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
-
- return &fib_info_devhash[fib_devindex_hashfn(val)];
+ return &dev->fib_nh_head;
}
static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -347,11 +332,10 @@ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
return val;
}
-static unsigned int fib_info_hashfn_result(unsigned int val)
+static unsigned int fib_info_hashfn_result(const struct net *net,
+ unsigned int val)
{
- unsigned int mask = (fib_info_hash_size - 1);
-
- return (val ^ (val >> 7) ^ (val >> 12)) & mask;
+ return hash_32(val ^ net_hash_mix(net), fib_info_hash_bits);
}
static inline unsigned int fib_info_hashfn(struct fib_info *fi)
@@ -363,14 +347,14 @@ static inline unsigned int fib_info_hashfn(struct fib_info *fi)
fi->fib_priority);
if (fi->nh) {
- val ^= fib_devindex_hashfn(fi->nh->id);
+ val ^= fi->nh->id;
} else {
for_nexthops(fi) {
- val ^= fib_devindex_hashfn(nh->fib_nh_oif);
+ val ^= nh->fib_nh_oif;
} endfor_nexthops(fi)
}
- return fib_info_hashfn_result(val);
+ return fib_info_hashfn_result(fi->fib_net, val);
}
/* no metrics, only nexthop id */
@@ -381,11 +365,11 @@ static struct fib_info *fib_find_info_nh(struct net *net,
struct fib_info *fi;
unsigned int hash;
- hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id),
+ hash = fib_info_hashfn_1(cfg->fc_nh_id,
cfg->fc_protocol, cfg->fc_scope,
(__force u32)cfg->fc_prefsrc,
cfg->fc_priority);
- hash = fib_info_hashfn_result(hash);
+ hash = fib_info_hashfn_result(net, hash);
head = &fib_info_hash[hash];
hlist_for_each_entry(fi, head, fib_hash) {
@@ -437,28 +421,23 @@ static struct fib_info *fib_find_info(struct fib_info *nfi)
}
/* Check, that the gateway is already configured.
- * Used only by redirect accept routine.
+ * Used only by redirect accept routine, under rcu_read_lock();
*/
int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
struct fib_nh *nh;
- spin_lock(&fib_info_lock);
-
- head = fib_info_devhash_bucket(dev);
+ head = fib_nh_head(dev);
- hlist_for_each_entry(nh, head, nh_hash) {
- if (nh->fib_nh_dev == dev &&
- nh->fib_nh_gw4 == gw &&
+ hlist_for_each_entry_rcu(nh, head, nh_hash) {
+ DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+ if (nh->fib_nh_gw4 == gw &&
!(nh->fib_nh_flags & RTNH_F_DEAD)) {
- spin_unlock(&fib_info_lock);
return 0;
}
}
- spin_unlock(&fib_info_lock);
-
return -1;
}
@@ -543,8 +522,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
info->nlh, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
+ rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
}
static int fib_detect_death(struct fib_info *fi, int order,
@@ -1278,7 +1256,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
unsigned int old_size = fib_info_hash_size;
unsigned int i;
- spin_lock_bh(&fib_info_lock);
+ ASSERT_RTNL();
old_info_hash = fib_info_hash;
old_laddrhash = fib_info_laddrhash;
fib_info_hash_size = new_size;
@@ -1315,8 +1293,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
}
}
- spin_unlock_bh(&fib_info_lock);
-
kvfree(old_info_hash);
kvfree(old_laddrhash);
}
@@ -1392,6 +1368,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
int nhs = 1;
struct net *net = cfg->fc_nlinfo.nl_net;
+ ASSERT_RTNL();
if (cfg->fc_type > RTN_MAX)
goto err_inval;
@@ -1434,8 +1411,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
err = -ENOBUFS;
- /* Paired with WRITE_ONCE() in fib_release_info() */
- if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) {
+ if (fib_info_cnt >= fib_info_hash_size) {
unsigned int new_size = fib_info_hash_size << 1;
struct hlist_head *new_info_hash;
struct hlist_head *new_laddrhash;
@@ -1594,7 +1570,7 @@ link_it:
refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
- spin_lock_bh(&fib_info_lock);
+
fib_info_cnt++;
hlist_add_head(&fi->fib_hash,
&fib_info_hash[fib_info_hashfn(fi)]);
@@ -1612,11 +1588,10 @@ link_it:
if (!nexthop_nh->fib_nh_dev)
continue;
- head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
- hlist_add_head(&nexthop_nh->nh_hash, head);
+ head = fib_nh_head(nexthop_nh->fib_nh_dev);
+ hlist_add_head_rcu(&nexthop_nh->nh_hash, head);
} endfor_nexthops(fi)
}
- spin_unlock_bh(&fib_info_lock);
return fi;
err_inval:
@@ -1966,12 +1941,12 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
- struct hlist_head *head = fib_info_devhash_bucket(dev);
+ struct hlist_head *head = fib_nh_head(dev);
struct fib_nh *nh;
hlist_for_each_entry(nh, head, nh_hash) {
- if (nh->fib_nh_dev == dev)
- fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
+ DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+ fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
}
}
@@ -1985,7 +1960,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
- struct hlist_head *head = fib_info_devhash_bucket(dev);
+ struct hlist_head *head = fib_nh_head(dev);
struct fib_info *prev_fi = NULL;
int scope = RT_SCOPE_NOWHERE;
struct fib_nh *nh;
@@ -1999,7 +1974,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
int dead;
BUG_ON(!fi->fib_nhs);
- if (nh->fib_nh_dev != dev || fi == prev_fi)
+ DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+ if (fi == prev_fi)
continue;
prev_fi = fi;
dead = 0;
@@ -2066,8 +2042,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
if (fa->fa_slen != slen)
continue;
- if (fa->fa_dscp &&
- fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos))
+ if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp))
continue;
if (fa->tb_id != tb->tb_id)
continue;
@@ -2150,7 +2125,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
}
prev_fi = NULL;
- head = fib_info_devhash_bucket(dev);
+ head = fib_nh_head(dev);
ret = 0;
hlist_for_each_entry(nh, head, nh_hash) {
@@ -2158,7 +2133,8 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
int alive;
BUG_ON(!fi->fib_nhs);
- if (nh->fib_nh_dev != dev || fi == prev_fi)
+ DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+ if (fi == prev_fi)
continue;
prev_fi = fi;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8f30e3f00b7f..161f5526b86c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -292,15 +292,9 @@ static const int inflate_threshold = 50;
static const int halve_threshold_root = 15;
static const int inflate_threshold_root = 30;
-static void __alias_free_mem(struct rcu_head *head)
-{
- struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
- kmem_cache_free(fn_alias_kmem, fa);
-}
-
static inline void alias_free_mem_rcu(struct fib_alias *fa)
{
- call_rcu(&fa->rcu, __alias_free_mem);
+ kfree_rcu(fa, rcu);
}
#define TNODE_VMALLOC_MAX \
@@ -1580,8 +1574,7 @@ found:
if (index >= (1ul << fa->fa_slen))
continue;
}
- if (fa->fa_dscp &&
- inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
+ if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp))
continue;
/* Paired with WRITE_ONCE() in fib_release_info() */
if (READ_ONCE(fi->fib_dead))
diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c
index 0abbc413e0fe..3e30745e2c09 100644
--- a/net/ipv4/fou_core.c
+++ b/net/ipv4/fou_core.c
@@ -50,7 +50,7 @@ struct fou_net {
static inline struct fou *fou_from_sock(struct sock *sk)
{
- return sk->sk_user_data;
+ return rcu_dereference_sk_user_data(sk);
}
static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len)
@@ -233,9 +233,15 @@ static struct sk_buff *fou_gro_receive(struct sock *sk,
struct sk_buff *skb)
{
const struct net_offload __rcu **offloads;
- u8 proto = fou_from_sock(sk)->protocol;
+ struct fou *fou = fou_from_sock(sk);
const struct net_offload *ops;
struct sk_buff *pp = NULL;
+ u8 proto;
+
+ if (!fou)
+ goto out;
+
+ proto = fou->protocol;
/* We can clear the encap_mark for FOU as we are essentially doing
* one of two possible things. We are either adding an L4 tunnel
@@ -263,14 +269,24 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
int nhoff)
{
const struct net_offload __rcu **offloads;
- u8 proto = fou_from_sock(sk)->protocol;
+ struct fou *fou = fou_from_sock(sk);
const struct net_offload *ops;
- int err = -ENOSYS;
+ u8 proto;
+ int err;
+
+ if (!fou) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ proto = fou->protocol;
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
- if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete)) {
+ err = -ENOSYS;
goto out;
+ }
err = ops->callbacks.gro_complete(skb, nhoff);
@@ -322,6 +338,9 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
skb_gro_remcsum_init(&grc);
+ if (!fou)
+ goto out;
+
off = skb_gro_offset(skb);
len = off + sizeof(*guehdr);
diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c
index 98b90107b5ab..3d9614609b2d 100644
--- a/net/ipv4/fou_nl.c
+++ b/net/ipv4/fou_nl.c
@@ -12,7 +12,7 @@
/* Global operation policy for fou */
const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = {
- [FOU_ATTR_PORT] = { .type = NLA_U16, },
+ [FOU_ATTR_PORT] = { .type = NLA_BE16, },
[FOU_ATTR_AF] = { .type = NLA_U8, },
[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
[FOU_ATTR_TYPE] = { .type = NLA_U8, },
@@ -21,7 +21,7 @@ const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = {
[FOU_ATTR_LOCAL_V6] = { .len = 16, },
[FOU_ATTR_PEER_V4] = { .type = NLA_U32, },
[FOU_ATTR_PEER_V6] = { .len = 16, },
- [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, },
+ [FOU_ATTR_PEER_PORT] = { .type = NLA_BE16, },
[FOU_ATTR_IFINDEX] = { .type = NLA_S32, },
};
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ab6d0d98dbc3..963a89ae9c26 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -93,6 +93,7 @@
#include <net/ip_fib.h>
#include <net/l3mdev.h>
#include <net/addrconf.h>
+#include <net/inet_dscp.h>
#define CREATE_TRACE_POINTS
#include <trace/events/icmp.h>
@@ -220,61 +221,56 @@ static inline void icmp_xmit_unlock(struct sock *sk)
spin_unlock(&sk->sk_lock.slock);
}
-int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
-int sysctl_icmp_msgs_burst __read_mostly = 50;
-
-static struct {
- spinlock_t lock;
- u32 credit;
- u32 stamp;
-} icmp_global = {
- .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock),
-};
-
/**
* icmp_global_allow - Are we allowed to send one more ICMP message ?
+ * @net: network namespace
*
* Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
* Returns false if we reached the limit and can not send another packet.
- * Note: called with BH disabled
+ * Works in tandem with icmp_global_consume().
*/
-bool icmp_global_allow(void)
+bool icmp_global_allow(struct net *net)
{
- u32 credit, delta, incr = 0, now = (u32)jiffies;
- bool rc = false;
+ u32 delta, now, oldstamp;
+ int incr, new, old;
- /* Check if token bucket is empty and cannot be refilled
- * without taking the spinlock. The READ_ONCE() are paired
- * with the following WRITE_ONCE() in this same function.
+ /* Note: many cpus could find this condition true.
+ * Then later icmp_global_consume() could consume more credits,
+ * this is an acceptable race.
*/
- if (!READ_ONCE(icmp_global.credit)) {
- delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ);
- if (delta < HZ / 50)
- return false;
- }
+ if (atomic_read(&net->ipv4.icmp_global_credit) > 0)
+ return true;
- spin_lock(&icmp_global.lock);
- delta = min_t(u32, now - icmp_global.stamp, HZ);
- if (delta >= HZ / 50) {
- incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
- if (incr)
- WRITE_ONCE(icmp_global.stamp, now);
- }
- credit = min_t(u32, icmp_global.credit + incr,
- READ_ONCE(sysctl_icmp_msgs_burst));
- if (credit) {
- /* We want to use a credit of one in average, but need to randomize
- * it for security reasons.
- */
- credit = max_t(int, credit - get_random_u32_below(3), 0);
- rc = true;
+ now = jiffies;
+ oldstamp = READ_ONCE(net->ipv4.icmp_global_stamp);
+ delta = min_t(u32, now - oldstamp, HZ);
+ if (delta < HZ / 50)
+ return false;
+
+ incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ;
+ if (!incr)
+ return false;
+
+ if (cmpxchg(&net->ipv4.icmp_global_stamp, oldstamp, now) == oldstamp) {
+ old = atomic_read(&net->ipv4.icmp_global_credit);
+ do {
+ new = min(old + incr, READ_ONCE(net->ipv4.sysctl_icmp_msgs_burst));
+ } while (!atomic_try_cmpxchg(&net->ipv4.icmp_global_credit, &old, new));
}
- WRITE_ONCE(icmp_global.credit, credit);
- spin_unlock(&icmp_global.lock);
- return rc;
+ return true;
}
EXPORT_SYMBOL(icmp_global_allow);
+void icmp_global_consume(struct net *net)
+{
+ int credits = get_random_u32_below(3);
+
+ /* Note: this might make icmp_global.credit negative. */
+ if (credits)
+ atomic_sub(credits, &net->ipv4.icmp_global_credit);
+}
+EXPORT_SYMBOL(icmp_global_consume);
+
static bool icmpv4_mask_allow(struct net *net, int type, int code)
{
if (type > NR_ICMP_TYPES)
@@ -291,14 +287,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
return false;
}
-static bool icmpv4_global_allow(struct net *net, int type, int code)
+static bool icmpv4_global_allow(struct net *net, int type, int code,
+ bool *apply_ratelimit)
{
if (icmpv4_mask_allow(net, type, code))
return true;
- if (icmp_global_allow())
+ if (icmp_global_allow(net)) {
+ *apply_ratelimit = true;
return true;
-
+ }
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
@@ -308,15 +306,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code)
*/
static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
- struct flowi4 *fl4, int type, int code)
+ struct flowi4 *fl4, int type, int code,
+ bool apply_ratelimit)
{
struct dst_entry *dst = &rt->dst;
struct inet_peer *peer;
bool rc = true;
int vif;
- if (icmpv4_mask_allow(net, type, code))
- goto out;
+ if (!apply_ratelimit)
+ return true;
/* No rate limit on loopback */
if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
@@ -331,6 +330,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
out:
if (!rc)
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
+ else
+ icmp_global_consume(net);
return rc;
}
@@ -402,6 +403,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct ipcm_cookie ipc;
struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->dst.dev);
+ bool apply_ratelimit = false;
struct flowi4 fl4;
struct sock *sk;
struct inet_sock *inet;
@@ -413,11 +415,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
return;
- /* Needed by both icmp_global_allow and icmp_xmit_lock */
+ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable();
- /* global icmp_msgs_per_sec */
- if (!icmpv4_global_allow(net, type, code))
+ /* is global icmp_msgs_per_sec exhausted ? */
+ if (!icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable;
sk = icmp_xmit_lock(net);
@@ -443,14 +445,14 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.saddr = saddr;
fl4.flowi4_mark = mark;
fl4.flowi4_uid = sock_net_uid(net, NULL);
- fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
+ fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(skb)));
fl4.flowi4_proto = IPPROTO_ICMP;
fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
goto out_unlock;
- if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
+ if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
ip_rt_put(rt);
out_unlock:
@@ -476,13 +478,11 @@ static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
return route_lookup_dev;
}
-static struct rtable *icmp_route_lookup(struct net *net,
- struct flowi4 *fl4,
+static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
struct sk_buff *skb_in,
- const struct iphdr *iph,
- __be32 saddr, u8 tos, u32 mark,
- int type, int code,
- struct icmp_bxm *param)
+ const struct iphdr *iph, __be32 saddr,
+ dscp_t dscp, u32 mark, int type,
+ int code, struct icmp_bxm *param)
{
struct net_device *route_lookup_dev;
struct dst_entry *dst, *dst2;
@@ -496,7 +496,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->saddr = saddr;
fl4->flowi4_mark = mark;
fl4->flowi4_uid = sock_net_uid(net, NULL);
- fl4->flowi4_tos = RT_TOS(tos);
+ fl4->flowi4_tos = inet_dscp_to_dsfield(dscp);
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
@@ -517,6 +517,9 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (!IS_ERR(dst)) {
if (rt != rt2)
return rt;
+ if (inet_addr_type_dev_table(net, route_lookup_dev,
+ fl4->daddr) == RTN_LOCAL)
+ return rt;
} else if (PTR_ERR(dst) == -EPERM) {
rt = NULL;
} else {
@@ -545,7 +548,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
orefdst = skb_in->_skb_refdst; /* save old refdst */
skb_dst_set(skb_in, NULL);
err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
- RT_TOS(tos), rt2->dst.dev);
+ dscp, rt2->dst.dev) ? -EINVAL : 0;
dst_release(&rt2->dst);
rt2 = skb_rtable(skb_in);
@@ -596,6 +599,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
int room;
struct icmp_bxm icmp_param;
struct rtable *rt = skb_rtable(skb_in);
+ bool apply_ratelimit = false;
struct ipcm_cookie ipc;
struct flowi4 fl4;
__be32 saddr;
@@ -677,7 +681,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
}
}
- /* Needed by both icmp_global_allow and icmp_xmit_lock */
+ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
@@ -685,7 +689,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
* loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
*/
if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
- !icmpv4_global_allow(net, type, code))
+ !icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable;
sk = icmp_xmit_lock(net);
@@ -738,13 +742,14 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
ipc.opt = &icmp_param.replyopts.opt;
ipc.sockc.mark = mark;
- rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
- type, code, &icmp_param);
+ rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr,
+ inet_dsfield_to_dscp(tos), mark, type, code,
+ &icmp_param);
if (IS_ERR(rt))
goto out_unlock;
/* peer icmp_ratelimit */
- if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
+ if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
goto ende;
/* RFC says return as much as we can without exceeding 576 bytes. */
@@ -1487,6 +1492,8 @@ static int __net_init icmp_sk_init(struct net *net)
net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
net->ipv4.sysctl_icmp_ratemask = 0x1818;
net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
+ net->ipv4.sysctl_icmp_msgs_per_sec = 1000;
+ net->ipv4.sysctl_icmp_msgs_burst = 50;
return 0;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 9bf09de6a2e7..6a238398acc9 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1437,16 +1437,32 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
unsigned int mode, gfp_t gfp)
{
+ struct ip_mc_list __rcu **mc_hash;
struct ip_mc_list *im;
ASSERT_RTNL();
- for_each_pmc_rtnl(in_dev, im) {
- if (im->multiaddr == addr) {
- im->users++;
- ip_mc_add_src(in_dev, &addr, mode, 0, NULL, 0);
- goto out;
+ mc_hash = rtnl_dereference(in_dev->mc_hash);
+ if (mc_hash) {
+ u32 hash = hash_32((__force u32)addr, MC_HASH_SZ_LOG);
+
+ for (im = rtnl_dereference(mc_hash[hash]);
+ im;
+ im = rtnl_dereference(im->next_hash)) {
+ if (im->multiaddr == addr)
+ break;
}
+ } else {
+ for_each_pmc_rtnl(in_dev, im) {
+ if (im->multiaddr == addr)
+ break;
+ }
+ }
+
+ if (im) {
+ im->users++;
+ ip_mc_add_src(in_dev, &addr, mode, 0, NULL, 0);
+ goto out;
}
im = kzalloc(sizeof(*im), gfp);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 64d07b842e73..6872b5aff73e 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -236,7 +236,7 @@ static bool inet_bhash2_conflict(const struct sock *sk,
#define sk_for_each_bound_bhash(__sk, __tb2, __tb) \
hlist_for_each_entry(__tb2, &(__tb)->bhash2, bhash_node) \
- sk_for_each_bound(sk2, &(__tb2)->owners)
+ sk_for_each_bound((__sk), &(__tb2)->owners)
/* This should be called only when the tb and tb2 hashbuckets' locks are held */
static int inet_csk_bind_conflict(const struct sock *sk,
@@ -714,6 +714,7 @@ struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg)
out:
release_sock(sk);
if (newsk && mem_cgroup_sockets_enabled) {
+ gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
int amt = 0;
/* atomically get the memory usage, set and charge the
@@ -731,8 +732,8 @@ out:
}
if (amt)
- mem_cgroup_charge_skmem(newsk->sk_memcg, amt,
- GFP_KERNEL | __GFP_NOFAIL);
+ mem_cgroup_charge_skmem(newsk->sk_memcg, amt, gfp);
+ kmem_cache_charge(newsk, gfp);
release_sock(newsk);
}
@@ -774,7 +775,8 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+ smp_store_release(&icsk->icsk_pending, 0);
+ smp_store_release(&icsk->icsk_ack.pending, 0);
sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
sk_stop_timer(sk, &icsk->icsk_delack_timer);
@@ -789,7 +791,8 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk)
/* ongoing timer handlers need to acquire socket lock. */
sock_not_owned_by_me(sk);
- icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+ smp_store_release(&icsk->icsk_pending, 0);
+ smp_store_release(&icsk->icsk_ack.pending, 0);
sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
@@ -1044,21 +1047,31 @@ static bool reqsk_queue_unlink(struct request_sock *req)
found = __sk_nulls_del_node_init_rcu(sk);
spin_unlock(lock);
}
- if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
- reqsk_put(req);
+
return found;
}
-bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
+static bool __inet_csk_reqsk_queue_drop(struct sock *sk,
+ struct request_sock *req,
+ bool from_timer)
{
bool unlinked = reqsk_queue_unlink(req);
+ if (!from_timer && timer_delete_sync(&req->rsk_timer))
+ reqsk_put(req);
+
if (unlinked) {
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
reqsk_put(req);
}
+
return unlinked;
}
+
+bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
+{
+ return __inet_csk_reqsk_queue_drop(sk, req, false);
+}
EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
@@ -1151,7 +1164,7 @@ static void reqsk_timer_handler(struct timer_list *t)
if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
/* delete timer */
- inet_csk_reqsk_queue_drop(sk_listener, nreq);
+ __inet_csk_reqsk_queue_drop(sk_listener, nreq, true);
goto no_ownership;
}
@@ -1177,7 +1190,8 @@ no_ownership:
}
drop:
- inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
+ __inet_csk_reqsk_queue_drop(sk_listener, oreq, true);
+ reqsk_put(oreq);
}
static bool reqsk_queue_hash_req(struct request_sock *req,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 9712cdb8087c..321acc8abf17 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -247,6 +247,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct nlmsghdr *nlh;
struct nlattr *attr;
void *info = NULL;
+ u8 icsk_pending;
int protocol;
cb_data = cb->data;
@@ -307,14 +308,15 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto out;
}
- if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
- icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ icsk_pending = smp_load_acquire(&icsk->icsk_pending);
+ if (icsk_pending == ICSK_TIME_RETRANS ||
+ icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk_pending == ICSK_TIME_LOSS_PROBE) {
r->idiag_timer = 1;
r->idiag_retrans = icsk->icsk_retransmits;
r->idiag_expires =
jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies);
- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ } else if (icsk_pending == ICSK_TIME_PROBE0) {
r->idiag_timer = 4;
r->idiag_retrans = icsk->icsk_probes_out;
r->idiag_expires =
@@ -442,7 +444,7 @@ static int inet_twsk_diag_fill(struct sock *sk,
inet_diag_msg_common_fill(r, sk);
r->idiag_retrans = 0;
- r->idiag_state = tw->tw_substate;
+ r->idiag_state = READ_ONCE(tw->tw_substate);
r->idiag_timer = 3;
tmo = tw->tw_timer.expires - jiffies;
r->idiag_expires = jiffies_delta_to_msecs(tmo);
@@ -1209,7 +1211,7 @@ next_chunk:
if (num < s_num)
goto next_normal;
state = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_substate : sk->sk_state;
+ READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state;
if (!(idiag_states & (1 << state)))
goto next_normal;
if (r->sdiag_family != AF_UNSPEC &&
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 48d0d494185b..9bfcfd016e18 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -310,7 +310,7 @@ inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
return inet_lhash2_bucket(h, hash);
}
-static inline int compute_score(struct sock *sk, struct net *net,
+static inline int compute_score(struct sock *sk, const struct net *net,
const unsigned short hnum, const __be32 daddr,
const int dif, const int sdif)
{
@@ -348,7 +348,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
* Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
* the selected sock or an error.
*/
-struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
+struct sock *inet_lookup_reuseport(const struct net *net, struct sock *sk,
struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned short hnum,
@@ -374,7 +374,7 @@ EXPORT_SYMBOL_GPL(inet_lookup_reuseport);
*/
/* called with rcu_read_lock() : No refcount taken on the socket */
-static struct sock *inet_lhash2_lookup(struct net *net,
+static struct sock *inet_lhash2_lookup(const struct net *net,
struct inet_listen_hashbucket *ilb2,
struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
@@ -401,7 +401,7 @@ static struct sock *inet_lhash2_lookup(struct net *net,
return result;
}
-struct sock *inet_lookup_run_sk_lookup(struct net *net,
+struct sock *inet_lookup_run_sk_lookup(const struct net *net,
int protocol,
struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
@@ -423,7 +423,7 @@ struct sock *inet_lookup_run_sk_lookup(struct net *net,
return sk;
}
-struct sock *__inet_lookup_listener(struct net *net,
+struct sock *__inet_lookup_listener(const struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
@@ -488,7 +488,7 @@ void sock_edemux(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_edemux);
-struct sock *__inet_lookup_established(struct net *net,
+struct sock *__inet_lookup_established(const struct net *net,
struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const u16 hnum,
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 5bd759963451..5ab56f4cb529 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -128,11 +128,6 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
return NULL;
}
-static void inetpeer_free_rcu(struct rcu_head *head)
-{
- kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
-}
-
/* perform garbage collect on all items stacked during a lookup */
static void inet_peer_gc(struct inet_peer_base *base,
struct inet_peer *gc_stack[],
@@ -168,7 +163,7 @@ static void inet_peer_gc(struct inet_peer_base *base,
if (p) {
rb_erase(&p->rb_node, &base->rb_root);
base->total--;
- call_rcu(&p->rcu, inetpeer_free_rcu);
+ kfree_rcu(p, rcu);
}
}
}
@@ -242,7 +237,7 @@ void inet_putpeer(struct inet_peer *p)
WRITE_ONCE(p->dtime, (__u32)jiffies);
if (refcount_dec_and_test(&p->refcnt))
- call_rcu(&p->rcu, inetpeer_free_rcu);
+ kfree_rcu(p, rcu);
}
EXPORT_SYMBOL_GPL(inet_putpeer);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a92664a5ef2e..07036a2943c1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -132,12 +132,12 @@ static bool frag_expire_skip_icmp(u32 user)
*/
static void ip_expire(struct timer_list *t)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT;
struct inet_frag_queue *frag = from_timer(frag, t, timer);
const struct iphdr *iph;
struct sk_buff *head = NULL;
struct net *net;
struct ipq *qp;
- int err;
qp = container_of(frag, struct ipq, q);
net = qp->q.fqdir->net;
@@ -175,14 +175,15 @@ static void ip_expire(struct timer_list *t)
/* skb has no dst, perform route lookup again */
iph = ip_hdr(head);
- err = ip_route_input_noref(head, iph->daddr, iph->saddr,
- iph->tos, head->dev);
- if (err)
+ reason = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), head->dev);
+ if (reason)
goto out;
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
*/
+ reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT;
if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
(skb_rtable(head)->rt_type != RTN_LOCAL))
goto out;
@@ -195,7 +196,7 @@ out:
spin_unlock(&qp->q.lock);
out_rcu_unlock:
rcu_read_unlock();
- kfree_skb_reason(head, SKB_DROP_REASON_FRAG_REASM_TIMEOUT);
+ kfree_skb_reason(head, reason);
ipq_put(qp);
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ba205473522e..f1f31ebfc793 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -44,6 +44,7 @@
#include <net/gre.h>
#include <net/dst_metadata.h>
#include <net/erspan.h>
+#include <net/inet_dscp.h>
/*
Problems & solutions
@@ -661,11 +662,11 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
if (skb_cow_head(skb, 0))
goto free_skb;
- tnl_params = (const struct iphdr *)skb->data;
-
- if (!pskb_network_may_pull(skb, pull_len))
+ if (!pskb_may_pull(skb, pull_len))
goto free_skb;
+ tnl_params = (const struct iphdr *)skb->data;
+
/* ip_tunnel_xmit() needs skb->data pointing to gre header. */
skb_pull(skb, pull_len);
skb_reset_mac_header(skb);
@@ -930,7 +931,7 @@ static int ipgre_open(struct net_device *dev)
t->parms.iph.daddr,
t->parms.iph.saddr,
t->parms.o_key,
- RT_TOS(t->parms.iph.tos),
+ t->parms.iph.tos & INET_DSCP_MASK,
t->parms.link);
if (IS_ERR(rt))
return -EADDRNOTAVAIL;
@@ -996,7 +997,7 @@ static void __gre_tunnel_init(struct net_device *dev)
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
- dev->features |= GRE_FEATURES | NETIF_F_LLTX;
+ dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
/* TCP offload with GRE SEQ is not supported, nor can we support 2
@@ -1010,6 +1011,8 @@ static void __gre_tunnel_init(struct net_device *dev)
dev->features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+
+ dev->lltx = true;
}
static int ipgre_tunnel_init(struct net_device *dev)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d6fbcbd2358a..f0a4dda246ab 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -322,15 +322,14 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
int err, drop_reason;
struct rtable *rt;
- drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
-
if (ip_can_use_hint(skb, iph, hint)) {
- err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
- dev, hint);
- if (unlikely(err))
+ drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev, hint);
+ if (unlikely(drop_reason))
goto drop_error;
}
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
!skb_dst(skb) &&
!skb->sk &&
@@ -362,10 +361,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
* how the packet travels inside Linux networking.
*/
if (!skb_valid_dst(skb)) {
- err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, dev);
- if (unlikely(err))
+ drop_reason = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev);
+ if (unlikely(drop_reason))
goto drop_error;
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
} else {
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -425,10 +425,8 @@ drop:
return NET_RX_DROP;
drop_error:
- if (err == -EXDEV) {
- drop_reason = SKB_DROP_REASON_IP_RPFILTER;
+ if (drop_reason == SKB_DROP_REASON_IP_RPFILTER)
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
- }
goto drop;
}
@@ -596,9 +594,8 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
{
struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *dev = skb->dev;
struct dst_entry *dst;
@@ -646,9 +643,8 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *curr_dev = NULL;
struct net *curr_net = NULL;
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index a9e22a098872..e3321932bec0 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -17,7 +17,7 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/uaccess.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/icmp.h>
@@ -617,7 +617,8 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev)
orefdst = skb->_skb_refdst;
skb_dst_set(skb, NULL);
- err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev);
+ err = ip_route_input(skb, nexthop, iph->saddr, ip4h_dscp(iph),
+ dev) ? -EINVAL : 0;
rt2 = skb_rtable(skb);
if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
skb_dst_drop(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b90d0f78ac80..0065b1996c94 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -77,6 +77,7 @@
#include <net/inetpeer.h>
#include <net/inet_ecn.h>
#include <net/lwtunnel.h>
+#include <net/inet_dscp.h>
#include <linux/bpf-cgroup.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
@@ -493,7 +494,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
inet->inet_dport,
inet->inet_sport,
sk->sk_protocol,
- RT_TOS(tos),
+ tos & INET_DSCP_MASK,
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
@@ -972,7 +973,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = dst_rtable(cork->dst);
- bool paged, hold_tskey, extra_uref = false;
+ bool paged, hold_tskey = false, extra_uref = false;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
@@ -1048,10 +1049,15 @@ static int __ip_append_data(struct sock *sk,
cork->length += length;
- hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
- if (hold_tskey)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+ if (cork->flags & IPCORK_TS_OPT_ID) {
+ tskey = cork->ts_opt_id;
+ } else {
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ hold_tskey = true;
+ }
+ }
/* So, what's going on in the loop below?
*
@@ -1325,7 +1331,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->priority = ipc->priority;
cork->transmit_time = ipc->sockc.transmit_time;
cork->tx_flags = 0;
- sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
+ sock_tx_timestamp(sk, &ipc->sockc, &cork->tx_flags);
+ if (ipc->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
+ cork->flags |= IPCORK_TS_OPT_ID;
+ cork->ts_opt_id = ipc->sockc.ts_opt_id;
+ }
return 0;
}
@@ -1586,7 +1596,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
* Generic function to send a packet as reply to another packet.
* Used to send some TCP resets/acks so far.
*/
-void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk,
+ struct sk_buff *skb,
const struct ip_options *sopt,
__be32 daddr, __be32 saddr,
const struct ip_reply_arg *arg,
@@ -1621,7 +1632,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
flowi4_init_output(&fl4, oif,
IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark,
- RT_TOS(arg->tos),
+ arg->tos & INET_DSCP_MASK,
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
ip_reply_arg_flowi_flags(arg),
daddr, saddr,
@@ -1652,6 +1663,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
+ if (orig_sk)
+ skb_set_owner_edemux(nskb, (struct sock *)orig_sk);
if (transmit_time)
nskb->tstamp_type = SKB_CLOCK_MONOTONIC;
if (txhash)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5cffad42fe8c..25505f9b724c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -43,6 +43,7 @@
#include <net/rtnetlink.h>
#include <net/udp.h>
#include <net/dst_metadata.h>
+#include <net/inet_dscp.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -217,7 +218,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
ip_tunnel_flags_copy(flags, parms->i_flags);
- hlist_for_each_entry_rcu(t, head, hash_node) {
+ hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
link == READ_ONCE(t->parms.link) &&
@@ -293,7 +294,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), dev_net(dev),
+ iph->tos & INET_DSCP_MASK, dev_net(dev),
tunnel->parms.link, tunnel->fwmark, 0, 0);
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -609,9 +610,9 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
- tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
- dev_net(dev), 0, skb->mark, skb_get_hash(skb),
- key->flow_flags);
+ tunnel_id_to_key32(key->tun_id),
+ tos & INET_DSCP_MASK, dev_net(dev), 0, skb->mark,
+ skb_get_hash(skb), key->flow_flags);
if (!tunnel_hlen)
tunnel_hlen = ip_encap_hlen(&tun_info->encap);
@@ -772,7 +773,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos),
+ tunnel->parms.o_key, tos & INET_DSCP_MASK,
dev_net(dev), READ_ONCE(tunnel->parms.link),
tunnel->fwmark, skb_get_hash(skb), 0);
@@ -1161,7 +1162,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
* Allowing to move it to another netns is clearly unsafe.
*/
if (!IS_ERR(itn->fb_tunnel_dev)) {
- itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ itn->fb_tunnel_dev->netns_local = true;
itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
itn->type = itn->fb_tunnel_dev->type;
@@ -1326,7 +1327,7 @@ int ip_tunnel_init(struct net_device *dev)
tunnel->dev = dev;
tunnel->net = dev_net(dev);
- strcpy(tunnel->parms.name, dev->name);
+ strscpy(tunnel->parms.name, dev->name);
iph->version = 4;
iph->ihl = 5;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 14536da9f5dc..f0b4419cef34 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -443,7 +443,7 @@ static int vti_tunnel_init(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->addr_len = 4;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
netif_keep_dst(dev);
return ip_tunnel_init(dev);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 923a2ef68c2f..dc0db5895e0e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -378,7 +378,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
dev->type = ARPHRD_TUNNEL;
dev->flags = IFF_NOARP;
dev->addr_len = 4;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
netif_keep_dst(dev);
dev->features |= IPIP_FEATURES;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 6c750bd13dd8..99d8faa508e5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -62,6 +62,7 @@
#include <net/fib_rules.h>
#include <linux/netconf.h>
#include <net/rtnh.h>
+#include <net/inet_dscp.h>
#include <linux/nospec.h>
@@ -119,6 +120,11 @@ static void ipmr_expire_process(struct timer_list *t);
lockdep_rtnl_is_held() || \
list_empty(&net->ipv4.mr_tables))
+static bool ipmr_can_free_table(struct net *net)
+{
+ return !check_net(net) || !net_initialized(net);
+}
+
static struct mr_table *ipmr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -136,7 +142,7 @@ static struct mr_table *ipmr_mr_table_iter(struct net *net,
return ret;
}
-static struct mr_table *ipmr_get_table(struct net *net, u32 id)
+static struct mr_table *__ipmr_get_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -147,6 +153,16 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id)
return NULL;
}
+static struct mr_table *ipmr_get_table(struct net *net, u32 id)
+{
+ struct mr_table *mrt;
+
+ rcu_read_lock();
+ mrt = __ipmr_get_table(net, id);
+ rcu_read_unlock();
+ return mrt;
+}
+
static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
struct mr_table **mrt)
{
@@ -188,7 +204,7 @@ static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
arg->table = fib_rule_get_table(rule, arg);
- mrt = ipmr_get_table(rule->fr_net, arg->table);
+ mrt = __ipmr_get_table(rule->fr_net, arg->table);
if (!mrt)
return -EAGAIN;
res->mrt = mrt;
@@ -287,7 +303,7 @@ static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack);
}
-static unsigned int ipmr_rules_seq_read(struct net *net)
+static unsigned int ipmr_rules_seq_read(const struct net *net)
{
return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
}
@@ -301,6 +317,11 @@ EXPORT_SYMBOL(ipmr_rule_default);
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
+static bool ipmr_can_free_table(struct net *net)
+{
+ return !check_net(net);
+}
+
static struct mr_table *ipmr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -314,6 +335,8 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id)
return net->ipv4.mrt;
}
+#define __ipmr_get_table ipmr_get_table
+
static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
struct mr_table **mrt)
{
@@ -345,7 +368,7 @@ static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
return 0;
}
-static unsigned int ipmr_rules_seq_read(struct net *net)
+static unsigned int ipmr_rules_seq_read(const struct net *net)
{
return 0;
}
@@ -402,7 +425,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
if (id != RT_TABLE_DEFAULT && id >= 1000000000)
return ERR_PTR(-EINVAL);
- mrt = ipmr_get_table(net, id);
+ mrt = __ipmr_get_table(net, id);
if (mrt)
return mrt;
@@ -412,6 +435,10 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
static void ipmr_free_table(struct mr_table *mrt)
{
+ struct net *net = read_pnet(&mrt->net);
+
+ WARN_ON_ONCE(!ipmr_can_free_table(net));
+
timer_shutdown_sync(&mrt->ipmr_expire_timer);
mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC);
@@ -536,7 +563,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->netdev_ops = &reg_vif_netdev_ops;
dev->needs_free_netdev = true;
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
}
static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
@@ -1373,7 +1400,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
goto out_unlock;
}
- mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+ mrt = __ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
if (!mrt) {
ret = -ENOENT;
goto out_unlock;
@@ -1868,7 +1895,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
vif->remote, vif->local,
0, 0,
IPPROTO_IPIP,
- RT_TOS(iph->tos), vif->link);
+ iph->tos & INET_DSCP_MASK, vif->link);
if (IS_ERR(rt))
goto out_free;
encap = sizeof(struct iphdr);
@@ -1876,7 +1903,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
0, 0,
IPPROTO_IPIP,
- RT_TOS(iph->tos), vif->link);
+ iph->tos & INET_DSCP_MASK, vif->link);
if (IS_ERR(rt))
goto out_free;
}
@@ -2080,7 +2107,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = RT_TOS(iph->tos),
+ .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)),
.flowi4_oif = (rt_is_output_route(rt) ?
skb->dev->ifindex : 0),
.flowi4_iif = (rt_is_output_route(rt) ?
@@ -2261,11 +2288,13 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
struct mr_table *mrt;
int err;
- mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
- if (!mrt)
+ rcu_read_lock();
+ mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT);
+ if (!mrt) {
+ rcu_read_unlock();
return -ENOENT;
+ }
- rcu_read_lock();
cache = ipmr_cache_find(mrt, saddr, daddr);
if (!cache && skb->dev) {
int vif = ipmr_find_vif(mrt, skb->dev);
@@ -2406,8 +2435,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
errout:
kfree_skb(skb);
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
}
static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
@@ -2546,11 +2574,11 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
- src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
- grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
- tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
+ src = nla_get_in_addr_default(tb[RTA_SRC], 0);
+ grp = nla_get_in_addr_default(tb[RTA_DST], 0);
+ tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
- mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
+ mrt = __ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
if (!mrt) {
err = -ENOENT;
goto errout_free;
@@ -2604,7 +2632,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (filter.table_id) {
struct mr_table *mrt;
- mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
+ mrt = __ipmr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR)
return skb->len;
@@ -2712,7 +2740,7 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
break;
}
}
- mrt = ipmr_get_table(net, tblid);
+ mrt = __ipmr_get_table(net, tblid);
if (!mrt) {
ret = -ENOENT;
goto out;
@@ -2920,13 +2948,15 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
- mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
- if (!mrt)
+ rcu_read_lock();
+ mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT);
+ if (!mrt) {
+ rcu_read_unlock();
return ERR_PTR(-ENOENT);
+ }
iter->mrt = mrt;
- rcu_read_lock();
return mr_vif_seq_start(seq, pos);
}
@@ -3035,11 +3065,9 @@ static const struct net_protocol pim_protocol = {
};
#endif
-static unsigned int ipmr_seq_read(struct net *net)
+static unsigned int ipmr_seq_read(const struct net *net)
{
- ASSERT_RTNL();
-
- return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
+ return READ_ONCE(net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net);
}
static int ipmr_dump(struct net *net, struct notifier_block *nb,
@@ -3139,6 +3167,17 @@ static struct pernet_operations ipmr_net_ops = {
.exit_batch = ipmr_net_exit_batch,
};
+static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = {
+ {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK,
+ .dumpit = ipmr_rtm_dumplink},
+ {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE,
+ .doit = ipmr_rtm_route},
+ {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE,
+ .doit = ipmr_rtm_route},
+ {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE,
+ .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute},
+};
+
int __init ip_mr_init(void)
{
int err;
@@ -3159,15 +3198,8 @@ int __init ip_mr_init(void)
goto add_proto_fail;
}
#endif
- rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
- ipmr_rtm_getroute, ipmr_rtm_dumproute, 0);
- rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
- ipmr_rtm_route, NULL, 0);
- rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
- ipmr_rtm_route, NULL, 0);
-
- rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK,
- NULL, ipmr_rtm_dumplink, 0);
+ rtnl_register_many(ipmr_rtnl_msg_handlers);
+
return 0;
#ifdef CONFIG_IP_PIMSM_V2
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 271dc03fc6db..f0af12a2f70b 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -310,7 +310,8 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
if (filter->filter_set)
flags |= NLM_F_DUMP_FILTERED;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list,
+ lockdep_rtnl_is_held()) {
if (e < s_e)
goto next_entry;
if (filter->dev &&
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 591a2737808e..08bc3f2c0078 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -14,6 +14,7 @@
#include <net/route.h>
#include <net/xfrm.h>
#include <net/ip.h>
+#include <net/inet_dscp.h>
#include <net/netfilter/nf_queue.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
@@ -43,7 +44,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
*/
fl4.daddr = iph->daddr;
fl4.saddr = saddr;
- fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1b991b889506..ef8009281da5 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -12,7 +12,13 @@ config NF_DEFRAG_IPV4
# old sockopt interface and eval loop
config IP_NF_IPTABLES_LEGACY
- tristate
+ tristate "Legacy IP tables support"
+ default n
+ select NETFILTER_XTABLES
+ help
+ iptables is a legacy packet classifier.
+ This is not needed if you are using iptables over nftables
+ (iptables-nft).
config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support"
@@ -318,7 +324,13 @@ endif # IP_NF_IPTABLES
# ARP tables
config IP_NF_ARPTABLES
- tristate
+ tristate "Legacy ARPTABLES support"
+ depends on NETFILTER_XTABLES
+ default n
+ help
+ arptables is a legacy packet classifier.
+ This is not needed if you are using arptables over nftables
+ (iptables-nft).
config NFT_COMPAT_ARP
tristate
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 14365b20f1c5..1cdd9c28ab2d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -826,7 +826,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
sizeof(info.underflow));
info.num_entries = private->number;
info.size = private->size;
- strcpy(info.name, name);
+ strscpy(info.name, name);
if (copy_to_user(user, &info, *len) != 0)
ret = -EFAULT;
@@ -1547,7 +1547,7 @@ int arpt_register_table(struct net *net,
goto out_free;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
goto out_free;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index fe89a056eb06..3d101613f27f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -981,7 +981,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
sizeof(info.underflow));
info.num_entries = private->number;
info.size = private->size;
- strcpy(info.name, name);
+ strscpy(info.name, name);
if (copy_to_user(user, &info, *len) != 0)
ret = -EFAULT;
@@ -1767,7 +1767,7 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
goto out_free;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
goto out_free;
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index ded5bef02f77..a27782d7653e 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
+#include <net/inet_dscp.h>
#include <linux/ip.h>
#include <net/ip.h>
#include <net/ip_fib.h>
@@ -75,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- flow.flowi4_tos = iph->tos & IPTOS_RT_MASK;
+ flow.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par));
flow.flowi4_uid = sock_net_uid(xt_net(par), NULL);
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index 6cc5743c553a..25e1e8eb18dd 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -15,6 +15,7 @@
#include <net/icmp.h>
#include <net/ip.h>
#include <net/route.h>
+#include <net/inet_dscp.h>
#include <net/netfilter/ipv4/nf_dup_ipv4.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
@@ -32,7 +33,7 @@ static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb,
fl4.flowi4_oif = oif;
fl4.daddr = gw->s_addr;
- fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
rt = ip_route_output_key(net, &fl4);
@@ -52,8 +53,9 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
{
struct iphdr *iph;
+ local_bh_disable();
if (this_cpu_read(nf_skb_duplicated))
- return;
+ goto out;
/*
* Copy the skb, and route the copy. Will later return %XT_CONTINUE for
* the original skb, which should continue on its way as if nothing has
@@ -61,7 +63,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
*/
skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL)
- return;
+ goto out;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* Avoid counting cloned packets towards the original connection. */
@@ -90,6 +92,8 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
} else {
kfree_skb(skb);
}
+out:
+ local_bh_enable();
}
EXPORT_SYMBOL_GPL(nf_dup_ipv4);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 04504b2b51df..87fd945a0d27 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -239,9 +239,8 @@ static int nf_reject_fill_skb_dst(struct sk_buff *skb_in)
void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
int hook)
{
- struct sk_buff *nskb;
- struct iphdr *niph;
const struct tcphdr *oth;
+ struct sk_buff *nskb;
struct tcphdr _oth;
oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook);
@@ -266,14 +265,12 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
nskb->mark = IP4_REPLY_MARK(net, oldskb->mark);
skb_reserve(nskb, LL_MAX_HEADER);
- niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
- ip4_dst_hoplimit(skb_dst(nskb)));
+ nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
+ ip4_dst_hoplimit(skb_dst(nskb)));
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
if (ip_route_me_harder(net, sk, nskb, RTN_UNSPEC))
goto free_nskb;
- niph = ip_hdr(nskb);
-
/* "Never happens" */
if (nskb->len > dst_mtu(skb_dst(nskb)))
goto free_nskb;
@@ -290,6 +287,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
*/
if (nf_bridge_info_exists(oldskb)) {
struct ethhdr *oeth = eth_hdr(oldskb);
+ struct iphdr *niph = ip_hdr(nskb);
struct net_device *br_indev;
br_indev = nf_bridge_get_physindev(oldskb, net);
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index a522c3a3be52..ef5dd88107dd 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -40,13 +40,13 @@ static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
sizeof(struct in_addr));
if (err < 0)
return err;
if (tb[NFTA_DUP_SREG_DEV])
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV],
&priv->sreg_dev, sizeof(int));
return err;
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 9eee535c64dd..625adbc42037 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -10,6 +10,8 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_fib.h>
+#include <net/inet_dscp.h>
+#include <net/ip.h>
#include <net/ip_fib.h>
#include <net/route.h>
@@ -22,8 +24,6 @@ static __be32 get_saddr(__be32 addr)
return addr;
}
-#define DSCP_BITS 0xfc
-
void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
@@ -66,6 +66,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
.flowi4_scope = RT_SCOPE_UNIVERSE,
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_uid = sock_net_uid(nft_net(pkt), NULL),
+ .flowi4_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)),
};
const struct net_device *oif;
const struct net_device *found;
@@ -84,9 +85,6 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
oif = NULL;
- if (priv->flags & NFTA_FIB_F_IIF)
- fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(oif);
-
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
nft_fib_store_result(dest, priv, nft_in(pkt));
@@ -110,7 +108,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (priv->flags & NFTA_FIB_F_MARK)
fl4.flowi4_mark = pkt->skb->mark;
- fl4.flowi4_tos = iph->tos & DSCP_BITS;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
if (priv->flags & NFTA_FIB_F_DADDR) {
fl4.daddr = iph->daddr;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 6b9787ee8601..09a3d73b45ba 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -865,15 +865,18 @@ out:
}
static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh,
- u32 op_flags)
+ u32 op_flags, u32 *resp_op_flags)
{
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
struct nexthop_grp *p;
size_t len = nhg->num_nh * sizeof(*p);
struct nlattr *nla;
u16 group_type = 0;
+ u16 weight;
int i;
+ *resp_op_flags |= NHA_OP_FLAG_RESP_GRP_RESVD_0;
+
if (nhg->hash_threshold)
group_type = NEXTHOP_GRP_TYPE_MPATH;
else if (nhg->resilient)
@@ -888,9 +891,12 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh,
p = nla_data(nla);
for (i = 0; i < nhg->num_nh; ++i) {
+ weight = nhg->nh_entries[i].weight - 1;
+
*p++ = (struct nexthop_grp) {
.id = nhg->nh_entries[i].nh->id,
- .weight = nhg->nh_entries[i].weight - 1,
+ .weight = weight,
+ .weight_high = weight >> 8,
};
}
@@ -934,10 +940,12 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ u32 resp_op_flags = 0;
if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
goto nla_put_failure;
- if (nla_put_nh_group(skb, nh, op_flags))
+ if (nla_put_nh_group(skb, nh, op_flags, &resp_op_flags) ||
+ nla_put_u32(skb, NHA_OP_FLAGS, resp_op_flags))
goto nla_put_failure;
goto out;
}
@@ -1050,7 +1058,9 @@ static size_t nh_nlmsg_size(struct nexthop *nh)
sz += nla_total_size(4); /* NHA_ID */
if (nh->is_group)
- sz += nh_nlmsg_size_grp(nh);
+ sz += nh_nlmsg_size_grp(nh) +
+ nla_total_size(4) + /* NHA_OP_FLAGS */
+ 0;
else
sz += nh_nlmsg_size_single(nh);
@@ -1080,8 +1090,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
info->nlh, gfp_any());
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
+ rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
}
static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket)
@@ -1201,8 +1210,7 @@ static void nexthop_bucket_notify(struct nh_res_table *res_table,
rtnl_notify(skb, nh->net, 0, RTNLGRP_NEXTHOP, NULL, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
+ rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
}
static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
@@ -1280,11 +1288,14 @@ static int nh_check_attr_group(struct net *net,
nhg = nla_data(tb[NHA_GROUP]);
for (i = 0; i < len; ++i) {
- if (nhg[i].resvd1 || nhg[i].resvd2) {
- NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
+ if (nhg[i].resvd2) {
+ NL_SET_ERR_MSG(extack, "Reserved field in nexthop_grp must be 0");
return -EINVAL;
}
- if (nhg[i].weight > 254) {
+ if (nexthop_grp_weight(&nhg[i]) == 0) {
+ /* 0xffff got passed in, representing weight of 0x10000,
+ * which is too heavy.
+ */
NL_SET_ERR_MSG(extack, "Invalid value for weight");
return -EINVAL;
}
@@ -1880,9 +1891,9 @@ static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table)
static void nh_res_group_rebalance(struct nh_group *nhg,
struct nh_res_table *res_table)
{
- int prev_upper_bound = 0;
- int total = 0;
- int w = 0;
+ u16 prev_upper_bound = 0;
+ u32 total = 0;
+ u32 w = 0;
int i;
INIT_LIST_HEAD(&res_table->uw_nh_entries);
@@ -1892,11 +1903,12 @@ static void nh_res_group_rebalance(struct nh_group *nhg,
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- int upper_bound;
+ u16 upper_bound;
+ u64 btw;
w += nhge->weight;
- upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w,
- total);
+ btw = ((u64)res_table->num_nh_buckets) * w;
+ upper_bound = DIV_ROUND_CLOSEST_ULL(btw, total);
nhge->res.wants_buckets = upper_bound - prev_upper_bound;
prev_upper_bound = upper_bound;
@@ -1962,8 +1974,8 @@ static void replace_nexthop_grp_res(struct nh_group *oldg,
static void nh_hthr_group_rebalance(struct nh_group *nhg)
{
- int total = 0;
- int w = 0;
+ u32 total = 0;
+ u32 w = 0;
int i;
for (i = 0; i < nhg->num_nh; ++i)
@@ -1971,7 +1983,7 @@ static void nh_hthr_group_rebalance(struct nh_group *nhg)
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- int upper_bound;
+ u32 upper_bound;
w += nhge->weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
@@ -2713,7 +2725,8 @@ static struct nexthop *nexthop_create_group(struct net *net,
goto out_no_nh;
}
nhg->nh_entries[i].nh = nhe;
- nhg->nh_entries[i].weight = entry[i].weight + 1;
+ nhg->nh_entries[i].weight = nexthop_grp_weight(&entry[i]);
+
list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
nhg->nh_entries[i].nh_parent = nh;
}
@@ -3234,12 +3247,8 @@ static int nh_valid_get_del_req(const struct nlmsghdr *nlh,
return -EINVAL;
}
- if (op_flags) {
- if (tb[NHA_OP_FLAGS])
- *op_flags = nla_get_u32(tb[NHA_OP_FLAGS]);
- else
- *op_flags = 0;
- }
+ if (op_flags)
+ *op_flags = nla_get_u32_default(tb[NHA_OP_FLAGS], 0);
return 0;
}
@@ -3420,10 +3429,7 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh,
if (err < 0)
return err;
- if (tb[NHA_OP_FLAGS])
- filter->op_flags = nla_get_u32(tb[NHA_OP_FLAGS]);
- else
- filter->op_flags = 0;
+ filter->op_flags = nla_get_u32_default(tb[NHA_OP_FLAGS], 0);
return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
}
@@ -4029,25 +4035,30 @@ static struct pernet_operations nexthop_net_ops = {
.exit_batch_rtnl = nexthop_net_exit_batch_rtnl,
};
+static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop},
+ {.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop},
+ {.msgtype = RTM_GETNEXTHOP, .doit = rtm_get_nexthop,
+ .dumpit = rtm_dump_nexthop},
+ {.msgtype = RTM_GETNEXTHOPBUCKET, .doit = rtm_get_nexthop_bucket,
+ .dumpit = rtm_dump_nexthop_bucket},
+ {.protocol = PF_INET, .msgtype = RTM_NEWNEXTHOP,
+ .doit = rtm_new_nexthop},
+ {.protocol = PF_INET, .msgtype = RTM_GETNEXTHOP,
+ .dumpit = rtm_dump_nexthop},
+ {.protocol = PF_INET6, .msgtype = RTM_NEWNEXTHOP,
+ .doit = rtm_new_nexthop},
+ {.protocol = PF_INET6, .msgtype = RTM_GETNEXTHOP,
+ .dumpit = rtm_dump_nexthop},
+};
+
static int __init nexthop_init(void)
{
register_pernet_subsys(&nexthop_net_ops);
register_netdevice_notifier(&nh_netdev_notifier);
- rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
- rtm_dump_nexthop, 0);
-
- rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
- rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
-
- rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
- rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
-
- rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket,
- rtm_dump_nexthop_bucket, 0);
+ rtnl_register_many(nexthop_rtnl_msg_handlers);
return 0;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 474dfd263c8b..0e9e01967ec9 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -370,7 +370,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
skb->ip_summed = CHECKSUM_NONE;
- skb_setup_tx_timestamp(skb, sockc->tsflags);
+ skb_setup_tx_timestamp(skb, sockc);
if (flags & MSG_CONFIRM)
skb_set_dst_pending_confirm(skb, 1);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 13c0f1d455f3..0fbec3509618 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -512,7 +512,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
sk->sk_protocol;
}
- flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope,
+ flowi4_init_output(fl4, oif, mark, tos & INET_DSCP_MASK, scope,
prot, flow_flags, iph->daddr, iph->saddr, 0, 0,
sock_net_uid(net, sk));
}
@@ -541,7 +541,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
- ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
+ ip_sock_rt_tos(sk),
ip_sock_rt_scope(sk),
inet_test_bit(HDRINCL, sk) ?
IPPROTO_RAW : sk->sk_protocol,
@@ -1027,6 +1027,19 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
struct fib_nh_common *nhc;
fib_select_path(net, &res, fl4, NULL);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (fib_info_num_path(res.fi) > 1) {
+ int nhsel;
+
+ for (nhsel = 0; nhsel < fib_info_num_path(res.fi); nhsel++) {
+ nhc = fib_info_nhc(res.fi, nhsel);
+ update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
+ jiffies + net->ipv4.ip_rt_mtu_expires);
+ }
+ rcu_read_unlock();
+ return;
+ }
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
nhc = FIB_RES_NHC(res);
update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
jiffies + net->ipv4.ip_rt_mtu_expires);
@@ -1263,7 +1276,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = iph->tos & IPTOS_RT_MASK,
+ .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)),
.flowi4_oif = rt->dst.dev->ifindex,
.flowi4_iif = skb->dev->ifindex,
.flowi4_mark = skb->mark,
@@ -1665,49 +1678,54 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
EXPORT_SYMBOL(rt_dst_clone);
/* called in rcu_read_lock() section */
-int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev,
- struct in_device *in_dev, u32 *itag)
+enum skb_drop_reason
+ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct in_device *in_dev, u32 *itag)
{
- int err;
+ enum skb_drop_reason reason;
/* Primary sanity checks. */
if (!in_dev)
- return -EINVAL;
+ return SKB_DROP_REASON_NOT_SPECIFIED;
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
- skb->protocol != htons(ETH_P_IP))
- return -EINVAL;
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ return SKB_DROP_REASON_IP_INVALID_SOURCE;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return SKB_DROP_REASON_INVALID_PROTO;
if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
- return -EINVAL;
+ return SKB_DROP_REASON_IP_LOCALNET;
if (ipv4_is_zeronet(saddr)) {
if (!ipv4_is_local_multicast(daddr) &&
ip_hdr(skb)->protocol != IPPROTO_IGMP)
- return -EINVAL;
+ return SKB_DROP_REASON_IP_INVALID_SOURCE;
} else {
- err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
- in_dev, itag);
- if (err < 0)
- return err;
+ reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0,
+ dev, in_dev, itag);
+ if (reason)
+ return reason;
}
- return 0;
+ return SKB_NOT_DROPPED_YET;
}
/* called in rcu_read_lock() section */
-static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, int our)
+static enum skb_drop_reason
+ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev, int our)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
unsigned int flags = RTCF_MULTICAST;
+ enum skb_drop_reason reason;
struct rtable *rth;
u32 itag = 0;
- int err;
- err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
- if (err)
- return err;
+ reason = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev,
+ &itag);
+ if (reason)
+ return reason;
if (our)
flags |= RTCF_LOCAL;
@@ -1718,7 +1736,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
false);
if (!rth)
- return -ENOBUFS;
+ return SKB_DROP_REASON_NOMEM;
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
@@ -1734,7 +1752,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
skb_dst_drop(skb);
skb_dst_set(skb, &rth->dst);
- return 0;
+ return SKB_NOT_DROPPED_YET;
}
@@ -1764,11 +1782,12 @@ static void ip_handle_martian_source(struct net_device *dev,
}
/* called in rcu_read_lock() section */
-static int __mkroute_input(struct sk_buff *skb,
- const struct fib_result *res,
- struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+static enum skb_drop_reason
+__mkroute_input(struct sk_buff *skb, const struct fib_result *res,
+ struct in_device *in_dev, __be32 daddr,
+ __be32 saddr, dscp_t dscp)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
struct net_device *dev = nhc->nhc_dev;
struct fib_nh_exception *fnhe;
@@ -1782,12 +1801,13 @@ static int __mkroute_input(struct sk_buff *skb,
out_dev = __in_dev_get_rcu(dev);
if (!out_dev) {
net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
- return -EINVAL;
+ return reason;
}
- err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
+ err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
in_dev->dev, in_dev, &itag);
if (err < 0) {
+ reason = -err;
ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
saddr);
@@ -1815,7 +1835,7 @@ static int __mkroute_input(struct sk_buff *skb,
*/
if (out_dev == in_dev &&
IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
- err = -EINVAL;
+ reason = SKB_DROP_REASON_ARP_PVLAN_DISABLE;
goto cleanup;
}
}
@@ -1838,7 +1858,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth = rt_dst_alloc(out_dev->dev, 0, res->type,
IN_DEV_ORCONF(out_dev, NOXFRM));
if (!rth) {
- err = -ENOBUFS;
+ reason = SKB_DROP_REASON_NOMEM;
goto cleanup;
}
@@ -1852,9 +1872,9 @@ static int __mkroute_input(struct sk_buff *skb,
lwtunnel_set_redirect(&rth->dst);
skb_dst_set(skb, &rth->dst);
out:
- err = 0;
- cleanup:
- return err;
+ reason = SKB_NOT_DROPPED_YET;
+cleanup:
+ return reason;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -2112,11 +2132,10 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
}
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
-static int ip_mkroute_input(struct sk_buff *skb,
- struct fib_result *res,
- struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos,
- struct flow_keys *hkeys)
+static enum skb_drop_reason
+ip_mkroute_input(struct sk_buff *skb, struct fib_result *res,
+ struct in_device *in_dev, __be32 daddr,
+ __be32 saddr, dscp_t dscp, struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && fib_info_num_path(res->fi) > 1) {
@@ -2128,50 +2147,57 @@ static int ip_mkroute_input(struct sk_buff *skb,
#endif
/* create a routing cache entry */
- return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+ return __mkroute_input(skb, res, in_dev, daddr, saddr, dscp);
}
/* Implements all the saddr-related checks as ip_route_input_slow(),
* assuming daddr is valid and the destination is not a local broadcast one.
* Uses the provided hint instead of performing a route lookup.
*/
-int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev,
- const struct sk_buff *hint)
+enum skb_drop_reason
+ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ const struct sk_buff *hint)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct rtable *rt = skb_rtable(hint);
struct net *net = dev_net(dev);
- int err = -EINVAL;
u32 tag = 0;
if (!in_dev)
- return -EINVAL;
+ return reason;
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_zeronet(saddr))
+ if (ipv4_is_zeronet(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_source;
+ }
if (rt->rt_type != RTN_LOCAL)
goto skip_validate_source;
- tos &= IPTOS_RT_MASK;
- err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
- if (err < 0)
+ reason = fib_validate_source_reason(skb, saddr, daddr, dscp, 0, dev,
+ in_dev, &tag);
+ if (reason)
goto martian_source;
skip_validate_source:
skb_dst_copy(skb, hint);
- return 0;
+ return SKB_NOT_DROPPED_YET;
martian_source:
ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
- return err;
+ return reason;
}
/* get device for dst_alloc with local routes */
@@ -2200,10 +2226,12 @@ static struct net_device *ip_rt_get_dev(struct net *net,
* called with rcu_read_lock()
*/
-static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev,
- struct fib_result *res)
+static enum skb_drop_reason
+ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct fib_result *res)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct flow_keys *flkeys = NULL, _flkeys;
struct net *net = dev_net(dev);
@@ -2231,8 +2259,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.flowi4_tun_key.tun_id = 0;
skb_dst_drop(skb);
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
res->fi = NULL;
res->table = NULL;
@@ -2242,21 +2272,29 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
/* Accept zero addresses only to limited broadcast;
* I even do not know to fix it or not. Waiting for complains :-)
*/
- if (ipv4_is_zeronet(saddr))
+ if (ipv4_is_zeronet(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_zeronet(daddr))
+ if (ipv4_is_zeronet(daddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_DEST;
goto martian_destination;
+ }
/* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
* and call it once if daddr or/and saddr are loopback addresses
*/
if (ipv4_is_loopback(daddr)) {
- if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_destination;
+ }
} else if (ipv4_is_loopback(saddr)) {
- if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_source;
+ }
}
/*
@@ -2266,7 +2304,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.flowi4_oif = 0;
fl4.flowi4_iif = dev->ifindex;
fl4.flowi4_mark = skb->mark;
- fl4.flowi4_tos = tos;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(dscp);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = 0;
fl4.daddr = daddr;
@@ -2298,10 +2336,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto brd_input;
}
+ err = -EINVAL;
if (res->type == RTN_LOCAL) {
- err = fib_validate_source(skb, saddr, daddr, tos,
- 0, dev, in_dev, &itag);
- if (err < 0)
+ reason = fib_validate_source_reason(skb, saddr, daddr, dscp,
+ 0, dev, in_dev, &itag);
+ if (reason)
goto martian_source;
goto local_input;
}
@@ -2310,21 +2349,28 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
err = -EHOSTUNREACH;
goto no_route;
}
- if (res->type != RTN_UNICAST)
+ if (res->type != RTN_UNICAST) {
+ reason = SKB_DROP_REASON_IP_INVALID_DEST;
goto martian_destination;
+ }
make_route:
- err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
-out: return err;
+ reason = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp,
+ flkeys);
+
+out:
+ return reason;
brd_input:
- if (skb->protocol != htons(ETH_P_IP))
- goto e_inval;
+ if (skb->protocol != htons(ETH_P_IP)) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
+ goto out;
+ }
if (!ipv4_is_zeronet(saddr)) {
- err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
- in_dev, &itag);
- if (err < 0)
+ reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0,
+ dev, in_dev, &itag);
+ if (reason)
goto martian_source;
}
flags |= RTCF_BROADCAST;
@@ -2342,7 +2388,7 @@ local_input:
rth = rcu_dereference(nhc->nhc_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
- err = 0;
+ reason = SKB_NOT_DROPPED_YET;
goto out;
}
}
@@ -2379,7 +2425,7 @@ local_input:
rt_add_uncached_list(rth);
}
skb_dst_set(skb, &rth->dst);
- err = 0;
+ reason = SKB_NOT_DROPPED_YET;
goto out;
no_route:
@@ -2400,12 +2446,8 @@ martian_destination:
&daddr, &saddr, dev->name);
#endif
-e_inval:
- err = -EINVAL;
- goto out;
-
e_nobufs:
- err = -ENOBUFS;
+ reason = SKB_DROP_REASON_NOMEM;
goto out;
martian_source:
@@ -2414,8 +2456,10 @@ martian_source:
}
/* called with rcu_read_lock held */
-static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, struct fib_result *res)
+static enum skb_drop_reason
+ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct fib_result *res)
{
/* Multicast recognition logic is moved from route cache to here.
* The problem was that too many Ethernet cards have broken/missing
@@ -2429,12 +2473,13 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
* route cache entry is created eventually.
*/
if (ipv4_is_multicast(daddr)) {
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
int our = 0;
- int err = -EINVAL;
if (!in_dev)
- return err;
+ return reason;
+
our = ip_check_mc_rcu(in_dev, daddr, saddr,
ip_hdr(skb)->protocol);
@@ -2455,27 +2500,27 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
IN_DEV_MFORWARD(in_dev))
#endif
) {
- err = ip_route_input_mc(skb, daddr, saddr,
- tos, dev, our);
+ reason = ip_route_input_mc(skb, daddr, saddr, dscp,
+ dev, our);
}
- return err;
+ return reason;
}
- return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
+ return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res);
}
-int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
+enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr,
+ __be32 saddr, dscp_t dscp,
+ struct net_device *dev)
{
+ enum skb_drop_reason reason;
struct fib_result res;
- int err;
- tos &= IPTOS_RT_MASK;
rcu_read_lock();
- err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
+ reason = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, &res);
rcu_read_unlock();
- return err;
+ return reason;
}
EXPORT_SYMBOL(ip_route_input_noref);
@@ -2618,7 +2663,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
struct rtable *rth;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
- fl4->flowi4_tos &= IPTOS_RT_MASK;
+ fl4->flowi4_tos &= INET_DSCP_MASK;
rcu_read_lock();
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
@@ -3233,10 +3278,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return err;
rtm = nlmsg_data(nlh);
- src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
- dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
- iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
- mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
+ src = nla_get_in_addr_default(tb[RTA_SRC], 0);
+ dst = nla_get_in_addr_default(tb[RTA_DST], 0);
+ iif = nla_get_u32_default(tb[RTA_IIF], 0);
+ mark = nla_get_u32_default(tb[RTA_MARK], 0);
if (tb[RTA_UID])
uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
else
@@ -3261,8 +3306,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.daddr = dst;
fl4.saddr = src;
- fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK;
- fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
+ fl4.flowi4_tos = rtm->rtm_tos & INET_DSCP_MASK;
+ fl4.flowi4_oif = nla_get_u32_default(tb[RTA_OIF], 0);
fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid;
if (sport)
@@ -3286,8 +3331,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
skb->dev = dev;
skb->mark = mark;
err = ip_route_input_rcu(skb, dst, src,
- rtm->rtm_tos & IPTOS_RT_MASK, dev,
- &res);
+ inet_dsfield_to_dscp(rtm->rtm_tos),
+ dev, &res) ? -EINVAL : 0;
rt = skb_rtable(skb);
if (err == 0 && rt->dst.error)
@@ -3634,6 +3679,11 @@ static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
#endif /* CONFIG_IP_ROUTE_CLASSID */
+static const struct rtnl_msg_handler ip_rt_rtnl_msg_handlers[] __initconst = {
+ {.protocol = PF_INET, .msgtype = RTM_GETROUTE,
+ .doit = inet_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+};
+
int __init ip_rt_init(void)
{
void *idents_hash;
@@ -3691,8 +3741,7 @@ int __init ip_rt_init(void)
xfrm_init();
xfrm4_init();
#endif
- rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
+ rtnl_register_many(ip_rt_rtnl_msg_handlers);
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&sysctl_route_ops);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4af0c234d8d7..a79b2a52ce01 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -601,22 +601,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_tcp_available_ulp,
},
{
- .procname = "icmp_msgs_per_sec",
- .data = &sysctl_icmp_msgs_per_sec,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "icmp_msgs_burst",
- .data = &sysctl_icmp_msgs_burst,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
@@ -702,6 +686,22 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "icmp_msgs_per_sec",
+ .data = &init_net.ipv4.sysctl_icmp_msgs_per_sec,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "icmp_msgs_burst",
+ .data = &init_net.ipv4.sysctl_icmp_msgs_burst,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
.procname = "ping_group_range",
.data = &init_net.ipv4.ping_group_range.range,
.maxlen = sizeof(gid_t)*2,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 831a18dc7aa6..0d704bda6c41 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -285,6 +285,8 @@
#include <trace/events/tcp.h>
#include <net/rps.h>
+#include "../core/devmem.h"
+
/* Track pending CMSGs. */
enum {
TCP_CMSG_INQ = 1,
@@ -471,18 +473,20 @@ void tcp_init_sock(struct sock *sk)
set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
sk_sockets_allocated_inc(sk);
+ xa_init_flags(&sk->sk_user_frags, XA_FLAGS_ALLOC1);
}
EXPORT_SYMBOL(tcp_init_sock);
-static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
+static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
{
struct sk_buff *skb = tcp_write_queue_tail(sk);
+ u32 tsflags = sockc->tsflags;
if (tsflags && skb) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
- sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags);
+ sock_tx_timestamp(sk, sockc, &shinfo->tx_flags);
if (tsflags & SOF_TIMESTAMPING_TX_ACK)
tcb->txstamp_ack = 1;
if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
@@ -1318,7 +1322,7 @@ wait_for_space:
out:
if (copied) {
- tcp_tx_timestamp(sk, sockc.tsflags);
+ tcp_tx_timestamp(sk, &sockc);
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
}
out_nopush:
@@ -2160,6 +2164,9 @@ static int tcp_zerocopy_receive(struct sock *sk,
skb = tcp_recv_skb(sk, seq, &offset);
}
+ if (!skb_frags_readable(skb))
+ break;
+
if (TCP_SKB_CB(skb)->has_rxtstamp) {
tcp_update_recv_tstamps(skb, tss);
zc->msg_flags |= TCP_CMSG_TS;
@@ -2177,6 +2184,9 @@ static int tcp_zerocopy_receive(struct sock *sk,
break;
}
page = skb_frag_page(frags);
+ if (WARN_ON_ONCE(!page))
+ break;
+
prefetchw(page);
pages[pages_to_map++] = page;
length += PAGE_SIZE;
@@ -2235,6 +2245,7 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
struct scm_timestamping_internal *tss)
{
int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
+ u32 tsflags = READ_ONCE(sk->sk_tsflags);
bool has_timestamping = false;
if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
@@ -2274,14 +2285,18 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
}
}
- if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
+ if (tsflags & SOF_TIMESTAMPING_SOFTWARE &&
+ (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE ||
+ !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))
has_timestamping = true;
else
tss->ts[0] = (struct timespec64) {0};
}
if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
- if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
+ if (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE &&
+ (tsflags & SOF_TIMESTAMPING_RX_HARDWARE ||
+ !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))
has_timestamping = true;
else
tss->ts[2] = (struct timespec64) {0};
@@ -2317,6 +2332,220 @@ static int tcp_inq_hint(struct sock *sk)
return inq;
}
+/* batch __xa_alloc() calls and reduce xa_lock()/xa_unlock() overhead. */
+struct tcp_xa_pool {
+ u8 max; /* max <= MAX_SKB_FRAGS */
+ u8 idx; /* idx <= max */
+ __u32 tokens[MAX_SKB_FRAGS];
+ netmem_ref netmems[MAX_SKB_FRAGS];
+};
+
+static void tcp_xa_pool_commit_locked(struct sock *sk, struct tcp_xa_pool *p)
+{
+ int i;
+
+ /* Commit part that has been copied to user space. */
+ for (i = 0; i < p->idx; i++)
+ __xa_cmpxchg(&sk->sk_user_frags, p->tokens[i], XA_ZERO_ENTRY,
+ (__force void *)p->netmems[i], GFP_KERNEL);
+ /* Rollback what has been pre-allocated and is no longer needed. */
+ for (; i < p->max; i++)
+ __xa_erase(&sk->sk_user_frags, p->tokens[i]);
+
+ p->max = 0;
+ p->idx = 0;
+}
+
+static void tcp_xa_pool_commit(struct sock *sk, struct tcp_xa_pool *p)
+{
+ if (!p->max)
+ return;
+
+ xa_lock_bh(&sk->sk_user_frags);
+
+ tcp_xa_pool_commit_locked(sk, p);
+
+ xa_unlock_bh(&sk->sk_user_frags);
+}
+
+static int tcp_xa_pool_refill(struct sock *sk, struct tcp_xa_pool *p,
+ unsigned int max_frags)
+{
+ int err, k;
+
+ if (p->idx < p->max)
+ return 0;
+
+ xa_lock_bh(&sk->sk_user_frags);
+
+ tcp_xa_pool_commit_locked(sk, p);
+
+ for (k = 0; k < max_frags; k++) {
+ err = __xa_alloc(&sk->sk_user_frags, &p->tokens[k],
+ XA_ZERO_ENTRY, xa_limit_31b, GFP_KERNEL);
+ if (err)
+ break;
+ }
+
+ xa_unlock_bh(&sk->sk_user_frags);
+
+ p->max = k;
+ p->idx = 0;
+ return k ? 0 : err;
+}
+
+/* On error, returns the -errno. On success, returns number of bytes sent to the
+ * user. May not consume all of @remaining_len.
+ */
+static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
+ unsigned int offset, struct msghdr *msg,
+ int remaining_len)
+{
+ struct dmabuf_cmsg dmabuf_cmsg = { 0 };
+ struct tcp_xa_pool tcp_xa_pool;
+ unsigned int start;
+ int i, copy, n;
+ int sent = 0;
+ int err = 0;
+
+ tcp_xa_pool.max = 0;
+ tcp_xa_pool.idx = 0;
+ do {
+ start = skb_headlen(skb);
+
+ if (skb_frags_readable(skb)) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* Copy header. */
+ copy = start - offset;
+ if (copy > 0) {
+ copy = min(copy, remaining_len);
+
+ n = copy_to_iter(skb->data + offset, copy,
+ &msg->msg_iter);
+ if (n != copy) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ offset += copy;
+ remaining_len -= copy;
+
+ /* First a dmabuf_cmsg for # bytes copied to user
+ * buffer.
+ */
+ memset(&dmabuf_cmsg, 0, sizeof(dmabuf_cmsg));
+ dmabuf_cmsg.frag_size = copy;
+ err = put_cmsg(msg, SOL_SOCKET, SO_DEVMEM_LINEAR,
+ sizeof(dmabuf_cmsg), &dmabuf_cmsg);
+ if (err || msg->msg_flags & MSG_CTRUNC) {
+ msg->msg_flags &= ~MSG_CTRUNC;
+ if (!err)
+ err = -ETOOSMALL;
+ goto out;
+ }
+
+ sent += copy;
+
+ if (remaining_len == 0)
+ goto out;
+ }
+
+ /* after that, send information of dmabuf pages through a
+ * sequence of cmsg
+ */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct net_iov *niov;
+ u64 frag_offset;
+ int end;
+
+ /* !skb_frags_readable() should indicate that ALL the
+ * frags in this skb are dmabuf net_iovs. We're checking
+ * for that flag above, but also check individual frags
+ * here. If the tcp stack is not setting
+ * skb_frags_readable() correctly, we still don't want
+ * to crash here.
+ */
+ if (!skb_frag_net_iov(frag)) {
+ net_err_ratelimited("Found non-dmabuf skb with net_iov");
+ err = -ENODEV;
+ goto out;
+ }
+
+ niov = skb_frag_net_iov(frag);
+ end = start + skb_frag_size(frag);
+ copy = end - offset;
+
+ if (copy > 0) {
+ copy = min(copy, remaining_len);
+
+ frag_offset = net_iov_virtual_addr(niov) +
+ skb_frag_off(frag) + offset -
+ start;
+ dmabuf_cmsg.frag_offset = frag_offset;
+ dmabuf_cmsg.frag_size = copy;
+ err = tcp_xa_pool_refill(sk, &tcp_xa_pool,
+ skb_shinfo(skb)->nr_frags - i);
+ if (err)
+ goto out;
+
+ /* Will perform the exchange later */
+ dmabuf_cmsg.frag_token = tcp_xa_pool.tokens[tcp_xa_pool.idx];
+ dmabuf_cmsg.dmabuf_id = net_iov_binding_id(niov);
+
+ offset += copy;
+ remaining_len -= copy;
+
+ err = put_cmsg(msg, SOL_SOCKET,
+ SO_DEVMEM_DMABUF,
+ sizeof(dmabuf_cmsg),
+ &dmabuf_cmsg);
+ if (err || msg->msg_flags & MSG_CTRUNC) {
+ msg->msg_flags &= ~MSG_CTRUNC;
+ if (!err)
+ err = -ETOOSMALL;
+ goto out;
+ }
+
+ atomic_long_inc(&niov->pp_ref_count);
+ tcp_xa_pool.netmems[tcp_xa_pool.idx++] = skb_frag_netmem(frag);
+
+ sent += copy;
+
+ if (remaining_len == 0)
+ goto out;
+ }
+ start = end;
+ }
+
+ tcp_xa_pool_commit(sk, &tcp_xa_pool);
+ if (!remaining_len)
+ goto out;
+
+ /* if remaining_len is not satisfied yet, we need to go to the
+ * next frag in the frag_list to satisfy remaining_len.
+ */
+ skb = skb_shinfo(skb)->frag_list ?: skb->next;
+
+ offset = offset - start;
+ } while (skb);
+
+ if (remaining_len) {
+ err = -EFAULT;
+ goto out;
+ }
+
+out:
+ tcp_xa_pool_commit(sk, &tcp_xa_pool);
+ if (!sent)
+ sent = err;
+
+ return sent;
+}
+
/*
* This routine copies from a sock struct into the user buffer.
*
@@ -2330,6 +2559,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
int *cmsg_flags)
{
struct tcp_sock *tp = tcp_sk(sk);
+ int last_copied_dmabuf = -1; /* uninitialized */
int copied = 0;
u32 peek_seq;
u32 *seq;
@@ -2509,15 +2739,44 @@ found_ok_skb:
}
if (!(flags & MSG_TRUNC)) {
- err = skb_copy_datagram_msg(skb, offset, msg, used);
- if (err) {
- /* Exception. Bailout! */
- if (!copied)
- copied = -EFAULT;
+ if (last_copied_dmabuf != -1 &&
+ last_copied_dmabuf != !skb_frags_readable(skb))
break;
+
+ if (skb_frags_readable(skb)) {
+ err = skb_copy_datagram_msg(skb, offset, msg,
+ used);
+ if (err) {
+ /* Exception. Bailout! */
+ if (!copied)
+ copied = -EFAULT;
+ break;
+ }
+ } else {
+ if (!(flags & MSG_SOCK_DEVMEM)) {
+ /* dmabuf skbs can only be received
+ * with the MSG_SOCK_DEVMEM flag.
+ */
+ if (!copied)
+ copied = -EFAULT;
+
+ break;
+ }
+
+ err = tcp_recvmsg_dmabuf(sk, skb, offset, msg,
+ used);
+ if (err <= 0) {
+ if (!copied)
+ copied = -EFAULT;
+
+ break;
+ }
+ used = err;
}
}
+ last_copied_dmabuf = !skb_frags_readable(skb);
+
WRITE_ONCE(*seq, *seq + used);
copied += used;
len -= used;
@@ -2833,7 +3092,7 @@ void __tcp_close(struct sock *sk, long timeout)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, sk->sk_allocation,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_ABORT_ON_CLOSE);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
@@ -2908,7 +3167,7 @@ adjudge_to_death:
if (READ_ONCE(tp->linger2) < 0) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_ABORT_ON_LINGER);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONLINGER);
} else {
@@ -2927,7 +3186,7 @@ adjudge_to_death:
if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_ABORT_ON_MEMORY);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
} else if (!check_net(sock_net(sk))) {
@@ -3025,13 +3284,16 @@ int tcp_disconnect(struct sock *sk, int flags)
inet_csk_listen_stop(sk);
} else if (unlikely(tp->repair)) {
WRITE_ONCE(sk->sk_err, ECONNABORTED);
- } else if (tcp_need_reset(old_state) ||
- (tp->snd_nxt != tp->write_seq &&
- (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
+ } else if (tcp_need_reset(old_state)) {
+ tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_TCP_STATE);
+ WRITE_ONCE(sk->sk_err, ECONNRESET);
+ } else if (tp->snd_nxt != tp->write_seq &&
+ (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
/* The last check adjusts for discrepancy of Linux wrt. RFC
* states
*/
- tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED);
+ tcp_send_active_reset(sk, gfp_any(),
+ SK_RST_REASON_TCP_DISCONNECT_WITH_DATA);
WRITE_ONCE(sk->sk_err, ECONNRESET);
} else if (old_state == TCP_SYN_SENT)
WRITE_ONCE(sk->sk_err, ECONNRESET);
@@ -3074,7 +3336,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->window_clamp = 0;
tp->delivered = 0;
tp->delivered_ce = 0;
- if (icsk->icsk_ca_ops->release)
+ if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release)
icsk->icsk_ca_ops->release(sk);
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
icsk->icsk_ca_initialized = 0;
@@ -4655,7 +4917,7 @@ int tcp_abort(struct sock *sk, int err)
if (tcp_need_reset(sk->sk_state))
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_STATE);
tcp_done_with_error(sk, err);
bh_unlock_sock(sk);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index db6516092daf..bbb8d5f0eae7 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -109,12 +109,13 @@ bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code)
* it's known that the keys in ao_info are matching peer's
* family/address/VRF/etc.
*/
-struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao,
+struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk,
+ struct tcp_ao_info *ao,
int sndid, int rcvid)
{
struct tcp_ao_key *key;
- hlist_for_each_entry_rcu(key, &ao->head, node) {
+ hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) {
if ((sndid >= 0 && key->sndid != sndid) ||
(rcvid >= 0 && key->rcvid != rcvid))
continue;
@@ -205,7 +206,7 @@ static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, int l3index,
if (!ao)
return NULL;
- hlist_for_each_entry_rcu(key, &ao->head, node) {
+ hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) {
u8 prefixlen = min(prefix, key->prefixlen);
if (!tcp_ao_key_cmp(key, l3index, addr, prefixlen,
@@ -793,7 +794,7 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb,
if (!ao_info)
return -ENOENT;
- *key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1);
+ *key = tcp_ao_established_key(sk, ao_info, aoh->rnext_keyid, -1);
if (!*key)
return -ENOENT;
*traffic_key = snd_other_key(*key);
@@ -979,7 +980,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
*/
key = READ_ONCE(info->rnext_key);
if (key->rcvid != aoh->keyid) {
- key = tcp_ao_established_key(info, -1, aoh->keyid);
+ key = tcp_ao_established_key(sk, info, -1, aoh->keyid);
if (!key)
goto key_not_found;
}
@@ -1003,7 +1004,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
aoh->rnext_keyid,
tcp_ao_hdr_maclen(aoh));
/* If the key is not found we do nothing. */
- key = tcp_ao_established_key(info, aoh->rnext_keyid, -1);
+ key = tcp_ao_established_key(sk, info, aoh->rnext_keyid, -1);
if (key)
/* pairs with tcp_ao_del_cmd */
WRITE_ONCE(info->current_key, key);
@@ -1163,7 +1164,7 @@ void tcp_ao_established(struct sock *sk)
if (!ao)
return;
- hlist_for_each_entry_rcu(key, &ao->head, node)
+ hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk))
tcp_ao_cache_traffic_keys(sk, ao, key);
}
@@ -1180,7 +1181,7 @@ void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb)
WRITE_ONCE(ao->risn, tcp_hdr(skb)->seq);
ao->rcv_sne = 0;
- hlist_for_each_entry_rcu(key, &ao->head, node)
+ hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk))
tcp_ao_cache_traffic_keys(sk, ao, key);
}
@@ -1256,14 +1257,14 @@ int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk,
key_head = rcu_dereference(hlist_first_rcu(&new_ao->head));
first_key = hlist_entry_safe(key_head, struct tcp_ao_key, node);
- key = tcp_ao_established_key(new_ao, tcp_rsk(req)->ao_keyid, -1);
+ key = tcp_ao_established_key(req_to_sk(req), new_ao, tcp_rsk(req)->ao_keyid, -1);
if (key)
new_ao->current_key = key;
else
new_ao->current_key = first_key;
/* set rnext_key */
- key = tcp_ao_established_key(new_ao, -1, tcp_rsk(req)->ao_rcv_next);
+ key = tcp_ao_established_key(req_to_sk(req), new_ao, -1, tcp_rsk(req)->ao_rcv_next);
if (key)
new_ao->rnext_key = key;
else
@@ -1857,12 +1858,12 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family,
* if there's any.
*/
if (cmd.set_current) {
- new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1);
+ new_current = tcp_ao_established_key(sk, ao_info, cmd.current_key, -1);
if (!new_current)
return -ENOENT;
}
if (cmd.set_rnext) {
- new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext);
+ new_rnext = tcp_ao_established_key(sk, ao_info, -1, cmd.rnext);
if (!new_rnext)
return -ENOENT;
}
@@ -1902,7 +1903,8 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family,
* "It is presumed that an MKT affecting a particular
* connection cannot be destroyed during an active connection"
*/
- hlist_for_each_entry_rcu(key, &ao_info->head, node) {
+ hlist_for_each_entry_rcu(key, &ao_info->head, node,
+ lockdep_sock_is_held(sk)) {
if (cmd.sndid != key->sndid ||
cmd.rcvid != key->rcvid)
continue;
@@ -2000,14 +2002,14 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family,
* if there's any.
*/
if (cmd.set_current) {
- new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1);
+ new_current = tcp_ao_established_key(sk, ao_info, cmd.current_key, -1);
if (!new_current) {
err = -ENOENT;
goto out;
}
}
if (cmd.set_rnext) {
- new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext);
+ new_rnext = tcp_ao_established_key(sk, ao_info, -1, cmd.rnext);
if (!new_rnext) {
err = -ENOENT;
goto out;
@@ -2101,7 +2103,8 @@ int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen)
* The layout of the fields in the user and kernel structures is expected to
* be the same (including in the 32bit vs 64bit case).
*/
-static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info,
+static int tcp_ao_copy_mkts_to_user(const struct sock *sk,
+ struct tcp_ao_info *ao_info,
sockptr_t optval, sockptr_t optlen)
{
struct tcp_ao_getsockopt opt_in, opt_out;
@@ -2229,7 +2232,8 @@ static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info,
/* May change in RX, while we're dumping, pre-fetch it */
current_key = READ_ONCE(ao_info->current_key);
- hlist_for_each_entry_rcu(key, &ao_info->head, node) {
+ hlist_for_each_entry_rcu(key, &ao_info->head, node,
+ lockdep_sock_is_held(sk)) {
if (opt_in.get_all)
goto match;
@@ -2309,7 +2313,7 @@ int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen)
if (!ao_info)
return -ENOENT;
- return tcp_ao_copy_mkts_to_user(ao_info, optval, optlen);
+ return tcp_ao_copy_mkts_to_user(sk, ao_info, optval, optlen);
}
int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen)
@@ -2396,7 +2400,7 @@ int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen)
WRITE_ONCE(ao->snd_sne, cmd.snd_sne);
WRITE_ONCE(ao->rcv_sne, cmd.rcv_sne);
- hlist_for_each_entry_rcu(key, &ao->head, node)
+ hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk))
tcp_ao_cache_traffic_keys(sk, ao, key);
return 0;
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 53b0d62fd2c2..99cef92e6290 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -30,7 +30,7 @@ void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
}
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
- struct sk_msg *msg, u32 apply_bytes, int flags)
+ struct sk_msg *msg, u32 apply_bytes)
{
bool apply = apply_bytes;
struct scatterlist *sge;
@@ -167,7 +167,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
if (unlikely(!psock))
return -EPIPE;
- ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
+ ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes) :
tcp_bpf_push_locked(sk, msg, bytes, flags, false);
sk_psock_put(sk, psock);
return ret;
@@ -221,11 +221,11 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
int flags,
int *addr_len)
{
- struct tcp_sock *tcp = tcp_sk(sk);
int peek = flags & MSG_PEEK;
- u32 seq = tcp->copied_seq;
struct sk_psock *psock;
+ struct tcp_sock *tcp;
int copied = 0;
+ u32 seq;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
@@ -238,7 +238,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
return tcp_recvmsg(sk, msg, len, flags, addr_len);
lock_sock(sk);
-
+ tcp = tcp_sk(sk);
+ seq = tcp->copied_seq;
/* We may have received data on the sk_receive_queue pre-accept and
* then we can not use read_skb in this context because we haven't
* assigned a sk_socket yet so have no link to the ops. The work-around
@@ -440,7 +441,6 @@ more_data:
cork = true;
psock->cork = NULL;
}
- sk_msg_return(sk, msg, tosend);
release_sock(sk);
origsize = msg->sg.size;
@@ -452,8 +452,9 @@ more_data:
sock_put(sk_redir);
lock_sock(sk);
+ sk_mem_uncharge(sk, sent);
if (unlikely(ret < 0)) {
- int free = sk_msg_free_nocharge(sk, msg);
+ int free = sk_msg_free(sk, msg);
if (!cork)
*copied -= free;
@@ -467,7 +468,7 @@ more_data:
break;
case __SK_DROP:
default:
- sk_msg_free_partial(sk, msg, tosend);
+ sk_msg_free(sk, msg);
sk_msg_apply_bytes(psock, tosend);
*copied -= (tosend + delta);
return -EACCES;
@@ -483,11 +484,8 @@ more_data:
}
if (msg &&
msg->sg.data[msg->sg.start].page_link &&
- msg->sg.data[msg->sg.start].length) {
- if (eval == __SK_REDIRECT)
- sk_mem_charge(sk, tosend - sent);
+ msg->sg.data[msg->sg.start].length)
goto more_data;
- }
}
return ret;
}
@@ -577,7 +575,7 @@ out_err:
err = sk_stream_error(sk, msg->msg_flags, err);
release_sock(sk);
sk_psock_put(sk, psock);
- return copied ? copied : err;
+ return copied > 0 ? copied : err;
}
enum {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 0306d257fa64..df758adbb445 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -270,8 +270,9 @@ void tcp_cleanup_congestion_control(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- if (icsk->icsk_ca_ops->release)
+ if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release)
icsk->icsk_ca_ops->release(sk);
+ icsk->icsk_ca_initialized = 0;
bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner);
}
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 52b1f2665dfa..81b96331b2bb 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -185,7 +185,7 @@ static inline void htcp_alpha_update(struct htcp *ca)
u32 scale = (HZ << 3) / (10 * minRTT);
/* clamping ratio to interval [0.5,10]<<3 */
- scale = min(max(scale, 1U << 2), 10U << 3);
+ scale = clamp(scale, 1U << 2, 10U << 3);
factor = (factor << 3) / scale;
if (!factor)
factor = 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e37488d3453f..5bdf13ac26ef 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -75,7 +75,7 @@
#include <net/proto_memory.h>
#include <net/inet_common.h>
#include <linux/ipsec.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/errqueue.h>
#include <trace/events/tcp.h>
#include <linux/jump_label_ratelimit.h>
@@ -2473,8 +2473,22 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
*/
static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
{
- return tp->retrans_stamp &&
- tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
+ const struct sock *sk = (const struct sock *)tp;
+
+ if (tp->retrans_stamp &&
+ tcp_tsopt_ecr_before(tp, tp->retrans_stamp))
+ return true; /* got echoed TS before first retransmission */
+
+ /* Check if nothing was retransmitted (retrans_stamp==0), which may
+ * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp
+ * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear
+ * retrans_stamp even if we had retransmitted the SYN.
+ */
+ if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */
+ sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */
+ return true; /* nothing was retransmitted */
+
+ return false;
}
/* Undo procedures. */
@@ -2508,6 +2522,16 @@ static bool tcp_any_retrans_done(const struct sock *sk)
return false;
}
+/* If loss recovery is finished and there are no retransmits out in the
+ * network, then we clear retrans_stamp so that upon the next loss recovery
+ * retransmits_timed_out() and timestamp-undo are using the correct value.
+ */
+static void tcp_retrans_stamp_cleanup(struct sock *sk)
+{
+ if (!tcp_any_retrans_done(sk))
+ tcp_sk(sk)->retrans_stamp = 0;
+}
+
static void DBGUNDO(struct sock *sk, const char *msg)
{
#if FASTRETRANS_DEBUG > 1
@@ -2875,6 +2899,9 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
struct tcp_sock *tp = tcp_sk(sk);
int mib_idx;
+ /* Start the clock with our fast retransmit, for undo and ETIMEDOUT. */
+ tcp_retrans_stamp_cleanup(sk);
+
if (tcp_is_reno(tp))
mib_idx = LINUX_MIB_TCPRENORECOVERY;
else
@@ -4894,8 +4921,8 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
return res;
}
-static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
- enum skb_drop_reason reason)
+noinline_for_tracing static void
+tcp_drop_reason(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason)
{
sk_drops_add(sk, skb);
sk_skb_reason_drop(sk, skb, reason);
@@ -5391,6 +5418,9 @@ restart:
for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
n = tcp_skb_next(skb, list);
+ if (!skb_frags_readable(skb))
+ goto skip_this;
+
/* No new bits? It is possible on ofo queue. */
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
skb = tcp_collapse_one(sk, skb, list, root);
@@ -5411,17 +5441,20 @@ restart:
break;
}
- if (n && n != tail && tcp_skb_can_collapse_rx(skb, n) &&
+ if (n && n != tail && skb_frags_readable(n) &&
+ tcp_skb_can_collapse_rx(skb, n) &&
TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
end_of_skbs = false;
break;
}
+skip_this:
/* Decided to skip this, advance start seq. */
start = TCP_SKB_CB(skb)->end_seq;
}
if (end_of_skbs ||
- (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
+ (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) ||
+ !skb_frags_readable(skb))
return;
__skb_queue_head_init(&tmp);
@@ -5463,7 +5496,8 @@ restart:
if (!skb ||
skb == tail ||
!tcp_skb_can_collapse_rx(nskb, skb) ||
- (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
+ (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) ||
+ !skb_frags_readable(skb))
goto end;
}
}
@@ -6650,10 +6684,17 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss && !tp->packets_out)
tcp_try_undo_recovery(sk);
- /* Reset rtx states to prevent spurious retransmits_timed_out() */
tcp_update_rto_time(tp);
- tp->retrans_stamp = 0;
inet_csk(sk)->icsk_retransmits = 0;
+ /* In tcp_fastopen_synack_timer() on the first SYNACK RTO we set
+ * retrans_stamp but don't enter CA_Loss, so in case that happened we
+ * need to zero retrans_stamp here to prevent spurious
+ * retransmits_timed_out(). However, if the ACK of our SYNACK caused us
+ * to enter CA_Recovery then we need to leave retrans_stamp as it was
+ * set entering CA_Recovery, for correct retransmits_timed_out() and
+ * undo behavior.
+ */
+ tcp_retrans_stamp_cleanup(sk);
/* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
* we no longer need req so release it.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a4e510846905..a38c8b1f44db 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -79,6 +79,7 @@
#include <linux/seq_file.h>
#include <linux/inetdevice.h>
#include <linux/btf_ids.h>
+#include <linux/skbuff_ref.h>
#include <crypto/hash.h>
#include <linux/scatterlist.h>
@@ -120,6 +121,9 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
struct tcp_sock *tp = tcp_sk(sk);
int ts_recent_stamp;
+ if (READ_ONCE(tw->tw_substate) == TCP_FIN_WAIT2)
+ reuse = 0;
+
if (reuse == 2) {
/* Still does not detect *everything* that goes through
* lo, since we require a loopback src or dst address
@@ -903,7 +907,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
ctl_sk->sk_mark = 0;
ctl_sk->sk_priority = 0;
}
- ip_send_unicast_reply(ctl_sk,
+ ip_send_unicast_reply(ctl_sk, sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len,
@@ -1017,7 +1021,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
transmit_time = tcp_transmit_time(sk);
- ip_send_unicast_reply(ctl_sk,
+ ip_send_unicast_reply(ctl_sk, sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len,
@@ -1049,7 +1053,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
}
if (aoh)
- key.ao_key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1);
+ key.ao_key = tcp_ao_established_key(sk, ao_info,
+ aoh->rnext_keyid, -1);
}
}
if (key.ao_key) {
@@ -1070,7 +1075,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
}
tcp_v4_send_ack(sk, skb,
- tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcptw->tw_snd_nxt, READ_ONCE(tcptw->tw_rcv_nxt),
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
READ_ONCE(tcptw->tw_ts_recent),
@@ -2509,10 +2514,25 @@ static void tcp_md5sig_info_free_rcu(struct rcu_head *head)
}
#endif
+static void tcp_release_user_frags(struct sock *sk)
+{
+#ifdef CONFIG_PAGE_POOL
+ unsigned long index;
+ void *netmem;
+
+ xa_for_each(&sk->sk_user_frags, index, netmem)
+ WARN_ON_ONCE(!napi_pp_put_page((__force netmem_ref)netmem));
+#endif
+}
+
void tcp_v4_destroy_sock(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ tcp_release_user_frags(sk);
+
+ xa_destroy(&sk->sk_user_frags);
+
trace_tcp_destroy_sock(sk);
tcp_clear_xmit_timers(sk);
@@ -2881,15 +2901,17 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
__be32 src = inet->inet_rcv_saddr;
__u16 destp = ntohs(inet->inet_dport);
__u16 srcp = ntohs(inet->inet_sport);
+ u8 icsk_pending;
int rx_queue;
int state;
- if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
- icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ icsk_pending = smp_load_acquire(&icsk->icsk_pending);
+ if (icsk_pending == ICSK_TIME_RETRANS ||
+ icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ } else if (icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
timer_expires = icsk->icsk_timeout;
} else if (timer_pending(&sk->sk_timer)) {
@@ -2945,7 +2967,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
- i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
+ i, src, srcp, dest, destp, READ_ONCE(tw->tw_substate), 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
refcount_read(&tw->tw_refcnt), tw);
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index b01eb6d94413..95669935494e 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -617,9 +617,13 @@ static struct genl_family tcp_metrics_nl_family;
static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
[TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
- [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY,
- .len = sizeof(struct in6_addr), },
+ [TCP_METRICS_ATTR_ADDR_IPV6] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
+
[TCP_METRICS_ATTR_SADDR_IPV4] = { .type = NLA_U32, },
+ [TCP_METRICS_ATTR_SADDR_IPV6] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
+
/* Following attributes are not received for GET/DEL,
* we keep them for reference
*/
@@ -811,8 +815,6 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
if (a) {
struct in6_addr in6;
- if (nla_len(a) != sizeof(struct in6_addr))
- return -EINVAL;
in6 = nla_get_in6_addr(a);
inetpeer_set_addr_v6(addr, &in6);
if (hash)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a19a9dbd3409..7121d8573928 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -52,16 +52,17 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
return TCP_TW_SUCCESS;
}
-static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq)
+static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq,
+ u32 rcv_nxt)
{
#ifdef CONFIG_TCP_AO
struct tcp_ao_info *ao;
ao = rcu_dereference(tcptw->ao_info);
- if (unlikely(ao && seq < tcptw->tw_rcv_nxt))
+ if (unlikely(ao && seq < rcv_nxt))
WRITE_ONCE(ao->rcv_sne, ao->rcv_sne + 1);
#endif
- tcptw->tw_rcv_nxt = seq;
+ WRITE_ONCE(tcptw->tw_rcv_nxt, seq);
}
/*
@@ -98,8 +99,9 @@ enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
const struct tcphdr *th, u32 *tw_isn)
{
- struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+ u32 rcv_nxt = READ_ONCE(tcptw->tw_rcv_nxt);
+ struct tcp_options_received tmp_opt;
bool paws_reject = false;
int ts_recent_stamp;
@@ -117,26 +119,26 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
}
}
- if (tw->tw_substate == TCP_FIN_WAIT2) {
+ if (READ_ONCE(tw->tw_substate) == TCP_FIN_WAIT2) {
/* Just repeat all the checks of tcp_rcv_state_process() */
/* Out of window, send ACK */
if (paws_reject ||
!tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tcptw->tw_rcv_nxt,
- tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))
+ rcv_nxt,
+ rcv_nxt + tcptw->tw_rcv_wnd))
return tcp_timewait_check_oow_rate_limit(
tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2);
if (th->rst)
goto kill;
- if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
+ if (th->syn && !before(TCP_SKB_CB(skb)->seq, rcv_nxt))
return TCP_TW_RST;
/* Dup ACK? */
if (!th->ack ||
- !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
+ !after(TCP_SKB_CB(skb)->end_seq, rcv_nxt) ||
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
inet_twsk_put(tw);
return TCP_TW_SUCCESS;
@@ -146,12 +148,13 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* reset.
*/
if (!th->fin ||
- TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1)
+ TCP_SKB_CB(skb)->end_seq != rcv_nxt + 1)
return TCP_TW_RST;
/* FIN arrived, enter true time-wait state. */
- tw->tw_substate = TCP_TIME_WAIT;
- twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq);
+ WRITE_ONCE(tw->tw_substate, TCP_TIME_WAIT);
+ twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq,
+ rcv_nxt);
if (tmp_opt.saw_tstamp) {
WRITE_ONCE(tcptw->tw_ts_recent_stamp,
@@ -182,7 +185,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
*/
if (!paws_reject &&
- (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
+ (TCP_SKB_CB(skb)->seq == rcv_nxt &&
(TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {
/* In window segment, it may be only reset or bare ack. */
@@ -229,7 +232,7 @@ kill:
*/
if (th->syn && !th->rst && !th->ack && !paws_reject &&
- (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) ||
+ (after(TCP_SKB_CB(skb)->seq, rcv_nxt) ||
(tmp_opt.saw_tstamp &&
(s32)(READ_ONCE(tcptw->tw_ts_recent) - tmp_opt.rcv_tsval) < 0))) {
u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
@@ -323,6 +326,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_last_oow_ack_time = 0;
tcptw->tw_tx_delay = tp->tcp_tx_delay;
tw->tw_txhash = sk->sk_txhash;
+ tw->tw_tx_queue_mapping = sk->sk_tx_queue_mapping;
+#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
+ tw->tw_rx_queue_mapping = sk->sk_rx_queue_mapping;
+#endif
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -625,6 +632,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
+ xa_init_flags(&newsk->sk_user_frags, XA_FLAGS_ALLOC1);
+
return newsk;
}
EXPORT_SYMBOL(tcp_create_openreq_child);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index e4ad3311e148..2308665b51c5 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -101,8 +101,14 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
- if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
- return __tcp4_gso_segment_list(skb, features);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) {
+ struct tcphdr *th = tcp_hdr(skb);
+
+ if (skb_pagelen(skb) - th->doff * 4 == skb_shinfo(skb)->gso_size)
+ return __tcp4_gso_segment_list(skb, features);
+
+ skb->ip_summed = CHECKSUM_NONE;
+ }
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct iphdr *iph = ip_hdr(skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 16c48df8df4c..0e5b9a654254 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -883,8 +883,10 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
unsigned int size;
if (mptcp_syn_options(sk, skb, &size, &opts->mptcp)) {
- opts->options |= OPTION_MPTCP;
- remaining -= size;
+ if (remaining >= size) {
+ opts->options |= OPTION_MPTCP;
+ remaining -= size;
+ }
}
}
@@ -2342,9 +2344,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
if (len <= skb->len)
break;
- if (unlikely(TCP_SKB_CB(skb)->eor) ||
- tcp_has_tx_tstamp(skb) ||
- !skb_pure_zcopy_same(skb, next))
+ if (tcp_has_tx_tstamp(skb) || !tcp_skb_can_collapse(skb, next))
return false;
len -= skb->len;
@@ -2956,10 +2956,8 @@ void tcp_send_loss_probe(struct sock *sk)
}
skb = skb_rb_last(&sk->tcp_rtx_queue);
if (unlikely(!skb)) {
- WARN_ONCE(tp->packets_out,
- "invalid inflight: %u state %u cwnd %u mss %d\n",
- tp->packets_out, sk->sk_state, tcp_snd_cwnd(tp), mss);
- inet_csk(sk)->icsk_pending = 0;
+ tcp_warn_once(sk, tp->packets_out, "invalid inflight: ");
+ smp_store_release(&inet_csk(sk)->icsk_pending, 0);
return;
}
@@ -2992,7 +2990,7 @@ probe_sent:
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
/* Reset s.t. tcp_rearm_rto will restart timer from now */
- inet_csk(sk)->icsk_pending = 0;
+ smp_store_release(&inet_csk(sk)->icsk_pending, 0);
rearm_timer:
tcp_rearm_rto(sk);
}
@@ -3264,6 +3262,8 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
return false;
if (skb_cloned(skb))
return false;
+ if (!skb_frags_readable(skb))
+ return false;
/* Some heuristics for collapsing over SACK'd could be invented */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
return false;
@@ -3649,7 +3649,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority,
/* skb of trace_tcp_send_reset() keeps the skb that caused RST,
* skb here is different to the troublesome skb, so use NULL
*/
- trace_tcp_send_reset(sk, NULL, SK_RST_REASON_NOT_SPECIFIED);
+ trace_tcp_send_reset(sk, NULL, reason);
}
/* Send a crossed SYN-ACK during socket establishment.
@@ -3728,7 +3728,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
switch (synack_type) {
case TCP_SYNACK_NORMAL:
- skb_set_owner_w(skb, req_to_sk(req));
+ skb_set_owner_edemux(skb, req_to_sk(req));
break;
case TCP_SYNACK_COOKIE:
/* Under synflood, we do not attach skb to a socket,
@@ -4131,7 +4131,10 @@ int tcp_connect(struct sock *sk)
if (unlikely(!buff))
return -ENOBUFS;
- tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
+ /* SYN eats a sequence byte, write_seq updated by
+ * tcp_connect_queue_skb().
+ */
+ tcp_init_nondata_skb(buff, tp->write_seq, TCPHDR_SYN);
tcp_mstamp_refresh(tp);
tp->retrans_stamp = tcp_time_stamp_ts(tp);
tcp_connect_queue_skb(sk, buff);
@@ -4221,7 +4224,8 @@ void tcp_send_delayed_ack(struct sock *sk)
if (!time_before(timeout, icsk->icsk_ack.timeout))
timeout = icsk->icsk_ack.timeout;
}
- icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+ smp_store_release(&icsk->icsk_ack.pending,
+ icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER);
icsk->icsk_ack.timeout = timeout;
sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4d40615dc8fc..b412ed88ccd9 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -125,7 +125,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
do_reset = true;
if (do_reset)
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_ABORT_ON_MEMORY);
tcp_done(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
@@ -282,6 +282,7 @@ static int tcp_write_timeout(struct sock *sk)
expired = retransmits_timed_out(sk, retry_until,
READ_ONCE(icsk->icsk_user_timeout));
tcp_fastopen_active_detect_blackhole(sk, expired);
+ mptcp_active_detect_blackhole(sk, expired);
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
@@ -360,6 +361,14 @@ static void tcp_delack_timer(struct timer_list *t)
from_timer(icsk, t, icsk_delack_timer);
struct sock *sk = &icsk->icsk_inet.sk;
+ /* Avoid taking socket spinlock if there is no ACK to send.
+ * The compressed_ack check is racy, but a separate hrtimer
+ * will take care of it eventually.
+ */
+ if (!(smp_load_acquire(&icsk->icsk_ack.pending) & ICSK_ACK_TIMER) &&
+ !READ_ONCE(tcp_sk(sk)->compressed_ack))
+ goto out;
+
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
tcp_delack_timer_handler(sk);
@@ -370,6 +379,7 @@ static void tcp_delack_timer(struct timer_list *t)
sock_hold(sk);
}
bh_unlock_sock(sk);
+out:
sock_put(sk);
}
@@ -700,11 +710,11 @@ void tcp_write_timer_handler(struct sock *sk)
tcp_send_loss_probe(sk);
break;
case ICSK_TIME_RETRANS:
- icsk->icsk_pending = 0;
+ smp_store_release(&icsk->icsk_pending, 0);
tcp_retransmit_timer(sk);
break;
case ICSK_TIME_PROBE0:
- icsk->icsk_pending = 0;
+ smp_store_release(&icsk->icsk_pending, 0);
tcp_probe_timer(sk);
break;
}
@@ -716,6 +726,10 @@ static void tcp_write_timer(struct timer_list *t)
from_timer(icsk, t, icsk_retransmit_timer);
struct sock *sk = &icsk->icsk_inet.sk;
+ /* Avoid locking the socket when there is no pending event. */
+ if (!smp_load_acquire(&icsk->icsk_pending))
+ goto out;
+
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
tcp_write_timer_handler(sk);
@@ -725,6 +739,7 @@ static void tcp_write_timer(struct timer_list *t)
sock_hold(sk);
}
bh_unlock_sock(sk);
+out:
sock_put(sk);
}
@@ -779,7 +794,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
goto out;
}
}
- tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED);
+ tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_STATE);
goto death;
}
@@ -807,7 +822,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
(user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) {
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT);
tcp_write_err(sk);
goto out;
}
@@ -850,6 +865,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
* LINUX_MIB_TCPACKCOMPRESSED accurate.
*/
tp->compressed_ack--;
+ tcp_mstamp_refresh(tp);
tcp_send_ack(sk);
}
} else {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 49c622e743e8..e8953e88efef 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -100,6 +100,7 @@
#include <net/net_namespace.h>
#include <net/icmp.h>
#include <net/inet_hashtables.h>
+#include <net/ip.h>
#include <net/ip_tunnels.h>
#include <net/route.h>
#include <net/checksum.h>
@@ -365,7 +366,7 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
return udp_lib_get_port(sk, snum, hash2_nulladdr);
}
-static int compute_score(struct sock *sk, struct net *net,
+static int compute_score(struct sock *sk, const struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned short hnum,
int dif, int sdif)
@@ -409,7 +410,6 @@ static int compute_score(struct sock *sk, struct net *net,
return score;
}
-INDIRECT_CALLABLE_SCOPE
u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
const __be32 faddr, const __be16 fport)
{
@@ -418,9 +418,10 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
return __inet_ehashfn(laddr, lport, faddr, fport,
udp_ehash_secret + net_hash_mix(net));
}
+EXPORT_SYMBOL(udp_ehashfn);
/* called with rcu_read_lock() */
-static struct sock *udp4_lib_lookup2(struct net *net,
+static struct sock *udp4_lib_lookup2(const struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum,
int dif, int sdif,
@@ -477,21 +478,180 @@ rescore:
return result;
}
+#if IS_ENABLED(CONFIG_BASE_SMALL)
+static struct sock *udp4_lib_lookup4(const struct net *net,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned int hnum,
+ int dif, int sdif,
+ struct udp_table *udptable)
+{
+ return NULL;
+}
+
+static void udp_rehash4(struct udp_table *udptable, struct sock *sk,
+ u16 newhash4)
+{
+}
+
+static void udp_unhash4(struct udp_table *udptable, struct sock *sk)
+{
+}
+#else /* !CONFIG_BASE_SMALL */
+static struct sock *udp4_lib_lookup4(const struct net *net,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned int hnum,
+ int dif, int sdif,
+ struct udp_table *udptable)
+{
+ const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
+ const struct hlist_nulls_node *node;
+ struct udp_hslot *hslot4;
+ unsigned int hash4, slot;
+ struct udp_sock *up;
+ struct sock *sk;
+
+ hash4 = udp_ehashfn(net, daddr, hnum, saddr, sport);
+ slot = hash4 & udptable->mask;
+ hslot4 = &udptable->hash4[slot];
+ INET_ADDR_COOKIE(acookie, saddr, daddr);
+
+begin:
+ /* SLAB_TYPESAFE_BY_RCU not used, so we don't need to touch sk_refcnt */
+ udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) {
+ sk = (struct sock *)up;
+ if (inet_match(net, sk, acookie, ports, dif, sdif))
+ return sk;
+ }
+
+ /* if the nulls value we got at the end of this lookup is not the
+ * expected one, we must restart lookup. We probably met an item that
+ * was moved to another chain due to rehash.
+ */
+ if (get_nulls_value(node) != slot)
+ goto begin;
+
+ return NULL;
+}
+
+/* In hash4, rehash can happen in connect(), where hash4_cnt keeps unchanged. */
+static void udp_rehash4(struct udp_table *udptable, struct sock *sk,
+ u16 newhash4)
+{
+ struct udp_hslot *hslot4, *nhslot4;
+
+ hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash);
+ nhslot4 = udp_hashslot4(udptable, newhash4);
+ udp_sk(sk)->udp_lrpa_hash = newhash4;
+
+ if (hslot4 != nhslot4) {
+ spin_lock_bh(&hslot4->lock);
+ hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node);
+ hslot4->count--;
+ spin_unlock_bh(&hslot4->lock);
+
+ spin_lock_bh(&nhslot4->lock);
+ hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node,
+ &nhslot4->nulls_head);
+ nhslot4->count++;
+ spin_unlock_bh(&nhslot4->lock);
+ }
+}
+
+static void udp_unhash4(struct udp_table *udptable, struct sock *sk)
+{
+ struct udp_hslot *hslot2, *hslot4;
+
+ if (udp_hashed4(sk)) {
+ hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
+ hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash);
+
+ spin_lock(&hslot4->lock);
+ hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node);
+ hslot4->count--;
+ spin_unlock(&hslot4->lock);
+
+ spin_lock(&hslot2->lock);
+ udp_hash4_dec(hslot2);
+ spin_unlock(&hslot2->lock);
+ }
+}
+
+void udp_lib_hash4(struct sock *sk, u16 hash)
+{
+ struct udp_hslot *hslot, *hslot2, *hslot4;
+ struct net *net = sock_net(sk);
+ struct udp_table *udptable;
+
+ /* Connected udp socket can re-connect to another remote address,
+ * so rehash4 is needed.
+ */
+ udptable = net->ipv4.udp_table;
+ if (udp_hashed4(sk)) {
+ udp_rehash4(udptable, sk, hash);
+ return;
+ }
+
+ hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash);
+ hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
+ hslot4 = udp_hashslot4(udptable, hash);
+ udp_sk(sk)->udp_lrpa_hash = hash;
+
+ spin_lock_bh(&hslot->lock);
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
+
+ spin_lock(&hslot4->lock);
+ hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node,
+ &hslot4->nulls_head);
+ hslot4->count++;
+ spin_unlock(&hslot4->lock);
+
+ spin_lock(&hslot2->lock);
+ udp_hash4_inc(hslot2);
+ spin_unlock(&hslot2->lock);
+
+ spin_unlock_bh(&hslot->lock);
+}
+EXPORT_SYMBOL(udp_lib_hash4);
+
+/* call with sock lock */
+void udp4_hash4(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ unsigned int hash;
+
+ if (sk_unhashed(sk) || sk->sk_rcv_saddr == htonl(INADDR_ANY))
+ return;
+
+ hash = udp_ehashfn(net, sk->sk_rcv_saddr, sk->sk_num,
+ sk->sk_daddr, sk->sk_dport);
+
+ udp_lib_hash4(sk, hash);
+}
+EXPORT_SYMBOL(udp4_hash4);
+#endif /* CONFIG_BASE_SMALL */
+
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
-struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
+struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport, int dif,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
unsigned short hnum = ntohs(dport);
- unsigned int hash2, slot2;
struct udp_hslot *hslot2;
struct sock *result, *sk;
+ unsigned int hash2;
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
+
+ if (udp_has_hash4(hslot2)) {
+ result = udp4_lib_lookup4(net, saddr, sport, daddr, hnum,
+ dif, sdif, udptable);
+ if (result) /* udp4_lib_lookup4 return sk or NULL */
+ return result;
+ }
/* Lookup connected or non-wildcard socket */
result = udp4_lib_lookup2(net, saddr, sport,
@@ -518,8 +678,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
/* Lookup wildcard sockets */
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
result = udp4_lib_lookup2(net, saddr, sport,
htonl(INADDR_ANY), hnum, dif, sdif,
@@ -561,7 +720,7 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
* Does increment socket refcount.
*/
#if IS_ENABLED(CONFIG_NF_TPROXY_IPV4) || IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
-struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
+struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
{
struct sock *sk;
@@ -950,8 +1109,10 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
cork->gso_size);
+
+ /* Don't checksum the payload, skb will get segmented */
+ goto csum_partial;
}
- goto csum_partial;
}
if (is_udplite) /* UDP-Lite */
@@ -1513,7 +1674,6 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
struct sk_buff_head *list = &sk->sk_receive_queue;
int rmem, err = -ENOMEM;
spinlock_t *busy = NULL;
- bool becomes_readable;
int size, rcvbuf;
/* Immediately drop when the receive queue is full.
@@ -1554,19 +1714,12 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
*/
sock_skb_set_dropcount(sk, skb);
- becomes_readable = skb_queue_empty(list);
__skb_queue_tail(list, skb);
spin_unlock(&list->lock);
- if (!sock_flag(sk, SOCK_DEAD)) {
- if (becomes_readable ||
- sk->sk_data_ready != sock_def_readable ||
- READ_ONCE(sk->sk_peek_off) >= 0)
- INDIRECT_CALL_1(sk->sk_data_ready,
- sock_def_readable, sk);
- else
- sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
- }
+ if (!sock_flag(sk, SOCK_DEAD))
+ INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
+
busylock_release(busy);
return 0;
@@ -1932,6 +2085,18 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
EXPORT_SYMBOL(udp_pre_connect);
+static int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ int res;
+
+ lock_sock(sk);
+ res = __ip4_datagram_connect(sk, uaddr, addr_len);
+ if (!res)
+ udp4_hash4(sk);
+ release_sock(sk);
+ return res;
+}
+
int __udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
@@ -1991,6 +2156,8 @@ void udp_lib_unhash(struct sock *sk)
hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
hslot2->count--;
spin_unlock(&hslot2->lock);
+
+ udp_unhash4(udptable, sk);
}
spin_unlock_bh(&hslot->lock);
}
@@ -2000,7 +2167,7 @@ EXPORT_SYMBOL(udp_lib_unhash);
/*
* inet_rcv_saddr was changed, we must rehash secondary hash
*/
-void udp_lib_rehash(struct sock *sk, u16 newhash)
+void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4)
{
if (sk_hashed(sk)) {
struct udp_table *udptable = udp_get_table_prot(sk);
@@ -2032,6 +2199,19 @@ void udp_lib_rehash(struct sock *sk, u16 newhash)
spin_unlock(&nhslot2->lock);
}
+ if (udp_hashed4(sk)) {
+ udp_rehash4(udptable, sk, newhash4);
+
+ if (hslot2 != nhslot2) {
+ spin_lock(&hslot2->lock);
+ udp_hash4_dec(hslot2);
+ spin_unlock(&hslot2->lock);
+
+ spin_lock(&nhslot2->lock);
+ udp_hash4_inc(nhslot2);
+ spin_unlock(&nhslot2->lock);
+ }
+ }
spin_unlock_bh(&hslot->lock);
}
}
@@ -2043,7 +2223,11 @@ void udp_v4_rehash(struct sock *sk)
u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
inet_sk(sk)->inet_rcv_saddr,
inet_sk(sk)->inet_num);
- udp_lib_rehash(sk, new_hash);
+ u16 new_hash4 = udp_ehashfn(sock_net(sk),
+ sk->sk_rcv_saddr, sk->sk_num,
+ sk->sk_daddr, sk->sk_dport);
+
+ udp_lib_rehash(sk, new_hash, new_hash4);
}
static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -2265,7 +2449,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
udptable->mask;
hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
- hslot = &udptable->hash2[hash2];
+ hslot = &udptable->hash2[hash2].hslot;
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
@@ -2536,14 +2720,13 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
struct udp_table *udptable = net->ipv4.udp_table;
INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
unsigned short hnum = ntohs(loc_port);
- unsigned int hash2, slot2;
struct udp_hslot *hslot2;
+ unsigned int hash2;
__portpair ports;
struct sock *sk;
hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
ports = INET_COMBINED_PORTS(rmt_port, hnum);
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
@@ -2618,7 +2801,7 @@ int udp_v4_early_demux(struct sk_buff *skb)
if (!inet_sk(sk)->inet_daddr && in_dev)
return ip_mc_validate_source(skb, iph->daddr,
iph->saddr,
- iph->tos & IPTOS_RT_MASK,
+ ip4h_dscp(iph),
skb->dev, in_dev, &itag);
}
return 0;
@@ -2937,7 +3120,7 @@ struct proto udp_prot = {
.owner = THIS_MODULE,
.close = udp_lib_close,
.pre_connect = udp_pre_connect,
- .connect = ip4_datagram_connect,
+ .connect = udp_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
.init = udp_init_sock,
@@ -3184,7 +3367,7 @@ again:
batch_sks = 0;
for (; state->bucket <= udptable->mask; state->bucket++) {
- struct udp_hslot *hslot2 = &udptable->hash2[state->bucket];
+ struct udp_hslot *hslot2 = &udptable->hash2[state->bucket].hslot;
if (hlist_empty(&hslot2->head))
continue;
@@ -3425,10 +3608,12 @@ __setup("uhash_entries=", set_uhash_entries);
void __init udp_table_init(struct udp_table *table, const char *name)
{
- unsigned int i;
+ unsigned int i, slot_size;
+ slot_size = sizeof(struct udp_hslot) + sizeof(struct udp_hslot_main) +
+ udp_hash4_slot_size();
table->hash = alloc_large_system_hash(name,
- 2 * sizeof(struct udp_hslot),
+ slot_size,
uhash_entries,
21, /* one slot per 2 MB */
0,
@@ -3437,17 +3622,18 @@ void __init udp_table_init(struct udp_table *table, const char *name)
UDP_HTABLE_SIZE_MIN,
UDP_HTABLE_SIZE_MAX);
- table->hash2 = table->hash + (table->mask + 1);
+ table->hash2 = (void *)(table->hash + (table->mask + 1));
for (i = 0; i <= table->mask; i++) {
INIT_HLIST_HEAD(&table->hash[i].head);
table->hash[i].count = 0;
spin_lock_init(&table->hash[i].lock);
}
for (i = 0; i <= table->mask; i++) {
- INIT_HLIST_HEAD(&table->hash2[i].head);
- table->hash2[i].count = 0;
- spin_lock_init(&table->hash2[i].lock);
+ INIT_HLIST_HEAD(&table->hash2[i].hslot.head);
+ table->hash2[i].hslot.count = 0;
+ spin_lock_init(&table->hash2[i].hslot.lock);
}
+ udp_table_hash4_init(table);
}
u32 udp_flow_hashrnd(void)
@@ -3473,18 +3659,21 @@ static void __net_init udp_sysctl_init(struct net *net)
static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_entries)
{
struct udp_table *udptable;
+ unsigned int slot_size;
int i;
udptable = kmalloc(sizeof(*udptable), GFP_KERNEL);
if (!udptable)
goto out;
- udptable->hash = vmalloc_huge(hash_entries * 2 * sizeof(struct udp_hslot),
+ slot_size = sizeof(struct udp_hslot) + sizeof(struct udp_hslot_main) +
+ udp_hash4_slot_size();
+ udptable->hash = vmalloc_huge(hash_entries * slot_size,
GFP_KERNEL_ACCOUNT);
if (!udptable->hash)
goto free_table;
- udptable->hash2 = udptable->hash + hash_entries;
+ udptable->hash2 = (void *)(udptable->hash + hash_entries);
udptable->mask = hash_entries - 1;
udptable->log = ilog2(hash_entries);
@@ -3493,10 +3682,11 @@ static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_ent
udptable->hash[i].count = 0;
spin_lock_init(&udptable->hash[i].lock);
- INIT_HLIST_HEAD(&udptable->hash2[i].head);
- udptable->hash2[i].count = 0;
- spin_lock_init(&udptable->hash2[i].lock);
+ INIT_HLIST_HEAD(&udptable->hash2[i].hslot.head);
+ udptable->hash2[i].hslot.count = 0;
+ spin_lock_init(&udptable->hash2[i].hslot.lock);
}
+ udp_table_hash4_init(udptable);
return udptable;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index d842303587af..a5be6e4ed326 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -296,8 +296,26 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
return NULL;
}
- if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
- return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+ if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) {
+ /* Detect modified geometry and pass those to skb_segment. */
+ if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size)
+ return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+
+ /* Setup csum, as fraglist skips this in udp4_gro_receive. */
+ gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head;
+ gso_skb->csum_offset = offsetof(struct udphdr, check);
+ gso_skb->ip_summed = CHECKSUM_PARTIAL;
+
+ uh = udp_hdr(gso_skb);
+ if (is_ipv6)
+ uh->check = ~udp_v6_check(gso_skb->len,
+ &ipv6_hdr(gso_skb)->saddr,
+ &ipv6_hdr(gso_skb)->daddr, 0);
+ else
+ uh->check = ~udp_v4_check(gso_skb->len,
+ ip_hdr(gso_skb)->saddr,
+ ip_hdr(gso_skb)->daddr, 0);
+ }
skb_pull(gso_skb, sizeof(*uh));
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index e4e0fa869fa4..619a53eb672d 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -6,6 +6,7 @@
#include <net/dst_metadata.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
+#include <net/inet_dscp.h>
int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
@@ -232,7 +233,7 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
fl4.saddr = key->u.ipv4.src;
fl4.fl4_dport = dport;
fl4.fl4_sport = sport;
- fl4.flowi4_tos = RT_TOS(tos);
+ fl4.flowi4_tos = tos & INET_DSCP_MASK;
fl4.flowi4_flags = key->flow_flags;
rt = ip_route_output_key(net, &fl4);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index a620618cc568..b5b06323cfd9 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -33,7 +33,7 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
const struct iphdr *iph = ip_hdr(skb);
if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, skb->dev))
+ ip4h_dscp(iph), skb->dev))
goto drop;
}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 0294fef577fa..7fb6205619e7 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -14,50 +14,47 @@
#include <linux/inetdevice.h>
#include <net/dst.h>
#include <net/xfrm.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/l3mdev.h>
-static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
- int tos, int oif,
- const xfrm_address_t *saddr,
- const xfrm_address_t *daddr,
- u32 mark)
+static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4,
+ const struct xfrm_dst_lookup_params *params)
{
struct rtable *rt;
memset(fl4, 0, sizeof(*fl4));
- fl4->daddr = daddr->a4;
- fl4->flowi4_tos = tos;
- fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
- fl4->flowi4_mark = mark;
- if (saddr)
- fl4->saddr = saddr->a4;
-
- rt = __ip_route_output_key(net, fl4);
+ fl4->daddr = params->daddr->a4;
+ fl4->flowi4_tos = inet_dscp_to_dsfield(params->dscp);
+ fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(params->net,
+ params->oif);
+ fl4->flowi4_mark = params->mark;
+ if (params->saddr)
+ fl4->saddr = params->saddr->a4;
+ fl4->flowi4_proto = params->ipproto;
+ fl4->uli = params->uli;
+
+ rt = __ip_route_output_key(params->net, fl4);
if (!IS_ERR(rt))
return &rt->dst;
return ERR_CAST(rt);
}
-static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
- const xfrm_address_t *saddr,
- const xfrm_address_t *daddr,
- u32 mark)
+static struct dst_entry *xfrm4_dst_lookup(const struct xfrm_dst_lookup_params *params)
{
struct flowi4 fl4;
- return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark);
+ return __xfrm4_dst_lookup(&fl4, params);
}
-static int xfrm4_get_saddr(struct net *net, int oif,
- xfrm_address_t *saddr, xfrm_address_t *daddr,
- u32 mark)
+static int xfrm4_get_saddr(xfrm_address_t *saddr,
+ const struct xfrm_dst_lookup_params *params)
{
struct dst_entry *dst;
struct flowi4 fl4;
- dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark);
+ dst = __xfrm4_dst_lookup(&fl4, params);
if (IS_ERR(dst))
return -EHOSTUNREACH;
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index b146ce88c5d0..4ee624d8e66f 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -76,7 +76,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
const struct iphdr *iph = ip_hdr(skb);
if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, skb->dev))
+ ip4h_dscp(iph), skb->dev))
goto drop;
}
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 08d4b7132d4c..1c9c686d9522 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -323,6 +323,7 @@ config IPV6_RPL_LWTUNNEL
bool "IPv6: RPL Source Routing Header support"
depends on IPV6
select LWTUNNEL
+ select DST_CACHE
help
Support for RFC6554 RPL Source Routing Header using the lightweight
tunnels mechanism.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f70d8757af1a..0e765466d7f7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -85,15 +85,13 @@
#include <linux/netconf.h>
#include <linux/random.h>
#include <linux/uaccess.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/export.h>
#include <linux/ioam6.h>
-#define INFINITY_LIFE_TIME 0xFFFFFFFF
-
#define IPV6_MAX_STRLEN \
sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255")
@@ -239,6 +237,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
.ra_honor_pio_life = 0,
+ .ra_honor_pio_pflag = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -302,6 +301,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
.ra_honor_pio_life = 0,
+ .ra_honor_pio_pflag = 0,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -1016,7 +1016,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr)
{
- u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+ u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net));
return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
}
@@ -2570,6 +2570,24 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
return idev;
}
+static void delete_tempaddrs(struct inet6_dev *idev,
+ struct inet6_ifaddr *ifp)
+{
+ struct inet6_ifaddr *ift, *tmp;
+
+ write_lock_bh(&idev->lock);
+ list_for_each_entry_safe(ift, tmp, &idev->tempaddr_list, tmp_list) {
+ if (ift->ifpub != ifp)
+ continue;
+
+ in6_ifa_hold(ift);
+ write_unlock_bh(&idev->lock);
+ ipv6_del_addr(ift);
+ write_lock_bh(&idev->lock);
+ }
+ write_unlock_bh(&idev->lock);
+}
+
static void manage_tempaddrs(struct inet6_dev *idev,
struct inet6_ifaddr *ifp,
__u32 valid_lft, __u32 prefered_lft,
@@ -2762,6 +2780,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
u32 addr_flags = 0;
struct inet6_dev *in6_dev;
struct net *net = dev_net(dev);
+ bool ignore_autoconf = false;
pinfo = (struct prefix_info *) opt;
@@ -2864,7 +2883,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
/* Try to figure out our local address for this prefix */
- if (pinfo->autoconf && in6_dev->cnf.autoconf) {
+ ignore_autoconf = READ_ONCE(in6_dev->cnf.ra_honor_pio_pflag) && pinfo->preferpd;
+ if (pinfo->autoconf && in6_dev->cnf.autoconf && !ignore_autoconf) {
struct in6_addr addr;
bool tokenized = false, dev_addr_generated = false;
@@ -3122,11 +3142,12 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
in6_ifa_hold(ifp);
read_unlock_bh(&idev->lock);
- if (!(ifp->flags & IFA_F_TEMPORARY) &&
- (ifa_flags & IFA_F_MANAGETEMPADDR))
- manage_tempaddrs(idev, ifp, 0, 0, false,
- jiffies);
ipv6_del_addr(ifp);
+
+ if (!(ifp->flags & IFA_F_TEMPORARY) &&
+ (ifp->flags & IFA_F_MANAGETEMPADDR))
+ delete_tempaddrs(idev, ifp);
+
addrconf_verify_rtnl(net);
if (ipv6_addr_is_multicast(pfx)) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk,
@@ -4791,7 +4812,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!pfx)
return -EINVAL;
- ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+ ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
/* We ignore other flags so far. */
ifa_flags &= IFA_F_MANAGETEMPADDR;
@@ -4800,7 +4821,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifm->ifa_prefixlen, extack);
}
-static int modify_prefix_route(struct inet6_ifaddr *ifp,
+static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp,
unsigned long expires, u32 flags,
bool modify_peer)
{
@@ -4824,7 +4845,9 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
ifp->prefix_len,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
- } else {
+ return 0;
+ }
+ if (f6i != net->ipv6.fib6_null_entry) {
table = f6i->fib6_table;
spin_lock_bh(&table->tb6_lock);
@@ -4837,9 +4860,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
}
spin_unlock_bh(&table->tb6_lock);
-
- fib6_info_release(f6i);
}
+ fib6_info_release(f6i);
return 0;
}
@@ -4918,7 +4940,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
int rc = -ENOENT;
if (had_prefixroute)
- rc = modify_prefix_route(ifp, expires, flags, false);
+ rc = modify_prefix_route(net, ifp, expires, flags, false);
/* prefix route could have been deleted; if so restore it */
if (rc == -ENOENT) {
@@ -4928,7 +4950,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
}
if (had_prefixroute && !ipv6_addr_any(&ifp->peer_addr))
- rc = modify_prefix_route(ifp, expires, flags, true);
+ rc = modify_prefix_route(net, ifp, expires, flags, true);
if (rc == -ENOENT && !ipv6_addr_any(&ifp->peer_addr)) {
addrconf_prefix_route(&ifp->peer_addr, ifp->prefix_len,
@@ -4950,14 +4972,12 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
}
if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) {
- if (was_managetempaddr &&
- !(ifp->flags & IFA_F_MANAGETEMPADDR)) {
- cfg->valid_lft = 0;
- cfg->preferred_lft = 0;
- }
- manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft,
- cfg->preferred_lft, !was_managetempaddr,
- jiffies);
+ if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR))
+ delete_tempaddrs(ifp->idev, ifp);
+ else
+ manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft,
+ cfg->preferred_lft, !was_managetempaddr,
+ jiffies);
}
addrconf_verify_rtnl(net);
@@ -5016,10 +5036,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENODEV;
}
- if (tb[IFA_FLAGS])
- cfg.ifa_flags = nla_get_u32(tb[IFA_FLAGS]);
- else
- cfg.ifa_flags = ifm->ifa_flags;
+ cfg.ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
/* We ignore other flags so far. */
cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS |
@@ -5617,8 +5634,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
}
static void ipv6_store_devconf(const struct ipv6_devconf *cnf,
@@ -6173,8 +6189,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
}
static inline size_t inet6_prefix_nlmsg_size(void)
@@ -6241,8 +6256,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
}
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -6926,6 +6940,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ {
+ .procname = "ra_honor_pio_pflag",
+ .data = &ipv6_devconf.ra_honor_pio_pflag,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
#ifdef CONFIG_IPV6_ROUTER_PREF
{
.procname = "accept_ra_rtr_pref",
@@ -7398,6 +7421,27 @@ static struct rtnl_af_ops inet6_ops __read_mostly = {
.set_link_af = inet6_set_link_af,
};
+static const struct rtnl_msg_handler addrconf_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETLINK,
+ .dumpit = inet6_dump_ifinfo, .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDR,
+ .doit = inet6_rtm_newaddr},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDR,
+ .doit = inet6_rtm_deladdr},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDR,
+ .doit = inet6_rtm_getaddr, .dumpit = inet6_dump_ifaddr,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETMULTICAST,
+ .dumpit = inet6_dump_ifmcaddr,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETANYCAST,
+ .dumpit = inet6_dump_ifacaddr,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETNETCONF,
+ .doit = inet6_netconf_get_devconf, .dumpit = inet6_netconf_dump_devconf,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+};
+
/*
* Init / cleanup code
*/
@@ -7439,44 +7483,14 @@ int __init addrconf_init(void)
addrconf_verify(&init_net);
- rtnl_af_register(&inet6_ops);
+ err = rtnl_af_register(&inet6_ops);
+ if (err)
+ goto erraf;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
- NULL, inet6_dump_ifinfo, RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
+ err = rtnl_register_many(addrconf_rtnl_msg_handlers);
+ if (err)
goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR,
- inet6_rtm_newaddr, NULL, 0);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR,
- inet6_rtm_deladdr, NULL, 0);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
- inet6_rtm_getaddr, inet6_dump_ifaddr,
- RTNL_FLAG_DOIT_UNLOCKED |
- RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
- NULL, inet6_dump_ifmcaddr,
- RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
- NULL, inet6_dump_ifacaddr,
- RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
- inet6_netconf_get_devconf,
- inet6_netconf_dump_devconf,
- RTNL_FLAG_DOIT_UNLOCKED |
- RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
- goto errout;
err = ipv6_addr_label_rtnl_register();
if (err < 0)
goto errout;
@@ -7485,6 +7499,7 @@ int __init addrconf_init(void)
errout:
rtnl_unregister_all(PF_INET6);
rtnl_af_unregister(&inet6_ops);
+erraf:
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
destroy_workqueue(addrconf_wq);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index acd70b5992a7..ab054f329e12 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -634,23 +634,17 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return err;
}
+static const struct rtnl_msg_handler ipv6_adddr_label_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDRLABEL,
+ .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDRLABEL,
+ .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDRLABEL,
+ .doit = ip6addrlbl_get, .dumpit = ip6addrlbl_dump,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+};
+
int __init ipv6_addr_label_rtnl_register(void)
{
- int ret;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDRLABEL,
- ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- if (ret < 0)
- return ret;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDRLABEL,
- ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- if (ret < 0)
- return ret;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
- ip6addrlbl_get,
- ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED |
- RTNL_FLAG_DUMP_UNLOCKED);
- return ret;
+ return rtnl_register_many(ipv6_adddr_label_rtnl_msg_handlers);
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 90d2c7e3f5e9..f60ec8b0f8ea 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -252,31 +252,29 @@ lookup_protocol:
*/
inet->inet_sport = htons(inet->inet_num);
err = sk->sk_prot->hash(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
if (!kern) {
err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
+out_sk_release:
+ sk_common_release(sk);
+ sock->sk = NULL;
+ goto out;
}
static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
@@ -708,6 +706,7 @@ const struct proto_ops inet6_stream_ops = {
.splice_eof = inet_splice_eof,
.sendmsg_locked = tcp_sendmsg_locked,
.splice_read = tcp_splice_read,
+ .set_peek_off = sk_set_peek_off,
.read_sock = tcp_read_sock,
.read_skb = tcp_read_skb,
.peek_len = tcp_peek_len,
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0627c4c18d1a..562cace50ca9 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -49,9 +49,10 @@ static DEFINE_SPINLOCK(acaddr_hash_lock);
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
-static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+static u32 inet6_acaddr_hash(const struct net *net,
+ const struct in6_addr *addr)
{
- u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+ u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net));
return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
}
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index eb8ee1e9373a..dbcea9fee626 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -29,7 +29,7 @@
#include <net/calipso.h>
#include <linux/atomic.h>
#include <linux/bug.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/crc-ccitt.h>
/* Maximium size of the calipso option including
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 3920e8aa1031..b2400c226a32 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -132,7 +132,8 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
- skb_page_unref(sg_page(sg), skb->pp_recycle);
+ skb_page_unref(page_to_netmem(sg_page(sg)),
+ skb->pp_recycle);
}
#ifdef CONFIG_INET6_ESPINTCP
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 919ebfabbe4e..7b41fb4f00b5 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -80,9 +80,9 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
if (sp->len == XFRM_MAX_DEPTH)
goto out_reset;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET6);
+ x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET6);
if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
/* non-offload path will record the error and audit log */
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
index f87ae33e1d01..949b72610df7 100644
--- a/net/ipv6/fib6_notifier.c
+++ b/net/ipv6/fib6_notifier.c
@@ -22,7 +22,7 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
return call_fib_notifiers(net, event_type, info);
}
-static unsigned int fib6_seq_read(struct net *net)
+static unsigned int fib6_seq_read(const struct net *net)
{
return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 9e254de7462f..c85c1627cb16 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -27,6 +27,7 @@ struct fib6_rule {
struct rt6key src;
struct rt6key dst;
dscp_t dscp;
+ u8 dscp_full:1; /* DSCP or TOS selector */
};
static bool fib6_rule_matchall(const struct fib_rule *rule)
@@ -55,7 +56,7 @@ int fib6_rules_dump(struct net *net, struct notifier_block *nb,
return fib_rules_dump(net, nb, AF_INET6, extack);
}
-unsigned int fib6_rules_seq_read(struct net *net)
+unsigned int fib6_rules_seq_read(const struct net *net)
{
return fib_rules_seq_read(net, AF_INET6);
}
@@ -345,6 +346,20 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
return 1;
}
+static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
+ struct netlink_ext_ack *extack)
+{
+ if (rule6->dscp) {
+ NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
+ return -EINVAL;
+ }
+
+ rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ rule6->dscp_full = true;
+
+ return 0;
+}
+
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
@@ -361,6 +376,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
rule6->dscp = inet_dsfield_to_dscp(frh->tos);
+ if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
+ goto errout;
+
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
@@ -413,9 +431,19 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
return 0;
- if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
+ if (frh->tos &&
+ (rule6->dscp_full ||
+ inet_dscp_to_dsfield(rule6->dscp) != frh->tos))
return 0;
+ if (tb[FRA_DSCP]) {
+ dscp_t dscp;
+
+ dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
+ if (!rule6->dscp_full || rule6->dscp != dscp)
+ return 0;
+ }
+
if (frh->src_len &&
nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
return 0;
@@ -434,7 +462,15 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule6->dst.plen;
frh->src_len = rule6->src.plen;
- frh->tos = inet_dscp_to_dsfield(rule6->dscp);
+
+ if (rule6->dscp_full) {
+ frh->tos = 0;
+ if (nla_put_u8(skb, FRA_DSCP,
+ inet_dscp_to_dsfield(rule6->dscp) >> 2))
+ goto nla_put_failure;
+ } else {
+ frh->tos = inet_dscp_to_dsfield(rule6->dscp);
+ }
if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
@@ -450,7 +486,8 @@ nla_put_failure:
static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
- + nla_total_size(16); /* src */
+ + nla_total_size(16) /* src */
+ + nla_total_size(1); /* dscp */
}
static void fib6_rule_flush_cache(struct fib_rules_ops *ops)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 7b31674644ef..071b0bc1179d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net *net, int type)
return false;
}
-static bool icmpv6_global_allow(struct net *net, int type)
+static bool icmpv6_global_allow(struct net *net, int type,
+ bool *apply_ratelimit)
{
if (icmpv6_mask_allow(net, type))
return true;
- if (icmp_global_allow())
+ if (icmp_global_allow(net)) {
+ *apply_ratelimit = true;
return true;
-
+ }
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
@@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct net *net, int type)
* Check the ICMP output rate limit
*/
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
- struct flowi6 *fl6)
+ struct flowi6 *fl6, bool apply_ratelimit)
{
struct net *net = sock_net(sk);
struct dst_entry *dst;
bool res = false;
- if (icmpv6_mask_allow(net, type))
+ if (!apply_ratelimit)
return true;
/*
@@ -228,6 +230,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
ICMP6_MIB_RATELIMITHOST);
+ else
+ icmp_global_consume(net);
dst_release(dst);
return res;
}
@@ -452,6 +456,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
struct net *net;
struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL;
+ bool apply_ratelimit = false;
struct dst_entry *dst;
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
@@ -533,11 +538,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
return;
}
- /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
+ /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit */
- if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
+ if (!(skb->dev->flags & IFF_LOOPBACK) &&
+ !icmpv6_global_allow(net, type, &apply_ratelimit))
goto out_bh_enable;
mip6_addr_swap(skb, parm);
@@ -575,7 +581,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
np = inet6_sk(sk);
- if (!icmpv6_xrlim_allow(sk, type, &fl6))
+ if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
goto out;
tmp_hdr.icmp6_type = type;
@@ -717,6 +723,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL;
struct icmp6hdr *icmph = icmp6_hdr(skb);
+ bool apply_ratelimit = false;
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
struct icmpv6_msg msg;
@@ -781,8 +788,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
goto out;
/* Check the ratelimit */
- if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
- !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
+ if ((!(skb->dev->flags & IFF_LOOPBACK) &&
+ !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
+ !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
goto out_dst_release;
idev = __in6_dev_get(skb->dev);
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index ad5f6f6ba333..85b92917849b 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -108,6 +108,7 @@ int ila_lwt_init(void);
void ila_lwt_fini(void);
int ila_xlat_init_net(struct net *net);
+void ila_xlat_pre_exit_net(struct net *net);
void ila_xlat_exit_net(struct net *net);
int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 69caed07315f..976c78efbae1 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -71,6 +71,11 @@ ila_xlat_init_fail:
return err;
}
+static __net_exit void ila_pre_exit_net(struct net *net)
+{
+ ila_xlat_pre_exit_net(net);
+}
+
static __net_exit void ila_exit_net(struct net *net)
{
ila_xlat_exit_net(net);
@@ -78,6 +83,7 @@ static __net_exit void ila_exit_net(struct net *net)
static struct pernet_operations ila_net_ops = {
.init = ila_init_net,
+ .pre_exit = ila_pre_exit_net,
.exit = ila_exit_net,
.id = &ila_net_id,
.size = sizeof(struct ila_net),
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 67e8c9440977..7646e401c630 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -105,16 +105,11 @@ static int parse_nl_config(struct genl_info *info,
xp->ip.locator_match.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_LOCATOR_MATCH]);
- if (info->attrs[ILA_ATTR_CSUM_MODE])
- xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
- else
- xp->ip.csum_mode = ILA_CSUM_NO_ACTION;
+ xp->ip.csum_mode = nla_get_u8_default(info->attrs[ILA_ATTR_CSUM_MODE],
+ ILA_CSUM_NO_ACTION);
- if (info->attrs[ILA_ATTR_IDENT_TYPE])
- xp->ip.ident_type = nla_get_u8(
- info->attrs[ILA_ATTR_IDENT_TYPE]);
- else
- xp->ip.ident_type = ILA_ATYPE_USE_FORMAT;
+ xp->ip.ident_type = nla_get_u8_default(info->attrs[ILA_ATTR_IDENT_TYPE],
+ ILA_ATYPE_USE_FORMAT);
if (info->attrs[ILA_ATTR_IFINDEX])
xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
@@ -619,6 +614,15 @@ int ila_xlat_init_net(struct net *net)
return 0;
}
+void ila_xlat_pre_exit_net(struct net *net)
+{
+ struct ila_net *ilan = net_generic(net, ila_net_id);
+
+ if (ilan->xlat.hooks_registered)
+ nf_unregister_net_hooks(net, ila_nf_hook_ops,
+ ARRAY_SIZE(ila_nf_hook_ops));
+}
+
void ila_xlat_exit_net(struct net *net)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
@@ -626,10 +630,6 @@ void ila_xlat_exit_net(struct net *net)
rhashtable_free_and_destroy(&ilan->xlat.rhash_table, ila_free_cb, NULL);
free_bucket_spinlocks(ilan->xlat.locks);
-
- if (ilan->xlat.hooks_registered)
- nf_unregister_net_hooks(net, ila_nf_hook_ops,
- ARRAY_SIZE(ila_nf_hook_ops));
}
static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 6db71bb1cd30..9ec05e354baa 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -46,7 +46,7 @@ EXPORT_SYMBOL_GPL(inet6_ehashfn);
*
* The sockhash lock must be held as a reader here.
*/
-struct sock *__inet6_lookup_established(struct net *net,
+struct sock *__inet6_lookup_established(const struct net *net,
struct inet_hashinfo *hashinfo,
const struct in6_addr *saddr,
const __be16 sport,
@@ -89,7 +89,7 @@ found:
}
EXPORT_SYMBOL(__inet6_lookup_established);
-static inline int compute_score(struct sock *sk, struct net *net,
+static inline int compute_score(struct sock *sk, const struct net *net,
const unsigned short hnum,
const struct in6_addr *daddr,
const int dif, const int sdif)
@@ -126,7 +126,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
* Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
* the selected sock or an error.
*/
-struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
__be16 sport,
@@ -147,7 +147,7 @@ struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
EXPORT_SYMBOL_GPL(inet6_lookup_reuseport);
/* called with rcu_read_lock() */
-static struct sock *inet6_lhash2_lookup(struct net *net,
+static struct sock *inet6_lhash2_lookup(const struct net *net,
struct inet_listen_hashbucket *ilb2,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -174,7 +174,7 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
return result;
}
-struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+struct sock *inet6_lookup_run_sk_lookup(const struct net *net,
int protocol,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -199,7 +199,7 @@ struct sock *inet6_lookup_run_sk_lookup(struct net *net,
}
EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup);
-struct sock *inet6_lookup_listener(struct net *net,
+struct sock *inet6_lookup_listener(const struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -243,7 +243,8 @@ done:
}
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
-struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+struct sock *inet6_lookup(const struct net *net,
+ struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index 08c929513065..a84d332f952f 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -135,15 +135,11 @@ static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info)
ns->id = id;
- if (!info->attrs[IOAM6_ATTR_NS_DATA])
- data32 = IOAM6_U32_UNAVAILABLE;
- else
- data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]);
-
- if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE])
- data64 = IOAM6_U64_UNAVAILABLE;
- else
- data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]);
+ data32 = nla_get_u32_default(info->attrs[IOAM6_ATTR_NS_DATA],
+ IOAM6_U32_UNAVAILABLE);
+
+ data64 = nla_get_u64_default(info->attrs[IOAM6_ATTR_NS_DATA_WIDE],
+ IOAM6_U64_UNAVAILABLE);
ns->data = cpu_to_be32(data32);
ns->data_wide = cpu_to_be64(data64);
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index bf7120ecea1e..9d8422e350f8 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -42,8 +42,10 @@ struct ioam6_lwt {
struct ioam6_lwt_freq freq;
atomic_t pkt_cnt;
u8 mode;
+ bool has_tunsrc;
+ struct in6_addr tunsrc;
struct in6_addr tundst;
- struct ioam6_lwt_encap tuninfo;
+ struct ioam6_lwt_encap tuninfo;
};
static const struct netlink_range_validation freq_range = {
@@ -72,8 +74,10 @@ static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
[IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8,
IOAM6_IPTUNNEL_MODE_MIN,
IOAM6_IPTUNNEL_MODE_MAX),
+ [IOAM6_IPTUNNEL_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[IOAM6_IPTUNNEL_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
- [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
+ [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(
+ sizeof(struct ioam6_trace_hdr)),
};
static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
@@ -85,7 +89,7 @@ static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
- trace->type.bit21)
+ trace->type.bit21 | trace->type.bit23)
return false;
trace->nodelen = 0;
@@ -138,10 +142,13 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
}
}
- if (!tb[IOAM6_IPTUNNEL_MODE])
- mode = IOAM6_IPTUNNEL_MODE_INLINE;
- else
- mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]);
+ mode = nla_get_u8_default(tb[IOAM6_IPTUNNEL_MODE],
+ IOAM6_IPTUNNEL_MODE_INLINE);
+
+ if (tb[IOAM6_IPTUNNEL_SRC] && mode == IOAM6_IPTUNNEL_MODE_INLINE) {
+ NL_SET_ERR_MSG(extack, "no tunnel src expected with this mode");
+ return -EINVAL;
+ }
if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) {
NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination");
@@ -167,19 +174,40 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
ilwt = ioam6_lwt_state(lwt);
err = dst_cache_init(&ilwt->cache, GFP_ATOMIC);
- if (err) {
- kfree(lwt);
- return err;
- }
+ if (err)
+ goto free_lwt;
atomic_set(&ilwt->pkt_cnt, 0);
ilwt->freq.k = freq_k;
ilwt->freq.n = freq_n;
ilwt->mode = mode;
- if (tb[IOAM6_IPTUNNEL_DST])
+
+ if (!tb[IOAM6_IPTUNNEL_SRC]) {
+ ilwt->has_tunsrc = false;
+ } else {
+ ilwt->has_tunsrc = true;
+ ilwt->tunsrc = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_SRC]);
+
+ if (ipv6_addr_any(&ilwt->tunsrc)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_SRC],
+ "invalid tunnel source address");
+ err = -EINVAL;
+ goto free_cache;
+ }
+ }
+
+ if (tb[IOAM6_IPTUNNEL_DST]) {
ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
+ if (ipv6_addr_any(&ilwt->tundst)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_DST],
+ "invalid tunnel dest address");
+ err = -EINVAL;
+ goto free_cache;
+ }
+ }
+
tuninfo = ioam6_lwt_info(lwt);
tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1;
tuninfo->pad[0] = IPV6_TLV_PADN;
@@ -201,6 +229,11 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
*ts = lwt;
return 0;
+free_cache:
+ dst_cache_destroy(&ilwt->cache);
+free_lwt:
+ kfree(lwt);
+ return err;
}
static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
@@ -256,6 +289,8 @@ static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
struct ioam6_lwt_encap *tuninfo,
+ bool has_tunsrc,
+ struct in6_addr *tunsrc,
struct in6_addr *tundst)
{
struct dst_entry *dst = skb_dst(skb);
@@ -285,8 +320,12 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
hdr->nexthdr = NEXTHDR_HOP;
hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
hdr->daddr = *tundst;
- ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
- IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
+
+ if (has_tunsrc)
+ memcpy(&hdr->saddr, tunsrc, sizeof(*tunsrc));
+ else
+ ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
+ IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
skb_postpush_rcsum(skb, hdr, len);
@@ -328,7 +367,9 @@ do_inline:
case IOAM6_IPTUNNEL_MODE_ENCAP:
do_encap:
/* Encapsulation (ip6ip6) */
- err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
+ err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
+ ilwt->has_tunsrc, &ilwt->tunsrc,
+ &ilwt->tundst);
if (unlikely(err))
goto drop;
@@ -414,6 +455,13 @@ static int ioam6_fill_encap_info(struct sk_buff *skb,
goto ret;
if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
+ if (ilwt->has_tunsrc) {
+ err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_SRC,
+ &ilwt->tunsrc);
+ if (err)
+ goto ret;
+ }
+
err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst);
if (err)
goto ret;
@@ -435,8 +483,12 @@ static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
nla_total_size(sizeof(ilwt->mode)) +
nla_total_size(sizeof(ilwt->tuninfo.traceh));
- if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
+ if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
+ if (ilwt->has_tunsrc)
+ nlsize += nla_total_size(sizeof(ilwt->tunsrc));
+
nlsize += nla_total_size(sizeof(ilwt->tundst));
+ }
return nlsize;
}
@@ -451,17 +503,21 @@ static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
return (ilwt_a->freq.k != ilwt_b->freq.k ||
ilwt_a->freq.n != ilwt_b->freq.n ||
ilwt_a->mode != ilwt_b->mode ||
+ ilwt_a->has_tunsrc != ilwt_b->has_tunsrc ||
(ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
!ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
+ (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
+ ilwt_a->has_tunsrc &&
+ !ipv6_addr_equal(&ilwt_a->tunsrc, &ilwt_b->tunsrc)) ||
trace_a->namespace_id != trace_b->namespace_id);
}
static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
.build_state = ioam6_build_state,
.destroy_state = ioam6_destroy_state,
- .output = ioam6_output,
+ .output = ioam6_output,
.fill_encap = ioam6_fill_encap_info,
- .get_encap_size = ioam6_encap_nlsize,
+ .get_encap_size = ioam6_encap_nlsize,
.cmp_encap = ioam6_encap_cmp,
.owner = THIS_MODULE,
};
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index eb111d20615c..c134ba202c4c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -198,16 +198,9 @@ static void node_free_immediate(struct net *net, struct fib6_node *fn)
net->ipv6.rt6_stats->fib_nodes--;
}
-static void node_free_rcu(struct rcu_head *head)
-{
- struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
-
- kmem_cache_free(fib6_node_kmem, fn);
-}
-
static void node_free(struct net *net, struct fib6_node *fn)
{
- call_rcu(&fn->rcu, node_free_rcu);
+ kfree_rcu(fn, rcu);
net->ipv6.rt6_stats->fib_nodes--;
}
@@ -345,17 +338,17 @@ static void __net_init fib6_tables_init(struct net *net)
#endif
-unsigned int fib6_tables_seq_read(struct net *net)
+unsigned int fib6_tables_seq_read(const struct net *net)
{
unsigned int h, fib_seq = 0;
rcu_read_lock();
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
- struct hlist_head *head = &net->ipv6.fib_table_hash[h];
- struct fib6_table *tb;
+ const struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ const struct fib6_table *tb;
hlist_for_each_entry_rcu(tb, head, tb6_hlist)
- fib_seq += tb->fib_seq;
+ fib_seq += READ_ONCE(tb->fib_seq);
}
rcu_read_unlock();
@@ -400,7 +393,7 @@ int call_fib6_entry_notifiers(struct net *net,
.rt = rt,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, event_type, &info.info);
}
@@ -416,7 +409,7 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
.nsiblings = nsiblings,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, event_type, &info.info);
}
@@ -427,7 +420,7 @@ int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
.nsiblings = rt->fib6_nsiblings,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
}
@@ -1190,8 +1183,8 @@ next_iter:
while (sibling) {
if (sibling->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(sibling)) {
- list_add_tail(&rt->fib6_siblings,
- &sibling->fib6_siblings);
+ list_add_tail_rcu(&rt->fib6_siblings,
+ &sibling->fib6_siblings);
break;
}
sibling = rcu_dereference_protected(sibling->fib6_next,
@@ -1252,7 +1245,7 @@ add:
fib6_siblings)
sibling->fib6_nsiblings--;
rt->fib6_nsiblings = 0;
- list_del_init(&rt->fib6_siblings);
+ list_del_rcu(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
return err;
}
@@ -1970,7 +1963,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
&rt->fib6_siblings, fib6_siblings)
sibling->fib6_nsiblings--;
rt->fib6_nsiblings = 0;
- list_del_init(&rt->fib6_siblings);
+ list_del_rcu(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
}
@@ -2500,6 +2493,12 @@ static struct pernet_operations fib6_net_ops = {
.exit = fib6_net_exit,
};
+static const struct rtnl_msg_handler fib6_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
+ .dumpit = inet6_dump_fib,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
+};
+
int __init fib6_init(void)
{
int ret = -ENOMEM;
@@ -2513,9 +2512,7 @@ int __init fib6_init(void)
if (ret)
goto out_kmem_cache_create;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
- inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED |
- RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
+ ret = rtnl_register_many(fib6_rtnl_msg_handlers);
if (ret)
goto out_unregister_subsys;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3942bd2ade78..235808cfec70 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1471,7 +1471,7 @@ static void ip6gre_tnl_init_features(struct net_device *dev)
{
struct ip6_tnl *nt = netdev_priv(dev);
- dev->features |= GRE6_FEATURES | NETIF_F_LLTX;
+ dev->features |= GRE6_FEATURES;
dev->hw_features |= GRE6_FEATURES;
/* TCP offload with GRE SEQ is not supported, nor can we support 2
@@ -1485,6 +1485,8 @@ static void ip6gre_tnl_init_features(struct net_device *dev)
dev->features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+
+ dev->lltx = true;
}
static int ip6gre_tunnel_init_common(struct net_device *dev)
@@ -1619,8 +1621,7 @@ static int __net_init ip6gre_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
-
+ ign->fb_tunnel_dev->netns_local = true;
ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 133610a49da6..70c0e16c0ae6 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -111,9 +111,8 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
{
struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
@@ -327,9 +326,8 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *curr_dev = NULL;
struct net *curr_net = NULL;
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f26841f1490f..f7b4608bb316 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -127,7 +127,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
- if (unlikely(IS_ERR_OR_NULL(neigh))) {
+ if (IS_ERR_OR_NULL(neigh)) {
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (IS_ERR(neigh)) {
@@ -1401,8 +1401,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
cork->base.mark = ipc6->sockc.mark;
- sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
-
+ sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags);
+ if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
+ cork->base.flags |= IPCORK_TS_OPT_ID;
+ cork->base.ts_opt_id = ipc6->sockc.ts_opt_id;
+ }
cork->base.length = 0;
cork->base.transmit_time = ipc6->sockc.transmit_time;
@@ -1433,7 +1436,7 @@ static int __ip6_append_data(struct sock *sk,
bool zc = false;
u32 tskey = 0;
struct rt6_info *rt = dst_rt6_info(cork->dst);
- bool paged, hold_tskey, extra_uref = false;
+ bool paged, hold_tskey = false, extra_uref = false;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
@@ -1543,10 +1546,15 @@ emsgsize:
flags &= ~MSG_SPLICE_PAGES;
}
- hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
- if (hold_tskey)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+ if (cork->flags & IPCORK_TS_OPT_ID) {
+ tskey = cork->ts_opt_id;
+ } else {
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ hold_tskey = true;
+ }
+ }
/*
* Let's try using as much space as possible.
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 87dfb565a9f8..48fd53b98972 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -53,6 +53,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/dst_metadata.h>
+#include <net/inet_dscp.h>
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
@@ -608,7 +609,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* Try to guess incoming interface */
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
- 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ 0, 0, 0, IPPROTO_IPIP,
+ eiph->tos & INET_DSCP_MASK, 0);
if (IS_ERR(rt))
goto out;
@@ -619,7 +621,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rt->rt_flags & RTCF_LOCAL) {
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
eiph->daddr, eiph->saddr, 0, 0,
- IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ IPPROTO_IPIP,
+ eiph->tos & INET_DSCP_MASK, 0);
if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) {
if (!IS_ERR(rt))
ip_rt_put(rt);
@@ -627,8 +630,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
skb_dst_set(skb2, &rt->dst);
} else {
- if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
- skb2->dev) ||
+ if (ip_route_input(skb2, eiph->daddr, eiph->saddr,
+ ip4h_dscp(eiph), skb2->dev) ||
skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
goto out;
}
@@ -1849,7 +1852,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->type = ARPHRD_TUNNEL6;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
netif_keep_dst(dev);
@@ -2258,7 +2261,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
+ ip6n->fb_tnl_dev->netns_local = true;
err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
if (err < 0)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index dd342e6ecf3f..578ff1336afe 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -108,6 +108,11 @@ static void ipmr_expire_process(struct timer_list *t);
lockdep_rtnl_is_held() || \
list_empty(&net->ipv6.mr6_tables))
+static bool ip6mr_can_free_table(struct net *net)
+{
+ return !check_net(net) || !net_initialized(net);
+}
+
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -125,7 +130,7 @@ static struct mr_table *ip6mr_mr_table_iter(struct net *net,
return ret;
}
-static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -136,6 +141,16 @@ static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
return NULL;
}
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+{
+ struct mr_table *mrt;
+
+ rcu_read_lock();
+ mrt = __ip6mr_get_table(net, id);
+ rcu_read_unlock();
+ return mrt;
+}
+
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
struct mr_table **mrt)
{
@@ -177,7 +192,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
arg->table = fib_rule_get_table(rule, arg);
- mrt = ip6mr_get_table(rule->fr_net, arg->table);
+ mrt = __ip6mr_get_table(rule->fr_net, arg->table);
if (!mrt)
return -EAGAIN;
res->mrt = mrt;
@@ -276,7 +291,7 @@ static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
}
-static unsigned int ip6mr_rules_seq_read(struct net *net)
+static unsigned int ip6mr_rules_seq_read(const struct net *net)
{
return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
}
@@ -291,6 +306,11 @@ EXPORT_SYMBOL(ip6mr_rule_default);
#define ip6mr_for_each_table(mrt, net) \
for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
+static bool ip6mr_can_free_table(struct net *net)
+{
+ return !check_net(net);
+}
+
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -304,6 +324,8 @@ static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
return net->ipv6.mrt6;
}
+#define __ip6mr_get_table ip6mr_get_table
+
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
struct mr_table **mrt)
{
@@ -335,7 +357,7 @@ static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
return 0;
}
-static unsigned int ip6mr_rules_seq_read(struct net *net)
+static unsigned int ip6mr_rules_seq_read(const struct net *net)
{
return 0;
}
@@ -382,7 +404,7 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
{
struct mr_table *mrt;
- mrt = ip6mr_get_table(net, id);
+ mrt = __ip6mr_get_table(net, id);
if (mrt)
return mrt;
@@ -392,6 +414,10 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
static void ip6mr_free_table(struct mr_table *mrt)
{
+ struct net *net = read_pnet(&mrt->net);
+
+ WARN_ON_ONCE(!ip6mr_can_free_table(net));
+
timer_shutdown_sync(&mrt->ipmr_expire_timer);
mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
@@ -411,13 +437,15 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
- mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
- if (!mrt)
+ rcu_read_lock();
+ mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
+ if (!mrt) {
+ rcu_read_unlock();
return ERR_PTR(-ENOENT);
+ }
iter->mrt = mrt;
- rcu_read_lock();
return mr_vif_seq_start(seq, pos);
}
@@ -640,7 +668,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->netdev_ops = &reg_vif_netdev_ops;
dev->needs_free_netdev = true;
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
}
static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
@@ -1260,11 +1288,9 @@ static int ip6mr_device_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static unsigned int ip6mr_seq_read(struct net *net)
+static unsigned int ip6mr_seq_read(const struct net *net)
{
- ASSERT_RTNL();
-
- return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
+ return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
}
static int ip6mr_dump(struct net *net, struct notifier_block *nb,
@@ -1369,6 +1395,12 @@ static struct pernet_operations ip6mr_net_ops = {
.exit_batch = ip6mr_net_exit_batch,
};
+static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
+ .msgtype = RTM_GETROUTE,
+ .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute},
+};
+
int __init ip6_mr_init(void)
{
int err;
@@ -1391,9 +1423,8 @@ int __init ip6_mr_init(void)
goto add_proto_fail;
}
#endif
- err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
- ip6mr_rtm_getroute, ip6mr_rtm_dumproute, 0);
- if (err == 0)
+ err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
+ if (!err)
return 0;
#ifdef CONFIG_IPV6_PIMSM_V2
@@ -1408,9 +1439,9 @@ reg_pernet_fail:
return err;
}
-void ip6_mr_cleanup(void)
+void __init ip6_mr_cleanup(void)
{
- rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
+ rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
#ifdef CONFIG_IPV6_PIMSM_V2
inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
#endif
@@ -2275,11 +2306,13 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
struct mfc6_cache *cache;
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
- mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
- if (!mrt)
+ rcu_read_lock();
+ mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
+ if (!mrt) {
+ rcu_read_unlock();
return -ENOENT;
+ }
- rcu_read_lock();
cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
if (!cache && skb->dev) {
int vif = ip6mr_find_vif(mrt, skb->dev);
@@ -2431,8 +2464,7 @@ static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
errout:
kfree_skb(skb);
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
}
static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
@@ -2558,9 +2590,9 @@ static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
src = nla_get_in6_addr(tb[RTA_SRC]);
if (tb[RTA_DST])
grp = nla_get_in6_addr(tb[RTA_DST]);
- tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
+ tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
- mrt = ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
+ mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
if (!mrt) {
NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
return -ENOENT;
@@ -2607,7 +2639,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (filter.table_id) {
struct mr_table *mrt;
- mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
+ mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
return skb->len;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index cd342d5015c6..1e225e6489ea 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -985,7 +985,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
int err;
if (level == SOL_IP && sk->sk_type != SOCK_RAW)
- return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+ return ip_setsockopt(sk, level, optname, optval, optlen);
if (level != SOL_IPV6)
return -ENOPROTOOPT;
@@ -1475,7 +1475,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
int err;
if (level == SOL_IP && sk->sk_type != SOCK_RAW)
- return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+ return ip_getsockopt(sk, level, optname, optval, optlen);
if (level != SOL_IPV6)
return -ENOPROTOOPT;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 7ba01d8cfbae..b244dbf61d5f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -586,7 +586,8 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
const struct in6_addr *group;
struct ipv6_mc_socklist *pmc;
struct ip6_sf_socklist *psl;
- int i, count, copycount;
+ unsigned int count;
+ int i, copycount;
group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
@@ -610,7 +611,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
psl = sock_dereference(pmc->sflist, sk);
count = psl ? psl->sl_count : 0;
- copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
+ copycount = min(count, gsf->gf_numsrc);
gsf->gf_numsrc = count;
for (i = 0; i < copycount; i++) {
struct sockaddr_in6 *psin6;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b8eec1b6cc2c..aba94a348673 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -200,9 +200,9 @@ static inline int ndisc_is_useropt(const struct net_device *dev,
return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
opt->nd_opt_type == ND_OPT_RDNSS ||
opt->nd_opt_type == ND_OPT_DNSSL ||
+ opt->nd_opt_type == ND_OPT_6CO ||
opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
- opt->nd_opt_type == ND_OPT_PREF64 ||
- ndisc_ops_is_useropt(dev, opt->nd_opt_type);
+ opt->nd_opt_type == ND_OPT_PREF64;
}
static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
@@ -1944,7 +1944,7 @@ static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl,
static char warncomm[TASK_COMM_LEN];
static int warned;
if (strcmp(warncomm, current->comm) && warned < 5) {
- strcpy(warncomm, current->comm);
+ strscpy(warncomm, current->comm);
pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n",
warncomm, func,
dev_name, ctl->procname,
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index f3c8e2d918e1..e087a8e97ba7 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -8,7 +8,14 @@ menu "IPv6: Netfilter Configuration"
# old sockopt interface and eval loop
config IP6_NF_IPTABLES_LEGACY
- tristate
+ tristate "Legacy IP6 tables support"
+ depends on INET && IPV6
+ select NETFILTER_XTABLES
+ default n
+ help
+ ip6tables is a legacy packet classifier.
+ This is not needed if you are using iptables over nftables
+ (iptables-nft).
config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support"
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 131f7bb2110d..7d5602950ae7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1773,7 +1773,7 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
goto out_free;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
goto out_free;
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index a0a2de30be3e..0c39c77fe8a8 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -47,11 +47,12 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb,
void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
const struct in6_addr *gw, int oif)
{
+ local_bh_disable();
if (this_cpu_read(nf_skb_duplicated))
- return;
+ goto out;
skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL)
- return;
+ goto out;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_reset_ct(skb);
@@ -69,6 +70,8 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
} else {
kfree_skb(skb);
}
+out:
+ local_bh_enable();
}
EXPORT_SYMBOL_GPL(nf_dup_ipv6);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index dedee264b8f6..9ae2b2725bf9 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -223,33 +223,23 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
const struct tcphdr *oth, unsigned int otcplen)
{
struct tcphdr *tcph;
- int needs_ack;
skb_reset_transport_header(nskb);
- tcph = skb_put(nskb, sizeof(struct tcphdr));
+ tcph = skb_put_zero(nskb, sizeof(struct tcphdr));
/* Truncate to length (no data) */
tcph->doff = sizeof(struct tcphdr)/4;
tcph->source = oth->dest;
tcph->dest = oth->source;
if (oth->ack) {
- needs_ack = 0;
tcph->seq = oth->ack_seq;
- tcph->ack_seq = 0;
} else {
- needs_ack = 1;
tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
otcplen - (oth->doff<<2));
- tcph->seq = 0;
+ tcph->ack = 1;
}
- /* Reset flags */
- ((u_int8_t *)tcph)[13] = 0;
tcph->rst = 1;
- tcph->ack = needs_ack;
- tcph->window = 0;
- tcph->urg_ptr = 0;
- tcph->check = 0;
/* Adjust TCP checksum */
tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
@@ -278,13 +268,12 @@ static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in)
void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
int hook)
{
- struct sk_buff *nskb;
- struct tcphdr _otcph;
- const struct tcphdr *otcph;
- unsigned int otcplen, hh_len;
const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
- struct ipv6hdr *ip6h;
struct dst_entry *dst = NULL;
+ const struct tcphdr *otcph;
+ struct sk_buff *nskb;
+ struct tcphdr _otcph;
+ unsigned int otcplen;
struct flowi6 fl6;
if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
@@ -323,9 +312,8 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
if (IS_ERR(dst))
return;
- hh_len = (dst->dev->hard_header_len + 15)&~15;
- nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
- + sizeof(struct tcphdr) + dst->trailer_len,
+ nskb = alloc_skb(LL_MAX_HEADER + sizeof(struct ipv6hdr) +
+ sizeof(struct tcphdr) + dst->trailer_len,
GFP_ATOMIC);
if (!nskb) {
@@ -338,9 +326,8 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
nskb->mark = fl6.flowi6_mark;
- skb_reserve(nskb, hh_len + dst->header_len);
- ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP,
- ip6_dst_hoplimit(dst));
+ skb_reserve(nskb, LL_MAX_HEADER);
+ nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, ip6_dst_hoplimit(dst));
nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen);
nf_ct_attach(nskb, oldskb);
@@ -355,6 +342,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
*/
if (nf_bridge_info_exists(oldskb)) {
struct ethhdr *oeth = eth_hdr(oldskb);
+ struct ipv6hdr *ip6h = ipv6_hdr(nskb);
struct net_device *br_indev;
br_indev = nf_bridge_get_physindev(oldskb, net);
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index c82f3fdd4a65..492a811828a7 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -38,13 +38,13 @@ static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
sizeof(struct in6_addr));
if (err < 0)
return err;
if (tb[NFTA_DUP_SREG_DEV])
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV],
&priv->sreg_dev, sizeof(int));
return err;
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 36dc14b34388..c9f1634b3838 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -41,8 +41,6 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
lookup_flags |= RT6_LOOKUP_F_IFACE;
fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
- } else if (priv->flags & NFTA_FIB_F_IIF) {
- fl6->flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev);
}
if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
@@ -75,6 +73,8 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
else if (priv->flags & NFTA_FIB_F_OIF)
dev = nft_out(pkt);
+ fl6.flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev);
+
nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
@@ -165,6 +165,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
.flowi6_iif = LOOPBACK_IFINDEX,
.flowi6_proto = pkt->tprot,
.flowi6_uid = sock_net_uid(nft_net(pkt), NULL),
+ .flowi6_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)),
};
struct rt6_info *rt;
int lookup_flags;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 608fa9d05b55..8476a3944a88 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -629,7 +629,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->ip_summed = CHECKSUM_NONE;
- skb_setup_tx_timestamp(skb, sockc->tsflags);
+ skb_setup_tx_timestamp(skb, sockc);
if (flags & MSG_CONFIRM)
skb_set_dst_pending_confirm(skb, 1);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 219701caba1e..67ff16c04718 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -174,7 +174,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev)
struct net_device *rt_dev = rt->dst.dev;
bool handled = false;
- if (rt_idev->dev == dev) {
+ if (rt_idev && rt_idev->dev == dev) {
rt->rt6i_idev = in6_dev_get(blackhole_netdev);
in6_dev_put(rt_idev);
handled = true;
@@ -374,6 +374,7 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
struct rt6_info *rt = dst_rt6_info(dst);
struct inet6_dev *idev = rt->rt6i_idev;
+ struct fib6_info *from;
if (idev && idev->dev != blackhole_netdev) {
struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
@@ -383,6 +384,8 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
in6_dev_put(idev);
}
}
+ from = unrcu_pointer(xchg(&rt->from, NULL));
+ fib6_info_release(from);
}
static bool __rt6_check_expired(const struct rt6_info *rt)
@@ -413,8 +416,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
struct flowi6 *fl6, int oif, bool have_oif_match,
const struct sk_buff *skb, int strict)
{
- struct fib6_info *sibling, *next_sibling;
struct fib6_info *match = res->f6i;
+ struct fib6_info *sibling;
if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
goto out;
@@ -440,8 +443,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
goto out;
- list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
- fib6_siblings) {
+ list_for_each_entry_rcu(sibling, &match->fib6_siblings,
+ fib6_siblings) {
const struct fib6_nh *nh = sibling->fib6_nh;
int nh_upper_bound;
@@ -1455,7 +1458,6 @@ static DEFINE_SPINLOCK(rt6_exception_lock);
static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
struct rt6_exception *rt6_ex)
{
- struct fib6_info *from;
struct net *net;
if (!bucket || !rt6_ex)
@@ -1467,8 +1469,6 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
/* purge completely the exception to allow releasing the held resources:
* some [sk] cache may keep the dst around for unlimited time
*/
- from = unrcu_pointer(xchg(&rt6_ex->rt6i->from, NULL));
- fib6_info_release(from);
dst_dev_put(&rt6_ex->rt6i->dst);
hlist_del_rcu(&rt6_ex->hlist);
@@ -2780,10 +2780,10 @@ static void ip6_negative_advice(struct sock *sk,
if (rt->rt6i_flags & RTF_CACHE) {
rcu_read_lock();
if (rt6_check_expired(rt)) {
- /* counteract the dst_release() in sk_dst_reset() */
- dst_hold(dst);
+ /* rt/dst can not be destroyed yet,
+ * because of rcu_read_lock()
+ */
sk_dst_reset(sk);
-
rt6_remove_exception_rt(rt);
}
rcu_read_unlock();
@@ -5195,14 +5195,18 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
* nexthop. Since sibling routes are always added at the end of
* the list, find the first sibling of the last route appended
*/
+ rcu_read_lock();
+
if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
- rt = list_first_entry(&rt_last->fib6_siblings,
- struct fib6_info,
- fib6_siblings);
+ rt = list_first_or_null_rcu(&rt_last->fib6_siblings,
+ struct fib6_info,
+ fib6_siblings);
}
if (rt)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
+
+ rcu_read_unlock();
}
static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
@@ -5547,17 +5551,21 @@ static size_t rt6_nlmsg_size(struct fib6_info *f6i)
nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
&nexthop_len);
} else {
- struct fib6_info *sibling, *next_sibling;
struct fib6_nh *nh = f6i->fib6_nh;
+ struct fib6_info *sibling;
nexthop_len = 0;
if (f6i->fib6_nsiblings) {
rt6_nh_nlmsg_size(nh, &nexthop_len);
- list_for_each_entry_safe(sibling, next_sibling,
- &f6i->fib6_siblings, fib6_siblings) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
+ fib6_siblings) {
rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
}
+
+ rcu_read_unlock();
}
nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
@@ -5721,7 +5729,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
goto nla_put_failure;
} else if (rt->fib6_nsiblings) {
- struct fib6_info *sibling, *next_sibling;
+ struct fib6_info *sibling;
struct nlattr *mp;
mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
@@ -5733,14 +5741,21 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
0) < 0)
goto nla_put_failure;
- list_for_each_entry_safe(sibling, next_sibling,
- &rt->fib6_siblings, fib6_siblings) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sibling, &rt->fib6_siblings,
+ fib6_siblings) {
if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
sibling->fib6_nh->fib_nh_weight,
- AF_INET6, 0) < 0)
+ AF_INET6, 0) < 0) {
+ rcu_read_unlock();
+
goto nla_put_failure;
+ }
}
+ rcu_read_unlock();
+
nla_nest_end(skb, mp);
} else if (rt->nh) {
if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
@@ -6177,7 +6192,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
err = -ENOBUFS;
seq = info->nlh ? info->nlh->nlmsg_seq : 0;
- skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
+ skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC);
if (!skb)
goto errout;
@@ -6190,11 +6205,10 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
goto errout;
}
rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
- info->nlh, gfp_any());
+ info->nlh, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
void fib6_rt_update(struct net *net, struct fib6_info *rt,
@@ -6220,8 +6234,7 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
info->nlh, gfp_any());
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
@@ -6682,6 +6695,15 @@ static void bpf_iter_unregister(void)
#endif
#endif
+static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE,
+ .doit = inet6_rtm_newroute},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE,
+ .doit = inet6_rtm_delroute},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
+ .doit = inet6_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+};
+
int __init ip6_route_init(void)
{
int ret;
@@ -6724,19 +6746,7 @@ int __init ip6_route_init(void)
if (ret)
goto fib6_rules_init;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
- inet6_rtm_newroute, NULL, 0);
- if (ret < 0)
- goto out_register_late_subsys;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
- inet6_rtm_delroute, NULL, 0);
- if (ret < 0)
- goto out_register_late_subsys;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
- inet6_rtm_getroute, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
+ ret = rtnl_register_many(ip6_route_rtnl_msg_handlers);
if (ret < 0)
goto out_register_late_subsys;
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index 2c83b7586422..db3c19a42e1c 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -263,10 +263,8 @@ static int rpl_input(struct sk_buff *skb)
rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
err = rpl_do_srh(skb, rlwt);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
+ if (unlikely(err))
+ goto drop;
local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
@@ -286,9 +284,13 @@ static int rpl_input(struct sk_buff *skb)
err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
if (unlikely(err))
- return err;
+ goto drop;
return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return err;
}
static int nla_put_rpl_srh(struct sk_buff *skb, int attrtype,
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index c74705ead984..ac1dbd492c22 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -954,10 +954,10 @@ static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
+ enum skb_drop_reason reason;
struct seg6_local_lwt *slwt;
struct iphdr *iph;
__be32 nhaddr;
- int err;
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
@@ -967,9 +967,9 @@ static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
skb_dst_drop(skb);
- err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
- if (err) {
- kfree_skb(skb);
+ reason = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
+ if (reason) {
+ kfree_skb_reason(skb, reason);
return -EINVAL;
}
@@ -1174,8 +1174,8 @@ drop:
static int input_action_end_dt4(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
+ enum skb_drop_reason reason;
struct iphdr *iph;
- int err;
if (!decap_and_validate(skb, IPPROTO_IPIP))
goto drop;
@@ -1193,8 +1193,8 @@ static int input_action_end_dt4(struct sk_buff *skb,
iph = ip_hdr(skb);
- err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
- if (unlikely(err))
+ reason = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
+ if (unlikely(reason))
goto drop;
return dst_input(skb);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 83b195f09561..39bd8951bfca 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -51,6 +51,7 @@
#include <net/dsfield.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/inet_dscp.h>
/*
This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
@@ -935,8 +936,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark,
- RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
- 0, dst, tiph->saddr, 0, 0,
+ tos & INET_DSCP_MASK, RT_SCOPE_UNIVERSE,
+ IPPROTO_IPV6, 0, dst, tiph->saddr, 0, 0,
sock_net_uid(tunnel->net, NULL));
rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr);
@@ -1111,7 +1112,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
iph->daddr, iph->saddr,
0, 0,
IPPROTO_IPV6,
- RT_TOS(iph->tos),
+ iph->tos & INET_DSCP_MASK,
tunnel->parms.link);
if (!IS_ERR(rt)) {
@@ -1435,7 +1436,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
netif_keep_dst(dev);
dev->addr_len = 4;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->features |= SIT_FEATURES;
dev->hw_features |= SIT_FEATURES;
dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
@@ -1855,7 +1856,7 @@ static int __net_init sit_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ sitn->fb_tunnel_dev->netns_local = true;
err = register_netdev(sitn->fb_tunnel_dev);
if (err)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 200fea92f12f..2debdf085a3b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -967,6 +967,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
}
if (sk) {
+ /* unconstify the socket only to attach it to buff with care. */
+ skb_set_owner_edemux(buff, (struct sock *)sk);
+
if (sk->sk_state == TCP_TIME_WAIT)
mark = inet_twsk(sk)->tw_mark;
else
@@ -1169,8 +1172,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
goto out;
if (aoh)
- key.ao_key = tcp_ao_established_key(ao_info,
- aoh->rnext_keyid, -1);
+ key.ao_key = tcp_ao_established_key(sk, ao_info,
+ aoh->rnext_keyid, -1);
}
}
if (key.ao_key) {
@@ -1193,7 +1196,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
#endif
}
- tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt,
+ READ_ONCE(tcptw->tw_rcv_nxt),
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
@@ -1617,7 +1621,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
by tcp. Feel free to propose better solution.
--ANK (980728)
*/
- if (np->rxopt.all)
+ if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
@@ -1655,8 +1659,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (reason)
goto reset;
}
- if (opt_skb)
- __kfree_skb(opt_skb);
return 0;
}
} else
@@ -2176,6 +2178,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
+ u8 icsk_pending;
int rx_queue;
int state;
@@ -2184,12 +2187,13 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
destp = ntohs(inet->inet_dport);
srcp = ntohs(inet->inet_sport);
- if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
- icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ icsk_pending = smp_load_acquire(&icsk->icsk_pending);
+ if (icsk_pending == ICSK_TIME_RETRANS ||
+ icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ } else if (icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
timer_expires = icsk->icsk_timeout;
} else if (timer_pending(&sp->sk_timer)) {
@@ -2258,7 +2262,7 @@ static void get_timewait6_sock(struct seq_file *seq,
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
- tw->tw_substate, 0, 0,
+ READ_ONCE(tw->tw_substate), 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
refcount_read(&tw->tw_refcnt), tw);
}
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 23971903e66d..a45bf17cb2a1 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -159,8 +159,14 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
- if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
- return __tcp6_gso_segment_list(skb, features);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) {
+ struct tcphdr *th = tcp_hdr(skb);
+
+ if (skb_pagelen(skb) - th->doff * 4 == skb_shinfo(skb)->gso_size)
+ return __tcp6_gso_segment_list(skb, features);
+
+ skb->ip_summed = CHECKSUM_NONE;
+ }
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6602a2e9cdb5..d766fd798ecf 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -110,11 +110,22 @@ void udp_v6_rehash(struct sock *sk)
u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
&sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
+ u16 new_hash4;
- udp_lib_rehash(sk, new_hash);
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
+ new_hash4 = udp_ehashfn(sock_net(sk),
+ sk->sk_rcv_saddr, sk->sk_num,
+ sk->sk_daddr, sk->sk_dport);
+ } else {
+ new_hash4 = udp6_ehashfn(sock_net(sk),
+ &sk->sk_v6_rcv_saddr, sk->sk_num,
+ &sk->sk_v6_daddr, sk->sk_dport);
+ }
+
+ udp_lib_rehash(sk, new_hash, new_hash4);
}
-static int compute_score(struct sock *sk, struct net *net,
+static int compute_score(struct sock *sk, const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned short hnum,
int dif, int sdif)
@@ -160,7 +171,7 @@ static int compute_score(struct sock *sk, struct net *net,
}
/* called with rcu_read_lock() */
-static struct sock *udp6_lib_lookup2(struct net *net,
+static struct sock *udp6_lib_lookup2(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned int hnum,
int dif, int sdif, struct udp_hslot *hslot2,
@@ -216,21 +227,95 @@ rescore:
return result;
}
+#if IS_ENABLED(CONFIG_BASE_SMALL)
+static struct sock *udp6_lib_lookup4(const struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum, int dif, int sdif,
+ struct udp_table *udptable)
+{
+ return NULL;
+}
+
+static void udp6_hash4(struct sock *sk)
+{
+}
+#else /* !CONFIG_BASE_SMALL */
+static struct sock *udp6_lib_lookup4(const struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum, int dif, int sdif,
+ struct udp_table *udptable)
+{
+ const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
+ const struct hlist_nulls_node *node;
+ struct udp_hslot *hslot4;
+ unsigned int hash4, slot;
+ struct udp_sock *up;
+ struct sock *sk;
+
+ hash4 = udp6_ehashfn(net, daddr, hnum, saddr, sport);
+ slot = hash4 & udptable->mask;
+ hslot4 = &udptable->hash4[slot];
+
+begin:
+ udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) {
+ sk = (struct sock *)up;
+ if (inet6_match(net, sk, saddr, daddr, ports, dif, sdif))
+ return sk;
+ }
+
+ /* if the nulls value we got at the end of this lookup is not the
+ * expected one, we must restart lookup. We probably met an item that
+ * was moved to another chain due to rehash.
+ */
+ if (get_nulls_value(node) != slot)
+ goto begin;
+
+ return NULL;
+}
+
+static void udp6_hash4(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ unsigned int hash;
+
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
+ udp4_hash4(sk);
+ return;
+ }
+
+ if (sk_unhashed(sk) || ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ return;
+
+ hash = udp6_ehashfn(net, &sk->sk_v6_rcv_saddr, sk->sk_num,
+ &sk->sk_v6_daddr, sk->sk_dport);
+
+ udp_lib_hash4(sk, hash);
+}
+#endif /* CONFIG_BASE_SMALL */
+
/* rcu_read_lock() must be held */
-struct sock *__udp6_lib_lookup(struct net *net,
+struct sock *__udp6_lib_lookup(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, int sdif, struct udp_table *udptable,
struct sk_buff *skb)
{
unsigned short hnum = ntohs(dport);
- unsigned int hash2, slot2;
struct udp_hslot *hslot2;
struct sock *result, *sk;
+ unsigned int hash2;
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
+
+ if (udp_has_hash4(hslot2)) {
+ result = udp6_lib_lookup4(net, saddr, sport, daddr, hnum,
+ dif, sdif, udptable);
+ if (result) /* udp6_lib_lookup4 return sk or NULL */
+ return result;
+ }
/* Lookup connected or non-wildcard sockets */
result = udp6_lib_lookup2(net, saddr, sport,
@@ -257,8 +342,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
/* Lookup wildcard sockets */
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif, sdif,
@@ -300,7 +384,7 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
* Does increment socket refcount.
*/
#if IS_ENABLED(CONFIG_NF_TPROXY_IPV6) || IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
-struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
+struct sock *udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport, int dif)
{
struct sock *sk;
@@ -859,7 +943,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
udptable->mask;
hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
- hslot = &udptable->hash2[hash2];
+ hslot = &udptable->hash2[hash2].hslot;
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
@@ -1065,14 +1149,13 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
{
struct udp_table *udptable = net->ipv4.udp_table;
unsigned short hnum = ntohs(loc_port);
- unsigned int hash2, slot2;
struct udp_hslot *hslot2;
+ unsigned int hash2;
__portpair ports;
struct sock *sk;
hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
ports = INET_COMBINED_PORTS(rmt_port, hnum);
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
@@ -1169,6 +1252,18 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
}
+static int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ int res;
+
+ lock_sock(sk);
+ res = __ip6_datagram_connect(sk, uaddr, addr_len);
+ if (!res)
+ udp6_hash4(sk);
+ release_sock(sk);
+ return res;
+}
+
/**
* udp6_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
@@ -1266,8 +1361,10 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
cork->gso_size);
+
+ /* Don't checksum the payload, skb will get segmented */
+ goto csum_partial;
}
- goto csum_partial;
}
if (is_udplite)
@@ -1762,7 +1859,7 @@ struct proto udpv6_prot = {
.owner = THIS_MODULE,
.close = udp_lib_close,
.pre_connect = udpv6_pre_connect,
- .connect = ip6_datagram_connect,
+ .connect = udpv6_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
.init = udpv6_init_sock,
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index b1d81c4270ab..1f19b6f14484 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -23,23 +23,24 @@
#include <net/ip6_route.h>
#include <net/l3mdev.h>
-static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
- const xfrm_address_t *saddr,
- const xfrm_address_t *daddr,
- u32 mark)
+static struct dst_entry *xfrm6_dst_lookup(const struct xfrm_dst_lookup_params *params)
{
struct flowi6 fl6;
struct dst_entry *dst;
int err;
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
- fl6.flowi6_mark = mark;
- memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
- if (saddr)
- memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
+ fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(params->net,
+ params->oif);
+ fl6.flowi6_mark = params->mark;
+ memcpy(&fl6.daddr, params->daddr, sizeof(fl6.daddr));
+ if (params->saddr)
+ memcpy(&fl6.saddr, params->saddr, sizeof(fl6.saddr));
- dst = ip6_route_output(net, NULL, &fl6);
+ fl6.flowi4_proto = params->ipproto;
+ fl6.uli = params->uli;
+
+ dst = ip6_route_output(params->net, NULL, &fl6);
err = dst->error;
if (dst->error) {
@@ -50,15 +51,14 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
return dst;
}
-static int xfrm6_get_saddr(struct net *net, int oif,
- xfrm_address_t *saddr, xfrm_address_t *daddr,
- u32 mark)
+static int xfrm6_get_saddr(xfrm_address_t *saddr,
+ const struct xfrm_dst_lookup_params *params)
{
struct dst_entry *dst;
struct net_device *dev;
struct inet6_dev *idev;
- dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark);
+ dst = xfrm6_dst_lookup(params);
if (IS_ERR(dst))
return -EHOSTUNREACH;
@@ -68,7 +68,8 @@ static int xfrm6_get_saddr(struct net *net, int oif,
return -EHOSTUNREACH;
}
dev = idev->dev;
- ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
+ ipv6_dev_get_saddr(dev_net(dev), dev, &params->daddr->in6, 0,
+ &saddr->in6);
dst_release(dst);
return 0;
}
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c00323fa9eb6..7929df08d4e0 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1236,7 +1236,9 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
return -EOPNOTSUPP;
/* receive/dequeue next skb:
- * the function understands MSG_PEEK and, thus, does not dequeue skb */
+ * the function understands MSG_PEEK and, thus, does not dequeue skb
+ * only refcount is increased.
+ */
skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -1252,9 +1254,8 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
cskb = skb;
if (skb_copy_datagram_msg(cskb, offset, msg, copied)) {
- if (!(flags & MSG_PEEK))
- skb_queue_head(&sk->sk_receive_queue, skb);
- return -EFAULT;
+ err = -EFAULT;
+ goto err_out;
}
/* SOCK_SEQPACKET: set MSG_TRUNC if recv buf size is too small */
@@ -1271,11 +1272,8 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS,
sizeof(IUCV_SKB_CB(skb)->class),
(void *)&IUCV_SKB_CB(skb)->class);
- if (err) {
- if (!(flags & MSG_PEEK))
- skb_queue_head(&sk->sk_receive_queue, skb);
- return err;
- }
+ if (err)
+ goto err_out;
/* Mark read part of skb as used */
if (!(flags & MSG_PEEK)) {
@@ -1331,8 +1329,18 @@ done:
/* SOCK_SEQPACKET: return real length if MSG_TRUNC is set */
if (sk->sk_type == SOCK_SEQPACKET && (flags & MSG_TRUNC))
copied = rlen;
+ if (flags & MSG_PEEK)
+ skb_unref(skb);
return copied;
+
+err_out:
+ if (!(flags & MSG_PEEK))
+ skb_queue_head(&sk->sk_receive_queue, skb);
+ else
+ skb_unref(skb);
+
+ return err;
}
static inline __poll_t iucv_accept_poll(struct sock *parent)
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index d4118c796290..24aec295a51c 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1584,14 +1584,6 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return err;
}
-static void free_mux(struct rcu_head *rcu)
-{
- struct kcm_mux *mux = container_of(rcu,
- struct kcm_mux, rcu);
-
- kmem_cache_free(kcm_muxp, mux);
-}
-
static void release_mux(struct kcm_mux *mux)
{
struct kcm_net *knet = mux->knet;
@@ -1619,7 +1611,7 @@ static void release_mux(struct kcm_mux *mux)
knet->count--;
mutex_unlock(&knet->mutex);
- call_rcu(&mux->rcu, free_mux);
+ kfree_rcu(mux, rcu);
}
static void kcm_done(struct kcm_sock *kcm)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f79fb99271ed..c56bb4f451e6 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1354,7 +1354,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
}
if (hdr->sadb_msg_seq) {
- x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
+ x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX);
if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) {
xfrm_state_put(x);
x = NULL;
@@ -1362,7 +1362,8 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
}
if (!x)
- x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family);
+ x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, UINT_MAX,
+ proto, xdaddr, xsaddr, 1, family);
if (x == NULL)
return -ENOENT;
@@ -1417,7 +1418,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb
if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0)
return 0;
- x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
+ x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX);
if (x == NULL)
return 0;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 2e86f520f799..369a2f2e459c 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -117,12 +117,12 @@ struct l2tp_net {
struct hlist_head l2tp_v3_session_htable[16];
};
-static inline u32 l2tp_v2_session_key(u16 tunnel_id, u16 session_id)
+static u32 l2tp_v2_session_key(u16 tunnel_id, u16 session_id)
{
return ((u32)tunnel_id) << 16 | session_id;
}
-static inline unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id)
+static unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id)
{
return ((unsigned long)sk) + session_id;
}
@@ -135,63 +135,81 @@ static bool l2tp_sk_is_v6(struct sock *sk)
}
#endif
-static inline struct l2tp_net *l2tp_pernet(const struct net *net)
+static struct l2tp_net *l2tp_pernet(const struct net *net)
{
return net_generic(net, l2tp_net_id);
}
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
{
+ struct sock *sk = tunnel->sock;
+
trace_free_tunnel(tunnel);
- sock_put(tunnel->sock);
- /* the tunnel is freed in the socket destructor */
+
+ if (sk) {
+ /* Disable udp encapsulation */
+ switch (tunnel->encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ /* No longer an encapsulation socket. See net/ipv4/udp.c */
+ WRITE_ONCE(udp_sk(sk)->encap_type, 0);
+ udp_sk(sk)->encap_rcv = NULL;
+ udp_sk(sk)->encap_destroy = NULL;
+ break;
+ case L2TP_ENCAPTYPE_IP:
+ break;
+ }
+
+ tunnel->sock = NULL;
+ sock_put(sk);
+ }
+
+ kfree_rcu(tunnel, rcu);
}
static void l2tp_session_free(struct l2tp_session *session)
{
trace_free_session(session);
if (session->tunnel)
- l2tp_tunnel_dec_refcount(session->tunnel);
- kfree(session);
+ l2tp_tunnel_put(session->tunnel);
+ kfree_rcu(session, rcu);
}
-struct l2tp_tunnel *l2tp_sk_to_tunnel(struct sock *sk)
+struct l2tp_tunnel *l2tp_sk_to_tunnel(const struct sock *sk)
{
- struct l2tp_tunnel *tunnel = sk->sk_user_data;
+ const struct net *net = sock_net(sk);
+ unsigned long tunnel_id, tmp;
+ struct l2tp_tunnel *tunnel;
+ struct l2tp_net *pn;
- if (tunnel)
- if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC))
- return NULL;
+ rcu_read_lock_bh();
+ pn = l2tp_pernet(net);
+ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
+ if (tunnel &&
+ tunnel->sock == sk &&
+ refcount_inc_not_zero(&tunnel->ref_count)) {
+ rcu_read_unlock_bh();
+ return tunnel;
+ }
+ }
+ rcu_read_unlock_bh();
- return tunnel;
+ return NULL;
}
EXPORT_SYMBOL_GPL(l2tp_sk_to_tunnel);
-void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
-{
- refcount_inc(&tunnel->ref_count);
-}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_inc_refcount);
-
-void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
+void l2tp_tunnel_put(struct l2tp_tunnel *tunnel)
{
if (refcount_dec_and_test(&tunnel->ref_count))
l2tp_tunnel_free(tunnel);
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_dec_refcount);
-
-void l2tp_session_inc_refcount(struct l2tp_session *session)
-{
- refcount_inc(&session->ref_count);
-}
-EXPORT_SYMBOL_GPL(l2tp_session_inc_refcount);
+EXPORT_SYMBOL_GPL(l2tp_tunnel_put);
-void l2tp_session_dec_refcount(struct l2tp_session *session)
+void l2tp_session_put(struct l2tp_session *session)
{
if (refcount_dec_and_test(&session->ref_count))
l2tp_session_free(session);
}
-EXPORT_SYMBOL_GPL(l2tp_session_dec_refcount);
+EXPORT_SYMBOL_GPL(l2tp_session_put);
/* Lookup a tunnel. A new reference is held on the returned tunnel. */
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
@@ -211,26 +229,27 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
-struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
+struct l2tp_tunnel *l2tp_tunnel_get_next(const struct net *net, unsigned long *key)
{
struct l2tp_net *pn = l2tp_pernet(net);
- unsigned long tunnel_id, tmp;
- struct l2tp_tunnel *tunnel;
- int count = 0;
+ struct l2tp_tunnel *tunnel = NULL;
rcu_read_lock_bh();
- idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
- if (tunnel && ++count > nth &&
- refcount_inc_not_zero(&tunnel->ref_count)) {
+again:
+ tunnel = idr_get_next_ul(&pn->l2tp_tunnel_idr, key);
+ if (tunnel) {
+ if (refcount_inc_not_zero(&tunnel->ref_count)) {
rcu_read_unlock_bh();
return tunnel;
}
+ (*key)++;
+ goto again;
}
rcu_read_unlock_bh();
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_get_nth);
+EXPORT_SYMBOL_GPL(l2tp_tunnel_get_next);
struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id)
{
@@ -254,7 +273,15 @@ struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk,
hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session,
hlist, key) {
- if (session->tunnel->sock == sk &&
+ /* session->tunnel may be NULL if another thread is in
+ * l2tp_session_register and has added an item to
+ * l2tp_v3_session_htable but hasn't yet added the
+ * session to its tunnel's session_list.
+ */
+ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
+
+ if (session->session_id == session_id &&
+ tunnel && tunnel->sock == sk &&
refcount_inc_not_zero(&session->ref_count)) {
rcu_read_unlock_bh();
return session;
@@ -295,24 +322,109 @@ struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, in
}
EXPORT_SYMBOL_GPL(l2tp_session_get);
-struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth)
+static struct l2tp_session *l2tp_v2_session_get_next(const struct net *net,
+ u16 tid,
+ unsigned long *key)
{
- struct l2tp_session *session;
- int count = 0;
+ struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_session *session = NULL;
+
+ /* Start searching within the range of the tid */
+ if (*key == 0)
+ *key = l2tp_v2_session_key(tid, 0);
rcu_read_lock_bh();
- list_for_each_entry_rcu(session, &tunnel->session_list, list) {
- if (++count > nth) {
- l2tp_session_inc_refcount(session);
+again:
+ session = idr_get_next_ul(&pn->l2tp_v2_session_idr, key);
+ if (session) {
+ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
+
+ /* ignore sessions with id 0 as they are internal for pppol2tp */
+ if (session->session_id == 0) {
+ (*key)++;
+ goto again;
+ }
+
+ if (tunnel->tunnel_id == tid &&
+ refcount_inc_not_zero(&session->ref_count)) {
rcu_read_unlock_bh();
return session;
}
+
+ (*key)++;
+ if (tunnel->tunnel_id == tid)
+ goto again;
+ }
+ rcu_read_unlock_bh();
+
+ return NULL;
+}
+
+static struct l2tp_session *l2tp_v3_session_get_next(const struct net *net,
+ u32 tid, struct sock *sk,
+ unsigned long *key)
+{
+ struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_session *session = NULL;
+
+ rcu_read_lock_bh();
+again:
+ session = idr_get_next_ul(&pn->l2tp_v3_session_idr, key);
+ if (session && !hash_hashed(&session->hlist)) {
+ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
+
+ if (tunnel && tunnel->tunnel_id == tid &&
+ refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock_bh();
+ return session;
+ }
+
+ (*key)++;
+ goto again;
+ }
+
+ /* If we get here and session is non-NULL, the IDR entry may be one
+ * where the session_id collides with one in another tunnel. Check
+ * session_htable for a match. There can only be one session of a given
+ * ID per tunnel so we can return as soon as a match is found.
+ */
+ if (session && hash_hashed(&session->hlist)) {
+ unsigned long hkey = l2tp_v3_session_hashkey(sk, session->session_id);
+ u32 sid = session->session_id;
+
+ hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session,
+ hlist, hkey) {
+ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
+
+ if (session->session_id == sid &&
+ tunnel && tunnel->tunnel_id == tid &&
+ refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock_bh();
+ return session;
+ }
+ }
+
+ /* If no match found, the colliding session ID isn't in our
+ * tunnel so try the next session ID.
+ */
+ (*key)++;
+ goto again;
}
+
rcu_read_unlock_bh();
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
+
+struct l2tp_session *l2tp_session_get_next(const struct net *net, struct sock *sk, int pver,
+ u32 tunnel_id, unsigned long *key)
+{
+ if (pver == L2TP_HDR_VER_2)
+ return l2tp_v2_session_get_next(net, tunnel_id, key);
+ else
+ return l2tp_v3_session_get_next(net, tunnel_id, sk, key);
+}
+EXPORT_SYMBOL_GPL(l2tp_session_get_next);
/* Lookup a session by interface name.
* This is very inefficient but is only used by management interfaces.
@@ -330,7 +442,7 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
if (tunnel) {
list_for_each_entry_rcu(session, &tunnel->session_list, list) {
if (!strcmp(session->ifname, ifname)) {
- l2tp_session_inc_refcount(session);
+ refcount_inc(&session->ref_count);
rcu_read_unlock_bh();
return session;
@@ -347,7 +459,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
static void l2tp_session_coll_list_add(struct l2tp_session_coll_list *clist,
struct l2tp_session *session)
{
- l2tp_session_inc_refcount(session);
+ refcount_inc(&session->ref_count);
WARN_ON_ONCE(session->coll_list);
session->coll_list = clist;
spin_lock(&clist->lock);
@@ -387,12 +499,12 @@ static int l2tp_session_collision_add(struct l2tp_net *pn,
/* If existing session isn't already in the session hlist, add it. */
if (!hash_hashed(&session2->hlist))
- hash_add(pn->l2tp_v3_session_htable, &session2->hlist,
- session2->hlist_key);
+ hash_add_rcu(pn->l2tp_v3_session_htable, &session2->hlist,
+ session2->hlist_key);
/* Add new session to the hlist and collision list */
- hash_add(pn->l2tp_v3_session_htable, &session1->hlist,
- session1->hlist_key);
+ hash_add_rcu(pn->l2tp_v3_session_htable, &session1->hlist,
+ session1->hlist_key);
refcount_inc(&clist->ref_count);
l2tp_session_coll_list_add(clist, session1);
@@ -408,7 +520,7 @@ static void l2tp_session_collision_del(struct l2tp_net *pn,
lockdep_assert_held(&pn->l2tp_session_idr_lock);
- hash_del(&session->hlist);
+ hash_del_rcu(&session->hlist);
if (clist) {
/* Remove session from its collision list. If there
@@ -433,7 +545,7 @@ static void l2tp_session_collision_del(struct l2tp_net *pn,
spin_unlock(&clist->lock);
if (refcount_dec_and_test(&clist->ref_count))
kfree(clist);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
}
}
@@ -442,6 +554,7 @@ int l2tp_session_register(struct l2tp_session *session,
{
struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
struct l2tp_session *other_session = NULL;
+ void *old = NULL;
u32 session_key;
int err;
@@ -481,16 +594,23 @@ int l2tp_session_register(struct l2tp_session *session,
goto out;
}
- l2tp_tunnel_inc_refcount(tunnel);
- list_add(&session->list, &tunnel->session_list);
+ refcount_inc(&tunnel->ref_count);
+ WRITE_ONCE(session->tunnel, tunnel);
+ list_add_rcu(&session->list, &tunnel->session_list);
+ /* this makes session available to lockless getters */
if (tunnel->version == L2TP_HDR_VER_3) {
if (!other_session)
- idr_replace(&pn->l2tp_v3_session_idr, session, session_key);
+ old = idr_replace(&pn->l2tp_v3_session_idr, session, session_key);
} else {
- idr_replace(&pn->l2tp_v2_session_idr, session, session_key);
+ old = idr_replace(&pn->l2tp_v2_session_idr, session, session_key);
}
+ /* old should be NULL, unless something removed or modified
+ * the IDR entry after our idr_alloc_32 above (which shouldn't
+ * happen).
+ */
+ WARN_ON_ONCE(old);
out:
spin_unlock_bh(&pn->l2tp_session_idr_lock);
spin_unlock_bh(&tunnel->list_lock);
@@ -797,7 +917,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
if (!session->lns_mode && !session->send_seq) {
trace_session_seqnum_lns_enable(session);
session->send_seq = 1;
- l2tp_session_set_header_len(session, tunnel->version);
+ l2tp_session_set_header_len(session, tunnel->version,
+ tunnel->encap);
}
} else {
/* No sequence numbers.
@@ -818,7 +939,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
if (!session->lns_mode && session->send_seq) {
trace_session_seqnum_lns_disable(session);
session->send_seq = 0;
- l2tp_session_set_header_len(session, tunnel->version);
+ l2tp_session_set_header_len(session, tunnel->version,
+ tunnel->encap);
} else if (session->send_seq) {
pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
session->name);
@@ -955,7 +1077,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (!session || !session->recv_skb) {
if (session)
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
/* Not found? Pass to userspace to deal with */
goto pass;
@@ -969,12 +1091,12 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (version == L2TP_HDR_VER_3 &&
l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) {
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
goto invalid;
}
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return 0;
@@ -1218,44 +1340,6 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
* Tinnel and session create/destroy.
*****************************************************************************/
-/* Tunnel socket destruct hook.
- * The tunnel context is deleted only when all session sockets have been
- * closed.
- */
-static void l2tp_tunnel_destruct(struct sock *sk)
-{
- struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
-
- if (!tunnel)
- goto end;
-
- /* Disable udp encapsulation */
- switch (tunnel->encap) {
- case L2TP_ENCAPTYPE_UDP:
- /* No longer an encapsulation socket. See net/ipv4/udp.c */
- WRITE_ONCE(udp_sk(sk)->encap_type, 0);
- udp_sk(sk)->encap_rcv = NULL;
- udp_sk(sk)->encap_destroy = NULL;
- break;
- case L2TP_ENCAPTYPE_IP:
- break;
- }
-
- /* Remove hooks into tunnel socket */
- write_lock_bh(&sk->sk_callback_lock);
- sk->sk_destruct = tunnel->old_sk_destruct;
- sk->sk_user_data = NULL;
- write_unlock_bh(&sk->sk_callback_lock);
-
- /* Call the original destructor */
- if (sk->sk_destruct)
- (*sk->sk_destruct)(sk);
-
- kfree_rcu(tunnel, rcu);
-end:
- return;
-}
-
/* Remove an l2tp session from l2tp_core's lists. */
static void l2tp_session_unhash(struct l2tp_session *session)
{
@@ -1288,8 +1372,6 @@ static void l2tp_session_unhash(struct l2tp_session *session)
spin_unlock_bh(&pn->l2tp_session_idr_lock);
spin_unlock_bh(&tunnel->list_lock);
-
- synchronize_rcu();
}
}
@@ -1301,28 +1383,21 @@ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
spin_lock_bh(&tunnel->list_lock);
tunnel->acpt_newsess = false;
- for (;;) {
- session = list_first_entry_or_null(&tunnel->session_list,
- struct l2tp_session, list);
- if (!session)
- break;
- l2tp_session_inc_refcount(session);
- list_del_init(&session->list);
- spin_unlock_bh(&tunnel->list_lock);
+ list_for_each_entry(session, &tunnel->session_list, list)
l2tp_session_delete(session);
- spin_lock_bh(&tunnel->list_lock);
- l2tp_session_dec_refcount(session);
- }
spin_unlock_bh(&tunnel->list_lock);
}
/* Tunnel socket destroy hook for UDP encapsulation */
static void l2tp_udp_encap_destroy(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
+ struct l2tp_tunnel *tunnel;
- if (tunnel)
+ tunnel = l2tp_sk_to_tunnel(sk);
+ if (tunnel) {
l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_put(tunnel);
+ }
}
static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel)
@@ -1356,10 +1431,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
l2tp_tunnel_remove(tunnel->l2tp_net, tunnel);
/* drop initial ref */
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
/* drop workqueue ref */
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
}
/* Create a socket for the tunnel, if one isn't set up by
@@ -1505,7 +1580,6 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
tunnel->tunnel_id = tunnel_id;
tunnel->peer_tunnel_id = peer_tunnel_id;
- tunnel->magic = L2TP_TUNNEL_MAGIC;
sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
spin_lock_init(&tunnel->list_lock);
tunnel->acpt_newsess = true;
@@ -1531,6 +1605,8 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
enum l2tp_encap_type encap)
{
+ struct l2tp_tunnel *tunnel;
+
if (!net_eq(sock_net(sk), net))
return -EINVAL;
@@ -1544,9 +1620,15 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
(encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP))
return -EPROTONOSUPPORT;
- if (sk->sk_user_data)
+ if (encap == L2TP_ENCAPTYPE_UDP && sk->sk_user_data)
return -EBUSY;
+ tunnel = l2tp_sk_to_tunnel(sk);
+ if (tunnel) {
+ l2tp_tunnel_put(tunnel);
+ return -EBUSY;
+ }
+
return 0;
}
@@ -1584,12 +1666,10 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
ret = l2tp_validate_socket(sk, net, tunnel->encap);
if (ret < 0)
goto err_inval_sock;
- rcu_assign_sk_user_data(sk, tunnel);
write_unlock_bh(&sk->sk_callback_lock);
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
struct udp_tunnel_sock_cfg udp_cfg = {
- .sk_user_data = tunnel,
.encap_type = UDP_ENCAP_L2TPINUDP,
.encap_rcv = l2tp_udp_encap_recv,
.encap_err_rcv = l2tp_udp_encap_err_recv,
@@ -1599,8 +1679,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
setup_udp_tunnel_sock(net, sock, &udp_cfg);
}
- tunnel->old_sk_destruct = sk->sk_destruct;
- sk->sk_destruct = &l2tp_tunnel_destruct;
sk->sk_allocation = GFP_ATOMIC;
release_sock(sk);
@@ -1639,7 +1717,7 @@ void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
{
if (!test_and_set_bit(0, &tunnel->dead)) {
trace_delete_tunnel(tunnel);
- l2tp_tunnel_inc_refcount(tunnel);
+ refcount_inc(&tunnel->ref_count);
queue_work(l2tp_wq, &tunnel->del_work);
}
}
@@ -1647,23 +1725,37 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
void l2tp_session_delete(struct l2tp_session *session)
{
- if (test_and_set_bit(0, &session->dead))
- return;
+ if (!test_and_set_bit(0, &session->dead)) {
+ trace_delete_session(session);
+ refcount_inc(&session->ref_count);
+ queue_work(l2tp_wq, &session->del_work);
+ }
+}
+EXPORT_SYMBOL_GPL(l2tp_session_delete);
+
+/* Workqueue session deletion function */
+static void l2tp_session_del_work(struct work_struct *work)
+{
+ struct l2tp_session *session = container_of(work, struct l2tp_session,
+ del_work);
- trace_delete_session(session);
l2tp_session_unhash(session);
l2tp_session_queue_purge(session);
if (session->session_close)
(*session->session_close)(session);
- l2tp_session_dec_refcount(session);
+ /* drop initial ref */
+ l2tp_session_put(session);
+
+ /* drop workqueue ref */
+ l2tp_session_put(session);
}
-EXPORT_SYMBOL_GPL(l2tp_session_delete);
/* We come here whenever a session's send_seq, cookie_len or
* l2specific_type parameters are set.
*/
-void l2tp_session_set_header_len(struct l2tp_session *session, int version)
+void l2tp_session_set_header_len(struct l2tp_session *session, int version,
+ enum l2tp_encap_type encap)
{
if (version == L2TP_HDR_VER_2) {
session->hdr_len = 6;
@@ -1672,7 +1764,7 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
} else {
session->hdr_len = 4 + session->cookie_len;
session->hdr_len += l2tp_get_l2specific_len(session);
- if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
+ if (encap == L2TP_ENCAPTYPE_UDP)
session->hdr_len += 4;
}
}
@@ -1686,7 +1778,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
session = kzalloc(sizeof(*session) + priv_size, GFP_KERNEL);
if (session) {
session->magic = L2TP_SESSION_MAGIC;
- session->tunnel = tunnel;
session->session_id = session_id;
session->peer_session_id = peer_session_id;
@@ -1710,6 +1801,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
INIT_HLIST_NODE(&session->hlist);
INIT_LIST_HEAD(&session->clist);
INIT_LIST_HEAD(&session->list);
+ INIT_WORK(&session->del_work, l2tp_session_del_work);
if (cfg) {
session->pwtype = cfg->pw_type;
@@ -1724,7 +1816,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len);
}
- l2tp_session_set_header_len(session, tunnel->version);
+ l2tp_session_set_header_len(session, tunnel->version, tunnel->encap);
refcount_set(&session->ref_count, 1);
@@ -1753,7 +1845,7 @@ static __net_init int l2tp_init_net(struct net *net)
return 0;
}
-static __net_exit void l2tp_exit_net(struct net *net)
+static __net_exit void l2tp_pre_exit_net(struct net *net)
{
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel = NULL;
@@ -1766,9 +1858,40 @@ static __net_exit void l2tp_exit_net(struct net *net)
}
rcu_read_unlock_bh();
- if (l2tp_wq)
- flush_workqueue(l2tp_wq);
- rcu_barrier();
+ if (l2tp_wq) {
+ /* Run all TUNNEL_DELETE work items just queued. */
+ __flush_workqueue(l2tp_wq);
+
+ /* Each TUNNEL_DELETE work item will queue a SESSION_DELETE
+ * work item for each session in the tunnel. Flush the
+ * workqueue again to process these.
+ */
+ __flush_workqueue(l2tp_wq);
+ }
+}
+
+static int l2tp_idr_item_unexpected(int id, void *p, void *data)
+{
+ const char *idr_name = data;
+
+ pr_err("l2tp: %s IDR not empty at net %d exit\n", idr_name, id);
+ WARN_ON_ONCE(1);
+ return 1;
+}
+
+static __net_exit void l2tp_exit_net(struct net *net)
+{
+ struct l2tp_net *pn = l2tp_pernet(net);
+
+ /* Our per-net IDRs should be empty. Check that is so, to
+ * help catch cleanup races or refcnt leaks.
+ */
+ idr_for_each(&pn->l2tp_v2_session_idr, l2tp_idr_item_unexpected,
+ "v2_session");
+ idr_for_each(&pn->l2tp_v3_session_idr, l2tp_idr_item_unexpected,
+ "v3_session");
+ idr_for_each(&pn->l2tp_tunnel_idr, l2tp_idr_item_unexpected,
+ "tunnel");
idr_destroy(&pn->l2tp_v2_session_idr);
idr_destroy(&pn->l2tp_v3_session_idr);
@@ -1778,6 +1901,7 @@ static __net_exit void l2tp_exit_net(struct net *net)
static struct pernet_operations l2tp_net_ops = {
.init = l2tp_init_net,
.exit = l2tp_exit_net,
+ .pre_exit = l2tp_pre_exit_net,
.id = &l2tp_net_id,
.size = sizeof(struct l2tp_net),
};
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 8ac81bc1bc6f..ffd8ced3a51f 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -16,7 +16,6 @@
#endif
/* Random numbers used for internal consistency checks of tunnel and session structures */
-#define L2TP_TUNNEL_MAGIC 0x42114DDA
#define L2TP_SESSION_MAGIC 0x0C04EB7D
struct sk_buff;
@@ -67,6 +66,7 @@ struct l2tp_session_coll_list {
struct l2tp_session {
int magic; /* should be L2TP_SESSION_MAGIC */
long dead;
+ struct rcu_head rcu;
struct l2tp_tunnel *tunnel; /* back pointer to tunnel context */
u32 session_id;
@@ -103,6 +103,7 @@ struct l2tp_session {
int reorder_skip; /* set if skip to next nr */
enum l2tp_pwtype pwtype;
struct l2tp_stats stats;
+ struct work_struct del_work;
/* Session receive handler for data packets.
* Each pseudowire implementation should implement this callback in order to
@@ -155,8 +156,6 @@ struct l2tp_tunnel_cfg {
*/
#define L2TP_TUNNEL_NAME_MAX 20
struct l2tp_tunnel {
- int magic; /* Should be L2TP_TUNNEL_MAGIC */
-
unsigned long dead;
struct rcu_head rcu;
@@ -176,7 +175,6 @@ struct l2tp_tunnel {
struct net *l2tp_net; /* the net we belong to */
refcount_t ref_count;
- void (*old_sk_destruct)(struct sock *sk);
struct sock *sock; /* parent socket */
int fd; /* parent fd, if tunnel socket was created
* by userspace
@@ -211,23 +209,22 @@ static inline void *l2tp_session_priv(struct l2tp_session *session)
}
/* Tunnel and session refcounts */
-void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel);
-void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel);
-void l2tp_session_inc_refcount(struct l2tp_session *session);
-void l2tp_session_dec_refcount(struct l2tp_session *session);
+void l2tp_tunnel_put(struct l2tp_tunnel *tunnel);
+void l2tp_session_put(struct l2tp_session *session);
/* Tunnel and session lookup.
* These functions take a reference on the instances they return, so
* the caller must ensure that the reference is dropped appropriately.
*/
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
-struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth);
+struct l2tp_tunnel *l2tp_tunnel_get_next(const struct net *net, unsigned long *key);
struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id);
struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id);
struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, int pver,
u32 tunnel_id, u32 session_id);
-struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
+struct l2tp_session *l2tp_session_get_next(const struct net *net, struct sock *sk, int pver,
+ u32 tunnel_id, unsigned long *key);
struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
const char *ifname);
@@ -260,7 +257,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
/* Transmit path helpers for sending packets over the tunnel socket. */
-void l2tp_session_set_header_len(struct l2tp_session *session, int version);
+void l2tp_session_set_header_len(struct l2tp_session *session, int version,
+ enum l2tp_encap_type encap);
int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb);
/* Pseudowire management.
@@ -273,10 +271,7 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
/* IOCTL helper for IP encap modules. */
int l2tp_ioctl(struct sock *sk, int cmd, int *karg);
-/* Extract the tunnel structure from a socket's sk_user_data pointer,
- * validating the tunnel magic feather.
- */
-struct l2tp_tunnel *l2tp_sk_to_tunnel(struct sock *sk);
+struct l2tp_tunnel *l2tp_sk_to_tunnel(const struct sock *sk);
static inline int l2tp_get_l2specific_len(struct l2tp_session *session)
{
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 8755ae521154..2d0c8275a3a8 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -34,8 +34,8 @@ static struct dentry *rootdir;
struct l2tp_dfs_seq_data {
struct net *net;
netns_tracker ns_tracker;
- int tunnel_idx; /* current tunnel */
- int session_idx; /* index of session within current tunnel */
+ unsigned long tkey; /* lookup key of current tunnel */
+ unsigned long skey; /* lookup key of current session */
struct l2tp_tunnel *tunnel;
struct l2tp_session *session; /* NULL means get next tunnel */
};
@@ -44,23 +44,25 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
{
/* Drop reference taken during previous invocation */
if (pd->tunnel)
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
- pd->tunnel = l2tp_tunnel_get_nth(pd->net, pd->tunnel_idx);
- pd->tunnel_idx++;
+ pd->tunnel = l2tp_tunnel_get_next(pd->net, &pd->tkey);
+ pd->tkey++;
}
static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
{
/* Drop reference taken during previous invocation */
if (pd->session)
- l2tp_session_dec_refcount(pd->session);
+ l2tp_session_put(pd->session);
- pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
- pd->session_idx++;
+ pd->session = l2tp_session_get_next(pd->net, pd->tunnel->sock,
+ pd->tunnel->version,
+ pd->tunnel->tunnel_id, &pd->skey);
+ pd->skey++;
if (!pd->session) {
- pd->session_idx = 0;
+ pd->skey = 0;
l2tp_dfs_next_tunnel(pd);
}
}
@@ -109,11 +111,11 @@ static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
* or l2tp_dfs_next_tunnel().
*/
if (pd->session) {
- l2tp_session_dec_refcount(pd->session);
+ l2tp_session_put(pd->session);
pd->session = NULL;
}
if (pd->tunnel) {
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
pd->tunnel = NULL;
}
}
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8ba00ad433c2..d692b902e120 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -72,31 +72,19 @@ static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev
unsigned int len = skb->len;
int ret = l2tp_xmit_skb(session, skb);
- if (likely(ret == NET_XMIT_SUCCESS)) {
- DEV_STATS_ADD(dev, tx_bytes, len);
- DEV_STATS_INC(dev, tx_packets);
- } else {
+ if (likely(ret == NET_XMIT_SUCCESS))
+ dev_sw_netstats_tx_add(dev, 1, len);
+ else
DEV_STATS_INC(dev, tx_dropped);
- }
- return NETDEV_TX_OK;
-}
-static void l2tp_eth_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *stats)
-{
- stats->tx_bytes = DEV_STATS_READ(dev, tx_bytes);
- stats->tx_packets = DEV_STATS_READ(dev, tx_packets);
- stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
- stats->rx_bytes = DEV_STATS_READ(dev, rx_bytes);
- stats->rx_packets = DEV_STATS_READ(dev, rx_packets);
- stats->rx_errors = DEV_STATS_READ(dev, rx_errors);
+ return NETDEV_TX_OK;
}
static const struct net_device_ops l2tp_eth_netdev_ops = {
.ndo_init = l2tp_eth_dev_init,
.ndo_uninit = l2tp_eth_dev_uninit,
.ndo_start_xmit = l2tp_eth_dev_xmit,
- .ndo_get_stats64 = l2tp_eth_get_stats64,
+ .ndo_get_stats64 = dev_get_tstats64,
.ndo_set_mac_address = eth_mac_addr,
};
@@ -109,9 +97,10 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &l2tpeth_type);
ether_setup(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->netdev_ops = &l2tp_eth_netdev_ops;
dev->needs_free_netdev = true;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
}
static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
@@ -138,12 +127,11 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
if (!dev)
goto error_rcu;
- if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
- DEV_STATS_INC(dev, rx_packets);
- DEV_STATS_ADD(dev, rx_bytes, data_len);
- } else {
+ if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS)
+ dev_sw_netstats_rx_add(dev, data_len);
+ else
DEV_STATS_INC(dev, rx_errors);
- }
+
rcu_read_unlock();
return;
@@ -283,7 +271,7 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
spriv = l2tp_session_priv(session);
- l2tp_session_inc_refcount(session);
+ refcount_inc(&session->ref_count);
rtnl_lock();
@@ -301,7 +289,7 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
if (rc < 0) {
rtnl_unlock();
l2tp_session_delete(session);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
free_netdev(dev);
return rc;
@@ -312,17 +300,17 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
rtnl_unlock();
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
__module_get(THIS_MODULE);
return 0;
err_sess_dev:
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
free_netdev(dev);
err_sess:
- kfree(session);
+ l2tp_session_put(session);
err:
return rc;
}
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index e48aa177d74c..4bc24fddfd52 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -22,9 +22,19 @@
#include <net/tcp_states.h>
#include <net/protocol.h>
#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include "l2tp_core.h"
+/* per-net private data for this module */
+static unsigned int l2tp_ip_net_id;
+struct l2tp_ip_net {
+ rwlock_t l2tp_ip_lock;
+ struct hlist_head l2tp_ip_table;
+ struct hlist_head l2tp_ip_bind_table;
+};
+
struct l2tp_ip_sock {
/* inet_sock has to be the first member of l2tp_ip_sock */
struct inet_sock inet;
@@ -33,21 +43,23 @@ struct l2tp_ip_sock {
u32 peer_conn_id;
};
-static DEFINE_RWLOCK(l2tp_ip_lock);
-static struct hlist_head l2tp_ip_table;
-static struct hlist_head l2tp_ip_bind_table;
-
-static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
+static struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
{
return (struct l2tp_ip_sock *)sk;
}
+static struct l2tp_ip_net *l2tp_ip_pernet(const struct net *net)
+{
+ return net_generic(net, l2tp_ip_net_id);
+}
+
static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr,
__be32 raddr, int dif, u32 tunnel_id)
{
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(net);
struct sock *sk;
- sk_for_each_bound(sk, &l2tp_ip_bind_table) {
+ sk_for_each_bound(sk, &pn->l2tp_ip_bind_table) {
const struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
int bound_dev_if;
@@ -113,6 +125,7 @@ found:
static int l2tp_ip_recv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ struct l2tp_ip_net *pn;
struct sock *sk;
u32 session_id;
u32 tunnel_id;
@@ -121,6 +134,8 @@ static int l2tp_ip_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
struct iphdr *iph;
+ pn = l2tp_ip_pernet(net);
+
if (!pskb_may_pull(skb, 4))
goto discard;
@@ -152,7 +167,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
goto discard_sess;
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return 0;
@@ -167,15 +182,15 @@ pass_up:
tunnel_id = ntohl(*(__be32 *)&skb->data[4]);
iph = (struct iphdr *)skb_network_header(skb);
- read_lock_bh(&l2tp_ip_lock);
+ read_lock_bh(&pn->l2tp_ip_lock);
sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb),
tunnel_id);
if (!sk) {
- read_unlock_bh(&l2tp_ip_lock);
+ read_unlock_bh(&pn->l2tp_ip_lock);
goto discard;
}
sock_hold(sk);
- read_unlock_bh(&l2tp_ip_lock);
+ read_unlock_bh(&pn->l2tp_ip_lock);
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
@@ -185,7 +200,7 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
discard_sess:
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
goto discard;
discard_put:
@@ -198,21 +213,25 @@ discard:
static int l2tp_ip_hash(struct sock *sk)
{
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk));
+
if (sk_unhashed(sk)) {
- write_lock_bh(&l2tp_ip_lock);
- sk_add_node(sk, &l2tp_ip_table);
- write_unlock_bh(&l2tp_ip_lock);
+ write_lock_bh(&pn->l2tp_ip_lock);
+ sk_add_node(sk, &pn->l2tp_ip_table);
+ write_unlock_bh(&pn->l2tp_ip_lock);
}
return 0;
}
static void l2tp_ip_unhash(struct sock *sk)
{
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk));
+
if (sk_unhashed(sk))
return;
- write_lock_bh(&l2tp_ip_lock);
+ write_lock_bh(&pn->l2tp_ip_lock);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip_lock);
+ write_unlock_bh(&pn->l2tp_ip_lock);
}
static int l2tp_ip_open(struct sock *sk)
@@ -226,23 +245,26 @@ static int l2tp_ip_open(struct sock *sk)
static void l2tp_ip_close(struct sock *sk, long timeout)
{
- write_lock_bh(&l2tp_ip_lock);
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk));
+
+ write_lock_bh(&pn->l2tp_ip_lock);
hlist_del_init(&sk->sk_bind_node);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip_lock);
+ write_unlock_bh(&pn->l2tp_ip_lock);
sk_common_release(sk);
}
static void l2tp_ip_destroy_sock(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
- struct sk_buff *skb;
+ struct l2tp_tunnel *tunnel;
- while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
- kfree_skb(skb);
+ __skb_queue_purge(&sk->sk_write_queue);
- if (tunnel)
+ tunnel = l2tp_sk_to_tunnel(sk);
+ if (tunnel) {
l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_put(tunnel);
+ }
}
static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -250,6 +272,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *)uaddr;
struct net *net = sock_net(sk);
+ struct l2tp_ip_net *pn;
int ret;
int chk_addr_ret;
@@ -280,10 +303,11 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
inet->inet_saddr = 0; /* Use device */
- write_lock_bh(&l2tp_ip_lock);
+ pn = l2tp_ip_pernet(net);
+ write_lock_bh(&pn->l2tp_ip_lock);
if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0,
sk->sk_bound_dev_if, addr->l2tp_conn_id)) {
- write_unlock_bh(&l2tp_ip_lock);
+ write_unlock_bh(&pn->l2tp_ip_lock);
ret = -EADDRINUSE;
goto out;
}
@@ -291,9 +315,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id;
- sk_add_bind_node(sk, &l2tp_ip_bind_table);
+ sk_add_bind_node(sk, &pn->l2tp_ip_bind_table);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip_lock);
+ write_unlock_bh(&pn->l2tp_ip_lock);
ret = 0;
sock_reset_flag(sk, SOCK_ZAPPED);
@@ -307,6 +331,7 @@ out:
static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk));
int rc;
if (addr_len < sizeof(*lsa))
@@ -329,10 +354,10 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
- write_lock_bh(&l2tp_ip_lock);
+ write_lock_bh(&pn->l2tp_ip_lock);
hlist_del_init(&sk->sk_bind_node);
- sk_add_bind_node(sk, &l2tp_ip_bind_table);
- write_unlock_bh(&l2tp_ip_lock);
+ sk_add_bind_node(sk, &pn->l2tp_ip_bind_table);
+ write_unlock_bh(&pn->l2tp_ip_lock);
out_sk:
release_sock(sk);
@@ -637,25 +662,58 @@ static struct net_protocol l2tp_ip_protocol __read_mostly = {
.handler = l2tp_ip_recv,
};
+static __net_init int l2tp_ip_init_net(struct net *net)
+{
+ struct l2tp_ip_net *pn = net_generic(net, l2tp_ip_net_id);
+
+ rwlock_init(&pn->l2tp_ip_lock);
+ INIT_HLIST_HEAD(&pn->l2tp_ip_table);
+ INIT_HLIST_HEAD(&pn->l2tp_ip_bind_table);
+ return 0;
+}
+
+static __net_exit void l2tp_ip_exit_net(struct net *net)
+{
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(net);
+
+ write_lock_bh(&pn->l2tp_ip_lock);
+ WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip_table) != 0);
+ WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip_bind_table) != 0);
+ write_unlock_bh(&pn->l2tp_ip_lock);
+}
+
+static struct pernet_operations l2tp_ip_net_ops = {
+ .init = l2tp_ip_init_net,
+ .exit = l2tp_ip_exit_net,
+ .id = &l2tp_ip_net_id,
+ .size = sizeof(struct l2tp_ip_net),
+};
+
static int __init l2tp_ip_init(void)
{
int err;
pr_info("L2TP IP encapsulation support (L2TPv3)\n");
+ err = register_pernet_device(&l2tp_ip_net_ops);
+ if (err)
+ goto out;
+
err = proto_register(&l2tp_ip_prot, 1);
if (err != 0)
- goto out;
+ goto out1;
err = inet_add_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
if (err)
- goto out1;
+ goto out2;
inet_register_protosw(&l2tp_ip_protosw);
return 0;
-out1:
+out2:
proto_unregister(&l2tp_ip_prot);
+out1:
+ unregister_pernet_device(&l2tp_ip_net_ops);
out:
return err;
}
@@ -665,6 +723,7 @@ static void __exit l2tp_ip_exit(void)
inet_unregister_protosw(&l2tp_ip_protosw);
inet_del_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
proto_unregister(&l2tp_ip_prot);
+ unregister_pernet_device(&l2tp_ip_net_ops);
}
module_init(l2tp_ip_init);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index d217ff1f229e..f4c1da070826 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -22,6 +22,8 @@
#include <net/tcp_states.h>
#include <net/protocol.h>
#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/transp_v6.h>
#include <net/addrconf.h>
@@ -29,6 +31,14 @@
#include "l2tp_core.h"
+/* per-net private data for this module */
+static unsigned int l2tp_ip6_net_id;
+struct l2tp_ip6_net {
+ rwlock_t l2tp_ip6_lock;
+ struct hlist_head l2tp_ip6_table;
+ struct hlist_head l2tp_ip6_bind_table;
+};
+
struct l2tp_ip6_sock {
/* inet_sock has to be the first member of l2tp_ip6_sock */
struct inet_sock inet;
@@ -39,23 +49,25 @@ struct l2tp_ip6_sock {
struct ipv6_pinfo inet6;
};
-static DEFINE_RWLOCK(l2tp_ip6_lock);
-static struct hlist_head l2tp_ip6_table;
-static struct hlist_head l2tp_ip6_bind_table;
-
-static inline struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk)
+static struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk)
{
return (struct l2tp_ip6_sock *)sk;
}
+static struct l2tp_ip6_net *l2tp_ip6_pernet(const struct net *net)
+{
+ return net_generic(net, l2tp_ip6_net_id);
+}
+
static struct sock *__l2tp_ip6_bind_lookup(const struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *raddr,
int dif, u32 tunnel_id)
{
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(net);
struct sock *sk;
- sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
+ sk_for_each_bound(sk, &pn->l2tp_ip6_bind_table) {
const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk);
const struct in6_addr *sk_raddr = &sk->sk_v6_daddr;
const struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
@@ -123,6 +135,7 @@ found:
static int l2tp_ip6_recv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ struct l2tp_ip6_net *pn;
struct sock *sk;
u32 session_id;
u32 tunnel_id;
@@ -131,6 +144,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
struct ipv6hdr *iph;
+ pn = l2tp_ip6_pernet(net);
+
if (!pskb_may_pull(skb, 4))
goto discard;
@@ -162,7 +177,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
goto discard_sess;
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return 0;
@@ -177,15 +192,15 @@ pass_up:
tunnel_id = ntohl(*(__be32 *)&skb->data[4]);
iph = ipv6_hdr(skb);
- read_lock_bh(&l2tp_ip6_lock);
+ read_lock_bh(&pn->l2tp_ip6_lock);
sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
inet6_iif(skb), tunnel_id);
if (!sk) {
- read_unlock_bh(&l2tp_ip6_lock);
+ read_unlock_bh(&pn->l2tp_ip6_lock);
goto discard;
}
sock_hold(sk);
- read_unlock_bh(&l2tp_ip6_lock);
+ read_unlock_bh(&pn->l2tp_ip6_lock);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
@@ -195,7 +210,7 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
discard_sess:
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
goto discard;
discard_put:
@@ -208,21 +223,25 @@ discard:
static int l2tp_ip6_hash(struct sock *sk)
{
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(sock_net(sk));
+
if (sk_unhashed(sk)) {
- write_lock_bh(&l2tp_ip6_lock);
- sk_add_node(sk, &l2tp_ip6_table);
- write_unlock_bh(&l2tp_ip6_lock);
+ write_lock_bh(&pn->l2tp_ip6_lock);
+ sk_add_node(sk, &pn->l2tp_ip6_table);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
}
return 0;
}
static void l2tp_ip6_unhash(struct sock *sk)
{
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(sock_net(sk));
+
if (sk_unhashed(sk))
return;
- write_lock_bh(&l2tp_ip6_lock);
+ write_lock_bh(&pn->l2tp_ip6_lock);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip6_lock);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
}
static int l2tp_ip6_open(struct sock *sk)
@@ -236,24 +255,29 @@ static int l2tp_ip6_open(struct sock *sk)
static void l2tp_ip6_close(struct sock *sk, long timeout)
{
- write_lock_bh(&l2tp_ip6_lock);
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(sock_net(sk));
+
+ write_lock_bh(&pn->l2tp_ip6_lock);
hlist_del_init(&sk->sk_bind_node);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip6_lock);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
sk_common_release(sk);
}
static void l2tp_ip6_destroy_sock(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
+ struct l2tp_tunnel *tunnel;
lock_sock(sk);
ip6_flush_pending_frames(sk);
release_sock(sk);
- if (tunnel)
+ tunnel = l2tp_sk_to_tunnel(sk);
+ if (tunnel) {
l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_put(tunnel);
+ }
}
static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -262,11 +286,14 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct ipv6_pinfo *np = inet6_sk(sk);
struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *)uaddr;
struct net *net = sock_net(sk);
+ struct l2tp_ip6_net *pn;
__be32 v4addr = 0;
int bound_dev_if;
int addr_type;
int err;
+ pn = l2tp_ip6_pernet(net);
+
if (addr->l2tp_family != AF_INET6)
return -EINVAL;
if (addr_len < sizeof(*addr))
@@ -324,10 +351,10 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
rcu_read_unlock();
- write_lock_bh(&l2tp_ip6_lock);
+ write_lock_bh(&pn->l2tp_ip6_lock);
if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, NULL, bound_dev_if,
addr->l2tp_conn_id)) {
- write_unlock_bh(&l2tp_ip6_lock);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
err = -EADDRINUSE;
goto out_unlock;
}
@@ -340,9 +367,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
l2tp_ip6_sk(sk)->conn_id = addr->l2tp_conn_id;
- sk_add_bind_node(sk, &l2tp_ip6_bind_table);
+ sk_add_bind_node(sk, &pn->l2tp_ip6_bind_table);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip6_lock);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
sock_reset_flag(sk, SOCK_ZAPPED);
release_sock(sk);
@@ -364,6 +391,7 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
struct in6_addr *daddr;
int addr_type;
int rc;
+ struct l2tp_ip6_net *pn;
if (addr_len < sizeof(*lsa))
return -EINVAL;
@@ -395,10 +423,11 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
l2tp_ip6_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
- write_lock_bh(&l2tp_ip6_lock);
+ pn = l2tp_ip6_pernet(sock_net(sk));
+ write_lock_bh(&pn->l2tp_ip6_lock);
hlist_del_init(&sk->sk_bind_node);
- sk_add_bind_node(sk, &l2tp_ip6_bind_table);
- write_unlock_bh(&l2tp_ip6_lock);
+ sk_add_bind_node(sk, &pn->l2tp_ip6_bind_table);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
out_sk:
release_sock(sk);
@@ -765,25 +794,58 @@ static struct inet6_protocol l2tp_ip6_protocol __read_mostly = {
.handler = l2tp_ip6_recv,
};
+static __net_init int l2tp_ip6_init_net(struct net *net)
+{
+ struct l2tp_ip6_net *pn = net_generic(net, l2tp_ip6_net_id);
+
+ rwlock_init(&pn->l2tp_ip6_lock);
+ INIT_HLIST_HEAD(&pn->l2tp_ip6_table);
+ INIT_HLIST_HEAD(&pn->l2tp_ip6_bind_table);
+ return 0;
+}
+
+static __net_exit void l2tp_ip6_exit_net(struct net *net)
+{
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(net);
+
+ write_lock_bh(&pn->l2tp_ip6_lock);
+ WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip6_table) != 0);
+ WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip6_bind_table) != 0);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
+}
+
+static struct pernet_operations l2tp_ip6_net_ops = {
+ .init = l2tp_ip6_init_net,
+ .exit = l2tp_ip6_exit_net,
+ .id = &l2tp_ip6_net_id,
+ .size = sizeof(struct l2tp_ip6_net),
+};
+
static int __init l2tp_ip6_init(void)
{
int err;
pr_info("L2TP IP encapsulation support for IPv6 (L2TPv3)\n");
+ err = register_pernet_device(&l2tp_ip6_net_ops);
+ if (err)
+ goto out;
+
err = proto_register(&l2tp_ip6_prot, 1);
if (err != 0)
- goto out;
+ goto out1;
err = inet6_add_protocol(&l2tp_ip6_protocol, IPPROTO_L2TP);
if (err)
- goto out1;
+ goto out2;
inet6_register_protosw(&l2tp_ip6_protosw);
return 0;
-out1:
+out2:
proto_unregister(&l2tp_ip6_prot);
+out1:
+ unregister_pernet_device(&l2tp_ip6_net_ops);
out:
return err;
}
@@ -793,6 +855,7 @@ static void __exit l2tp_ip6_exit(void)
inet6_unregister_protosw(&l2tp_ip6_protosw);
inet6_del_protocol(&l2tp_ip6_protocol, IPPROTO_L2TP);
proto_unregister(&l2tp_ip6_prot);
+ unregister_pernet_device(&l2tp_ip6_net_ops);
}
module_init(l2tp_ip6_init);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index d105030520f9..59457c0c14aa 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -63,7 +63,7 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info)
if (tunnel) {
session = l2tp_session_get(net, tunnel->sock, tunnel->version,
tunnel_id, session_id);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
}
}
@@ -116,7 +116,7 @@ static int l2tp_tunnel_notify(struct genl_family *family,
NLM_F_ACK, tunnel, cmd);
if (ret >= 0) {
- ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ ret = genlmsg_multicast_allns(family, msg, 0, 0);
/* We don't care if no one is listening */
if (ret == -ESRCH)
ret = 0;
@@ -144,7 +144,7 @@ static int l2tp_session_notify(struct genl_family *family,
NLM_F_ACK, session, cmd);
if (ret >= 0) {
- ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+ ret = genlmsg_multicast_allns(family, msg, 0, 0);
/* We don't care if no one is listening */
if (ret == -ESRCH)
ret = 0;
@@ -242,7 +242,7 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
if (ret < 0)
goto out;
- l2tp_tunnel_inc_refcount(tunnel);
+ refcount_inc(&tunnel->ref_count);
ret = l2tp_tunnel_register(tunnel, net, &cfg);
if (ret < 0) {
kfree(tunnel);
@@ -250,7 +250,7 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
}
ret = l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel,
L2TP_CMD_TUNNEL_CREATE);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
out:
return ret;
@@ -280,7 +280,7 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
l2tp_tunnel_delete(tunnel);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
out:
return ret;
@@ -308,7 +308,7 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
tunnel, L2TP_CMD_TUNNEL_MODIFY);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
out:
return ret;
@@ -479,42 +479,48 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
goto err_nlmsg_tunnel;
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
return genlmsg_unicast(net, msg, info->snd_portid);
err_nlmsg_tunnel:
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
err_nlmsg:
nlmsg_free(msg);
err:
return ret;
}
+struct l2tp_nl_cb_data {
+ unsigned long tkey;
+ unsigned long skey;
+};
+
static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- int ti = cb->args[0];
+ struct l2tp_nl_cb_data *cbd = (void *)&cb->ctx[0];
+ unsigned long key = cbd->tkey;
struct l2tp_tunnel *tunnel;
struct net *net = sock_net(skb->sk);
for (;;) {
- tunnel = l2tp_tunnel_get_nth(net, ti);
+ tunnel = l2tp_tunnel_get_next(net, &key);
if (!tunnel)
goto out;
if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
tunnel, L2TP_CMD_TUNNEL_GET) < 0) {
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
goto out;
}
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
- ti++;
+ key++;
}
out:
- cb->args[0] = ti;
+ cbd->tkey = key;
return skb->len;
}
@@ -641,12 +647,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
if (session) {
ret = l2tp_session_notify(&l2tp_nl_family, info, session,
L2TP_CMD_SESSION_CREATE);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
}
}
out_tunnel:
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
out:
return ret;
}
@@ -671,7 +677,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
l2tp_nl_cmd_ops[pw_type]->session_delete(session);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
out:
return ret;
@@ -692,8 +698,10 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
if (info->attrs[L2TP_ATTR_SEND_SEQ]) {
+ struct l2tp_tunnel *tunnel = session->tunnel;
+
session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
- l2tp_session_set_header_len(session, session->tunnel->version);
+ l2tp_session_set_header_len(session, tunnel->version, tunnel->encap);
}
if (info->attrs[L2TP_ATTR_LNS_MODE])
@@ -705,7 +713,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
ret = l2tp_session_notify(&l2tp_nl_family, info,
session, L2TP_CMD_SESSION_MODIFY);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
out:
return ret;
@@ -816,57 +824,59 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return ret;
err_ref_msg:
nlmsg_free(msg);
err_ref:
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
err:
return ret;
}
static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct l2tp_nl_cb_data *cbd = (void *)&cb->ctx[0];
struct net *net = sock_net(skb->sk);
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
- int ti = cb->args[0];
- int si = cb->args[1];
+ unsigned long tkey = cbd->tkey;
+ unsigned long skey = cbd->skey;
for (;;) {
if (!tunnel) {
- tunnel = l2tp_tunnel_get_nth(net, ti);
+ tunnel = l2tp_tunnel_get_next(net, &tkey);
if (!tunnel)
goto out;
}
- session = l2tp_session_get_nth(tunnel, si);
+ session = l2tp_session_get_next(net, tunnel->sock, tunnel->version,
+ tunnel->tunnel_id, &skey);
if (!session) {
- ti++;
- l2tp_tunnel_dec_refcount(tunnel);
+ tkey++;
+ l2tp_tunnel_put(tunnel);
tunnel = NULL;
- si = 0;
+ skey = 0;
continue;
}
if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
session, L2TP_CMD_SESSION_GET) < 0) {
- l2tp_session_dec_refcount(session);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_session_put(session);
+ l2tp_tunnel_put(tunnel);
break;
}
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
- si++;
+ skey++;
}
out:
- cb->args[0] = ti;
- cb->args[1] = si;
+ cbd->tkey = tkey;
+ cbd->skey = skey;
return skb->len;
}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 3596290047b2..53baf2dd5d5d 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -119,7 +119,6 @@ struct pppol2tp_session {
struct mutex sk_lock; /* Protects .sk */
struct sock __rcu *sk; /* Pointer to the session PPPoX socket */
struct sock *__sk; /* Copy of .sk, for cleanup */
- struct rcu_head rcu; /* For asynchronous release */
};
static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
@@ -150,27 +149,23 @@ static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session)
/* Helpers to obtain tunnel/session contexts from sockets.
*/
-static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
+static struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
{
struct l2tp_session *session;
if (!sk)
return NULL;
- sock_hold(sk);
- session = (struct l2tp_session *)(sk->sk_user_data);
- if (!session) {
- sock_put(sk);
- goto out;
- }
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) {
- session = NULL;
- sock_put(sk);
- goto out;
+ rcu_read_lock();
+ session = rcu_dereference_sk_user_data(sk);
+ if (session && refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock();
+ WARN_ON_ONCE(session->magic != L2TP_SESSION_MAGIC);
+ return session;
}
+ rcu_read_unlock();
-out:
- return session;
+ return NULL;
}
/*****************************************************************************
@@ -318,12 +313,12 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
l2tp_xmit_skb(session, skb);
local_bh_enable();
- sock_put(sk);
+ l2tp_session_put(session);
return total_len;
error_put_sess:
- sock_put(sk);
+ l2tp_session_put(session);
error:
return error;
}
@@ -377,12 +372,12 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
l2tp_xmit_skb(session, skb);
local_bh_enable();
- sock_put(sk);
+ l2tp_session_put(session);
return 1;
abort_put_sess:
- sock_put(sk);
+ l2tp_session_put(session);
abort:
/* Free the original skb */
kfree_skb(skb);
@@ -393,29 +388,32 @@ abort:
* Session (and tunnel control) socket create/destroy.
*****************************************************************************/
-static void pppol2tp_put_sk(struct rcu_head *head)
-{
- struct pppol2tp_session *ps;
-
- ps = container_of(head, typeof(*ps), rcu);
- sock_put(ps->__sk);
-}
-
/* Really kill the session socket. (Called from sock_put() if
* refcnt == 0.)
*/
static void pppol2tp_session_destruct(struct sock *sk)
{
- struct l2tp_session *session = sk->sk_user_data;
-
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_write_queue);
+}
- if (session) {
- sk->sk_user_data = NULL;
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
- return;
- l2tp_session_dec_refcount(session);
+static void pppol2tp_session_close(struct l2tp_session *session)
+{
+ struct pppol2tp_session *ps;
+
+ ps = l2tp_session_priv(session);
+ mutex_lock(&ps->sk_lock);
+ ps->__sk = rcu_dereference_protected(ps->sk,
+ lockdep_is_held(&ps->sk_lock));
+ RCU_INIT_POINTER(ps->sk, NULL);
+ mutex_unlock(&ps->sk_lock);
+ if (ps->__sk) {
+ /* detach socket */
+ rcu_assign_sk_user_data(ps->__sk, NULL);
+ sock_put(ps->__sk);
+
+ /* drop ref taken when we referenced socket via sk_user_data */
+ l2tp_session_put(session);
}
}
@@ -444,30 +442,13 @@ static int pppol2tp_release(struct socket *sock)
session = pppol2tp_sock_to_session(sk);
if (session) {
- struct pppol2tp_session *ps;
-
l2tp_session_delete(session);
-
- ps = l2tp_session_priv(session);
- mutex_lock(&ps->sk_lock);
- ps->__sk = rcu_dereference_protected(ps->sk,
- lockdep_is_held(&ps->sk_lock));
- RCU_INIT_POINTER(ps->sk, NULL);
- mutex_unlock(&ps->sk_lock);
- call_rcu(&ps->rcu, pppol2tp_put_sk);
-
- /* Rely on the sock_put() call at the end of the function for
- * dropping the reference held by pppol2tp_sock_to_session().
- * The last reference will be dropped by pppol2tp_put_sk().
- */
+ /* drop ref taken by pppol2tp_sock_to_session */
+ l2tp_session_put(session);
}
release_sock(sk);
- /* This will delete the session context via
- * pppol2tp_session_destruct() if the socket's refcnt drops to
- * zero.
- */
sock_put(sk);
return 0;
@@ -506,6 +487,7 @@ static int pppol2tp_create(struct net *net, struct socket *sock, int kern)
goto out;
sock_init_data(sock, sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
sock->state = SS_UNCONNECTED;
sock->ops = &pppol2tp_ops;
@@ -542,6 +524,7 @@ static void pppol2tp_session_init(struct l2tp_session *session)
struct pppol2tp_session *ps;
session->recv_skb = pppol2tp_recv;
+ session->session_close = pppol2tp_session_close;
if (IS_ENABLED(CONFIG_L2TP_DEBUGFS))
session->show = pppol2tp_show;
@@ -685,7 +668,7 @@ static struct l2tp_tunnel *pppol2tp_tunnel_get(struct net *net,
if (error < 0)
return ERR_PTR(error);
- l2tp_tunnel_inc_refcount(tunnel);
+ refcount_inc(&tunnel->ref_count);
error = l2tp_tunnel_register(tunnel, net, &tcfg);
if (error < 0) {
kfree(tunnel);
@@ -701,7 +684,7 @@ static struct l2tp_tunnel *pppol2tp_tunnel_get(struct net *net,
/* Error if socket is not prepped */
if (!tunnel->sock) {
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
return ERR_PTR(-ENOENT);
}
}
@@ -787,18 +770,20 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
goto end;
}
+ drop_refcnt = true;
+
pppol2tp_session_init(session);
ps = l2tp_session_priv(session);
- l2tp_session_inc_refcount(session);
+ refcount_inc(&session->ref_count);
mutex_lock(&ps->sk_lock);
error = l2tp_session_register(session, tunnel);
if (error < 0) {
mutex_unlock(&ps->sk_lock);
- kfree(session);
+ l2tp_session_put(session);
goto end;
}
- drop_refcnt = true;
+
new_session = true;
}
@@ -830,12 +815,13 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
out_no_ppp:
/* This is how we get the session context from the socket. */
- sk->sk_user_data = session;
+ sock_hold(sk);
+ rcu_assign_sk_user_data(sk, session);
rcu_assign_pointer(ps->sk, sk);
mutex_unlock(&ps->sk_lock);
/* Keep the reference we've grabbed on the session: sk doesn't expect
- * the session to disappear. pppol2tp_session_destruct() is responsible
+ * the session to disappear. pppol2tp_session_close() is responsible
* for dropping it.
*/
drop_refcnt = false;
@@ -850,8 +836,8 @@ end:
l2tp_tunnel_delete(tunnel);
}
if (drop_refcnt)
- l2tp_session_dec_refcount(session);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_session_put(session);
+ l2tp_tunnel_put(tunnel);
release_sock(sk);
return error;
@@ -891,7 +877,7 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
return 0;
err_sess:
- kfree(session);
+ l2tp_session_put(session);
err:
return error;
}
@@ -1002,7 +988,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
error = len;
- sock_put(sk);
+ l2tp_session_put(session);
end:
return error;
}
@@ -1052,12 +1038,12 @@ static int pppol2tp_tunnel_copy_stats(struct pppol2tp_ioc_stats *stats,
return -EBADR;
if (session->pwtype != L2TP_PWTYPE_PPP) {
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return -EBADR;
}
pppol2tp_copy_stats(stats, &session->stats);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return 0;
}
@@ -1205,7 +1191,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
}
- l2tp_session_set_header_len(session, session->tunnel->version);
+ l2tp_session_set_header_len(session, session->tunnel->version,
+ session->tunnel->encap);
break;
case PPPOL2TP_SO_LNSMODE:
@@ -1274,7 +1261,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
err = pppol2tp_session_setsockopt(sk, session, optname, val);
}
- sock_put(sk);
+ l2tp_session_put(session);
end:
return err;
}
@@ -1395,7 +1382,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
err = 0;
end_put_sess:
- sock_put(sk);
+ l2tp_session_put(session);
end:
return err;
}
@@ -1406,14 +1393,12 @@ end:
* L2TPv2, we dump only L2TPv2 tunnels and sessions here.
*****************************************************************************/
-static unsigned int pppol2tp_net_id;
-
#ifdef CONFIG_PROC_FS
struct pppol2tp_seq_data {
struct seq_net_private p;
- int tunnel_idx; /* current tunnel */
- int session_idx; /* index of session within current tunnel */
+ unsigned long tkey; /* lookup key of current tunnel */
+ unsigned long skey; /* lookup key of current session */
struct l2tp_tunnel *tunnel;
struct l2tp_session *session; /* NULL means get next tunnel */
};
@@ -1422,17 +1407,17 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
{
/* Drop reference taken during previous invocation */
if (pd->tunnel)
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
for (;;) {
- pd->tunnel = l2tp_tunnel_get_nth(net, pd->tunnel_idx);
- pd->tunnel_idx++;
+ pd->tunnel = l2tp_tunnel_get_next(net, &pd->tkey);
+ pd->tkey++;
/* Only accept L2TPv2 tunnels */
if (!pd->tunnel || pd->tunnel->version == 2)
return;
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
}
}
@@ -1440,13 +1425,15 @@ static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
{
/* Drop reference taken during previous invocation */
if (pd->session)
- l2tp_session_dec_refcount(pd->session);
+ l2tp_session_put(pd->session);
- pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
- pd->session_idx++;
+ pd->session = l2tp_session_get_next(net, pd->tunnel->sock,
+ pd->tunnel->version,
+ pd->tunnel->tunnel_id, &pd->skey);
+ pd->skey++;
if (!pd->session) {
- pd->session_idx = 0;
+ pd->skey = 0;
pppol2tp_next_tunnel(net, pd);
}
}
@@ -1498,11 +1485,11 @@ static void pppol2tp_seq_stop(struct seq_file *p, void *v)
* or pppol2tp_next_tunnel().
*/
if (pd->session) {
- l2tp_session_dec_refcount(pd->session);
+ l2tp_session_put(pd->session);
pd->session = NULL;
}
if (pd->tunnel) {
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
pd->tunnel = NULL;
}
}
@@ -1513,7 +1500,7 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
seq_printf(m, "\nTUNNEL '%s', %c %d\n",
tunnel->name,
- (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
+ tunnel->sock ? 'Y' : 'N',
refcount_read(&tunnel->ref_count) - 1);
seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n",
0,
@@ -1641,7 +1628,6 @@ static __net_exit void pppol2tp_exit_net(struct net *net)
static struct pernet_operations pppol2tp_net_ops = {
.init = pppol2tp_init_net,
.exit = pppol2tp_exit_net,
- .id = &pppol2tp_net_id,
};
/*****************************************************************************
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 4eb52add7103..0259cde394ba 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1098,7 +1098,7 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
if (unlikely(level != SOL_LLC || optlen != sizeof(int)))
goto out;
- rc = copy_from_sockptr(&opt, optval, sizeof(opt));
+ rc = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (rc)
goto out;
rc = -EINVAL;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 13438cc0a6b1..cf0f7780fb10 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -96,7 +96,7 @@ config MAC80211_DEBUGFS
config MAC80211_MESSAGE_TRACING
bool "Trace all mac80211 debug messages"
- depends on MAC80211
+ depends on MAC80211 && TRACING
help
Select this option to have mac80211 register the
mac80211_msg trace subsystem with tracepoints to
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 9bffac7a4974..f3fbe5a4395e 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -170,28 +170,63 @@ static void sta_rx_agg_reorder_timer_expired(struct timer_list *t)
rcu_read_unlock();
}
-static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb,
- const struct ieee80211_addba_ext_ie *req,
- u16 buf_size)
+void ieee80211_add_addbaext(struct sk_buff *skb,
+ const u8 req_addba_ext_data,
+ u16 buf_size)
{
- struct ieee80211_addba_ext_ie *resp;
+ struct ieee80211_addba_ext_ie *addba_ext;
u8 *pos;
pos = skb_put_zero(skb, 2 + sizeof(struct ieee80211_addba_ext_ie));
*pos++ = WLAN_EID_ADDBA_EXT;
*pos++ = sizeof(struct ieee80211_addba_ext_ie);
- resp = (struct ieee80211_addba_ext_ie *)pos;
- resp->data = req->data & IEEE80211_ADDBA_EXT_NO_FRAG;
+ addba_ext = (struct ieee80211_addba_ext_ie *)pos;
- resp->data |= u8_encode_bits(buf_size >> IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT,
- IEEE80211_ADDBA_EXT_BUF_SIZE_MASK);
+ addba_ext->data = IEEE80211_ADDBA_EXT_NO_FRAG;
+ if (req_addba_ext_data)
+ addba_ext->data &= req_addba_ext_data;
+
+ addba_ext->data |=
+ u8_encode_bits(buf_size >> IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT,
+ IEEE80211_ADDBA_EXT_BUF_SIZE_MASK);
+}
+
+u8 ieee80211_retrieve_addba_ext_data(struct sta_info *sta,
+ const void *elem_data, ssize_t elem_len,
+ u16 *buf_size)
+{
+ struct ieee802_11_elems *elems;
+ u8 buf_size_1k, data = 0;
+
+ if (!sta->sta.deflink.he_cap.has_he)
+ return 0;
+
+ if (elem_len <= 0)
+ return 0;
+
+ elems = ieee802_11_parse_elems(elem_data, elem_len, true, NULL);
+
+ if (elems && !elems->parse_error && elems->addba_ext_ie) {
+ data = elems->addba_ext_ie->data;
+
+ if (!sta->sta.deflink.eht_cap.has_eht || !buf_size)
+ goto free;
+
+ buf_size_1k = u8_get_bits(elems->addba_ext_ie->data,
+ IEEE80211_ADDBA_EXT_BUF_SIZE_MASK);
+ *buf_size |= (u16)buf_size_1k <<
+ IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT;
+ }
+free:
+ kfree(elems);
+
+ return data;
}
static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
u8 dialog_token, u16 status, u16 policy,
u16 buf_size, u16 timeout,
- const struct ieee80211_addba_ext_ie *addbaext)
+ const u8 req_addba_ext_data)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
@@ -207,20 +242,7 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
return;
skb_reserve(skb, local->hw.extra_tx_headroom);
- mgmt = skb_put_zero(skb, 24);
- memcpy(mgmt->da, da, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- if (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
- memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
- memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
-
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_ACTION);
+ mgmt = ieee80211_mgmt_ba(skb, da, sdata);
skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
mgmt->u.action.category = WLAN_CATEGORY_BACK;
@@ -236,8 +258,8 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
- if (sta->sta.deflink.he_cap.has_he && addbaext)
- ieee80211_add_addbaext(sdata, skb, addbaext, buf_size);
+ if (sta->sta.deflink.he_cap.has_he)
+ ieee80211_add_addbaext(skb, req_addba_ext_data, buf_size);
ieee80211_tx_skb(sdata, skb);
}
@@ -246,7 +268,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
u8 dialog_token, u16 timeout,
u16 start_seq_num, u16 ba_policy, u16 tid,
u16 buf_size, bool tx, bool auto_seq,
- const struct ieee80211_addba_ext_ie *addbaext)
+ const u8 addba_ext_data)
{
struct ieee80211_local *local = sta->sdata->local;
struct tid_ampdu_rx *tid_agg_rx;
@@ -432,7 +454,7 @@ end:
if (tx)
ieee80211_send_addba_resp(sta, sta->sta.addr, tid,
dialog_token, status, 1, buf_size,
- timeout, addbaext);
+ timeout, addba_ext_data);
}
void ieee80211_process_addba_request(struct ieee80211_local *local,
@@ -441,9 +463,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
size_t len)
{
u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num;
- struct ieee802_11_elems *elems = NULL;
- u8 dialog_token;
- int ies_len;
+ u8 dialog_token, addba_ext_data;
/* extract session parameters from addba request frame */
dialog_token = mgmt->u.action.u.addba_req.dialog_token;
@@ -456,28 +476,17 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
- ies_len = len - offsetof(struct ieee80211_mgmt,
- u.action.u.addba_req.variable);
- if (ies_len) {
- elems = ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable,
- ies_len, true, NULL);
- if (!elems || elems->parse_error)
- goto free;
- }
-
- if (sta->sta.deflink.eht_cap.has_eht && elems && elems->addba_ext_ie) {
- u8 buf_size_1k = u8_get_bits(elems->addba_ext_ie->data,
- IEEE80211_ADDBA_EXT_BUF_SIZE_MASK);
-
- buf_size |= buf_size_1k << IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT;
- }
+ addba_ext_data =
+ ieee80211_retrieve_addba_ext_data(sta,
+ mgmt->u.action.u.addba_req.variable,
+ len -
+ offsetof(typeof(*mgmt),
+ u.action.u.addba_req.variable),
+ &buf_size);
__ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
start_seq_num, ba_policy, tid,
- buf_size, true, false,
- elems ? elems->addba_ext_ie : NULL);
-free:
- kfree(elems);
+ buf_size, true, false, addba_ext_data);
}
void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 677bbbac9f16..61f2cac37728 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -58,36 +58,24 @@
* complete.
*/
-static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
- const u8 *da, u16 tid,
+static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid,
u8 dialog_token, u16 start_seq_num,
u16 agg_size, u16 timeout)
{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
u16 capab;
- skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
-
+ skb = dev_alloc_skb(sizeof(*mgmt) +
+ 2 + sizeof(struct ieee80211_addba_ext_ie) +
+ local->hw.extra_tx_headroom);
if (!skb)
return;
skb_reserve(skb, local->hw.extra_tx_headroom);
- mgmt = skb_put_zero(skb, 24);
- memcpy(mgmt->da, da, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- if (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
- memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
- memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
-
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_ACTION);
+ mgmt = ieee80211_mgmt_ba(skb, sta->sta.addr, sdata);
skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req));
@@ -106,6 +94,9 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
mgmt->u.action.u.addba_req.start_seq_num =
cpu_to_le16(start_seq_num << 4);
+ if (sta->sta.deflink.he_cap.has_he)
+ ieee80211_add_addbaext(skb, 0, agg_size);
+
ieee80211_tx_skb_tid(sdata, skb, tid, -1);
}
@@ -473,8 +464,11 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta,
sta->ampdu_mlme.addba_req_num[tid]++;
spin_unlock_bh(&sta->lock);
- if (sta->sta.deflink.he_cap.has_he) {
+ if (sta->sta.deflink.eht_cap.has_eht) {
buf_size = local->hw.max_tx_aggregation_subframes;
+ } else if (sta->sta.deflink.he_cap.has_he) {
+ buf_size = min_t(u16, local->hw.max_tx_aggregation_subframes,
+ IEEE80211_MAX_AMPDU_BUF_HE);
} else {
/*
* We really should use what the driver told us it will
@@ -486,9 +480,8 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta,
}
/* send AddBA request */
- ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
- tid_tx->dialog_token, tid_tx->ssn,
- buf_size, tid_tx->timeout);
+ ieee80211_send_addba_request(sta, tid, tid_tx->dialog_token,
+ tid_tx->ssn, buf_size, tid_tx->timeout);
WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state));
}
@@ -810,7 +803,7 @@ void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
if (!test_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)) {
ieee80211_send_addba_with_timeout(sta, tid_tx);
- /* RESPONSE_RECEIVED state whould trigger the flow again */
+ /* RESPONSE_RECEIVED state would trigger the flow again */
return;
}
@@ -983,6 +976,13 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
tid = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_TID_MASK);
buf_size = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK);
+
+ ieee80211_retrieve_addba_ext_data(sta,
+ mgmt->u.action.u.addba_resp.variable,
+ len - offsetof(typeof(*mgmt),
+ u.action.u.addba_resp.variable),
+ &buf_size);
+
buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes);
txq = sta->sta.txq[tid];
diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index fdf8b658fede..c61df637232a 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -55,10 +55,21 @@
#define HE_DURATION_S(shift, streams, gi, bps) \
(HE_DURATION(streams, gi, bps) >> shift)
+/* gi in HE/EHT is identical. It matches enum nl80211_eht_gi as well */
+#define EHT_GI_08 HE_GI_08
+#define EHT_GI_16 HE_GI_16
+#define EHT_GI_32 HE_GI_32
+
+#define EHT_DURATION(streams, gi, bps) \
+ HE_DURATION(streams, gi, bps)
+#define EHT_DURATION_S(shift, streams, gi, bps) \
+ HE_DURATION_S(shift, streams, gi, bps)
+
#define BW_20 0
#define BW_40 1
#define BW_80 2
#define BW_160 3
+#define BW_320 4
/*
* Define group sort order: HT40 -> SGI -> #streams
@@ -68,17 +79,26 @@
#define IEEE80211_VHT_STREAM_GROUPS 8 /* BW(=4) * SGI(=2) */
#define IEEE80211_HE_MAX_STREAMS 8
+#define IEEE80211_HE_STREAM_GROUPS 12 /* BW(=4) * GI(=3) */
+
+#define IEEE80211_EHT_MAX_STREAMS 8
+#define IEEE80211_EHT_STREAM_GROUPS 15 /* BW(=5) * GI(=3) */
#define IEEE80211_HT_GROUPS_NB (IEEE80211_MAX_STREAMS * \
IEEE80211_HT_STREAM_GROUPS)
#define IEEE80211_VHT_GROUPS_NB (IEEE80211_MAX_STREAMS * \
IEEE80211_VHT_STREAM_GROUPS)
+#define IEEE80211_HE_GROUPS_NB (IEEE80211_HE_MAX_STREAMS * \
+ IEEE80211_HE_STREAM_GROUPS)
+#define IEEE80211_EHT_GROUPS_NB (IEEE80211_EHT_MAX_STREAMS * \
+ IEEE80211_EHT_STREAM_GROUPS)
#define IEEE80211_HT_GROUP_0 0
#define IEEE80211_VHT_GROUP_0 (IEEE80211_HT_GROUP_0 + IEEE80211_HT_GROUPS_NB)
#define IEEE80211_HE_GROUP_0 (IEEE80211_VHT_GROUP_0 + IEEE80211_VHT_GROUPS_NB)
+#define IEEE80211_EHT_GROUP_0 (IEEE80211_HE_GROUP_0 + IEEE80211_HE_GROUPS_NB)
-#define MCS_GROUP_RATES 12
+#define MCS_GROUP_RATES 14
#define HT_GROUP_IDX(_streams, _sgi, _ht40) \
IEEE80211_HT_GROUP_0 + \
@@ -203,6 +223,69 @@
#define HE_GROUP(_streams, _gi, _bw) \
__HE_GROUP(_streams, _gi, _bw, \
HE_GROUP_SHIFT(_streams, _gi, _bw))
+
+#define EHT_BW2VBPS(_bw, r5, r4, r3, r2, r1) \
+ ((_bw) == BW_320 ? r5 : BW2VBPS(_bw, r4, r3, r2, r1))
+
+#define EHT_GROUP_IDX(_streams, _gi, _bw) \
+ (IEEE80211_EHT_GROUP_0 + \
+ IEEE80211_EHT_MAX_STREAMS * 3 * (_bw) + \
+ IEEE80211_EHT_MAX_STREAMS * (_gi) + \
+ (_streams) - 1)
+
+#define __EHT_GROUP(_streams, _gi, _bw, _s) \
+ [EHT_GROUP_IDX(_streams, _gi, _bw)] = { \
+ .shift = _s, \
+ .duration = { \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 1960, 980, 490, 234, 117)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 3920, 1960, 980, 468, 234)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 5880, 2937, 1470, 702, 351)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 7840, 3920, 1960, 936, 468)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 11760, 5880, 2940, 1404, 702)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 15680, 7840, 3920, 1872, 936)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 17640, 8820, 4410, 2106, 1053)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 19600, 9800, 4900, 2340, 1170)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 23520, 11760, 5880, 2808, 1404)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 26133, 13066, 6533, 3120, 1560)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 29400, 14700, 7350, 3510, 1755)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 32666, 16333, 8166, 3900, 1950)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 35280, 17640, 8820, 4212, 2106)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 39200, 19600, 9800, 4680, 2340)) \
+ } \
+}
+
+#define EHT_GROUP_SHIFT(_streams, _gi, _bw) \
+ GROUP_SHIFT(EHT_DURATION(_streams, _gi, \
+ EHT_BW2VBPS(_bw, 1960, 980, 490, 234, 117)))
+
+#define EHT_GROUP(_streams, _gi, _bw) \
+ __EHT_GROUP(_streams, _gi, _bw, \
+ EHT_GROUP_SHIFT(_streams, _gi, _bw))
+
+#define EHT_GROUP_RANGE(_gi, _bw) \
+ EHT_GROUP(1, _gi, _bw), \
+ EHT_GROUP(2, _gi, _bw), \
+ EHT_GROUP(3, _gi, _bw), \
+ EHT_GROUP(4, _gi, _bw), \
+ EHT_GROUP(5, _gi, _bw), \
+ EHT_GROUP(6, _gi, _bw), \
+ EHT_GROUP(7, _gi, _bw), \
+ EHT_GROUP(8, _gi, _bw)
+
struct mcs_group {
u8 shift;
u16 duration[MCS_GROUP_RATES];
@@ -376,6 +459,26 @@ static const struct mcs_group airtime_mcs_groups[] = {
HE_GROUP(6, HE_GI_32, BW_160),
HE_GROUP(7, HE_GI_32, BW_160),
HE_GROUP(8, HE_GI_32, BW_160),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_20),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_20),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_20),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_40),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_40),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_40),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_80),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_80),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_80),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_160),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_160),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_160),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_320),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_320),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_320),
};
static u32
@@ -422,6 +525,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw,
case RATE_INFO_BW_160:
bw = BW_160;
break;
+ case RATE_INFO_BW_320:
+ bw = BW_320;
+ break;
default:
WARN_ON_ONCE(1);
return 0;
@@ -443,14 +549,27 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw,
idx = status->rate_idx;
group = HE_GROUP_IDX(streams, status->he_gi, bw);
break;
+ case RX_ENC_EHT:
+ streams = status->nss;
+ idx = status->rate_idx;
+ group = EHT_GROUP_IDX(streams, status->eht.gi, bw);
+ break;
default:
WARN_ON_ONCE(1);
return 0;
}
- if (WARN_ON_ONCE((status->encoding != RX_ENC_HE && streams > 4) ||
- (status->encoding == RX_ENC_HE && streams > 8)))
- return 0;
+ switch (status->encoding) {
+ case RX_ENC_EHT:
+ case RX_ENC_HE:
+ if (WARN_ON_ONCE(streams > 8))
+ return 0;
+ break;
+ default:
+ if (WARN_ON_ONCE(streams > 4))
+ return 0;
+ break;
+ }
if (idx >= MCS_GROUP_RATES)
return 0;
@@ -517,7 +636,9 @@ static bool ieee80211_fill_rate_info(struct ieee80211_hw *hw,
stat->nss = ri->nss;
stat->rate_idx = ri->mcs;
- if (ri->flags & RATE_INFO_FLAGS_HE_MCS)
+ if (ri->flags & RATE_INFO_FLAGS_EHT_MCS)
+ stat->encoding = RX_ENC_EHT;
+ else if (ri->flags & RATE_INFO_FLAGS_HE_MCS)
stat->encoding = RX_ENC_HE;
else if (ri->flags & RATE_INFO_FLAGS_VHT_MCS)
stat->encoding = RX_ENC_VHT;
@@ -529,7 +650,14 @@ static bool ieee80211_fill_rate_info(struct ieee80211_hw *hw,
if (ri->flags & RATE_INFO_FLAGS_SHORT_GI)
stat->enc_flags |= RX_ENC_FLAG_SHORT_GI;
- stat->he_gi = ri->he_gi;
+ switch (stat->encoding) {
+ case RX_ENC_EHT:
+ stat->eht.gi = ri->eht_gi;
+ break;
+ default:
+ stat->he_gi = ri->he_gi;
+ break;
+ }
if (stat->encoding != RX_ENC_LEGACY)
return true;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b02b84ce2130..d3fc158ccaf6 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -105,8 +105,11 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
}
/* also validate MU-MIMO change */
- monitor_sdata = wiphy_dereference(local->hw.wiphy,
- local->monitor_sdata);
+ if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
+ monitor_sdata = sdata;
+ else
+ monitor_sdata = wiphy_dereference(local->hw.wiphy,
+ local->monitor_sdata);
if (!monitor_sdata &&
(params->vht_mumimo_groups || params->vht_mumimo_follow_addr))
@@ -114,7 +117,9 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
/* apply all changes now - no failures allowed */
- if (monitor_sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ if (monitor_sdata &&
+ (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) ||
+ ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)))
ieee80211_set_mu_mimo_follow(monitor_sdata, params);
if (params->flags) {
@@ -138,7 +143,7 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
}
static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_mbssid_config params,
+ struct cfg80211_mbssid_config *params,
struct ieee80211_bss_conf *link_conf)
{
struct ieee80211_sub_if_data *tx_sdata;
@@ -149,10 +154,10 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
link_conf->ema_ap = false;
link_conf->bssid_indicator = 0;
- if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev)
+ if (sdata->vif.type != NL80211_IFTYPE_AP || !params->tx_wdev)
return -EINVAL;
- tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params.tx_wdev);
+ tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params->tx_wdev);
if (!tx_sdata)
return -EINVAL;
@@ -161,9 +166,9 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
} else {
sdata->vif.mbssid_tx_vif = &tx_sdata->vif;
link_conf->nontransmitted = true;
- link_conf->bssid_index = params.index;
+ link_conf->bssid_index = params->index;
}
- if (params.ema)
+ if (params->ema)
link_conf->ema_ap = true;
return 0;
@@ -194,6 +199,24 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy,
}
}
+ /* Let the driver know that an interface is going to be added.
+ * Indicate so only for interface types that will be added to the
+ * driver.
+ */
+ switch (type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ break;
+ case NL80211_IFTYPE_MONITOR:
+ if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) ||
+ !(params->flags & MONITOR_FLAG_ACTIVE))
+ break;
+ fallthrough;
+ default:
+ drv_prep_add_interface(local,
+ ieee80211_vif_type_p2p(&sdata->vif));
+ break;
+ }
+
return wdev;
}
@@ -879,6 +902,7 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
+ struct net_device *dev,
struct cfg80211_chan_def *chandef)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
@@ -888,22 +912,25 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
lockdep_assert_wiphy(local->hw.wiphy);
- if (cfg80211_chandef_identical(&local->monitor_chanreq.oper,
- &chanreq.oper))
- return 0;
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) {
+ if (cfg80211_chandef_identical(&local->monitor_chanreq.oper,
+ &chanreq.oper))
+ return 0;
- sdata = wiphy_dereference(local->hw.wiphy,
- local->monitor_sdata);
- if (!sdata)
- goto done;
+ sdata = wiphy_dereference(wiphy, local->monitor_sdata);
+ if (!sdata)
+ goto done;
+ }
- if (cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper,
+ if (rcu_access_pointer(sdata->deflink.conf->chanctx_conf) &&
+ cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper,
&chanreq.oper))
return 0;
ieee80211_link_release_channel(&sdata->deflink);
ret = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
- IEEE80211_CHANCTX_EXCLUSIVE);
+ IEEE80211_CHANCTX_SHARED);
if (ret)
return ret;
done:
@@ -1061,13 +1088,13 @@ ieee80211_copy_mbssid_beacon(u8 *pos, struct cfg80211_mbssid_elems *dst,
{
int i, offset = 0;
+ dst->cnt = src->cnt;
for (i = 0; i < src->cnt; i++) {
memcpy(pos + offset, src->elem[i].data, src->elem[i].len);
dst->elem[i].len = src->elem[i].len;
dst->elem[i].data = pos + offset;
offset += dst->elem[i].len;
}
- dst->cnt = src->cnt;
return offset;
}
@@ -1294,9 +1321,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (old)
return -EALREADY;
- if (params->smps_mode != NL80211_SMPS_OFF)
- return -EOPNOTSUPP;
-
link->smps_mode = IEEE80211_SMPS_OFF;
link->needed_rx_chains = sdata->local->rx_chains;
@@ -1390,7 +1414,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (sdata->vif.type == NL80211_IFTYPE_AP &&
params->mbssid_config.tx_wdev) {
err = ieee80211_set_ap_mbssid_options(sdata,
- params->mbssid_config,
+ &params->mbssid_config,
link_conf);
if (err)
return err;
@@ -1662,12 +1686,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
ieee80211_link_info_change_notify(sdata, link,
BSS_CHANGED_BEACON_ENABLED);
- if (sdata->wdev.cac_started) {
+ if (sdata->wdev.links[link_id].cac_started) {
chandef = link_conf->chanreq.oper;
- wiphy_delayed_work_cancel(wiphy, &sdata->dfs_cac_timer_work);
+ wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
- GFP_KERNEL);
+ GFP_KERNEL, link_id);
}
drv_stop_ap(sdata->local, sdata, link_conf);
@@ -1705,7 +1729,7 @@ static int sta_apply_auth_flags(struct ieee80211_local *local,
* before drv_sta_state() is called.
*/
if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
- rate_control_rate_init(sta);
+ rate_control_rate_init_all_links(sta);
ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
if (ret)
@@ -1911,6 +1935,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
params->eht_capa_len,
link_sta);
+ ieee80211_sta_init_nss(link_sta);
+
if (params->opmode_notif_used) {
/* returned value is only needed for rc update, but the
* rc isn't initialized here yet, so ignore it
@@ -1920,8 +1946,6 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
sband->band);
}
- ieee80211_sta_init_nss(link_sta);
-
return 0;
}
@@ -2134,7 +2158,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
*/
if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
test_sta_flag(sta, WLAN_STA_ASSOC))
- rate_control_rate_init(sta);
+ rate_control_rate_init_all_links(sta);
return sta_info_insert(sta);
}
@@ -3046,13 +3070,31 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
enum nl80211_tx_power_setting txp_type = type;
bool update_txp_type = false;
bool has_monitor = false;
+ int user_power_level;
+ int old_power = local->user_power_level;
lockdep_assert_wiphy(local->hw.wiphy);
+ switch (type) {
+ case NL80211_TX_POWER_AUTOMATIC:
+ user_power_level = IEEE80211_UNSET_POWER_LEVEL;
+ txp_type = NL80211_TX_POWER_LIMITED;
+ break;
+ case NL80211_TX_POWER_LIMITED:
+ case NL80211_TX_POWER_FIXED:
+ if (mbm < 0 || (mbm % 100))
+ return -EOPNOTSUPP;
+ user_power_level = MBM_TO_DBM(mbm);
+ break;
+ default:
+ return -EINVAL;
+ }
+
if (wdev) {
sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
- if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) {
if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
return -EOPNOTSUPP;
@@ -3062,57 +3104,67 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
return -EOPNOTSUPP;
}
- switch (type) {
- case NL80211_TX_POWER_AUTOMATIC:
- sdata->deflink.user_power_level =
- IEEE80211_UNSET_POWER_LEVEL;
- txp_type = NL80211_TX_POWER_LIMITED;
- break;
- case NL80211_TX_POWER_LIMITED:
- case NL80211_TX_POWER_FIXED:
- if (mbm < 0 || (mbm % 100))
- return -EOPNOTSUPP;
- sdata->deflink.user_power_level = MBM_TO_DBM(mbm);
- break;
- }
+ for (int link_id = 0;
+ link_id < ARRAY_SIZE(sdata->link);
+ link_id++) {
+ struct ieee80211_link_data *link =
+ wiphy_dereference(wiphy, sdata->link[link_id]);
- if (txp_type != sdata->vif.bss_conf.txpower_type) {
- update_txp_type = true;
- sdata->vif.bss_conf.txpower_type = txp_type;
- }
+ if (!link)
+ continue;
- ieee80211_recalc_txpower(sdata, update_txp_type);
+ link->user_power_level = user_power_level;
+ if (txp_type != link->conf->txpower_type) {
+ update_txp_type = true;
+ link->conf->txpower_type = txp_type;
+ }
+
+ ieee80211_recalc_txpower(link, update_txp_type);
+ }
return 0;
}
- switch (type) {
- case NL80211_TX_POWER_AUTOMATIC:
- local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
- txp_type = NL80211_TX_POWER_LIMITED;
- break;
- case NL80211_TX_POWER_LIMITED:
- case NL80211_TX_POWER_FIXED:
- if (mbm < 0 || (mbm % 100))
- return -EOPNOTSUPP;
- local->user_power_level = MBM_TO_DBM(mbm);
- break;
- }
+ local->user_power_level = user_power_level;
list_for_each_entry(sdata, &local->interfaces, list) {
- if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) {
has_monitor = true;
continue;
}
- sdata->deflink.user_power_level = local->user_power_level;
- if (txp_type != sdata->vif.bss_conf.txpower_type)
- update_txp_type = true;
- sdata->vif.bss_conf.txpower_type = txp_type;
+
+ for (int link_id = 0;
+ link_id < ARRAY_SIZE(sdata->link);
+ link_id++) {
+ struct ieee80211_link_data *link =
+ wiphy_dereference(wiphy, sdata->link[link_id]);
+
+ if (!link)
+ continue;
+
+ link->user_power_level = local->user_power_level;
+ if (txp_type != link->conf->txpower_type)
+ update_txp_type = true;
+ link->conf->txpower_type = txp_type;
+ }
}
list_for_each_entry(sdata, &local->interfaces, list) {
- if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
continue;
- ieee80211_recalc_txpower(sdata, update_txp_type);
+
+ for (int link_id = 0;
+ link_id < ARRAY_SIZE(sdata->link);
+ link_id++) {
+ struct ieee80211_link_data *link =
+ wiphy_dereference(wiphy, sdata->link[link_id]);
+
+ if (!link)
+ continue;
+
+ ieee80211_recalc_txpower(link, update_txp_type);
+ }
}
if (has_monitor) {
@@ -3124,10 +3176,15 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
update_txp_type = true;
sdata->vif.bss_conf.txpower_type = txp_type;
- ieee80211_recalc_txpower(sdata, update_txp_type);
+ ieee80211_recalc_txpower(&sdata->deflink,
+ update_txp_type);
}
}
+ if (local->emulate_chanctx &&
+ (old_power != local->user_power_level))
+ ieee80211_hw_conf_chan(local);
+
return 0;
}
@@ -3138,7 +3195,8 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy,
struct ieee80211_local *local = wiphy_priv(wiphy);
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
- if (local->ops->get_txpower)
+ if (local->ops->get_txpower &&
+ (sdata->flags & IEEE80211_SDATA_IN_DRIVER))
return drv_get_txpower(local, sdata, dbm);
if (local->emulate_chanctx)
@@ -3462,51 +3520,58 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
static int ieee80211_start_radar_detection(struct wiphy *wiphy,
struct net_device *dev,
struct cfg80211_chan_def *chandef,
- u32 cac_time_ms)
+ u32 cac_time_ms, int link_id)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_chan_req chanreq = { .oper = *chandef };
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_link_data *link_data;
int err;
lockdep_assert_wiphy(local->hw.wiphy);
- if (!list_empty(&local->roc_list) || local->scanning) {
- err = -EBUSY;
- goto out_unlock;
- }
+ if (!list_empty(&local->roc_list) || local->scanning)
+ return -EBUSY;
+
+ link_data = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link_data)
+ return -ENOLINK;
/* whatever, but channel contexts should not complain about that one */
- sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
- sdata->deflink.needed_rx_chains = local->rx_chains;
+ link_data->smps_mode = IEEE80211_SMPS_OFF;
+ link_data->needed_rx_chains = local->rx_chains;
- err = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
+ err = ieee80211_link_use_channel(link_data, &chanreq,
IEEE80211_CHANCTX_SHARED);
if (err)
- goto out_unlock;
+ return err;
- wiphy_delayed_work_queue(wiphy, &sdata->dfs_cac_timer_work,
+ wiphy_delayed_work_queue(wiphy, &link_data->dfs_cac_timer_work,
msecs_to_jiffies(cac_time_ms));
- out_unlock:
- return err;
+ return 0;
}
static void ieee80211_end_cac(struct wiphy *wiphy,
- struct net_device *dev)
+ struct net_device *dev, unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_link_data *link_data;
lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(sdata, &local->interfaces, list) {
+ link_data = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link_data)
+ continue;
+
wiphy_delayed_work_cancel(wiphy,
- &sdata->dfs_cac_timer_work);
+ &link_data->dfs_cac_timer_work);
- if (sdata->wdev.cac_started) {
- ieee80211_link_release_channel(&sdata->deflink);
- sdata->wdev.cac_started = false;
+ if (sdata->wdev.links[link_id].cac_started) {
+ ieee80211_link_release_channel(link_data);
+ sdata->wdev.links[link_id].cac_started = false;
}
}
}
@@ -3661,13 +3726,12 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id)
}
EXPORT_SYMBOL(ieee80211_csa_finish);
-void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_tx)
+void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
- sdata->csa_blocked_queues = block_tx;
sdata_info(sdata, "channel switch failed, disconnecting\n");
wiphy_work_queue(local->hw.wiphy, &ifmgd->csa_connection_drop_work);
}
@@ -3961,7 +4025,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
if (!list_empty(&local->roc_list) || local->scanning)
return -EBUSY;
- if (sdata->wdev.cac_started)
+ if (sdata->wdev.links[link_id].cac_started)
return -EBUSY;
if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
@@ -4294,7 +4358,8 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
if (chanctx_conf) {
*chandef = link->conf->chanreq.oper;
ret = 0;
- } else if (local->open_count > 0 &&
+ } else if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR) &&
+ local->open_count > 0 &&
local->open_count == local->monitors &&
sdata->vif.type == NL80211_IFTYPE_MONITOR) {
*chandef = local->monitor_chanreq.oper;
@@ -4819,12 +4884,12 @@ void ieee80211_color_change_finalize_work(struct wiphy *wiphy,
ieee80211_color_change_finalize(link);
}
-void ieee80211_color_collision_detection_work(struct work_struct *work)
+void ieee80211_color_collision_detection_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
- struct delayed_work *delayed_work = to_delayed_work(work);
struct ieee80211_link_data *link =
- container_of(delayed_work, struct ieee80211_link_data,
- color_collision_detect_work);
+ container_of(work, struct ieee80211_link_data,
+ color_collision_detect_work.work);
struct ieee80211_sub_if_data *sdata = link->sdata;
cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap,
@@ -4877,7 +4942,8 @@ ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
return;
}
- if (delayed_work_pending(&link->color_collision_detect_work)) {
+ if (wiphy_delayed_work_pending(sdata->local->hw.wiphy,
+ &link->color_collision_detect_work)) {
rcu_read_unlock();
return;
}
@@ -4886,9 +4952,9 @@ ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
/* queue the color collision detection event every 500 ms in order to
* avoid sending too much netlink messages to userspace.
*/
- ieee80211_queue_delayed_work(&sdata->local->hw,
- &link->color_collision_detect_work,
- msecs_to_jiffies(500));
+ wiphy_delayed_work_queue(sdata->local->hw.wiphy,
+ &link->color_collision_detect_work,
+ msecs_to_jiffies(500));
rcu_read_unlock();
}
@@ -4979,10 +5045,16 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy,
unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ u16 new_links = wdev->valid_links & ~BIT(link_id);
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- ieee80211_vif_set_links(sdata, wdev->valid_links, 0);
+ /* During the link teardown process, certain functions require the
+ * link_id to remain in the valid_links bitmap. Therefore, instead
+ * of removing the link_id from the bitmap, pass a masked value to
+ * simulate as if link_id does not exist anymore.
+ */
+ ieee80211_vif_set_links(sdata, new_links, 0);
}
static int
@@ -5016,6 +5088,13 @@ ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev,
return ret;
}
+ if (test_sta_flag(sta, WLAN_STA_ASSOC)) {
+ struct link_sta_info *link_sta;
+
+ link_sta = sdata_dereference(sta->link[params->link_id], sdata);
+ rate_control_rate_init(link_sta);
+ }
+
/* ieee80211_sta_activate_link frees the link upon failure */
return ieee80211_sta_activate_link(sta, params->link_id);
}
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index e8567723e94d..a442cb667520 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -286,7 +286,9 @@ ieee80211_get_max_required_bw(struct ieee80211_link_data *link)
enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
struct sta_info *sta;
- list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
+ list_for_each_entry(sta, &sdata->local->sta_list, list) {
if (sdata != sta->sdata &&
!(sta->sdata->bss && sta->sdata->bss == sdata->bss))
continue;
@@ -321,22 +323,34 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
continue;
switch (link->sdata->vif.type) {
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_AP_VLAN:
- width = ieee80211_get_max_required_bw(link);
- break;
case NL80211_IFTYPE_STATION:
+ if (!link->sdata->vif.cfg.assoc) {
+ /*
+ * The AP's sta->bandwidth may not yet be set
+ * at this point (pre-association), so simply
+ * take the width from the chandef. We cannot
+ * have TDLS peers yet (only after association).
+ */
+ width = link->conf->chanreq.oper.width;
+ break;
+ }
/*
- * The ap's sta->bandwidth is not set yet at this
- * point, so take the width from the chandef, but
- * account also for TDLS peers
+ * otherwise just use min_def like in AP, depending on what
+ * we currently think the AP STA (and possibly TDLS peers)
+ * require(s)
*/
- width = max(link->conf->chanreq.oper.width,
- ieee80211_get_max_required_bw(link));
+ fallthrough;
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_AP_VLAN:
+ width = ieee80211_get_max_required_bw(link);
break;
case NL80211_IFTYPE_P2P_DEVICE:
case NL80211_IFTYPE_NAN:
continue;
+ case NL80211_IFTYPE_MONITOR:
+ WARN_ON_ONCE(!ieee80211_hw_check(&local->hw,
+ NO_VIRTUAL_MONITOR));
+ fallthrough;
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_OCB:
@@ -345,7 +359,6 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_UNSPECIFIED:
case NUM_NL80211_IFTYPES:
- case NL80211_IFTYPE_MONITOR:
case NL80211_IFTYPE_P2P_CLIENT:
case NL80211_IFTYPE_P2P_GO:
WARN_ON_ONCE(1);
@@ -407,7 +420,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
if (!ctx->driver_present)
return 0;
- return IEEE80211_CHANCTX_CHANGE_MIN_WIDTH;
+ return IEEE80211_CHANCTX_CHANGE_MIN_DEF;
}
static void ieee80211_chan_bw_change(struct ieee80211_local *local,
@@ -460,12 +473,12 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
continue;
/* vif changed to narrow BW and narrow BW for station wasn't
- * requested or vise versa */
+ * requested or vice versa */
if ((new_sta_bw < link_sta->pub->bandwidth) == !narrowed)
continue;
link_sta->pub->bandwidth = new_sta_bw;
- rate_control_rate_update(local, sband, sta, link_id,
+ rate_control_rate_update(local, sband, link_sta,
IEEE80211_RC_BW_CHANGED);
}
}
@@ -681,6 +694,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local,
ctx->mode = mode;
ctx->conf.radar_enabled = false;
ctx->conf.radio_idx = radio_idx;
+ ctx->radar_detected = false;
_ieee80211_recalc_chanctx_min_def(local, ctx, NULL, false);
return ctx;
@@ -902,7 +916,7 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
}
if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) {
- ieee80211_recalc_txpower(sdata, false);
+ ieee80211_recalc_txpower(link, false);
ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL, false);
}
@@ -953,6 +967,10 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
if (!link->sdata->u.mgd.associated)
continue;
break;
+ case NL80211_IFTYPE_MONITOR:
+ if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
+ continue;
+ break;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_MESH_POINT:
@@ -965,6 +983,11 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
if (rcu_access_pointer(link->conf->chanctx_conf) != &chanctx->conf)
continue;
+ if (link->sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+ rx_chains_dynamic = rx_chains_static = local->rx_chains;
+ break;
+ }
+
switch (link->smps_mode) {
default:
WARN_ONCE(1, "Invalid SMPS mode %d\n",
@@ -1115,7 +1138,7 @@ ieee80211_replace_chanctx(struct ieee80211_local *local,
*
* Consider ctx1..3, link1..6, each ctx has 2 links. link1 and
* link2 from ctx1 request new different chandefs starting 2
- * in-place reserations with ctx4 and ctx5 replacing ctx1 and
+ * in-place reservations with ctx4 and ctx5 replacing ctx1 and
* ctx2 respectively. Next link5 and link6 from ctx3 reserve
* ctx4. If link3 and link4 remain on ctx2 as they are then this
* fails unless `replace_ctx` from ctx5 is replaced with ctx3.
@@ -1166,7 +1189,7 @@ ieee80211_replace_chanctx(struct ieee80211_local *local,
static bool
ieee80211_find_available_radio(struct ieee80211_local *local,
const struct ieee80211_chan_req *chanreq,
- int *radio_idx)
+ u32 radio_mask, int *radio_idx)
{
struct wiphy *wiphy = local->hw.wiphy;
const struct wiphy_radio *radio;
@@ -1177,6 +1200,9 @@ ieee80211_find_available_radio(struct ieee80211_local *local,
return true;
for (i = 0; i < wiphy->n_radio; i++) {
+ if (!(radio_mask & BIT(i)))
+ continue;
+
radio = &wiphy->radio[i];
if (!cfg80211_radio_chandef_valid(radio, &chanreq->oper))
continue;
@@ -1210,7 +1236,9 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
new_ctx = ieee80211_find_reservation_chanctx(local, chanreq, mode);
if (!new_ctx) {
if (ieee80211_can_create_new_chanctx(local, -1) &&
- ieee80211_find_available_radio(local, chanreq, &radio_idx))
+ ieee80211_find_available_radio(local, chanreq,
+ sdata->wdev.radio_mask,
+ &radio_idx))
new_ctx = ieee80211_new_chanctx(local, chanreq, mode,
false, radio_idx);
else
@@ -1709,7 +1737,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
link,
changed);
- ieee80211_recalc_txpower(sdata, false);
+ ieee80211_recalc_txpower(link, false);
}
ieee80211_recalc_chanctx_chantype(local, ctx);
@@ -1880,7 +1908,9 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link,
/* Note: context is now reserved */
if (ctx)
reserved = true;
- else if (!ieee80211_find_available_radio(local, chanreq, &radio_idx))
+ else if (!ieee80211_find_available_radio(local, chanreq,
+ sdata->wdev.radio_mask,
+ &radio_idx))
ctx = ERR_PTR(-EBUSY);
else
ctx = ieee80211_new_chanctx(local, chanreq, mode,
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 02b5476a4376..be2e486907f9 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -42,9 +42,8 @@ static ssize_t name## _read(struct file *file, char __user *userbuf, \
}
#define DEBUGFS_READONLY_FILE_OPS(name) \
-static const struct file_operations name## _ops = { \
+static const struct debugfs_short_fops name## _ops = { \
.read = name## _read, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
};
@@ -142,10 +141,9 @@ static ssize_t aqm_write(struct file *file,
return -EINVAL;
}
-static const struct file_operations aqm_ops = {
+static const struct debugfs_short_fops aqm_ops = {
.write = aqm_write,
.read = aqm_read,
- .open = simple_open,
.llseek = default_llseek,
};
@@ -194,10 +192,9 @@ static ssize_t airtime_flags_write(struct file *file,
return count;
}
-static const struct file_operations airtime_flags_ops = {
+static const struct debugfs_short_fops airtime_flags_ops = {
.write = airtime_flags_write,
.read = airtime_flags_read,
- .open = simple_open,
.llseek = default_llseek,
};
@@ -225,9 +222,8 @@ static ssize_t aql_pending_read(struct file *file,
buf, len);
}
-static const struct file_operations aql_pending_ops = {
+static const struct debugfs_short_fops aql_pending_ops = {
.read = aql_pending_read,
- .open = simple_open,
.llseek = default_llseek,
};
@@ -305,10 +301,9 @@ static ssize_t aql_txq_limit_write(struct file *file,
return count;
}
-static const struct file_operations aql_txq_limit_ops = {
+static const struct debugfs_short_fops aql_txq_limit_ops = {
.write = aql_txq_limit_write,
.read = aql_txq_limit_read,
- .open = simple_open,
.llseek = default_llseek,
};
@@ -355,10 +350,9 @@ static ssize_t aql_enable_write(struct file *file, const char __user *user_buf,
return count;
}
-static const struct file_operations aql_enable_ops = {
+static const struct debugfs_short_fops aql_enable_ops = {
.write = aql_enable_write,
.read = aql_enable_read,
- .open = simple_open,
.llseek = default_llseek,
};
@@ -406,10 +400,9 @@ static ssize_t force_tx_status_write(struct file *file,
return count;
}
-static const struct file_operations force_tx_status_ops = {
+static const struct debugfs_short_fops force_tx_status_ops = {
.write = force_tx_status_write,
.read = force_tx_status_read,
- .open = simple_open,
.llseek = default_llseek,
};
@@ -434,9 +427,8 @@ static ssize_t reset_write(struct file *file, const char __user *user_buf,
return count;
}
-static const struct file_operations reset_ops = {
+static const struct debugfs_short_fops reset_ops = {
.write = reset_write,
- .open = simple_open,
.llseek = noop_llseek,
};
#endif
@@ -456,6 +448,7 @@ static const char *hw_flag_names[] = {
FLAG(SUPPORTS_DYNAMIC_PS),
FLAG(MFP_CAPABLE),
FLAG(WANT_MONITOR_VIF),
+ FLAG(NO_VIRTUAL_MONITOR),
FLAG(NO_AUTO_VIF),
FLAG(SW_CRYPTO_CONTROL),
FLAG(SUPPORT_FAST_XMIT),
@@ -623,9 +616,8 @@ static ssize_t stats_ ##name## _read(struct file *file, \
print_devstats_##name); \
} \
\
-static const struct file_operations stats_ ##name## _ops = { \
+static const struct debugfs_short_fops stats_ ##name## _ops = { \
.read = stats_ ##name## _read, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
};
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 7e54da508765..b3a64edea0f2 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -26,17 +26,15 @@ static ssize_t key_##name##_read(struct file *file, \
#define KEY_READ_X(name) KEY_READ(name, name, "0x%x\n")
#define KEY_OPS(name) \
-static const struct file_operations key_ ##name## _ops = { \
+static const struct debugfs_short_fops key_ ##name## _ops = { \
.read = key_##name##_read, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
#define KEY_OPS_W(name) \
-static const struct file_operations key_ ##name## _ops = { \
+static const struct debugfs_short_fops key_ ##name## _ops = { \
.read = key_##name##_read, \
.write = key_##name##_write, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
@@ -49,9 +47,8 @@ static const struct file_operations key_ ##name## _ops = { \
#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, "%d\n")
#define KEY_CONF_OPS(name) \
-static const struct file_operations key_ ##name## _ops = { \
+static const struct debugfs_short_fops key_ ##name## _ops = { \
.read = key_conf_##name##_read, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 68596ef78b15..a9bc2fd59f55 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -221,10 +221,9 @@ static ssize_t ieee80211_if_fmt_##name( \
}
#define _IEEE80211_IF_FILE_OPS(name, _read, _write) \
-static const struct file_operations name##_ops = { \
+static const struct debugfs_short_fops name##_ops = { \
.read = (_read), \
.write = (_write), \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 1e9389c49a57..a67a9d316008 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -30,17 +30,15 @@ static ssize_t sta_ ##name## _read(struct file *file, \
#define STA_READ_D(name, field) STA_READ(name, field, "%d\n")
#define STA_OPS(name) \
-static const struct file_operations sta_ ##name## _ops = { \
+static const struct debugfs_short_fops sta_ ##name## _ops = { \
.read = sta_##name##_read, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
#define STA_OPS_RW(name) \
-static const struct file_operations sta_ ##name## _ops = { \
+static const struct debugfs_short_fops sta_ ##name## _ops = { \
.read = sta_##name##_read, \
.write = sta_##name##_write, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
@@ -450,9 +448,8 @@ STA_OPS_RW(agg_status);
/* link sta attributes */
#define LINK_STA_OPS(name) \
-static const struct file_operations link_sta_ ##name## _ops = { \
+static const struct debugfs_short_fops link_sta_ ##name## _ops = { \
.read = link_sta_##name##_read, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index fe868b521622..299d38e9e863 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -65,6 +65,7 @@ int drv_add_interface(struct ieee80211_local *local,
if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
(sdata->vif.type == NL80211_IFTYPE_MONITOR &&
!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR) &&
!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))))
return -EINVAL;
@@ -181,9 +182,10 @@ int drv_sta_set_txpwr(struct ieee80211_local *local,
return ret;
}
-void drv_sta_rc_update(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta, u32 changed)
+void drv_link_sta_rc_update(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_sta *link_sta,
+ u32 changed)
{
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
@@ -193,10 +195,10 @@ void drv_sta_rc_update(struct ieee80211_local *local,
(sdata->vif.type != NL80211_IFTYPE_ADHOC &&
sdata->vif.type != NL80211_IFTYPE_MESH_POINT));
- trace_drv_sta_rc_update(local, sdata, sta, changed);
- if (local->ops->sta_rc_update)
- local->ops->sta_rc_update(&local->hw, &sdata->vif,
- sta, changed);
+ trace_drv_link_sta_rc_update(local, sdata, link_sta, changed);
+ if (local->ops->link_sta_rc_update)
+ local->ops->link_sta_rc_update(&local->hw, &sdata->vif,
+ link_sta, changed);
trace_drv_return_void(local);
}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index d382d9729e85..edd1e4d4ad9d 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -594,9 +594,9 @@ int drv_sta_set_txpwr(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct sta_info *sta);
-void drv_sta_rc_update(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta, u32 changed);
+void drv_link_sta_rc_update(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_sta *link_sta, u32 changed);
static inline void drv_sta_rate_tbl_update(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
@@ -1728,4 +1728,16 @@ drv_can_neg_ttlm(struct ieee80211_local *local,
return res;
}
+
+static inline void
+drv_prep_add_interface(struct ieee80211_local *local,
+ enum nl80211_iftype type)
+{
+ trace_drv_prep_add_interface(local, type);
+ if (local->ops->prep_add_interface)
+ local->ops->prep_add_interface(&local->hw, type);
+
+ trace_drv_return_void(local);
+}
+
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c
index ddc7acc68335..7a3116c36df9 100644
--- a/net/mac80211/eht.c
+++ b/net/mac80211/eht.c
@@ -2,7 +2,7 @@
/*
* EHT handling
*
- * Copyright(c) 2021-2023 Intel Corporation
+ * Copyright(c) 2021-2024 Intel Corporation
*/
#include "ieee80211_i.h"
@@ -75,4 +75,23 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta);
link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta);
+
+ switch (u8_get_bits(eht_cap->eht_cap_elem.mac_cap_info[0],
+ IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK)) {
+ case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454:
+ link_sta->pub->agg.max_amsdu_len =
+ IEEE80211_MAX_MPDU_LEN_VHT_11454;
+ break;
+ case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_7991:
+ link_sta->pub->agg.max_amsdu_len =
+ IEEE80211_MAX_MPDU_LEN_VHT_7991;
+ break;
+ case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_3895:
+ default:
+ link_sta->pub->agg.max_amsdu_len =
+ IEEE80211_MAX_MPDU_LEN_VHT_3895;
+ break;
+ }
+
+ ieee80211_sta_recalc_aggregates(&link_sta->sta->sta);
}
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 79caeb485fd5..32390d8a9d75 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -379,7 +379,7 @@ void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work)
sta->ampdu_mlme.tid_rx_manage_offl))
__ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
IEEE80211_MAX_AMPDU_BUF_HT,
- false, true, NULL);
+ false, true, 0);
if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
sta->ampdu_mlme.tid_rx_manage_offl))
@@ -467,20 +467,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
return;
skb_reserve(skb, local->hw.extra_tx_headroom);
- mgmt = skb_put_zero(skb, 24);
- memcpy(mgmt->da, da, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- if (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
- memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
- memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
-
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_ACTION);
+ mgmt = ieee80211_mgmt_ba(skb, da, sdata);
skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba));
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 3f74bbceeca5..a1b4178deccf 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -569,7 +569,7 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta)
if (!sta->sdata->u.ibss.control_port)
sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED);
- rate_control_rate_init(sta);
+ rate_control_rate_init(&sta->deflink);
/* If it fails, maybe we raced another insertion? */
if (sta_info_insert_rcu(sta))
@@ -1068,11 +1068,12 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
/* Force rx_nss recalculation */
sta->sta.deflink.rx_nss = 0;
- rate_control_rate_init(sta);
+ rate_control_rate_init(&sta->deflink);
if (sta->sta.deflink.rx_nss != rx_nss)
changed |= IEEE80211_RC_NSS_CHANGED;
- drv_sta_rc_update(local, sdata, &sta->sta, changed);
+ drv_link_sta_rc_update(local, sdata, &sta->sta.deflink,
+ changed);
}
rcu_read_unlock();
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a3485e4c6132..9f0db39b28ff 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -892,6 +892,9 @@ struct ieee80211_chanctx {
/* temporary data for search algorithm etc. */
struct ieee80211_chan_req req;
+ bool radar_detected;
+
+ /* MUST be last - ends in a flexible-array member. */
struct ieee80211_chanctx_conf conf;
};
@@ -1013,8 +1016,6 @@ struct ieee80211_link_data_managed {
int wmm_last_param_set;
int mu_edca_last_param_set;
-
- u8 bss_param_ch_cnt;
};
struct ieee80211_link_data_ap {
@@ -1051,7 +1052,7 @@ struct ieee80211_link_data {
} csa;
struct wiphy_work color_change_finalize_work;
- struct delayed_work color_collision_detect_work;
+ struct wiphy_delayed_work color_collision_detect_work;
u64 color_bitmap;
/* context reservation -- protected with wiphy mutex */
@@ -1067,6 +1068,7 @@ struct ieee80211_link_data {
int ap_power_level; /* in dBm */
bool radar_required;
+ struct wiphy_delayed_work dfs_cac_timer_work;
union {
struct ieee80211_link_data_managed mgd;
@@ -1102,8 +1104,6 @@ struct ieee80211_sub_if_data {
unsigned long state;
- bool csa_blocked_queues;
-
char name[IFNAMSIZ];
struct ieee80211_fragment_cache frags;
@@ -1165,8 +1165,6 @@ struct ieee80211_sub_if_data {
struct ieee80211_link_data deflink;
struct ieee80211_link_data __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS];
- struct wiphy_delayed_work dfs_cac_timer_work;
-
/* for ieee80211_set_active_links_async() */
struct wiphy_work activate_links_work;
u16 desired_active_links;
@@ -1369,7 +1367,7 @@ struct ieee80211_local {
spinlock_t queue_stop_reason_lock;
int open_count;
- int monitors, cooked_mntrs;
+ int monitors, cooked_mntrs, tx_mntrs;
/* number of interfaces with corresponding FIF_ flags */
int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll,
fif_probe_req;
@@ -2004,7 +2002,8 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
/* color change handling */
void ieee80211_color_change_finalize_work(struct wiphy *wiphy,
struct wiphy_work *work);
-void ieee80211_color_collision_detection_work(struct work_struct *work);
+void ieee80211_color_collision_detection_work(struct wiphy *wiphy,
+ struct wiphy_work *work);
/* interface handling */
#define MAC80211_SUPPORTED_FEATURES_TX (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \
@@ -2034,8 +2033,8 @@ void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata);
int ieee80211_add_virtual_monitor(struct ieee80211_local *local);
void ieee80211_del_virtual_monitor(struct ieee80211_local *local);
-bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
-void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
+bool __ieee80211_recalc_txpower(struct ieee80211_link_data *link);
+void ieee80211_recalc_txpower(struct ieee80211_link_data *link,
bool update_bss);
void ieee80211_recalc_offload(struct ieee80211_local *local);
@@ -2072,8 +2071,6 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
u32 info_flags,
u32 ctrl_flags,
u64 *cookie);
-void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
- struct sk_buff_head *skbs);
struct sk_buff *
ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 info_flags);
@@ -2114,14 +2111,19 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
const u8 *bssid, int link_id);
bool ieee80211_smps_is_restrictive(enum ieee80211_smps_mode smps_mode_old,
enum ieee80211_smps_mode smps_mode_new);
-
+void ieee80211_add_addbaext(struct sk_buff *skb,
+ const u8 req_addba_ext_data,
+ u16 buf_size);
+u8 ieee80211_retrieve_addba_ext_data(struct sta_info *sta,
+ const void *elem_data, ssize_t elem_len,
+ u16 *buf_size);
void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
u16 initiator, u16 reason, bool stop);
void __ieee80211_start_rx_ba_session(struct sta_info *sta,
u8 dialog_token, u16 timeout,
u16 start_seq_num, u16 ba_policy, u16 tid,
u16 buf_size, bool tx, bool auto_seq,
- const struct ieee80211_addba_ext_ie *addbaext);
+ const u8 addba_ext_data);
void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
enum ieee80211_agg_stop_reason reason);
void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
@@ -2136,6 +2138,29 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
struct ieee80211_mgmt *mgmt,
size_t len);
+static inline struct ieee80211_mgmt *
+ieee80211_mgmt_ba(struct sk_buff *skb, const u8 *da,
+ struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_mgmt *mgmt = skb_put_zero(skb, 24);
+
+ ether_addr_copy(mgmt->da, da);
+ ether_addr_copy(mgmt->sa, sdata->vif.addr);
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP ||
+ sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+ sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
+ ether_addr_copy(mgmt->bssid, sdata->vif.addr);
+ else if (sdata->vif.type == NL80211_IFTYPE_STATION)
+ ether_addr_copy(mgmt->bssid, sdata->vif.cfg.ap_addr);
+ else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
+ ether_addr_copy(mgmt->bssid, sdata->u.ibss.bssid);
+
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ return mgmt;
+}
+
int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
enum ieee80211_agg_stop_reason reason);
void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
@@ -2174,8 +2199,6 @@ ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta)
return _ieee80211_sta_cur_vht_bw(link_sta, NULL);
}
void ieee80211_sta_init_nss(struct link_sta_info *link_sta);
-enum ieee80211_sta_rx_bandwidth
-ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width);
enum nl80211_chan_width
ieee80211_sta_cap_chan_bw(struct link_sta_info *link_sta);
void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
@@ -2387,17 +2410,13 @@ void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
struct ieee80211_hdr *hdr, bool ack, u16 tx_time);
-
+unsigned int
+ieee80211_get_vif_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
unsigned long queues,
enum queue_stop_reason reason,
bool refcounted);
-void ieee80211_stop_vif_queues(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- enum queue_stop_reason reason);
-void ieee80211_wake_vif_queues(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- enum queue_stop_reason reason);
void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
unsigned long queues,
enum queue_stop_reason reason,
@@ -2408,6 +2427,43 @@ void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
enum queue_stop_reason reason,
bool refcounted);
+static inline void
+ieee80211_stop_vif_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum queue_stop_reason reason)
+{
+ ieee80211_stop_queues_by_reason(&local->hw,
+ ieee80211_get_vif_queues(local, sdata),
+ reason, true);
+}
+
+static inline void
+ieee80211_wake_vif_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum queue_stop_reason reason)
+{
+ ieee80211_wake_queues_by_reason(&local->hw,
+ ieee80211_get_vif_queues(local, sdata),
+ reason, true);
+}
+static inline void
+ieee80211_stop_vif_queues_norefcount(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum queue_stop_reason reason)
+{
+ ieee80211_stop_queues_by_reason(&local->hw,
+ ieee80211_get_vif_queues(local, sdata),
+ reason, false);
+}
+static inline void
+ieee80211_wake_vif_queues_norefcount(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum queue_stop_reason reason)
+{
+ ieee80211_wake_queues_by_reason(&local->hw,
+ ieee80211_get_vif_queues(local, sdata),
+ reason, false);
+}
void ieee80211_add_pending_skb(struct ieee80211_local *local,
struct sk_buff *skb);
void ieee80211_add_pending_skbs(struct ieee80211_local *local,
@@ -2430,7 +2486,7 @@ static inline bool ieee80211_can_run_worker(struct ieee80211_local *local)
/*
* If quiescing is set, we are racing with __ieee80211_suspend.
* __ieee80211_suspend flushes the workers after setting quiescing,
- * and we check quiescing / suspended before enqueing new workers.
+ * and we check quiescing / suspended before enqueuing new workers.
* We should abort the worker to avoid the races below.
*/
if (local->quiescing)
@@ -2523,8 +2579,8 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
const struct cfg80211_chan_def *chandef);
u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata);
-u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef);
-u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef,
+u8 *ieee80211_ie_build_he_oper(u8 *pos, const struct cfg80211_chan_def *chandef);
+u8 *ieee80211_ie_build_eht_oper(u8 *pos, const struct cfg80211_chan_def *chandef,
const struct ieee80211_sta_eht_cap *eht_cap);
int ieee80211_parse_bitrates(enum nl80211_chan_width width,
const struct ieee80211_supported_band *sband,
@@ -2629,7 +2685,8 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
bool ieee80211_is_radar_required(struct ieee80211_local *local);
void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work);
-void ieee80211_dfs_cac_cancel(struct ieee80211_local *local);
+void ieee80211_dfs_cac_cancel(struct ieee80211_local *local,
+ struct ieee80211_chanctx *chanctx);
void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
struct wiphy_work *work);
int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b4ad66af3af3..806dffa48ef9 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -44,13 +44,13 @@
static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work);
-bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
+bool __ieee80211_recalc_txpower(struct ieee80211_link_data *link)
{
struct ieee80211_chanctx_conf *chanctx_conf;
int power;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
+ chanctx_conf = rcu_dereference(link->conf->chanctx_conf);
if (!chanctx_conf) {
rcu_read_unlock();
return false;
@@ -59,27 +59,26 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
power = ieee80211_chandef_max_power(&chanctx_conf->def);
rcu_read_unlock();
- if (sdata->deflink.user_power_level != IEEE80211_UNSET_POWER_LEVEL)
- power = min(power, sdata->deflink.user_power_level);
+ if (link->user_power_level != IEEE80211_UNSET_POWER_LEVEL)
+ power = min(power, link->user_power_level);
- if (sdata->deflink.ap_power_level != IEEE80211_UNSET_POWER_LEVEL)
- power = min(power, sdata->deflink.ap_power_level);
+ if (link->ap_power_level != IEEE80211_UNSET_POWER_LEVEL)
+ power = min(power, link->ap_power_level);
- if (power != sdata->vif.bss_conf.txpower) {
- sdata->vif.bss_conf.txpower = power;
- ieee80211_hw_config(sdata->local, 0);
+ if (power != link->conf->txpower) {
+ link->conf->txpower = power;
return true;
}
return false;
}
-void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
+void ieee80211_recalc_txpower(struct ieee80211_link_data *link,
bool update_bss)
{
- if (__ieee80211_recalc_txpower(sdata) ||
- (update_bss && ieee80211_sdata_running(sdata)))
- ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ if (__ieee80211_recalc_txpower(link) ||
+ (update_bss && ieee80211_sdata_running(link->sdata)))
+ ieee80211_link_info_change_notify(link->sdata, link,
BSS_CHANGED_TXPOWER);
}
@@ -462,6 +461,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
{
struct ieee80211_local *local = sdata->local;
unsigned long flags;
+ struct sk_buff_head freeq;
struct sk_buff *skb, *tmp;
u32 hw_reconf_flags = 0;
int i, flushed;
@@ -550,15 +550,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
wiphy_work_cancel(local->hw.wiphy,
&sdata->deflink.color_change_finalize_work);
wiphy_delayed_work_cancel(local->hw.wiphy,
- &sdata->dfs_cac_timer_work);
+ &sdata->deflink.dfs_cac_timer_work);
- if (sdata->wdev.cac_started) {
+ if (sdata->wdev.links[0].cac_started) {
chandef = sdata->vif.bss_conf.chanreq.oper;
WARN_ON(local->suspended);
ieee80211_link_release_channel(&sdata->deflink);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
- GFP_KERNEL);
+ GFP_KERNEL, 0);
}
if (sdata->vif.type == NL80211_IFTYPE_AP) {
@@ -637,18 +637,32 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
skb_queue_purge(&sdata->status_queue);
}
+ /*
+ * Since ieee80211_free_txskb() may issue __dev_queue_xmit()
+ * which should be called with interrupts enabled, reclamation
+ * is done in two phases:
+ */
+ __skb_queue_head_init(&freeq);
+
+ /* unlink from local queues... */
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
skb_queue_walk_safe(&local->pending[i], skb, tmp) {
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
if (info->control.vif == &sdata->vif) {
__skb_unlink(skb, &local->pending[i]);
- ieee80211_free_txskb(&local->hw, skb);
+ __skb_queue_tail(&freeq, skb);
}
}
}
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+ /* ... and perform actual reclamation with interrupts enabled. */
+ skb_queue_walk_safe(&freeq, skb, tmp) {
+ __skb_unlink(skb, &freeq);
+ ieee80211_free_txskb(&local->hw, skb);
+ }
+
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
ieee80211_txq_remove_vlan(local, sdata);
@@ -684,9 +698,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
ieee80211_recalc_idle(local);
ieee80211_recalc_offload(local);
- if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
+ if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
break;
+ ieee80211_link_release_channel(&sdata->deflink);
fallthrough;
default:
if (!going_down)
@@ -1072,6 +1088,8 @@ void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
ADJUST(CONTROL, control);
ADJUST(CONTROL, pspoll);
ADJUST(OTHER_BSS, other_bss);
+ if (!(flags & MONITOR_FLAG_SKIP_TX))
+ local->tx_mntrs += offset;
#undef ADJUST
}
@@ -1116,7 +1134,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
ASSERT_RTNL();
lockdep_assert_wiphy(local->hw.wiphy);
- if (local->monitor_sdata)
+ if (local->monitor_sdata ||
+ ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
return 0;
sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, GFP_KERNEL);
@@ -1178,6 +1197,9 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
+ if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
+ return;
+
ASSERT_RTNL();
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1296,6 +1318,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
}
}
+ sdata->vif.addr_valid = sdata->vif.type != NL80211_IFTYPE_MONITOR ||
+ (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE);
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN:
/* no need to tell driver, but set carrier and chanctx */
@@ -1313,7 +1337,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
break;
}
- if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
+ if ((sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) ||
+ ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) {
res = drv_add_interface(local, sdata);
if (res)
goto err_stop;
@@ -1729,8 +1754,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
wiphy_work_init(&sdata->work, ieee80211_iface_work);
wiphy_work_init(&sdata->activate_links_work,
ieee80211_activate_links_work);
- wiphy_delayed_work_init(&sdata->dfs_cac_timer_work,
- ieee80211_dfs_cac_timer_work);
switch (type) {
case NL80211_IFTYPE_P2P_GO:
@@ -2163,9 +2186,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ieee80211_set_default_queues(sdata);
- sdata->deflink.ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
- sdata->deflink.user_power_level = local->user_power_level;
-
/* setup type-dependent data */
ieee80211_setup_sdata(sdata, type);
@@ -2351,18 +2371,14 @@ void ieee80211_vif_block_queues_csa(struct ieee80211_sub_if_data *sdata)
if (ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA))
return;
- ieee80211_stop_vif_queues(local, sdata,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_blocked_queues = true;
+ ieee80211_stop_vif_queues_norefcount(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
}
void ieee80211_vif_unblock_queues_csa(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
- if (sdata->csa_blocked_queues) {
- ieee80211_wake_vif_queues(local, sdata,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_blocked_queues = false;
- }
+ ieee80211_wake_vif_queues_norefcount(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
}
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index eecdd2265eaa..67ecfea22982 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -18,7 +18,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <net/mac80211.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
#include "debugfs_key.h"
@@ -987,6 +987,26 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata)
}
}
+static void
+ieee80211_key_iter(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_key *key,
+ void (*iter)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ struct ieee80211_key_conf *key,
+ void *data),
+ void *iter_data)
+{
+ /* skip keys of station in removal process */
+ if (key->sta && key->sta->removed)
+ return;
+ if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+ return;
+ iter(hw, vif, key->sta ? &key->sta->sta : NULL,
+ &key->conf, iter_data);
+}
+
void ieee80211_iter_keys(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
void (*iter)(struct ieee80211_hw *hw,
@@ -1005,16 +1025,13 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
if (vif) {
sdata = vif_to_sdata(vif);
list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
- iter(hw, &sdata->vif,
- key->sta ? &key->sta->sta : NULL,
- &key->conf, iter_data);
+ ieee80211_key_iter(hw, vif, key, iter, iter_data);
} else {
list_for_each_entry(sdata, &local->interfaces, list)
list_for_each_entry_safe(key, tmp,
&sdata->key_list, list)
- iter(hw, &sdata->vif,
- key->sta ? &key->sta->sta : NULL,
- &key->conf, iter_data);
+ ieee80211_key_iter(hw, &sdata->vif, key,
+ iter, iter_data);
}
}
EXPORT_SYMBOL(ieee80211_iter_keys);
@@ -1031,17 +1048,8 @@ _ieee80211_iter_keys_rcu(struct ieee80211_hw *hw,
{
struct ieee80211_key *key;
- list_for_each_entry_rcu(key, &sdata->key_list, list) {
- /* skip keys of station in removal process */
- if (key->sta && key->sta->removed)
- continue;
- if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
- continue;
-
- iter(hw, &sdata->vif,
- key->sta ? &key->sta->sta : NULL,
- &key->conf, iter_data);
- }
+ list_for_each_entry_rcu(key, &sdata->key_list, list)
+ ieee80211_key_iter(hw, &sdata->vif, key, iter, iter_data);
}
void ieee80211_iter_keys_rcu(struct ieee80211_hw *hw,
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index 1a211b8d4057..58a76bcd6ae6 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -36,15 +36,20 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
link->conf = link_conf;
link_conf->link_id = link_id;
link_conf->vif = &sdata->vif;
+ link->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
+ link->user_power_level = sdata->local->user_power_level;
+ link_conf->txpower = INT_MIN;
wiphy_work_init(&link->csa.finalize_work,
ieee80211_csa_finalize_work);
wiphy_work_init(&link->color_change_finalize_work,
ieee80211_color_change_finalize_work);
- INIT_DELAYED_WORK(&link->color_collision_detect_work,
- ieee80211_color_collision_detection_work);
+ wiphy_delayed_work_init(&link->color_collision_detect_work,
+ ieee80211_color_collision_detection_work);
INIT_LIST_HEAD(&link->assigned_chanctx_list);
INIT_LIST_HEAD(&link->reserved_chanctx_list);
+ wiphy_delayed_work_init(&link->dfs_cac_timer_work,
+ ieee80211_dfs_cac_timer_work);
if (!deflink) {
switch (sdata->vif.type) {
@@ -70,11 +75,22 @@ void ieee80211_link_stop(struct ieee80211_link_data *link)
if (link->sdata->vif.type == NL80211_IFTYPE_STATION)
ieee80211_mgd_stop_link(link);
- cancel_delayed_work_sync(&link->color_collision_detect_work);
+ wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy,
+ &link->color_collision_detect_work);
wiphy_work_cancel(link->sdata->local->hw.wiphy,
&link->color_change_finalize_work);
wiphy_work_cancel(link->sdata->local->hw.wiphy,
&link->csa.finalize_work);
+
+ if (link->sdata->wdev.links[link->link_id].cac_started) {
+ wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy,
+ &link->dfs_cac_timer_work);
+ cfg80211_cac_event(link->sdata->dev,
+ &link->conf->chanreq.oper,
+ NL80211_RADAR_CAC_ABORTED,
+ GFP_KERNEL, link->link_id);
+ }
+
ieee80211_link_release_channel(link);
}
@@ -373,6 +389,37 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
jiffies);
}
+ for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) {
+ struct ieee80211_link_data *link;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+
+ /*
+ * This call really should not fail. Unfortunately, it appears
+ * that this may happen occasionally with some drivers. Should
+ * it happen, we are stuck in a bad place as going backwards is
+ * not really feasible.
+ *
+ * So lets just tell link_use_channel that it must not fail to
+ * assign the channel context (from mac80211's perspective) and
+ * assume the driver is going to trigger a recovery flow if it
+ * had a failure.
+ * That really is not great nor guaranteed to work. But at least
+ * the internal mac80211 state remains consistent and there is
+ * a chance that we can recover.
+ */
+ ret = _ieee80211_link_use_channel(link,
+ &link->conf->chanreq,
+ IEEE80211_CHANCTX_SHARED,
+ true);
+ WARN_ON_ONCE(ret);
+
+ /*
+ * inform about the link info changed parameters after all
+ * stations are also added
+ */
+ }
+
list_for_each_entry(sta, &local->sta_list, list) {
if (sdata != sta->sdata)
continue;
@@ -416,26 +463,6 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
link = sdata_dereference(sdata->link[link_id], sdata);
- /*
- * This call really should not fail. Unfortunately, it appears
- * that this may happen occasionally with some drivers. Should
- * it happen, we are stuck in a bad place as going backwards is
- * not really feasible.
- *
- * So lets just tell link_use_channel that it must not fail to
- * assign the channel context (from mac80211's perspective) and
- * assume the driver is going to trigger a recovery flow if it
- * had a failure.
- * That really is not great nor guaranteed to work. But at least
- * the internal mac80211 state remains consistent and there is
- * a chance that we can recover.
- */
- ret = _ieee80211_link_use_channel(link,
- &link->conf->chanreq,
- IEEE80211_CHANCTX_SHARED,
- true);
- WARN_ON_ONCE(ret);
-
ieee80211_mgd_set_link_qos_params(link);
ieee80211_link_info_change_notify(sdata, link,
BSS_CHANGED_ERP_CTS_PROT |
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a3104b6ea6f0..ee1211a213d7 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -167,6 +167,8 @@ static u32 ieee80211_calc_hw_conf_chan(struct ieee80211_local *local,
}
power = ieee80211_chandef_max_power(&chandef);
+ if (local->user_power_level != IEEE80211_UNSET_POWER_LEVEL)
+ power = min(local->user_power_level, power);
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -1051,9 +1053,9 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
return 0;
/* Driver provides cipher suites, but we need to exclude WEP */
- suites = kmemdup(local->hw.wiphy->cipher_suites,
- sizeof(u32) * local->hw.wiphy->n_cipher_suites,
- GFP_KERNEL);
+ suites = kmemdup_array(local->hw.wiphy->cipher_suites,
+ local->hw.wiphy->n_cipher_suites,
+ sizeof(u32), GFP_KERNEL);
if (!suites)
return -ENOMEM;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index f94e4be0be12..974081324aa4 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -7,7 +7,7 @@
*/
#include <linux/slab.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "ieee80211_i.h"
#include "mesh.h"
#include "wme.h"
@@ -1157,14 +1157,14 @@ void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata,
u64 changed)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- unsigned long bits = changed;
+ unsigned long bits[] = { BITMAP_FROM_U64(changed) };
u32 bit;
- if (!bits)
+ if (!changed)
return;
/* if we race with running work, worst case this work becomes a noop */
- for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE)
+ for_each_set_bit(bit, bits, sizeof(changed) * BITS_PER_BYTE)
set_bit(bit, ifmsh->mbss_changed);
set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags);
wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
@@ -1482,7 +1482,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
if (!elems)
return;
- /* ignore non-mesh or secure / unsecure mismatch */
+ /* ignore non-mesh or secure / insecure mismatch */
if ((!elems->mesh_id || !elems->mesh_config) ||
(elems->rsn && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) ||
(!elems->rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE))
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 024f48db6b05..4e9546e998b6 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -7,7 +7,7 @@
#include <linux/slab.h>
#include <linux/etherdevice.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "wme.h"
#include "mesh.h"
@@ -220,12 +220,12 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
/**
* mesh_path_error_tx - Sends a PERR mesh management frame
*
+ * @sdata: local mesh subif
* @ttl: allowed remaining hops
* @target: broken destination
* @target_sn: SN of the broken destination
* @target_rcode: reason code for this PERR
* @ra: node this frame is addressed to
- * @sdata: local mesh subif
*
* Note: This function may be called with driver locks taken that the driver
* also acquires in the TX path. To avoid a deadlock we don't transmit the
@@ -1137,8 +1137,8 @@ enddiscovery:
/**
* mesh_nexthop_resolve - lookup next hop; conditionally start path discovery
*
- * @skb: 802.11 frame to be sent
* @sdata: network subif the frame will be sent through
+ * @skb: 802.11 frame to be sent
*
* Lookup next hop for given skb and start path discovery if no
* forwarding information is found.
@@ -1245,8 +1245,8 @@ void mesh_path_refresh(struct ieee80211_sub_if_data *sdata,
* this function is considered "using" the associated mpath, so preempt a path
* refresh if this mpath expires soon.
*
- * @skb: 802.11 frame to be sent
* @sdata: network subif the frame will be sent through
+ * @skb: 802.11 frame to be sent
*
* Returns: 0 if the next hop was found. Nonzero otherwise.
*/
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index c0a5c75cddcb..9f9cb5af0a97 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -300,8 +300,8 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
/**
* mesh_path_lookup_by_idx - look up a path in the mesh path table by its index
- * @idx: index
* @sdata: local subif, or NULL for all entries
+ * @idx: index
*
* Returns: pointer to the mesh path structure, or NULL if not found.
*
@@ -315,8 +315,8 @@ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx)
/**
* mpp_path_lookup_by_idx - look up a path in the proxy path table by its index
- * @idx: index
* @sdata: local subif, or NULL for all entries
+ * @idx: index
*
* Returns: pointer to the proxy path structure, or NULL if not found.
*
@@ -580,7 +580,7 @@ void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata,
prev = rhashtable_lookup_get_insert_fast(&cache->rht,
&entry->rhash,
fast_tx_rht_params);
- if (unlikely(IS_ERR(prev))) {
+ if (IS_ERR(prev)) {
kfree(entry);
goto unlock_cache;
}
@@ -670,8 +670,8 @@ void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata,
/**
* mesh_path_add - allocate and add a new path to the mesh path table
- * @dst: destination address of the path (ETH_ALEN length)
* @sdata: local subif
+ * @dst: destination address of the path (ETH_ALEN length)
*
* Returns: 0 on success
*
@@ -916,8 +916,8 @@ static int table_path_del(struct mesh_table *tbl,
/**
* mesh_path_del - delete a mesh path from the table
*
- * @addr: dst address (ETH_ALEN length)
* @sdata: local subif
+ * @addr: dst address (ETH_ALEN length)
*
* Returns: 0 if successful
*/
@@ -996,8 +996,8 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
/**
* mesh_path_discard_frame - discard a frame whose path could not be resolved
*
- * @skb: frame to discard
* @sdata: network subif the frame was to be sent through
+ * @skb: frame to discard
*
* Locking: the function must me called within a rcu_read_lock region
*/
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 8f2b492a9fe9..6ea35c88dc48 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -486,10 +486,11 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
sta->sta.deflink.bandwidth = IEEE80211_STA_RX_BW_20;
}
+ /* FIXME: this check is wrong without SW rate control */
if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
- rate_control_rate_init(sta);
+ rate_control_rate_init(&sta->deflink);
else
- rate_control_rate_update(local, sband, sta, 0, changed);
+ rate_control_rate_update(local, sband, &sta->deflink, changed);
out:
spin_unlock_bh(&sta->mesh->plink_lock);
}
@@ -667,7 +668,7 @@ void mesh_plink_timer(struct timer_list *t)
/*
* This STA is valid because sta_info_destroy() will
* del_timer_sync() this timer after having made sure
- * it cannot be readded (by deleting the plink.)
+ * it cannot be re-added (by deleting the plink.)
*/
sta = mesh->plink_sta;
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index 8cf3f395f52f..3a66b4cefca7 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -175,7 +175,7 @@ static void mesh_sync_offset_adjust_tsf(struct ieee80211_sub_if_data *sdata,
spin_lock_bh(&ifmsh->sync_offset_lock);
if (ifmsh->sync_offset_clockdrift_max > TOFFSET_MINIMUM_ADJUSTMENT) {
- /* Since ajusting the tsf here would
+ /* Since adjusting the tsf here would
* require a possibly blocking call
* to the driver tsf setter, we punt
* the tsf adjustment to the mesh tasklet
diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c
index a57502d9ffec..8a1afc93e749 100644
--- a/net/mac80211/michael.c
+++ b/net/mac80211/michael.c
@@ -6,7 +6,7 @@
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/ieee80211.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "michael.h"
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f9526bbc3633..61c318f5239f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -23,7 +23,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <net/mac80211.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -31,6 +31,8 @@
#include "led.h"
#include "fils_aead.h"
+#include <kunit/static_stub.h>
+
#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
#define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2)
#define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10)
@@ -1231,7 +1233,7 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
bool disable_mu_mimo = false;
struct ieee80211_sub_if_data *other;
- list_for_each_entry_rcu(other, &local->interfaces, list) {
+ list_for_each_entry(other, &local->interfaces, list) {
if (other->vif.bss_conf.mu_mimo_owner) {
disable_mu_mimo = true;
break;
@@ -2636,16 +2638,96 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
*/
link->conf->csa_active = true;
link->u.mgd.csa.blocked_tx = csa_ie.mode;
- sdata->csa_blocked_queues =
- csa_ie.mode && !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA);
wiphy_work_queue(sdata->local->hw.wiphy,
&ifmgd->csa_connection_drop_work);
}
+struct sta_bss_param_ch_cnt_data {
+ struct ieee80211_sub_if_data *sdata;
+ u8 reporting_link_id;
+ u8 mld_id;
+};
+
+static enum cfg80211_rnr_iter_ret
+ieee80211_sta_bss_param_ch_cnt_iter(void *_data, u8 type,
+ const struct ieee80211_neighbor_ap_info *info,
+ const u8 *tbtt_info, u8 tbtt_info_len)
+{
+ struct sta_bss_param_ch_cnt_data *data = _data;
+ struct ieee80211_sub_if_data *sdata = data->sdata;
+ const struct ieee80211_tbtt_info_ge_11 *ti;
+ u8 bss_param_ch_cnt;
+ int link_id;
+
+ if (type != IEEE80211_TBTT_INFO_TYPE_TBTT)
+ return RNR_ITER_CONTINUE;
+
+ if (tbtt_info_len < sizeof(*ti))
+ return RNR_ITER_CONTINUE;
+
+ ti = (const void *)tbtt_info;
+
+ if (ti->mld_params.mld_id != data->mld_id)
+ return RNR_ITER_CONTINUE;
+
+ link_id = le16_get_bits(ti->mld_params.params,
+ IEEE80211_RNR_MLD_PARAMS_LINK_ID);
+ bss_param_ch_cnt =
+ le16_get_bits(ti->mld_params.params,
+ IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT);
+
+ if (bss_param_ch_cnt != 255 &&
+ link_id < ARRAY_SIZE(sdata->link)) {
+ struct ieee80211_link_data *link =
+ sdata_dereference(sdata->link[link_id], sdata);
+
+ if (link && link->conf->bss_param_ch_cnt != bss_param_ch_cnt) {
+ link->conf->bss_param_ch_cnt = bss_param_ch_cnt;
+ link->conf->bss_param_ch_cnt_link_id =
+ data->reporting_link_id;
+ }
+ }
+
+ return RNR_ITER_CONTINUE;
+}
+
+static void
+ieee80211_mgd_update_bss_param_ch_cnt(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *bss_conf,
+ struct ieee802_11_elems *elems)
+{
+ struct sta_bss_param_ch_cnt_data data = {
+ .reporting_link_id = bss_conf->link_id,
+ .sdata = sdata,
+ };
+ int bss_param_ch_cnt;
+
+ if (!elems->ml_basic)
+ return;
+
+ data.mld_id = ieee80211_mle_get_mld_id((const void *)elems->ml_basic);
+
+ cfg80211_iter_rnr(elems->ie_start, elems->total_len,
+ ieee80211_sta_bss_param_ch_cnt_iter, &data);
+
+ bss_param_ch_cnt =
+ ieee80211_mle_get_bss_param_ch_cnt((const void *)elems->ml_basic);
+
+ /*
+ * Update bss_param_ch_cnt_link_id even if bss_param_ch_cnt
+ * didn't change to indicate that we got a beacon on our own
+ * link.
+ */
+ if (bss_param_ch_cnt >= 0 && bss_param_ch_cnt != 255) {
+ bss_conf->bss_param_ch_cnt = bss_param_ch_cnt;
+ bss_conf->bss_param_ch_cnt_link_id =
+ bss_conf->link_id;
+ }
+}
+
static bool
-ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_channel *channel,
+ieee80211_find_80211h_pwr_constr(struct ieee80211_channel *channel,
const u8 *country_ie, u8 country_ie_len,
const u8 *pwr_constr_elem,
int *chan_pwr, int *pwr_reduction)
@@ -2715,8 +2797,7 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
return have_chan_pwr;
}
-static void ieee80211_find_cisco_dtpc(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_channel *channel,
+static void ieee80211_find_cisco_dtpc(struct ieee80211_channel *channel,
const u8 *cisco_dtpc_ie,
int *pwr_level)
{
@@ -2750,7 +2831,7 @@ static u64 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link,
(capab & cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT) ||
capab & cpu_to_le16(WLAN_CAPABILITY_RADIO_MEASURE))) {
has_80211h_pwr = ieee80211_find_80211h_pwr_constr(
- sdata, channel, country_ie, country_ie_len,
+ channel, country_ie, country_ie_len,
pwr_constr_ie, &chan_pwr, &pwr_reduction_80211h);
pwr_level_80211h =
max_t(int, 0, chan_pwr - pwr_reduction_80211h);
@@ -2758,7 +2839,7 @@ static u64 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link,
if (cisco_dtpc_ie) {
ieee80211_find_cisco_dtpc(
- sdata, channel, cisco_dtpc_ie, &pwr_level_cisco);
+ channel, cisco_dtpc_ie, &pwr_level_cisco);
has_cisco_pwr = true;
}
@@ -2791,7 +2872,7 @@ static u64 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link,
}
link->ap_power_level = new_ap_level;
- if (__ieee80211_recalc_txpower(sdata))
+ if (__ieee80211_recalc_txpower(link))
return BSS_CHANGED_TXPOWER;
return 0;
}
@@ -3031,18 +3112,19 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t)
void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work)
{
- struct ieee80211_sub_if_data *sdata =
- container_of(work, struct ieee80211_sub_if_data,
+ struct ieee80211_link_data *link =
+ container_of(work, struct ieee80211_link_data,
dfs_cac_timer_work.work);
- struct cfg80211_chan_def chandef = sdata->vif.bss_conf.chanreq.oper;
+ struct cfg80211_chan_def chandef = link->conf->chanreq.oper;
+ struct ieee80211_sub_if_data *sdata = link->sdata;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- if (sdata->wdev.cac_started) {
- ieee80211_link_release_channel(&sdata->deflink);
+ if (sdata->wdev.links[link->link_id].cac_started) {
+ ieee80211_link_release_channel(link);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_FINISHED,
- GFP_KERNEL);
+ GFP_KERNEL, link->link_id);
}
}
@@ -4100,8 +4182,13 @@ EXPORT_SYMBOL(ieee80211_beacon_loss);
void ieee80211_connection_loss(struct ieee80211_vif *vif)
{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- struct ieee80211_hw *hw = &sdata->local->hw;
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_hw *hw;
+
+ KUNIT_STATIC_STUB_REDIRECT(ieee80211_connection_loss, vif);
+
+ sdata = vif_to_sdata(vif);
+ hw = &sdata->local->hw;
trace_api_connection_loss(sdata);
@@ -4666,7 +4753,8 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
ret = false;
goto out;
}
- link->u.mgd.bss_param_ch_cnt = bss_param_ch_cnt;
+ bss_conf->bss_param_ch_cnt = bss_param_ch_cnt;
+ bss_conf->bss_param_ch_cnt_link_id = link_id;
}
} else if (elems->parse_error & IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC ||
!elems->prof ||
@@ -4676,6 +4764,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
} else {
const u8 *ptr = elems->prof->variable +
elems->prof->sta_info_len - 1;
+ int bss_param_ch_cnt;
/*
* During parsing, we validated that these fields exist,
@@ -4683,8 +4772,10 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
*/
capab_info = get_unaligned_le16(ptr);
assoc_data->link[link_id].status = get_unaligned_le16(ptr + 2);
- link->u.mgd.bss_param_ch_cnt =
+ bss_param_ch_cnt =
ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(elems->prof);
+ bss_conf->bss_param_ch_cnt = bss_param_ch_cnt;
+ bss_conf->bss_param_ch_cnt_link_id = link_id;
if (assoc_data->link[link_id].status != WLAN_STATUS_SUCCESS) {
link_info(link, "association response status code=%u\n",
@@ -4715,7 +4806,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
((assoc_data->wmm && !elems->wmm_param) ||
(link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT &&
(!elems->ht_cap_elem || !elems->ht_operation)) ||
- (link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT &&
+ (is_5ghz && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT &&
(!elems->vht_cap_elem || !elems->vht_operation)))) {
const struct cfg80211_bss_ies *ies;
struct ieee802_11_elems *bss_elems;
@@ -4763,19 +4854,22 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
sdata_info(sdata,
"AP bug: HT operation missing from AssocResp\n");
}
- if (!elems->vht_cap_elem && bss_elems->vht_cap_elem &&
- link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
- elems->vht_cap_elem = bss_elems->vht_cap_elem;
- sdata_info(sdata,
- "AP bug: VHT capa missing from AssocResp\n");
- }
- if (!elems->vht_operation && bss_elems->vht_operation &&
- link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
- elems->vht_operation = bss_elems->vht_operation;
- sdata_info(sdata,
- "AP bug: VHT operation missing from AssocResp\n");
- }
+ if (is_5ghz) {
+ if (!elems->vht_cap_elem && bss_elems->vht_cap_elem &&
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
+ elems->vht_cap_elem = bss_elems->vht_cap_elem;
+ sdata_info(sdata,
+ "AP bug: VHT capa missing from AssocResp\n");
+ }
+
+ if (!elems->vht_operation && bss_elems->vht_operation &&
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
+ elems->vht_operation = bss_elems->vht_operation;
+ sdata_info(sdata,
+ "AP bug: VHT operation missing from AssocResp\n");
+ }
+ }
kfree(bss_elems);
}
@@ -5661,7 +5755,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
/* links might have changed due to rejected ones, set them again */
ieee80211_vif_set_links(sdata, valid_links, dormant_links);
- rate_control_rate_init(sta);
+ rate_control_rate_init_all_links(sta);
if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) {
set_sta_flag(sta, WLAN_STA_MFP);
@@ -6909,6 +7003,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
/* note that after this elems->ml_basic can no longer be used fully */
ieee80211_mgd_check_cross_link_csa(sdata, rx_status->link_id, elems);
+ ieee80211_mgd_update_bss_param_ch_cnt(sdata, bss_conf, elems);
+
if (!link->u.mgd.disable_wmm_tracking &&
ieee80211_sta_wmm_params(local, link, elems->wmm_param,
elems->wmm_param_len,
@@ -7660,6 +7756,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
lockdep_assert_wiphy(sdata->local->hw.wiphy);
assoc_data->tries++;
+ assoc_data->comeback = false;
if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) {
sdata_info(sdata, "association with %pM timed out\n",
assoc_data->ap_addr);
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index 9ef14e475c90..6218abc3e441 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -4,7 +4,7 @@
*
* Copyright: (c) 2014 Czech Technical University in Prague
* (c) 2014 Volkswagen Group Research
- * Copyright (C) 2022 - 2023 Intel Corporation
+ * Copyright (C) 2022 - 2024 Intel Corporation
* Author: Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz>
* Funded by: Volkswagen Group Research
*/
@@ -16,7 +16,7 @@
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
#include <net/mac80211.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -96,7 +96,7 @@ static struct sta_info *ieee80211_ocb_finish_sta(struct sta_info *sta)
sta_info_move_state(sta, IEEE80211_STA_ASSOC);
sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED);
- rate_control_rate_init(sta);
+ rate_control_rate_init(&sta->deflink);
/* If it fails, maybe we raced another insertion? */
if (sta_info_insert_rcu(sta))
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 28d03196ef75..29fab7ae47b4 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -997,6 +997,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
}
IEEE80211_SKB_CB(skb)->flags = flags;
+ IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_DONT_USE_RATE_MASK;
skb->dev = sdata->dev;
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index d823d58303e8..7be52345f218 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -32,7 +32,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
ieee80211_scan_cancel(local);
- ieee80211_dfs_cac_cancel(local);
+ ieee80211_dfs_cac_cancel(local, NULL);
ieee80211_roc_purge(local, NULL);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 4dc1def69548..0d056db9f81e 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -28,8 +28,9 @@ module_param(ieee80211_default_rc_algo, charp, 0644);
MODULE_PARM_DESC(ieee80211_default_rc_algo,
"Default rate control algorithm for mac80211 to use");
-void rate_control_rate_init(struct sta_info *sta)
+void rate_control_rate_init(struct link_sta_info *link_sta)
{
+ struct sta_info *sta = link_sta->sta;
struct ieee80211_local *local = sta->sdata->local;
struct rate_control_ref *ref = sta->rate_ctrl;
struct ieee80211_sta *ista = &sta->sta;
@@ -37,11 +38,15 @@ void rate_control_rate_init(struct sta_info *sta)
struct ieee80211_supported_band *sband;
struct ieee80211_chanctx_conf *chanctx_conf;
- ieee80211_sta_init_nss(&sta->deflink);
+ ieee80211_sta_init_nss(link_sta);
if (!ref)
return;
+ /* SW rate control isn't supported with MLO right now */
+ if (WARN_ON(ieee80211_vif_is_mld(&sta->sdata->vif)))
+ return;
+
rcu_read_lock();
chanctx_conf = rcu_dereference(sta->sdata->vif.bss_conf.chanctx_conf);
@@ -67,6 +72,21 @@ void rate_control_rate_init(struct sta_info *sta)
set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
}
+void rate_control_rate_init_all_links(struct sta_info *sta)
+{
+ int link_id;
+
+ for (link_id = 0; link_id < ARRAY_SIZE(sta->link); link_id++) {
+ struct link_sta_info *link_sta;
+
+ link_sta = sdata_dereference(sta->link[link_id], sta->sdata);
+ if (!link_sta)
+ continue;
+
+ rate_control_rate_init(link_sta);
+ }
+}
+
void rate_control_tx_status(struct ieee80211_local *local,
struct ieee80211_tx_status *st)
{
@@ -93,16 +113,15 @@ void rate_control_tx_status(struct ieee80211_local *local,
void rate_control_rate_update(struct ieee80211_local *local,
struct ieee80211_supported_band *sband,
- struct sta_info *sta, unsigned int link_id,
+ struct link_sta_info *link_sta,
u32 changed)
{
struct rate_control_ref *ref = local->rate_ctrl;
+ struct sta_info *sta = link_sta->sta;
struct ieee80211_sta *ista = &sta->sta;
void *priv_sta = sta->rate_ctrl_priv;
struct ieee80211_chanctx_conf *chanctx_conf;
- WARN_ON(link_id != 0);
-
if (ref && ref->ops->rate_update) {
rcu_read_lock();
@@ -120,7 +139,8 @@ void rate_control_rate_update(struct ieee80211_local *local,
}
if (sta->uploaded)
- drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
+ drv_link_sta_rc_update(local, sta->sdata, link_sta->pub,
+ changed);
}
int ieee80211_rate_control_register(const struct rate_control_ops *ops)
@@ -229,9 +249,8 @@ static ssize_t rcname_read(struct file *file, char __user *userbuf,
ref->ops->name, len);
}
-const struct file_operations rcname_ops = {
+const struct debugfs_short_fops rcname_ops = {
.read = rcname_read,
- .open = simple_open,
.llseek = default_llseek,
};
#endif
@@ -890,7 +909,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
if (ieee80211_is_tx_data(skb))
rate_control_apply_mask(sdata, sta, sband, dest, max_rates);
- if (!(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX))
+ if (!(info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK))
mask = sdata->rc_rateidx_mask[info->band];
if (dest[0].idx < 0)
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index d6190f10fe7c..5e4bde598212 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -3,7 +3,7 @@
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2005, Devicescape Software, Inc.
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
- * Copyright (C) 2022 Intel Corporation
+ * Copyright (C) 2022, 2024 Intel Corporation
*/
#ifndef IEEE80211_RATE_H
@@ -29,11 +29,11 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
void rate_control_tx_status(struct ieee80211_local *local,
struct ieee80211_tx_status *st);
-void rate_control_rate_init(struct sta_info *sta);
+void rate_control_rate_init(struct link_sta_info *link_sta);
+void rate_control_rate_init_all_links(struct sta_info *sta);
void rate_control_rate_update(struct ieee80211_local *local,
struct ieee80211_supported_band *sband,
- struct sta_info *sta,
- unsigned int link_id,
+ struct link_sta_info *link_sta,
u32 changed);
static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
@@ -62,7 +62,7 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta)
#endif
}
-extern const struct file_operations rcname_ops;
+extern const struct debugfs_short_fops rcname_ops;
static inline void rate_control_add_debugfs(struct ieee80211_local *local)
{
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 6bf3b4444a43..706cbc99f718 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1053,7 +1053,7 @@ minstrel_ht_refill_sample_rates(struct minstrel_ht_sta *mi)
* - max_prob_rate must use only one stream, as a tradeoff between delivery
* probability and throughput during strong fluctuations
* - as long as the max prob rate has a probability of more than 75%, pick
- * higher throughput rates, even if the probablity is a bit lower
+ * higher throughput rates, even if the probability is a bit lower
*/
static void
minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 25b8a67a63a4..85149c774505 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -187,7 +187,6 @@ static const struct file_operations minstrel_ht_stat_fops = {
.open = minstrel_ht_stats_open,
.read = minstrel_stats_read,
.release = minstrel_stats_release,
- .llseek = no_llseek,
};
static char *
@@ -323,7 +322,6 @@ static const struct file_operations minstrel_ht_stat_csv_fops = {
.open = minstrel_ht_stats_csv_open,
.read = minstrel_stats_read,
.release = minstrel_stats_release,
- .llseek = no_llseek,
};
void
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 59ad24a71141..2bec18fc1b03 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -22,7 +22,7 @@
#include <kunit/visibility.h>
#include <net/mac80211.h>
#include <net/ieee80211_radiotap.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -508,18 +508,13 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
flags |= IEEE80211_RADIOTAP_AMPDU_IS_LAST;
if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_ERROR)
flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR;
- if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
- flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN;
if (status->flag & RX_FLAG_AMPDU_EOF_BIT_KNOWN)
flags |= IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN;
if (status->flag & RX_FLAG_AMPDU_EOF_BIT)
flags |= IEEE80211_RADIOTAP_AMPDU_EOF;
put_unaligned_le16(flags, pos);
pos += 2;
- if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
- *pos++ = status->ampdu_delimiter_crc;
- else
- *pos++ = 0;
+ *pos++ = 0;
*pos++ = 0;
}
@@ -767,8 +762,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
struct ieee80211_rate *rate)
{
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(origskb);
- struct ieee80211_sub_if_data *sdata;
- struct sk_buff *monskb = NULL;
+ struct ieee80211_sub_if_data *sdata, *prev_sdata = NULL;
+ struct sk_buff *skb, *monskb = NULL;
int present_fcs_len = 0;
unsigned int rtap_space = 0;
struct ieee80211_sub_if_data *monitor_sdata =
@@ -842,40 +837,52 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_space);
list_for_each_entry_rcu(sdata, &local->mon_list, u.mntr.list) {
- bool last_monitor = list_is_last(&sdata->u.mntr.list,
- &local->mon_list);
+ struct cfg80211_chan_def *chandef;
+
+ chandef = &sdata->vif.bss_conf.chanreq.oper;
+ if (chandef->chan &&
+ chandef->chan->center_freq != status->freq)
+ continue;
+
+ if (!prev_sdata) {
+ prev_sdata = sdata;
+ continue;
+ }
+
+ if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
+ ieee80211_handle_mu_mimo_mon(sdata, origskb, rtap_space);
if (!monskb)
monskb = ieee80211_make_monitor_skb(local, &origskb,
rate, rtap_space,
- only_monitor &&
- last_monitor);
+ false);
+ if (!monskb)
+ continue;
- if (monskb) {
- struct sk_buff *skb;
+ skb = skb_clone(monskb, GFP_ATOMIC);
+ if (!skb)
+ continue;
- if (last_monitor) {
- skb = monskb;
- monskb = NULL;
- } else {
- skb = skb_clone(monskb, GFP_ATOMIC);
- }
+ skb->dev = prev_sdata->dev;
+ dev_sw_netstats_rx_add(skb->dev, skb->len);
+ netif_receive_skb(skb);
+ prev_sdata = sdata;
+ }
- if (skb) {
- skb->dev = sdata->dev;
- dev_sw_netstats_rx_add(skb->dev, skb->len);
- netif_receive_skb(skb);
- }
+ if (prev_sdata) {
+ if (monskb)
+ skb = monskb;
+ else
+ skb = ieee80211_make_monitor_skb(local, &origskb,
+ rate, rtap_space,
+ only_monitor);
+ if (skb) {
+ skb->dev = prev_sdata->dev;
+ dev_sw_netstats_rx_add(skb->dev, skb->len);
+ netif_receive_skb(skb);
}
-
- if (last_monitor)
- break;
}
- /* this happens if last_monitor was erroneously false */
- dev_kfree_skb(monskb);
-
- /* ditto */
if (!origskb)
return NULL;
@@ -3568,7 +3575,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
sband = rx->local->hw.wiphy->bands[status->band];
- rate_control_rate_update(local, sband, rx->sta, 0,
+ rate_control_rate_update(local, sband, rx->link_sta,
IEEE80211_RC_SMPS_CHANGED);
cfg80211_sta_opmode_change_notify(sdata->dev,
rx->sta->addr,
@@ -3605,7 +3612,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
ieee80211_sta_rx_bw_to_chan_width(rx->link_sta);
sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED;
- rate_control_rate_update(local, sband, rx->sta, 0,
+ rate_control_rate_update(local, sband, rx->link_sta,
IEEE80211_RC_BW_CHANGED);
cfg80211_sta_opmode_change_notify(sdata->dev,
rx->sta->addr,
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index b5f2df61c7f6..cb7079071885 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -504,7 +504,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
* the scan was in progress; if there was none this will
* just be a no-op for the particular interface.
*/
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ list_for_each_entry(sdata, &local->interfaces, list) {
if (ieee80211_sdata_running(sdata))
wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
}
@@ -575,6 +575,7 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_sub_if_data *sdata_iter;
+ unsigned int link_id;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -585,8 +586,9 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
return false;
list_for_each_entry(sdata_iter, &local->interfaces, list) {
- if (sdata_iter->wdev.cac_started)
- return false;
+ for_each_valid_link(&sdata_iter->wdev, link_id)
+ if (sdata_iter->wdev.links[link_id].cac_started)
+ return false;
}
return true;
@@ -649,7 +651,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata,
cpu_to_le16(IEEE80211_SN_TO_SEQ(sn));
}
IEEE80211_SKB_CB(skb)->flags |= tx_flags;
- IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_SCAN_TX;
+ IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_DONT_USE_RATE_MASK;
ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band);
}
}
@@ -1013,10 +1015,8 @@ set_channel:
*/
if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) ||
!scan_req->n_ssids) {
- *next_delay = msecs_to_jiffies(scan_req->duration) >
- IEEE80211_PASSIVE_CHANNEL_TIME ?
- msecs_to_jiffies(scan_req->duration) :
- IEEE80211_PASSIVE_CHANNEL_TIME;
+ *next_delay = max(msecs_to_jiffies(scan_req->duration),
+ IEEE80211_PASSIVE_CHANNEL_TIME);
local->next_scan_state = SCAN_DECISION;
if (scan_req->n_ssids)
set_bit(SCAN_BEACON_WAIT, &local->scanning);
@@ -1176,14 +1176,14 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
unsigned int n_channels)
{
struct ieee80211_local *local = sdata->local;
- int ret = -EBUSY, i, n_ch = 0;
+ int i, n_ch = 0;
enum nl80211_band band;
lockdep_assert_wiphy(local->hw.wiphy);
/* busy scanning */
if (local->scan_req)
- goto unlock;
+ return -EBUSY;
/* fill internal scan request */
if (!channels) {
@@ -1200,7 +1200,9 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
&local->hw.wiphy->bands[band]->channels[i];
if (tmp_ch->flags & (IEEE80211_CHAN_NO_IR |
- IEEE80211_CHAN_DISABLED))
+ IEEE80211_CHAN_DISABLED) ||
+ !cfg80211_wdev_channel_allowed(&sdata->wdev,
+ tmp_ch))
continue;
local->int_scan_req->channels[n_ch] = tmp_ch;
@@ -1209,21 +1211,23 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
}
if (WARN_ON_ONCE(n_ch == 0))
- goto unlock;
+ return -EINVAL;
local->int_scan_req->n_channels = n_ch;
} else {
for (i = 0; i < n_channels; i++) {
if (channels[i]->flags & (IEEE80211_CHAN_NO_IR |
- IEEE80211_CHAN_DISABLED))
+ IEEE80211_CHAN_DISABLED) ||
+ !cfg80211_wdev_channel_allowed(&sdata->wdev,
+ channels[i]))
continue;
local->int_scan_req->channels[n_ch] = channels[i];
n_ch++;
}
- if (WARN_ON_ONCE(n_ch == 0))
- goto unlock;
+ if (n_ch == 0)
+ return -EINVAL;
local->int_scan_req->n_channels = n_ch;
}
@@ -1233,9 +1237,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN);
local->int_scan_req->ssids[0].ssid_len = ssid_len;
- ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req);
- unlock:
- return ret;
+ return __ieee80211_start_scan(sdata, sdata->local->int_scan_req);
}
void ieee80211_scan_cancel(struct ieee80211_local *local)
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 073ff9e0f397..c6015cd00372 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -377,13 +377,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
/* capture the AP chandef before (potential) downgrading */
csa_ie->chanreq.ap = new_chandef;
- if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320 &&
- new_chandef.width == NL80211_CHAN_WIDTH_320)
- ieee80211_chandef_downgrade(&new_chandef, NULL);
-
- if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160 &&
- (new_chandef.width == NL80211_CHAN_WIDTH_80P80 ||
- new_chandef.width == NL80211_CHAN_WIDTH_160))
+ while (conn->bw_limit <
+ ieee80211_min_bw_limit_from_chandef(&new_chandef))
ieee80211_chandef_downgrade(&new_chandef, NULL);
if (!cfg80211_chandef_compatible(&new_chandef,
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 9195d5a2de0a..9f89fb5bee37 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -169,7 +169,7 @@ struct sta_info;
* @buf_size: reorder buffer size at receiver
* @failed_bar_ssn: ssn of the last failed BAR tx attempt
* @bar_pending: BAR needs to be re-sent
- * @amsdu: support A-MSDU withing A-MDPU
+ * @amsdu: support A-MSDU within A-MDPU
* @ssn: starting sequence number of the session
*
* This structure's lifetime is managed by RCU, assignments to
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index dd8f857a1fbc..5f28f3633fa0 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -11,7 +11,7 @@
#include <linux/export.h>
#include <linux/etherdevice.h>
#include <net/mac80211.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "ieee80211_i.h"
#include "rate.h"
#include "mesh.h"
@@ -927,6 +927,9 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
if (!ieee80211_sdata_running(sdata))
continue;
+ if (sdata->u.mntr.flags & MONITOR_FLAG_SKIP_TX)
+ continue;
+
if ((sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) &&
!send_to_cooked)
continue;
@@ -1099,7 +1102,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
* This is a bit racy but we can avoid a lot of work
* with this test...
*/
- if (!local->monitors && (!send_to_cooked || !local->cooked_mntrs)) {
+ if (!local->tx_mntrs && (!send_to_cooked || !local->cooked_mntrs)) {
if (status->free_list)
list_add_tail(&skb->list, status->free_list);
else
@@ -1301,3 +1304,4 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
while ((skb = __skb_dequeue(skbs)))
ieee80211_free_txskb(hw, skb);
}
+EXPORT_SYMBOL(ieee80211_purge_tx_queue);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index f07b40916485..2f92e7c7f203 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1342,7 +1342,8 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata,
bw = min(bw, ieee80211_sta_cap_rx_bw(&sta->deflink));
if (bw != sta->sta.deflink.bandwidth) {
sta->sta.deflink.bandwidth = bw;
- rate_control_rate_update(local, sband, sta, 0,
+ rate_control_rate_update(local, sband,
+ &sta->deflink,
IEEE80211_RC_BW_CHANGED);
/*
* if a TDLS peer BW was updated, we need to
diff --git a/net/mac80211/tests/elems.c b/net/mac80211/tests/elems.c
index a413ba29f759..a53c55a879a8 100644
--- a/net/mac80211/tests/elems.c
+++ b/net/mac80211/tests/elems.c
@@ -7,7 +7,7 @@
#include <kunit/test.h>
#include "../ieee80211_i.h"
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
static void mle_defrag(struct kunit *test)
{
diff --git a/net/mac80211/tests/mfp.c b/net/mac80211/tests/mfp.c
index a8dc1601da60..58e675e0ed91 100644
--- a/net/mac80211/tests/mfp.c
+++ b/net/mac80211/tests/mfp.c
@@ -9,7 +9,7 @@
#include "../ieee80211_i.h"
#include "../sta_info.h"
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
static const struct mfp_test_case {
const char *desc;
diff --git a/net/mac80211/tests/tpe.c b/net/mac80211/tests/tpe.c
index dd63303a2985..c73b6c66bd5a 100644
--- a/net/mac80211/tests/tpe.c
+++ b/net/mac80211/tests/tpe.c
@@ -7,7 +7,7 @@
#include <kunit/test.h>
#include "../ieee80211_i.h"
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
static struct ieee80211_channel chan6g_1 = {
.band = NL80211_BAND_6GHZ,
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index e7f57bb18f6e..94c00e71f6f8 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -9,7 +9,7 @@
#include <linux/types.h>
#include <linux/netdevice.h>
#include <linux/export.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/mac80211.h>
#include "driver-ops.h"
@@ -313,7 +313,7 @@ int ieee80211_tkip_decrypt_data(struct arc4_ctx *ctx,
* Record previously received IV, will be copied into the
* key information after MIC verification. It is possible
* that we don't catch replays of fragments but that's ok
- * because the Michael MIC verication will then fail.
+ * because the Michael MIC verification will then fail.
*/
*out_iv32 = iv32;
*out_iv16 = iv16;
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index dc498cd8cd91..7a4985fc2b16 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -939,31 +939,34 @@ TRACE_EVENT(drv_sta_set_txpwr,
)
);
-TRACE_EVENT(drv_sta_rc_update,
+TRACE_EVENT(drv_link_sta_rc_update,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta,
+ struct ieee80211_link_sta *link_sta,
u32 changed),
- TP_ARGS(local, sdata, sta, changed),
+ TP_ARGS(local, sdata, link_sta, changed),
TP_STRUCT__entry(
LOCAL_ENTRY
VIF_ENTRY
STA_ENTRY
__field(u32, changed)
+ __field(u32, link_id)
),
TP_fast_assign(
LOCAL_ASSIGN;
VIF_ASSIGN;
- STA_ASSIGN;
+ STA_NAMED_ASSIGN(link_sta->sta);
__entry->changed = changed;
+ __entry->link_id = link_sta->link_id;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " changed: 0x%x",
- LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->changed
+ LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " (link %d) changed: 0x%x",
+ LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->link_id,
+ __entry->changed
)
);
@@ -3154,6 +3157,25 @@ TRACE_EVENT(drv_neg_ttlm_res,
LOCAL_PR_ARG, VIF_PR_ARG, __entry->res
)
);
+
+TRACE_EVENT(drv_prep_add_interface,
+ TP_PROTO(struct ieee80211_local *local,
+ enum nl80211_iftype type),
+
+ TP_ARGS(local, type),
+ TP_STRUCT__entry(LOCAL_ENTRY
+ __field(u32, type)
+ ),
+
+ TP_fast_assign(LOCAL_ASSIGN;
+ __entry->type = type;
+ ),
+
+ TP_printk(LOCAL_PR_FMT " type: %u\n ",
+ LOCAL_PR_ARG, __entry->type
+ )
+);
+
#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index bca7b341dd77..a24636bda679 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -24,7 +24,7 @@
#include <net/mac80211.h>
#include <net/codel.h>
#include <net/codel_impl.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/fq_impl.h>
#include <net/gso.h>
@@ -699,7 +699,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
txrc.skb = tx->skb;
txrc.reported_rate.idx = -1;
- if (unlikely(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) {
+ if (unlikely(info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK)) {
txrc.rate_idx_mask = ~0;
} else {
txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band];
@@ -1763,7 +1763,8 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
switch (sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
- if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
+ if ((sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) ||
+ ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) {
vif = &sdata->vif;
break;
}
@@ -3952,7 +3953,8 @@ begin:
switch (tx.sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
- if (tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
+ if ((tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) ||
+ ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) {
vif = &tx.sdata->vif;
break;
}
@@ -6214,7 +6216,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
goto start_xmit;
/* update QoS header to prioritize control port frames if possible,
- * priorization also happens for control port frames send over
+ * prioritization also happens for control port frames send over
* AF_PACKET
*/
rcu_read_lock();
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c7ad9bc5973a..dc0b74443c8d 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -657,7 +657,7 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw)
}
EXPORT_SYMBOL(ieee80211_wake_queues);
-static unsigned int
+unsigned int
ieee80211_get_vif_queues(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
@@ -669,7 +669,8 @@ ieee80211_get_vif_queues(struct ieee80211_local *local,
queues = 0;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
- queues |= BIT(sdata->vif.hw_queue[ac]);
+ if (sdata->vif.hw_queue[ac] != IEEE80211_INVAL_HW_QUEUE)
+ queues |= BIT(sdata->vif.hw_queue[ac]);
if (sdata->vif.cab_queue != IEEE80211_INVAL_HW_QUEUE)
queues |= BIT(sdata->vif.cab_queue);
} else {
@@ -724,24 +725,6 @@ void ieee80211_flush_queues(struct ieee80211_local *local,
__ieee80211_flush_queues(local, sdata, 0, drop);
}
-void ieee80211_stop_vif_queues(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- enum queue_stop_reason reason)
-{
- ieee80211_stop_queues_by_reason(&local->hw,
- ieee80211_get_vif_queues(local, sdata),
- reason, true);
-}
-
-void ieee80211_wake_vif_queues(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- enum queue_stop_reason reason)
-{
- ieee80211_wake_queues_by_reason(&local->hw,
- ieee80211_get_vif_queues(local, sdata),
- reason, true);
-}
-
static void __iterate_interfaces(struct ieee80211_local *local,
u32 iter_flags,
void (*iterator)(void *data, u8 *mac,
@@ -751,10 +734,13 @@ static void __iterate_interfaces(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata;
bool active_only = iter_flags & IEEE80211_IFACE_ITER_ACTIVE;
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ list_for_each_entry_rcu(sdata, &local->interfaces, list,
+ lockdep_is_held(&local->iflist_mtx) ||
+ lockdep_is_held(&local->hw.wiphy->mtx)) {
switch (sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
- if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
+ if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
continue;
break;
case NL80211_IFTYPE_AP_VLAN:
@@ -833,7 +819,8 @@ static void __iterate_stations(struct ieee80211_local *local,
{
struct sta_info *sta;
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ list_for_each_entry_rcu(sta, &local->sta_list, list,
+ lockdep_is_held(&local->hw.wiphy->mtx)) {
if (!sta->uploaded)
continue;
@@ -854,6 +841,19 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL_GPL(ieee80211_iterate_stations_atomic);
+void ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw,
+ void (*iterator)(void *data,
+ struct ieee80211_sta *sta),
+ void *data)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ __iterate_stations(local, iterator, data);
+}
+EXPORT_SYMBOL_GPL(ieee80211_iterate_stations_mtx);
+
struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
@@ -994,7 +994,7 @@ void ieee80211_set_wmm_default(struct ieee80211_link_data *link,
else
aCWmin = 15;
- /* Confiure old 802.11b/g medium access rules. */
+ /* Configure old 802.11b/g medium access rules. */
qparam.cw_max = aCWmax;
qparam.cw_min = aCWmin;
qparam.txop = 0;
@@ -1826,6 +1826,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
WARN(1, "Hardware became unavailable upon resume. This could be a software issue prior to suspend or a hardware issue.\n");
else
WARN(1, "Hardware became unavailable during restart.\n");
+ ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_SUSPEND,
+ false);
ieee80211_handle_reconfig_failure(local);
return res;
}
@@ -1857,8 +1860,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
}
list_for_each_entry(sdata, &local->interfaces, list) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
+ continue;
if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
- sdata->vif.type != NL80211_IFTYPE_MONITOR &&
ieee80211_sdata_running(sdata)) {
res = drv_add_interface(local, sdata);
if (WARN_ON(res))
@@ -1871,11 +1876,14 @@ int ieee80211_reconfig(struct ieee80211_local *local)
*/
if (res) {
list_for_each_entry_continue_reverse(sdata, &local->interfaces,
- list)
+ list) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
+ continue;
if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
- sdata->vif.type != NL80211_IFTYPE_MONITOR &&
ieee80211_sdata_running(sdata))
drv_remove_interface(local, sdata);
+ }
ieee80211_handle_reconfig_failure(local);
return res;
}
@@ -2736,7 +2744,7 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
return pos + sizeof(struct ieee80211_vht_operation);
}
-u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef)
+u8 *ieee80211_ie_build_he_oper(u8 *pos, const struct cfg80211_chan_def *chandef)
{
struct ieee80211_he_operation *he_oper;
struct ieee80211_he_6ghz_oper *he_6ghz_op;
@@ -2828,7 +2836,7 @@ out:
return pos;
}
-u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef,
+u8 *ieee80211_ie_build_eht_oper(u8 *pos, const struct cfg80211_chan_def *chandef,
const struct ieee80211_sta_eht_cap *eht_cap)
{
@@ -3451,24 +3459,44 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
return ts;
}
-void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
+/* Cancel CAC for the interfaces under the specified @local. If @ctx is
+ * also provided, only the interfaces using that ctx will be canceled.
+ */
+void ieee80211_dfs_cac_cancel(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
{
struct ieee80211_sub_if_data *sdata;
struct cfg80211_chan_def chandef;
+ struct ieee80211_link_data *link;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ unsigned int link_id;
lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(sdata, &local->interfaces, list) {
- wiphy_delayed_work_cancel(local->hw.wiphy,
- &sdata->dfs_cac_timer_work);
-
- if (sdata->wdev.cac_started) {
- chandef = sdata->vif.bss_conf.chanreq.oper;
- ieee80211_link_release_channel(&sdata->deflink);
- cfg80211_cac_event(sdata->dev,
- &chandef,
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS;
+ link_id++) {
+ link = sdata_dereference(sdata->link[link_id],
+ sdata);
+ if (!link)
+ continue;
+
+ chanctx_conf = sdata_dereference(link->conf->chanctx_conf,
+ sdata);
+ if (ctx && &ctx->conf != chanctx_conf)
+ continue;
+
+ wiphy_delayed_work_cancel(local->hw.wiphy,
+ &link->dfs_cac_timer_work);
+
+ if (!sdata->wdev.links[link_id].cac_started)
+ continue;
+
+ chandef = link->conf->chanreq.oper;
+ ieee80211_link_release_channel(link);
+ cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
- GFP_KERNEL);
+ GFP_KERNEL, link_id);
}
}
}
@@ -3478,9 +3506,8 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, radar_detected_work);
- struct cfg80211_chan_def chandef = local->hw.conf.chandef;
+ struct cfg80211_chan_def chandef;
struct ieee80211_chanctx *ctx;
- int num_chanctx = 0;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -3488,25 +3515,46 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER)
continue;
- num_chanctx++;
+ if (!ctx->radar_detected)
+ continue;
+
+ ctx->radar_detected = false;
+
chandef = ctx->conf.def;
+
+ ieee80211_dfs_cac_cancel(local, ctx);
+ cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL);
}
+}
- ieee80211_dfs_cac_cancel(local);
+static void
+ieee80211_radar_mark_chan_ctx_iterator(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *chanctx_conf,
+ void *data)
+{
+ struct ieee80211_chanctx *ctx =
+ container_of(chanctx_conf, struct ieee80211_chanctx,
+ conf);
- if (num_chanctx > 1)
- /* XXX: multi-channel is not supported yet */
- WARN_ON(1);
- else
- cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL);
+ if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER)
+ return;
+
+ if (data && data != chanctx_conf)
+ return;
+
+ ctx->radar_detected = true;
}
-void ieee80211_radar_detected(struct ieee80211_hw *hw)
+void ieee80211_radar_detected(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *chanctx_conf)
{
struct ieee80211_local *local = hw_to_local(hw);
trace_api_radar_detected(local);
+ ieee80211_iter_chan_contexts_atomic(hw, ieee80211_radar_mark_chan_ctx_iterator,
+ chanctx_conf);
+
wiphy_work_queue(hw->wiphy, &local->radar_detected_work);
}
EXPORT_SYMBOL(ieee80211_radar_detected);
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index bf6ef45af757..6a20fa099190 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -280,10 +280,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
/*
* This is a workaround for VHT-enabled STAs which break the spec
* and have the VHT-MCS Rx map filled in with value 3 for all eight
- * spacial streams, an example is AR9462.
+ * spatial streams, an example is AR9462.
*
* As per spec, in section 22.1.1 Introduction to the VHT PHY
- * A VHT STA shall support at least single spactial stream VHT-MCSs
+ * A VHT STA shall support at least single spatial stream VHT-MCSs
* 0 to 7 (transmit and receive) in all supported channel widths.
*/
if (vht_cap->vht_mcs.rx_mcs_map == cpu_to_le16(0xFFFF)) {
@@ -479,28 +479,6 @@ ieee80211_sta_rx_bw_to_chan_width(struct link_sta_info *link_sta)
}
}
-enum ieee80211_sta_rx_bandwidth
-ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
-{
- switch (width) {
- case NL80211_CHAN_WIDTH_20_NOHT:
- case NL80211_CHAN_WIDTH_20:
- return IEEE80211_STA_RX_BW_20;
- case NL80211_CHAN_WIDTH_40:
- return IEEE80211_STA_RX_BW_40;
- case NL80211_CHAN_WIDTH_80:
- return IEEE80211_STA_RX_BW_80;
- case NL80211_CHAN_WIDTH_160:
- case NL80211_CHAN_WIDTH_80P80:
- return IEEE80211_STA_RX_BW_160;
- case NL80211_CHAN_WIDTH_320:
- return IEEE80211_STA_RX_BW_320;
- default:
- WARN_ON_ONCE(1);
- return IEEE80211_STA_RX_BW_20;
- }
-}
-
/* FIXME: rename/move - this deals with everything not just VHT */
enum ieee80211_sta_rx_bandwidth
_ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta,
@@ -766,8 +744,7 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
if (changed > 0) {
ieee80211_recalc_min_chandef(sdata, link_sta->link_id);
- rate_control_rate_update(local, sband, link_sta->sta,
- link_sta->link_id, changed);
+ rate_control_rate_update(local, sband, link_sta, changed);
}
}
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 5c01e121481a..93b8668079a7 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -16,7 +16,7 @@
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/mac80211.h>
#include "ieee80211_i.h"
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 047a33797020..40d5d9e48479 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -12,7 +12,7 @@
#include <linux/compiler.h>
#include <linux/ieee80211.h>
#include <linux/gfp.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/mac80211.h>
#include <crypto/aes.h>
#include <crypto/utils.h>
@@ -598,9 +598,6 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad,
memcpy(j_0, hdr->addr2, ETH_ALEN);
memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN);
- j_0[13] = 0;
- j_0[14] = 0;
- j_0[AES_BLOCK_SIZE - 1] = 0x01;
ccmp_gcmp_aad(skb, aad, spp_amsdu);
}
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index e40a988d6c80..aac359b5c71d 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -13,7 +13,7 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/crc-ccitt.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/mac802154.h>
#include <net/ieee802154_netdev.h>
diff --git a/net/mac802154/scan.c b/net/mac802154/scan.c
index 1c0eeaa76560..a6dab3cc3ad8 100644
--- a/net/mac802154/scan.c
+++ b/net/mac802154/scan.c
@@ -176,6 +176,7 @@ void mac802154_scan_worker(struct work_struct *work)
struct ieee802154_local *local =
container_of(work, struct ieee802154_local, scan_work.work);
struct cfg802154_scan_request *scan_req;
+ enum nl802154_scan_types scan_req_type;
struct ieee802154_sub_if_data *sdata;
unsigned int scan_duration = 0;
struct wpan_phy *wpan_phy;
@@ -209,6 +210,7 @@ void mac802154_scan_worker(struct work_struct *work)
}
wpan_phy = scan_req->wpan_phy;
+ scan_req_type = scan_req->type;
scan_req_duration = scan_req->duration;
/* Look for the next valid chan */
@@ -246,7 +248,7 @@ void mac802154_scan_worker(struct work_struct *work)
goto end_scan;
}
- if (scan_req->type == NL802154_SCAN_ACTIVE) {
+ if (scan_req_type == NL802154_SCAN_ACTIVE) {
ret = mac802154_transmit_beacon_req(local, sdata);
if (ret)
dev_err(&sdata->dev->dev,
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 6fbed5bb5c3e..337d6faf0d2a 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -12,7 +12,7 @@
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/crc-ccitt.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/rtnetlink.h>
#include <net/ieee802154_netdev.h>
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index de52a9191da0..f6de136008f6 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -486,6 +486,9 @@ static int mctp_ioctl_droptag(struct mctp_sock *msk, bool tagv2,
tag = ctl.tag & MCTP_TAG_MASK;
rc = -EINVAL;
+ if (ctl.peer_addr == MCTP_ADDR_NULL)
+ ctl.peer_addr = MCTP_ADDR_ANY;
+
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
/* we do an irqsave here, even though we know the irq state,
@@ -753,10 +756,14 @@ static __init int mctp_init(void)
if (rc)
goto err_unreg_routes;
- mctp_device_init();
+ rc = mctp_device_init();
+ if (rc)
+ goto err_unreg_neigh;
return 0;
+err_unreg_neigh:
+ mctp_neigh_exit();
err_unreg_routes:
mctp_routes_exit();
err_unreg_proto:
diff --git a/net/mctp/device.c b/net/mctp/device.c
index acb97b257428..26ce34b7e88e 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -371,6 +371,8 @@ static int mctp_fill_link_af(struct sk_buff *skb,
return -ENODATA;
if (nla_put_u32(skb, IFLA_MCTP_NET, mdev->net))
return -EMSGSIZE;
+ if (nla_put_u8(skb, IFLA_MCTP_PHYS_BINDING, mdev->binding))
+ return -EMSGSIZE;
return 0;
}
@@ -385,6 +387,7 @@ static size_t mctp_get_link_af_size(const struct net_device *dev,
if (!mdev)
return 0;
ret = nla_total_size(4); /* IFLA_MCTP_NET */
+ ret += nla_total_size(1); /* IFLA_MCTP_PHYS_BINDING */
mctp_dev_put(mdev);
return ret;
}
@@ -480,7 +483,8 @@ static int mctp_dev_notify(struct notifier_block *this, unsigned long event,
}
static int mctp_register_netdevice(struct net_device *dev,
- const struct mctp_netdev_ops *ops)
+ const struct mctp_netdev_ops *ops,
+ enum mctp_phys_binding binding)
{
struct mctp_dev *mdev;
@@ -489,17 +493,19 @@ static int mctp_register_netdevice(struct net_device *dev,
return PTR_ERR(mdev);
mdev->ops = ops;
+ mdev->binding = binding;
return register_netdevice(dev);
}
int mctp_register_netdev(struct net_device *dev,
- const struct mctp_netdev_ops *ops)
+ const struct mctp_netdev_ops *ops,
+ enum mctp_phys_binding binding)
{
int rc;
rtnl_lock();
- rc = mctp_register_netdevice(dev, ops);
+ rc = mctp_register_netdevice(dev, ops, binding);
rtnl_unlock();
return rc;
@@ -524,25 +530,37 @@ static struct notifier_block mctp_dev_nb = {
.priority = ADDRCONF_NOTIFY_PRIORITY,
};
-void __init mctp_device_init(void)
+static const struct rtnl_msg_handler mctp_device_rtnl_msg_handlers[] = {
+ {THIS_MODULE, PF_MCTP, RTM_NEWADDR, mctp_rtm_newaddr, NULL, 0},
+ {THIS_MODULE, PF_MCTP, RTM_DELADDR, mctp_rtm_deladdr, NULL, 0},
+ {THIS_MODULE, PF_MCTP, RTM_GETADDR, NULL, mctp_dump_addrinfo, 0},
+};
+
+int __init mctp_device_init(void)
{
+ int err;
+
register_netdevice_notifier(&mctp_dev_nb);
- rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETADDR,
- NULL, mctp_dump_addrinfo, 0);
- rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWADDR,
- mctp_rtm_newaddr, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELADDR,
- mctp_rtm_deladdr, NULL, 0);
- rtnl_af_register(&mctp_af_ops);
+ err = rtnl_af_register(&mctp_af_ops);
+ if (err)
+ goto err_notifier;
+
+ err = rtnl_register_many(mctp_device_rtnl_msg_handlers);
+ if (err)
+ goto err_af;
+
+ return 0;
+err_af:
+ rtnl_af_unregister(&mctp_af_ops);
+err_notifier:
+ unregister_netdevice_notifier(&mctp_dev_nb);
+ return err;
}
void __exit mctp_device_exit(void)
{
+ rtnl_unregister_many(mctp_device_rtnl_msg_handlers);
rtnl_af_unregister(&mctp_af_ops);
- rtnl_unregister(PF_MCTP, RTM_DELADDR);
- rtnl_unregister(PF_MCTP, RTM_NEWADDR);
- rtnl_unregister(PF_MCTP, RTM_GETADDR);
-
unregister_netdevice_notifier(&mctp_dev_nb);
}
diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c
index ffa0f9e0983f..590f642413e4 100644
--- a/net/mctp/neigh.c
+++ b/net/mctp/neigh.c
@@ -322,22 +322,29 @@ static struct pernet_operations mctp_net_ops = {
.exit = mctp_neigh_net_exit,
};
+static const struct rtnl_msg_handler mctp_neigh_rtnl_msg_handlers[] = {
+ {THIS_MODULE, PF_MCTP, RTM_NEWNEIGH, mctp_rtm_newneigh, NULL, 0},
+ {THIS_MODULE, PF_MCTP, RTM_DELNEIGH, mctp_rtm_delneigh, NULL, 0},
+ {THIS_MODULE, PF_MCTP, RTM_GETNEIGH, NULL, mctp_rtm_getneigh, 0},
+};
+
int __init mctp_neigh_init(void)
{
- rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWNEIGH,
- mctp_rtm_newneigh, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELNEIGH,
- mctp_rtm_delneigh, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETNEIGH,
- NULL, mctp_rtm_getneigh, 0);
-
- return register_pernet_subsys(&mctp_net_ops);
+ int err;
+
+ err = register_pernet_subsys(&mctp_net_ops);
+ if (err)
+ return err;
+
+ err = rtnl_register_many(mctp_neigh_rtnl_msg_handlers);
+ if (err)
+ unregister_pernet_subsys(&mctp_net_ops);
+
+ return err;
}
-void __exit mctp_neigh_exit(void)
+void mctp_neigh_exit(void)
{
+ rtnl_unregister_many(mctp_neigh_rtnl_msg_handlers);
unregister_pernet_subsys(&mctp_net_ops);
- rtnl_unregister(PF_MCTP, RTM_GETNEIGH);
- rtnl_unregister(PF_MCTP, RTM_DELNEIGH);
- rtnl_unregister(PF_MCTP, RTM_NEWNEIGH);
}
diff --git a/net/mctp/route.c b/net/mctp/route.c
index eefd7834d9a0..597e9cf5aa64 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -1474,26 +1474,39 @@ static struct pernet_operations mctp_net_ops = {
.exit = mctp_routes_net_exit,
};
+static const struct rtnl_msg_handler mctp_route_rtnl_msg_handlers[] = {
+ {THIS_MODULE, PF_MCTP, RTM_NEWROUTE, mctp_newroute, NULL, 0},
+ {THIS_MODULE, PF_MCTP, RTM_DELROUTE, mctp_delroute, NULL, 0},
+ {THIS_MODULE, PF_MCTP, RTM_GETROUTE, NULL, mctp_dump_rtinfo, 0},
+};
+
int __init mctp_routes_init(void)
{
+ int err;
+
dev_add_pack(&mctp_packet_type);
- rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
- NULL, mctp_dump_rtinfo, 0);
- rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
- mctp_newroute, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
- mctp_delroute, NULL, 0);
+ err = register_pernet_subsys(&mctp_net_ops);
+ if (err)
+ goto err_pernet;
+
+ err = rtnl_register_many(mctp_route_rtnl_msg_handlers);
+ if (err)
+ goto err_rtnl;
- return register_pernet_subsys(&mctp_net_ops);
+ return 0;
+
+err_rtnl:
+ unregister_pernet_subsys(&mctp_net_ops);
+err_pernet:
+ dev_remove_pack(&mctp_packet_type);
+ return err;
}
void mctp_routes_exit(void)
{
+ rtnl_unregister_many(mctp_route_rtnl_msg_handlers);
unregister_pernet_subsys(&mctp_net_ops);
- rtnl_unregister(PF_MCTP, RTM_DELROUTE);
- rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
- rtnl_unregister(PF_MCTP, RTM_GETROUTE);
dev_remove_pack(&mctp_packet_type);
}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 0e6c94a8c2bc..1f63b32d76d6 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1201,8 +1201,7 @@ static void mpls_netconf_notify_devconf(struct net *net, int event,
rtnl_notify(skb, net, 0, RTNLGRP_MPLS_NETCONF, NULL, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err);
+ rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err);
}
static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = {
@@ -1665,7 +1664,7 @@ static int nla_put_via(struct sk_buff *skb,
u8 table, const void *addr, int alen)
{
static const int table_to_family[NEIGH_NR_TABLES + 1] = {
- AF_INET, AF_INET6, AF_DECnet, AF_PACKET,
+ AF_INET, AF_INET6, AF_PACKET,
};
struct nlattr *nla;
struct rtvia *via;
@@ -2278,8 +2277,7 @@ static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
}
static int mpls_valid_getroute_req(struct sk_buff *skb,
@@ -2730,6 +2728,15 @@ static struct rtnl_af_ops mpls_af_ops __read_mostly = {
.get_stats_af_size = mpls_get_stats_af_size,
};
+static const struct rtnl_msg_handler mpls_rtnl_msg_handlers[] __initdata_or_module = {
+ {THIS_MODULE, PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0},
+ {THIS_MODULE, PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0},
+ {THIS_MODULE, PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, 0},
+ {THIS_MODULE, PF_MPLS, RTM_GETNETCONF,
+ mpls_netconf_get_devconf, mpls_netconf_dump_devconf,
+ RTNL_FLAG_DUMP_UNLOCKED},
+};
+
static int __init mpls_init(void)
{
int err;
@@ -2746,26 +2753,30 @@ static int __init mpls_init(void)
dev_add_pack(&mpls_packet_type);
- rtnl_af_register(&mpls_af_ops);
-
- rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_NEWROUTE,
- mpls_rtm_newroute, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_DELROUTE,
- mpls_rtm_delroute, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETROUTE,
- mpls_getroute, mpls_dump_routes, 0);
- rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETNETCONF,
- mpls_netconf_get_devconf,
- mpls_netconf_dump_devconf,
- RTNL_FLAG_DUMP_UNLOCKED);
- err = ipgre_tunnel_encap_add_mpls_ops();
+ err = rtnl_af_register(&mpls_af_ops);
+ if (err)
+ goto out_unregister_dev_type;
+
+ err = rtnl_register_many(mpls_rtnl_msg_handlers);
if (err)
+ goto out_unregister_rtnl_af;
+
+ err = ipgre_tunnel_encap_add_mpls_ops();
+ if (err) {
pr_err("Can't add mpls over gre tunnel ops\n");
+ goto out_unregister_rtnl;
+ }
err = 0;
out:
return err;
+out_unregister_rtnl:
+ rtnl_unregister_many(mpls_rtnl_msg_handlers);
+out_unregister_rtnl_af:
+ rtnl_af_unregister(&mpls_af_ops);
+out_unregister_dev_type:
+ dev_remove_pack(&mpls_packet_type);
out_unregister_pernet:
unregister_pernet_subsys(&mpls_net_ops);
goto out;
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 4385fd3b13be..6e73da94af7f 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -106,7 +106,7 @@ static int mpls_xmit(struct sk_buff *skb)
hh_len = 0;
/* Ensure there is enough space for the headers in the skb */
- if (skb_cow(skb, hh_len + new_header_size))
+ if (skb_cow_head(skb, hh_len + new_header_size))
goto drop;
skb_set_inner_protocol(skb, skb->protocol);
diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c
index a8931349933c..b08ba959ac4f 100644
--- a/net/mptcp/crypto.c
+++ b/net/mptcp/crypto.c
@@ -22,7 +22,7 @@
#include <linux/kernel.h>
#include <crypto/sha2.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "protocol.h"
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 99382c317ebb..38d8121331d4 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -12,6 +12,7 @@
#include <net/netns/generic.h>
#include "protocol.h"
+#include "mib.h"
#define MPTCP_SYSCTL_PATH "net/mptcp"
@@ -27,8 +28,11 @@ struct mptcp_pernet {
#endif
unsigned int add_addr_timeout;
+ unsigned int blackhole_timeout;
unsigned int close_timeout;
unsigned int stale_loss_cnt;
+ atomic_t active_disable_times;
+ unsigned long active_disable_stamp;
u8 mptcp_enabled;
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
@@ -87,6 +91,8 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
+ pernet->blackhole_timeout = 3600;
+ atomic_set(&pernet->active_disable_times, 0);
pernet->close_timeout = TCP_TIMEWAIT_LEN;
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
@@ -151,6 +157,20 @@ static int proc_available_schedulers(const struct ctl_table *ctl,
return ret;
}
+static int proc_blackhole_detect_timeout(const struct ctl_table *table,
+ int write, void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns);
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ atomic_set(&pernet->active_disable_times, 0);
+
+ return ret;
+}
+
static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "enabled",
@@ -217,6 +237,13 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "blackhole_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_blackhole_detect_timeout,
+ .extra1 = SYSCTL_ZERO,
+ },
};
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -240,6 +267,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[6].data = &pernet->scheduler;
/* table[7] is for available_schedulers which is read-only info */
table[8].data = &pernet->close_timeout;
+ table[9].data = &pernet->blackhole_timeout;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
@@ -277,6 +305,111 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
#endif /* CONFIG_SYSCTL */
+/* The following code block is to deal with middle box issues with MPTCP,
+ * similar to what is done with TFO.
+ * The proposed solution is to disable active MPTCP globally when SYN+MPC are
+ * dropped, while SYN without MPC aren't. In this case, active side MPTCP is
+ * disabled globally for 1hr at first. Then if it happens again, it is disabled
+ * for 2h, then 4h, 8h, ...
+ * The timeout is reset back to 1hr when a successful active MPTCP connection is
+ * fully established.
+ */
+
+/* Disable active MPTCP and record current jiffies and active_disable_times */
+void mptcp_active_disable(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ struct mptcp_pernet *pernet;
+
+ pernet = mptcp_get_pernet(net);
+
+ if (!READ_ONCE(pernet->blackhole_timeout))
+ return;
+
+ /* Paired with READ_ONCE() in mptcp_active_should_disable() */
+ WRITE_ONCE(pernet->active_disable_stamp, jiffies);
+
+ /* Paired with smp_rmb() in mptcp_active_should_disable().
+ * We want pernet->active_disable_stamp to be updated first.
+ */
+ smp_mb__before_atomic();
+ atomic_inc(&pernet->active_disable_times);
+
+ MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE);
+}
+
+/* Calculate timeout for MPTCP active disable
+ * Return true if we are still in the active MPTCP disable period
+ * Return false if timeout already expired and we should use active MPTCP
+ */
+bool mptcp_active_should_disable(struct sock *ssk)
+{
+ struct net *net = sock_net(ssk);
+ unsigned int blackhole_timeout;
+ struct mptcp_pernet *pernet;
+ unsigned long timeout;
+ int disable_times;
+ int multiplier;
+
+ pernet = mptcp_get_pernet(net);
+ blackhole_timeout = READ_ONCE(pernet->blackhole_timeout);
+
+ if (!blackhole_timeout)
+ return false;
+
+ disable_times = atomic_read(&pernet->active_disable_times);
+ if (!disable_times)
+ return false;
+
+ /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
+ smp_rmb();
+
+ /* Limit timeout to max: 2^6 * initial timeout */
+ multiplier = 1 << min(disable_times - 1, 6);
+
+ /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
+ timeout = READ_ONCE(pernet->active_disable_stamp) +
+ multiplier * blackhole_timeout * HZ;
+
+ return time_before(jiffies, timeout);
+}
+
+/* Enable active MPTCP and reset active_disable_times if needed */
+void mptcp_active_enable(struct sock *sk)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
+
+ if (atomic_read(&pernet->active_disable_times)) {
+ struct dst_entry *dst = sk_dst_get(sk);
+
+ if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))
+ atomic_set(&pernet->active_disable_times, 0);
+ }
+}
+
+/* Check the number of retransmissions, and fallback to TCP if needed */
+void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
+{
+ struct mptcp_subflow_context *subflow;
+ u32 timeouts;
+
+ if (!sk_is_mptcp(ssk))
+ return;
+
+ timeouts = inet_csk(ssk)->icsk_retransmits;
+ subflow = mptcp_subflow_ctx(ssk);
+
+ if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
+ if (timeouts == 2 || (timeouts < 2 && expired)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
+ subflow->mpc_drop = 1;
+ mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
+ } else {
+ subflow->mpc_drop = 0;
+ }
+ }
+}
+
static int __net_init mptcp_net_init(struct net *net)
{
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index 2d3efb405437..02205f7994d7 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -47,7 +47,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
flags |= MPTCP_SUBFLOW_FLAG_BKUP_REM;
if (sf->request_bkup)
flags |= MPTCP_SUBFLOW_FLAG_BKUP_LOC;
- if (sf->fully_established)
+ if (READ_ONCE(sf->fully_established))
flags |= MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED;
if (sf->conn_finished)
flags |= MPTCP_SUBFLOW_FLAG_CONNECTED;
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 7884217f33eb..19eb9292bd60 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -15,6 +15,9 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK),
SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
+ SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP),
+ SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED),
+ SNMP_MIB_ITEM("MPCapableEndpAttempt", MPTCP_MIB_MPCAPABLEENDPATTEMPT),
SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT),
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
@@ -25,7 +28,13 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC),
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
+ SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX),
+ SNMP_MIB_ITEM("MPJoinSynTxCreatSkErr", MPTCP_MIB_JOINSYNTXCREATSKERR),
+ SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR),
+ SNMP_MIB_ITEM("MPJoinSynTxConnectErr", MPTCP_MIB_JOINSYNTXCONNECTERR),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
+ SNMP_MIB_ITEM("DSSCorruptionFallback", MPTCP_MIB_DSSCORRUPTIONFALLBACK),
+ SNMP_MIB_ITEM("DSSCorruptionReset", MPTCP_MIB_DSSCORRUPTIONRESET),
SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
@@ -69,6 +78,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
+ SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 66aa67f49d03..128282982843 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -10,6 +10,9 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
+ MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */
+ MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */
+ MPTCP_MIB_MPCAPABLEENDPATTEMPT, /* Prohibited MPC to port-based endp */
MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
@@ -20,7 +23,13 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
+ MPTCP_MIB_JOINSYNTX, /* Sending a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNTXCREATSKERR, /* Not able to create a socket when sending a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNTXBINDERR, /* Not able to bind() the address when sending a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNTXCONNECTERR, /* Not able to connect() when sending a SYN + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
+ MPTCP_MIB_DSSCORRUPTIONFALLBACK,/* DSS corruption detected, fallback */
+ MPTCP_MIB_DSSCORRUPTIONRESET, /* DSS corruption detected, MPJ subflow reset */
MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */
@@ -70,6 +79,7 @@ enum linux_mptcp_mib_field {
*/
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
+ MPTCP_MIB_BLACKHOLE, /* A blackhole has been detected */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c
index c30a2a90a192..dcffd847af33 100644
--- a/net/mptcp/mptcp_pm_gen.c
+++ b/net/mptcp/mptcp_pm_gen.c
@@ -14,7 +14,7 @@
const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1] = {
[MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, },
[MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, },
- [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, },
+ [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_BE32, },
[MPTCP_PM_ADDR_ATTR_ADDR6] = NLA_POLICY_EXACT_LEN(16),
[MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16, },
[MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32, },
@@ -112,7 +112,6 @@ const struct genl_ops mptcp_pm_nl_ops[11] = {
.dumpit = mptcp_pm_nl_get_addr_dumpit,
.policy = mptcp_pm_get_addr_nl_policy,
.maxattr = MPTCP_PM_ATTR_TOKEN,
- .flags = GENL_UNS_ADMIN_PERM,
},
{
.cmd = MPTCP_PM_CMD_FLUSH_ADDRS,
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 370c3836b771..1603b3702e22 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -461,7 +461,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
return false;
/* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */
- if (subflow->fully_established || snd_data_fin_enable ||
+ if (READ_ONCE(subflow->fully_established) || snd_data_fin_enable ||
subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
sk->sk_state != TCP_ESTABLISHED)
return false;
@@ -930,7 +930,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
/* here we can process OoO, in-window pkts, only in-sequence 4th ack
* will make the subflow fully established
*/
- if (likely(subflow->fully_established)) {
+ if (likely(READ_ONCE(subflow->fully_established))) {
/* on passive sockets, check for 3rd ack retransmission
* note that msk is always set by subflow_syn_recv_sock()
* for mp_join subflows
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 37f6dbcd8434..16c336c51940 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -154,6 +154,9 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
{
pr_debug("msk=%p\n", msk);
+
+ if (msk->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
}
void mptcp_pm_subflow_established(struct mptcp_sock *msk)
@@ -430,17 +433,6 @@ bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc)
return mptcp_pm_nl_is_backup(msk, &skc_local);
}
-int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex)
-{
- *flags = 0;
- *ifindex = 0;
-
- if (mptcp_pm_is_userspace(msk))
- return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
- return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
-}
-
int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info)
{
if (info->attrs[MPTCP_PM_ATTR_TOKEN])
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index f891bc714668..7a0f7998376a 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -149,7 +149,7 @@ static bool lookup_subflow_by_daddr(const struct list_head *list,
static bool
select_local_address(const struct pm_nl_pernet *pernet,
const struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *new_entry)
+ struct mptcp_pm_local *new_local)
{
struct mptcp_pm_addr_entry *entry;
bool found = false;
@@ -164,7 +164,9 @@ select_local_address(const struct pm_nl_pernet *pernet,
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
- *new_entry = *entry;
+ new_local->addr = entry->addr;
+ new_local->flags = entry->flags;
+ new_local->ifindex = entry->ifindex;
found = true;
break;
}
@@ -175,7 +177,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
static bool
select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *new_entry)
+ struct mptcp_pm_local *new_local)
{
struct mptcp_pm_addr_entry *entry;
bool found = false;
@@ -193,7 +195,9 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk,
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
continue;
- *new_entry = *entry;
+ new_local->addr = entry->addr;
+ new_local->flags = entry->flags;
+ new_local->ifindex = entry->ifindex;
found = true;
break;
}
@@ -334,15 +338,21 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
{
struct mptcp_pm_add_entry *entry;
struct sock *sk = (struct sock *)msk;
+ struct timer_list *add_timer = NULL;
spin_lock_bh(&msk->pm.lock);
entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
- if (entry && (!check_id || entry->addr.id == addr->id))
+ if (entry && (!check_id || entry->addr.id == addr->id)) {
entry->retrans_times = ADD_ADDR_RETRANS_MAX;
+ add_timer = &entry->add_timer;
+ }
+ if (!check_id && entry)
+ list_del(&entry->list);
spin_unlock_bh(&msk->pm.lock);
- if (entry && (!check_id || entry->addr.id == addr->id))
- sk_stop_timer_sync(sk, &entry->add_timer);
+ /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */
+ if (add_timer)
+ sk_stop_timer_sync(sk, add_timer);
return entry;
}
@@ -502,7 +512,8 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
{
struct mptcp_pm_addr_entry *entry;
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list,
+ lockdep_is_held(&pernet->lock)) {
if (entry->addr.id == id)
return entry;
}
@@ -514,7 +525,8 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
{
struct mptcp_pm_addr_entry *entry;
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list,
+ lockdep_is_held(&pernet->lock)) {
if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port))
return entry;
}
@@ -524,11 +536,11 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
- struct mptcp_pm_addr_entry local;
unsigned int add_addr_signal_max;
bool signal_and_subflow = false;
unsigned int local_addr_max;
struct pm_nl_pernet *pernet;
+ struct mptcp_pm_local local;
unsigned int subflows_max;
pernet = pm_nl_get_pernet(sock_net(sk));
@@ -629,7 +641,7 @@ subflow:
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
- __mptcp_subflow_connect(sk, &local.addr, &addrs[i]);
+ __mptcp_subflow_connect(sk, &local, &addrs[i]);
spin_lock_bh(&msk->pm.lock);
}
mptcp_pm_nl_check_work_pending(msk);
@@ -650,7 +662,7 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
*/
static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
struct mptcp_addr_info *remote,
- struct mptcp_addr_info *addrs)
+ struct mptcp_pm_local *locals)
{
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *entry;
@@ -673,13 +685,15 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
continue;
if (msk->pm.subflows < subflows_max) {
- msk->pm.subflows++;
- addrs[i] = entry->addr;
+ locals[i].addr = entry->addr;
+ locals[i].flags = entry->flags;
+ locals[i].ifindex = entry->ifindex;
/* Special case for ID0: set the correct ID */
- if (mptcp_addresses_equal(&entry->addr, &mpc_addr, entry->addr.port))
- addrs[i].id = 0;
+ if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port))
+ locals[i].addr.id = 0;
+ msk->pm.subflows++;
i++;
}
}
@@ -689,21 +703,19 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
* 'IPADDRANY' local address
*/
if (!i) {
- struct mptcp_addr_info local;
-
- memset(&local, 0, sizeof(local));
- local.family =
+ memset(&locals[i], 0, sizeof(locals[i]));
+ locals[i].addr.family =
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
remote->family == AF_INET6 &&
ipv6_addr_v4mapped(&remote->addr6) ? AF_INET :
#endif
remote->family;
- if (!mptcp_pm_addr_families_match(sk, &local, remote))
+ if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote))
return 0;
msk->pm.subflows++;
- addrs[i++] = local;
+ i++;
}
return i;
@@ -711,7 +723,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
- struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
+ struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX];
struct sock *sk = (struct sock *)msk;
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
@@ -740,13 +752,13 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
- nr = fill_local_addresses_vec(msk, &remote, addrs);
+ nr = fill_local_addresses_vec(msk, &remote, locals);
if (nr == 0)
return;
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
- if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0)
+ if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0)
sf_created = true;
spin_lock_bh(&msk->pm.lock);
@@ -771,7 +783,7 @@ bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk,
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
{
- struct mptcp_subflow_context *subflow;
+ struct mptcp_subflow_context *subflow, *alt = NULL;
msk_owned_by_me(msk);
lockdep_assert_held(&msk->pm.lock);
@@ -782,10 +794,18 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
mptcp_for_each_subflow(msk, subflow) {
if (__mptcp_subflow_active(subflow)) {
- mptcp_pm_send_ack(msk, subflow, false, false);
- break;
+ if (!subflow->stale) {
+ mptcp_pm_send_ack(msk, subflow, false, false);
+ return;
+ }
+
+ if (!alt)
+ alt = subflow;
}
}
+
+ if (alt)
+ mptcp_pm_send_ack(msk, alt, false, false);
}
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
@@ -850,7 +870,8 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
u8 id = subflow_get_local_id(subflow);
- if (inet_sk_state_load(ssk) == TCP_CLOSE)
+ if ((1 << inet_sk_state_load(ssk)) &
+ (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE))
continue;
if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
continue;
@@ -862,12 +883,12 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
i, rm_id, id, remote_id, msk->mpc_endpoint_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
+ removed |= subflow->request_join;
/* the following takes care of updating the subflows counter */
mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
- removed |= subflow->request_join;
if (rm_type == MPTCP_MIB_RMSUBFLOW)
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
@@ -1110,6 +1131,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
*/
inet_sk_state_store(newsk, TCP_LISTEN);
lock_sock(ssk);
+ WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true);
err = __inet_listen_sk(ssk, backlog);
if (!err)
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
@@ -1121,17 +1143,13 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
{
struct mptcp_pm_addr_entry *entry;
struct pm_nl_pernet *pernet;
- int ret = -1;
+ int ret;
pernet = pm_nl_get_pernet_from_msk(msk);
rcu_read_lock();
- list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) {
- ret = entry->addr.id;
- break;
- }
- }
+ entry = __lookup_addr(pernet, skc);
+ ret = entry ? entry->addr.id : -1;
rcu_read_unlock();
if (ret >= 0)
return ret;
@@ -1158,15 +1176,11 @@ bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
{
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
struct mptcp_pm_addr_entry *entry;
- bool backup = false;
+ bool backup;
rcu_read_lock();
- list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) {
- backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
- break;
- }
- }
+ entry = __lookup_addr(pernet, skc);
+ backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
rcu_read_unlock();
return backup;
@@ -1433,28 +1447,6 @@ out_free:
return ret;
}
-int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex)
-{
- struct mptcp_pm_addr_entry *entry;
- struct sock *sk = (struct sock *)msk;
- struct net *net = sock_net(sk);
-
- /* No entries with ID 0 */
- if (id == 0)
- return 0;
-
- rcu_read_lock();
- entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
- if (entry) {
- *flags = entry->flags;
- *ifindex = entry->ifindex;
- }
- rcu_read_unlock();
-
- return 0;
-}
-
static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr)
{
@@ -1462,7 +1454,6 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
entry = mptcp_pm_del_add_timer(msk, addr, false);
if (entry) {
- list_del(&entry->list);
kfree(entry);
return true;
}
@@ -1672,8 +1663,8 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
}
/* Called from the in-kernel PM only */
-static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
- struct list_head *rm_list)
+static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list)
{
struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
struct mptcp_pm_addr_entry *entry;
@@ -1701,8 +1692,8 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
spin_unlock_bh(&msk->pm.lock);
}
-static void mptcp_nl_remove_addrs_list(struct net *net,
- struct list_head *rm_list)
+static void mptcp_nl_flush_addrs_list(struct net *net,
+ struct list_head *rm_list)
{
long s_slot = 0, s_num = 0;
struct mptcp_sock *msk;
@@ -1715,7 +1706,7 @@ static void mptcp_nl_remove_addrs_list(struct net *net,
if (!mptcp_pm_is_userspace(msk)) {
lock_sock(sk);
- mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
+ mptcp_pm_flush_addrs_and_subflows(msk, rm_list);
release_sock(sk);
}
@@ -1756,7 +1747,7 @@ int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info)
pernet->next_id = 1;
bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock);
- mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
+ mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list);
synchronize_rcu();
__flush_addrs(&free_list);
return 0;
@@ -1826,7 +1817,7 @@ int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info)
goto fail;
}
- spin_lock_bh(&pernet->lock);
+ rcu_read_lock();
entry = __lookup_addr_by_id(pernet, addr.addr.id);
if (!entry) {
GENL_SET_ERR_MSG(info, "address not found");
@@ -1840,11 +1831,11 @@ int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info)
genlmsg_end(msg, reply);
ret = genlmsg_reply(msg, info);
- spin_unlock_bh(&pernet->lock);
+ rcu_read_unlock();
return ret;
unlock_fail:
- spin_unlock_bh(&pernet->lock);
+ rcu_read_unlock();
fail:
nlmsg_free(msg);
@@ -1868,7 +1859,7 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
pernet = pm_nl_get_pernet(net);
- spin_lock_bh(&pernet->lock);
+ rcu_read_lock();
for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
if (test_bit(i, pernet->id_bitmap)) {
entry = __lookup_addr_by_id(pernet, i);
@@ -1893,7 +1884,7 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
genlmsg_end(msg, hdr);
}
}
- spin_unlock_bh(&pernet->lock);
+ rcu_read_unlock();
cb->args[0] = id;
return msg->len;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 8eaa9fbe3e34..e35178f5205f 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -91,6 +91,7 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk,
struct mptcp_pm_addr_entry *addr)
{
struct mptcp_pm_addr_entry *entry, *tmp;
+ struct sock *sk = (struct sock *)msk;
list_for_each_entry_safe(entry, tmp, &msk->pm.userspace_pm_local_addr_list, list) {
if (mptcp_addresses_equal(&entry->addr, &addr->addr, false)) {
@@ -98,7 +99,7 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk,
* be used multiple times (e.g. fullmesh mode).
*/
list_del_rcu(&entry->list);
- kfree(entry);
+ sock_kfree_s(sk, entry, sizeof(*entry));
msk->pm.local_addr_used--;
return 0;
}
@@ -119,23 +120,6 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id)
return NULL;
}
-int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
- unsigned int id,
- u8 *flags, int *ifindex)
-{
- struct mptcp_pm_addr_entry *match;
-
- spin_lock_bh(&msk->pm.lock);
- match = mptcp_userspace_pm_lookup_addr_by_id(msk, id);
- spin_unlock_bh(&msk->pm.lock);
- if (match) {
- *flags = match->flags;
- *ifindex = match->ifindex;
- }
-
- return 0;
-}
-
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
@@ -324,14 +308,17 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
lock_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val);
if (!match) {
GENL_SET_ERR_MSG(info, "address with specified id not found");
+ spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
goto out;
}
list_move(&match->list, &free_list);
+ spin_unlock_bh(&msk->pm.lock);
mptcp_pm_remove_addrs(msk, &free_list);
@@ -352,8 +339,9 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
- struct mptcp_pm_addr_entry local = { 0 };
+ struct mptcp_pm_addr_entry entry = { 0 };
struct mptcp_addr_info addr_r;
+ struct mptcp_pm_local local;
struct mptcp_sock *msk;
int err = -EINVAL;
struct sock *sk;
@@ -379,18 +367,18 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- err = mptcp_pm_parse_entry(laddr, info, true, &local);
+ err = mptcp_pm_parse_entry(laddr, info, true, &entry);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
goto create_err;
}
- if (local.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+ if (entry.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
GENL_SET_ERR_MSG(info, "invalid addr flags");
err = -EINVAL;
goto create_err;
}
- local.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
+ entry.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
err = mptcp_pm_parse_addr(raddr, info, &addr_r);
if (err < 0) {
@@ -398,27 +386,29 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- if (!mptcp_pm_addr_families_match(sk, &local.addr, &addr_r)) {
+ if (!mptcp_pm_addr_families_match(sk, &entry.addr, &addr_r)) {
GENL_SET_ERR_MSG(info, "families mismatch");
err = -EINVAL;
goto create_err;
}
- err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &entry, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto create_err;
}
- lock_sock(sk);
-
- err = __mptcp_subflow_connect(sk, &local.addr, &addr_r);
+ local.addr = entry.addr;
+ local.flags = entry.flags;
+ local.ifindex = entry.ifindex;
+ lock_sock(sk);
+ err = __mptcp_subflow_connect(sk, &local, &addr_r);
release_sock(sk);
spin_lock_bh(&msk->pm.lock);
if (err)
- mptcp_userspace_pm_delete_local_addr(msk, &local);
+ mptcp_userspace_pm_delete_local_addr(msk, &entry);
else
msk->pm.subflows++;
spin_unlock_bh(&msk->pm.lock);
@@ -573,6 +563,7 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct net *net = sock_net(skb->sk);
+ struct mptcp_pm_addr_entry *entry;
struct mptcp_sock *msk;
int ret = -EINVAL;
struct sock *sk;
@@ -614,6 +605,17 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
if (loc.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&entry->addr, &loc.addr, false)) {
+ if (bkup)
+ entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else
+ entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+
lock_sock(sk);
ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc.addr, &rem.addr, bkup);
release_sock(sk);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 37ebcb7640eb..08a72242428c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -620,6 +620,18 @@ static bool mptcp_check_data_fin(struct sock *sk)
return ret;
}
+static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
+{
+ if (READ_ONCE(msk->allow_infinite_fallback)) {
+ MPTCP_INC_STATS(sock_net(ssk),
+ MPTCP_MIB_DSSCORRUPTIONFALLBACK);
+ mptcp_do_fallback(ssk);
+ } else {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET);
+ mptcp_subflow_reset(ssk);
+ }
+}
+
static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
struct sock *ssk,
unsigned int *bytes)
@@ -692,10 +704,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
moved += len;
seq += len;
- if (WARN_ON_ONCE(map_remaining < len))
- break;
+ if (unlikely(map_remaining < len)) {
+ DEBUG_NET_WARN_ON_ONCE(1);
+ mptcp_dss_corruption(msk, ssk);
+ }
} else {
- WARN_ON_ONCE(!fin);
+ if (unlikely(!fin)) {
+ DEBUG_NET_WARN_ON_ONCE(1);
+ mptcp_dss_corruption(msk, ssk);
+ }
+
sk_eat_skb(ssk, skb);
done = true;
}
@@ -2064,7 +2082,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
slow = lock_sock_fast(ssk);
WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp);
- tcp_cleanup_rbuf(ssk, 1);
+ if (tcp_can_send_ack(ssk))
+ tcp_cleanup_rbuf(ssk, 1);
unlock_sock_fast(ssk, slow);
}
}
@@ -2187,7 +2206,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
cmsg_flags = MPTCP_CMSG_INQ;
while (copied < len) {
- int bytes_read;
+ int err, bytes_read;
bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags);
if (unlikely(bytes_read < 0)) {
@@ -2249,9 +2268,16 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
pr_debug("block timeout %ld\n", timeo);
- sk_wait_data(sk, &timeo, NULL);
+ mptcp_rcv_space_adjust(msk, copied);
+ err = sk_wait_data(sk, &timeo, NULL);
+ if (err < 0) {
+ err = copied ? : err;
+ goto out_err;
+ }
}
+ mptcp_rcv_space_adjust(msk, copied);
+
out_err:
if (cmsg_flags && copied >= 0) {
if (cmsg_flags & MPTCP_CMSG_TS)
@@ -2267,8 +2293,6 @@ out_err:
pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n",
msk, skb_queue_empty_lockless(&sk->sk_receive_queue),
skb_queue_empty(&msk->receive_queue), copied);
- if (!(flags & MSG_PEEK))
- mptcp_rcv_space_adjust(msk, copied);
release_sock(sk);
return copied;
@@ -2704,8 +2728,8 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
return;
- close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies +
- mptcp_close_timeout(sk);
+ close_timeout = (unsigned long)inet_csk(sk)->icsk_mtup.probe_timestamp -
+ tcp_jiffies32 + jiffies + mptcp_close_timeout(sk);
/* the close timeout takes precedence on the fail one, and here at least one of
* them is active
@@ -2846,8 +2870,10 @@ static int mptcp_init_sock(struct sock *sk)
if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net))
return -ENOMEM;
+ rcu_read_lock();
ret = mptcp_init_sched(mptcp_sk(sk),
mptcp_sched_find(mptcp_get_scheduler(net)));
+ rcu_read_unlock();
if (ret)
return ret;
@@ -3121,8 +3147,7 @@ cleanup:
sock_hold(sk);
pr_debug("msk=%p state=%d\n", sk, sk->sk_state);
- if (msk->token)
- mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
+ mptcp_pm_connection_closed(msk);
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
@@ -3188,8 +3213,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_stop_rtx_timer(sk);
mptcp_stop_tout_timer(sk);
- if (msk->token)
- mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
+ mptcp_pm_connection_closed(msk);
/* msk->subflow is still intact, the following will not free the first
* subflow
@@ -3493,7 +3517,7 @@ static void schedule_3rdack_retransmission(struct sock *ssk)
struct tcp_sock *tp = tcp_sk(ssk);
unsigned long timeout;
- if (mptcp_subflow_ctx(ssk)->fully_established)
+ if (READ_ONCE(mptcp_subflow_ctx(ssk)->fully_established))
return;
/* reschedule with a timeout above RTT, as we must look only for drop */
@@ -3504,7 +3528,8 @@ static void schedule_3rdack_retransmission(struct sock *ssk)
timeout += jiffies;
WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
- icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+ smp_store_release(&icsk->icsk_ack.pending,
+ icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER);
icsk->icsk_ack.timeout = timeout;
sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
}
@@ -3717,13 +3742,6 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
return 0;
}
-static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow)
-{
- subflow->request_mptcp = 0;
- __mptcp_do_fallback(msk);
-}
-
static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_subflow_context *subflow;
@@ -3744,9 +3762,14 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
mptcp_subflow_early_fallback(msk, subflow);
#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
- mptcp_subflow_early_fallback(msk, subflow);
+ if (subflow->request_mptcp) {
+ if (mptcp_active_should_disable(sk)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED);
+ mptcp_subflow_early_fallback(msk, subflow);
+ } else if (mptcp_token_new_connect(ssk) < 0) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
+ mptcp_subflow_early_fallback(msk, subflow);
+ }
}
WRITE_ONCE(msk->write_seq, subflow->idsn);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3b22313d1b86..a93e661ef5c4 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -236,6 +236,12 @@ struct mptcp_pm_data {
struct mptcp_rm_list rm_list_rx;
};
+struct mptcp_pm_local {
+ struct mptcp_addr_info addr;
+ u8 flags;
+ int ifindex;
+};
+
struct mptcp_pm_addr_entry {
struct list_head list;
struct mptcp_addr_info addr;
@@ -507,7 +513,6 @@ struct mptcp_subflow_context {
request_bkup : 1,
mp_capable : 1, /* remote is MPTCP capable */
mp_join : 1, /* remote is JOINing */
- fully_established : 1, /* path validated */
pm_notified : 1, /* PM hook called for established status */
conn_finished : 1,
map_valid : 1,
@@ -525,9 +530,12 @@ struct mptcp_subflow_context {
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
close_event_done : 1, /* has done the post-closed part */
+ mpc_drop : 1, /* the MPC option has been dropped in a rtx */
__unused : 9;
bool data_avail;
bool scheduled;
+ bool pm_listener; /* a listener managed by the kernel PM? */
+ bool fully_established; /* path validated */
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -691,6 +699,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
+
+void mptcp_active_disable(struct sock *sk);
+bool mptcp_active_should_disable(struct sock *ssk);
+void mptcp_active_enable(struct sock *sk);
+
void mptcp_get_available_schedulers(char *buf, size_t maxlen);
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
@@ -719,7 +732,7 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr);
/* called with sk socket lock held */
-int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
+int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
const struct mptcp_addr_info *remote);
int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
struct socket **new_sock);
@@ -767,7 +780,7 @@ static inline bool __tcp_can_send(const struct sock *ssk)
static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
/* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
- if (subflow->request_join && !subflow->fully_established)
+ if (subflow->request_join && !READ_ONCE(subflow->fully_established))
return false;
return __tcp_can_send(mptcp_subflow_tcp_sock(subflow));
@@ -1014,14 +1027,6 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_pm_add_entry *
mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
-int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
- unsigned int id,
- u8 *flags, int *ifindex);
-int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex);
-int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
- unsigned int id,
- u8 *flags, int *ifindex);
int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info);
int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
@@ -1154,7 +1159,6 @@ static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
}
-void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
@@ -1218,6 +1222,14 @@ static inline void mptcp_do_fallback(struct sock *ssk)
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a)
+static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow)
+{
+ pr_fallback(msk);
+ subflow->request_mptcp = 0;
+ __mptcp_do_fallback(msk);
+}
+
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
{
struct mptcp_ext *mpext;
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 78ed508ebc1b..df7dbcfa3b71 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -60,7 +60,6 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen)
size_t offs = 0;
rcu_read_lock();
- spin_lock(&mptcp_sched_list_lock);
list_for_each_entry_rcu(sched, &mptcp_sched_list, list) {
offs += snprintf(buf + offs, maxlen - offs,
"%s%s",
@@ -69,7 +68,6 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen)
if (WARN_ON_ONCE(offs >= maxlen))
break;
}
- spin_unlock(&mptcp_sched_list_lock);
rcu_read_unlock();
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 064ab3235893..fd021cf8286e 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -132,6 +132,13 @@ static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason)
}
}
+static int subflow_reset_req_endp(struct request_sock *req, struct sk_buff *skb)
+{
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEENDPATTEMPT);
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
+ return -EPERM;
+}
+
/* Init mptcp request socket.
*
* Returns an error code if a JOIN has failed and a TCP reset
@@ -165,6 +172,8 @@ static int subflow_check_req(struct request_sock *req,
if (opt_mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
+ if (unlikely(listener->pm_listener))
+ return subflow_reset_req_endp(req, skb);
if (opt_mp_join)
return 0;
} else if (opt_mp_join) {
@@ -172,6 +181,8 @@ static int subflow_check_req(struct request_sock *req,
if (mp_opt.backup)
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX);
+ } else if (unlikely(listener->pm_listener)) {
+ return subflow_reset_req_endp(req, skb);
}
if (opt_mp_capable && listener->request_mptcp) {
@@ -546,6 +557,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_capable = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
+ mptcp_active_enable(parent);
mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -591,6 +603,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
}
} else if (mptcp_check_fallback(sk)) {
+ /* It looks like MPTCP is blocked, while TCP is not */
+ if (subflow->mpc_drop)
+ mptcp_active_disable(parent);
fallback:
mptcp_propagate_state(parent, sk, subflow, NULL);
}
@@ -785,7 +800,7 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
const struct mptcp_options_received *mp_opt)
{
subflow_set_remote_key(msk, subflow, mp_opt);
- subflow->fully_established = 1;
+ WRITE_ONCE(subflow->fully_established, true);
WRITE_ONCE(msk->fully_established, true);
if (subflow->is_mptfo)
@@ -956,7 +971,8 @@ enum mapping_status {
MAPPING_EMPTY,
MAPPING_DATA_FIN,
MAPPING_DUMMY,
- MAPPING_BAD_CSUM
+ MAPPING_BAD_CSUM,
+ MAPPING_NODSS
};
static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
@@ -971,8 +987,10 @@ static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
unsigned int skb_consumed;
skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
- if (WARN_ON_ONCE(skb_consumed >= skb->len))
+ if (unlikely(skb_consumed >= skb->len)) {
+ DEBUG_NET_WARN_ON_ONCE(1);
return true;
+ }
return skb->len - skb_consumed <= subflow->map_data_len -
mptcp_subflow_get_map_offset(subflow);
@@ -1111,8 +1129,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
return MAPPING_EMPTY;
}
+ /* If the required DSS has likely been dropped by a middlebox */
if (!subflow->map_valid)
- return MAPPING_INVALID;
+ return MAPPING_NODSS;
goto validate_seq;
}
@@ -1276,7 +1295,7 @@ static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
else if (READ_ONCE(msk->csum_enabled))
return !subflow->valid_csum_seen;
else
- return !subflow->fully_established;
+ return READ_ONCE(msk->allow_infinite_fallback);
}
static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
@@ -1326,7 +1345,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
status = get_mapping_status(ssk, msk);
trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY ||
- status == MAPPING_BAD_CSUM))
+ status == MAPPING_BAD_CSUM || status == MAPPING_NODSS))
goto fallback;
if (status != MAPPING_OK)
@@ -1379,7 +1398,9 @@ fallback:
* subflow_error_report() will introduce the appropriate barriers
*/
subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMPTCP;
+ subflow->reset_reason = status == MAPPING_NODSS ?
+ MPTCP_RST_EMIDDLEBOX :
+ MPTCP_RST_EMPTCP;
reset:
WRITE_ONCE(ssk->sk_err, EBADMSG);
@@ -1565,28 +1586,31 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
#endif
}
-int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
+int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
const struct mptcp_addr_info *remote)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
+ int local_id = local->addr.id;
struct sockaddr_storage addr;
int remote_id = remote->id;
- int local_id = loc->id;
int err = -ENOTCONN;
struct socket *sf;
struct sock *ssk;
u32 remote_token;
int addrlen;
- int ifindex;
- u8 flags;
+ /* The userspace PM sent the request too early? */
if (!mptcp_is_fully_established(sk))
goto err_out;
- err = mptcp_subflow_create_socket(sk, loc->family, &sf);
- if (err)
+ err = mptcp_subflow_create_socket(sk, local->addr.family, &sf);
+ if (err) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCREATSKERR);
+ pr_debug("msk=%p local=%d remote=%d create sock error: %d\n",
+ msk, local_id, remote_id, err);
goto err_out;
+ }
ssk = sf->sk;
subflow = mptcp_subflow_ctx(ssk);
@@ -1594,26 +1618,39 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
get_random_bytes(&subflow->local_nonce, sizeof(u32));
} while (!subflow->local_nonce);
- if (local_id)
+ /* if 'IPADDRANY', the ID will be set later, after the routing */
+ if (local->addr.family == AF_INET) {
+ if (!local->addr.addr.s_addr)
+ local_id = -1;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ } else if (sk->sk_family == AF_INET6) {
+ if (ipv6_addr_any(&local->addr.addr6))
+ local_id = -1;
+#endif
+ }
+
+ if (local_id >= 0)
subflow_set_local_id(subflow, local_id);
- mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
- &flags, &ifindex);
subflow->remote_key_valid = 1;
subflow->remote_key = READ_ONCE(msk->remote_key);
subflow->local_key = READ_ONCE(msk->local_key);
subflow->token = msk->token;
- mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
+ mptcp_info2sockaddr(&local->addr, &addr, ssk->sk_family);
addrlen = sizeof(struct sockaddr_in);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- ssk->sk_bound_dev_if = ifindex;
+ ssk->sk_bound_dev_if = local->ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
- if (err)
+ if (err) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXBINDERR);
+ pr_debug("msk=%p local=%d remote=%d bind error: %d\n",
+ msk, local_id, remote_id, err);
goto failed;
+ }
mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d\n", msk,
@@ -1621,15 +1658,21 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->remote_token = remote_token;
WRITE_ONCE(subflow->remote_id, remote_id);
subflow->request_join = 1;
- subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ subflow->request_bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
subflow->subflow_id = msk->subflow_id++;
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
sock_hold(ssk);
list_add_tail(&subflow->node, &msk->conn_list);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
- if (err && err != -EINPROGRESS)
+ if (err && err != -EINPROGRESS) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCONNECTERR);
+ pr_debug("msk=%p local=%d remote=%d connect error: %d\n",
+ msk, local_id, remote_id, err);
goto failed_unlink;
+ }
+
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTX);
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
@@ -2006,7 +2049,6 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
new_ctx->tcp_error_report = old_ctx->tcp_error_report;
new_ctx->rel_write_seq = 1;
- new_ctx->tcp_sock = newsk;
if (subflow_req->mp_capable) {
/* see comments in subflow_syn_recv_sock(), MPTCP connection
@@ -2023,7 +2065,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
- new_ctx->fully_established = 1;
+ WRITE_ONCE(new_ctx->fully_established, true);
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
new_ctx->request_bkup = subflow_req->request_bkup;
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 5ecf611c8820..5cf55bde366d 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -1954,6 +1954,8 @@ void ncsi_unregister_dev(struct ncsi_dev *nd)
list_del_rcu(&ndp->node);
spin_unlock_irqrestore(&ncsi_dev_lock, flags);
+ disable_work_sync(&ndp->work);
+
kfree(ndp);
}
EXPORT_SYMBOL_GPL(ncsi_unregister_dev);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index b00fc285b334..b9f551f02c81 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -655,11 +655,9 @@ void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
const struct nf_hook_entries *e)
{
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
int ret;
- INIT_LIST_HEAD(&sublist);
-
list_for_each_entry_safe(skb, next, head, list) {
skb_list_del_init(skb);
ret = nf_hook_slow(skb, state, e, 0);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index e4fa00abde6a..5988b9bb9029 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -163,11 +163,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to) {
+ if (ip > ip_to)
swap(ip, ip_to);
- if (ip < map->first_ip)
- return -IPSET_ERR_BITMAP_RANGE;
- }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -178,7 +175,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
ip_to = ip;
}
- if (ip_to > map->last_ip)
+ if (ip < map->first_ip || ip_to > map->last_ip)
return -IPSET_ERR_BITMAP_RANGE;
for (; !before(ip_to, ip); ip += map->hosts) {
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 61431690cbd5..cc20e6d56807 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -104,14 +104,19 @@ find_set_type(const char *name, u8 family, u8 revision)
static bool
load_settype(const char *name)
{
+ if (!try_module_get(THIS_MODULE))
+ return false;
+
nfnl_unlock(NFNL_SUBSYS_IPSET);
pr_debug("try to load ip_set_%s\n", name);
if (request_module("ip_set_%s", name) < 0) {
pr_warn("Can't find ip_set type %s\n", name);
nfnl_lock(NFNL_SUBSYS_IPSET);
+ module_put(THIS_MODULE);
return false;
}
nfnl_lock(NFNL_SUBSYS_IPSET);
+ module_put(THIS_MODULE);
return true;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index dc6ddc4abbe2..7d13110ce188 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3662,10 +3662,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
udest->port = nla_get_be16(nla_port);
- if (nla_addr_family)
- udest->af = nla_get_u16(nla_addr_family);
- else
- udest->af = 0;
+ udest->af = nla_get_u16_default(nla_addr_family, 0);
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index f53899d12416..d8a284999544 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -35,7 +35,7 @@
#include <linux/gfp.h>
#include <net/protocol.h>
#include <net/tcp.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/ip_vs.h>
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index f100da4ba3bc..a9fd1d3fc2cb 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -340,7 +340,7 @@ void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs)
int __init ip_vs_protocol_init(void)
{
- char protocols[64];
+ char protocols[64] = { 0 };
#define REGISTER_PROTOCOL(p) \
do { \
register_ip_vs_protocol(p); \
@@ -348,8 +348,6 @@ int __init ip_vs_protocol_init(void)
strcat(protocols, (p)->name); \
} while (0)
- protocols[0] = '\0';
- protocols[2] = '\0';
#ifdef CONFIG_IP_VS_PROTO_TCP
REGISTER_PROTOCOL(&ip_vs_protocol_tcp);
#endif
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index be74c0906dda..3402675bf521 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -51,7 +51,7 @@
#include <linux/kernel.h>
#include <linux/sched/signal.h>
-#include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */
+#include <linux/unaligned.h> /* Used for ntoh_seq and hton_seq */
#include <net/ip.h>
#include <net/sock.h>
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index 5257d5e7eb09..06b084844700 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -23,6 +23,7 @@ static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
struct bpf_nf_link {
struct bpf_link link;
struct nf_hook_ops hook_ops;
+ netns_tracker ns_tracker;
struct net *net;
u32 dead;
const struct nf_defrag_hook *defrag_hook;
@@ -42,7 +43,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link,
hook = rcu_dereference(*ptr_global_hook);
if (!hook) {
rcu_read_unlock();
- err = request_module(mod);
+ err = request_module("%s", mod);
if (err)
return ERR_PTR(err < 0 ? err : -EINVAL);
@@ -120,6 +121,7 @@ static void bpf_nf_link_release(struct bpf_link *link)
if (!cmpxchg(&nf_link->dead, 0, 1)) {
nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
bpf_nf_disable_defrag(nf_link);
+ put_net_track(nf_link->net, &nf_link->ns_tracker);
}
}
@@ -150,11 +152,12 @@ static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
struct bpf_link_info *info)
{
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+ const struct nf_defrag_hook *hook = nf_link->defrag_hook;
info->netfilter.pf = nf_link->hook_ops.pf;
info->netfilter.hooknum = nf_link->hook_ops.hooknum;
info->netfilter.priority = nf_link->hook_ops.priority;
- info->netfilter.flags = 0;
+ info->netfilter.flags = hook ? BPF_F_NETFILTER_IP_DEFRAG : 0;
return 0;
}
@@ -257,6 +260,8 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
return err;
}
+ get_net_track(net, &link->ns_tracker, GFP_KERNEL);
+
return bpf_link_settle(&link_primer);
}
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 34ba14e59e95..4890af4dc263 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -522,11 +522,10 @@ unsigned int nf_conncount_count(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_conncount_count);
-struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
- unsigned int keylen)
+struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen)
{
struct nf_conncount_data *data;
- int ret, i;
+ int i;
if (keylen % sizeof(u32) ||
keylen / sizeof(u32) > MAX_KEYLEN ||
@@ -539,12 +538,6 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
if (!data)
return ERR_PTR(-ENOMEM);
- ret = nf_ct_netns_get(net, family);
- if (ret < 0) {
- kfree(data);
- return ERR_PTR(ret);
- }
-
for (i = 0; i < ARRAY_SIZE(data->root); ++i)
data->root[i] = RB_ROOT;
@@ -581,13 +574,11 @@ static void destroy_tree(struct rb_root *r)
}
}
-void nf_conncount_destroy(struct net *net, unsigned int family,
- struct nf_conncount_data *data)
+void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data)
{
unsigned int i;
cancel_work_sync(&data->gc_work);
- nf_ct_netns_put(net, family);
for (i = 0; i < ARRAY_SIZE(data->root); ++i)
destroy_tree(&data->root[i]);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9384426ddc06..9db3e2b0b1c3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -988,6 +988,56 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
tstamp->start = ktime_get_real_ns();
}
+/**
+ * nf_ct_match_reverse - check if ct1 and ct2 refer to identical flow
+ * @ct1: conntrack in hash table to check against
+ * @ct2: merge candidate
+ *
+ * returns true if ct1 and ct2 happen to refer to the same flow, but
+ * in opposing directions, i.e.
+ * ct1: a:b -> c:d
+ * ct2: c:d -> a:b
+ * for both directions. If so, @ct2 should not have been created
+ * as the skb should have been picked up as ESTABLISHED flow.
+ * But ct1 was not yet committed to hash table before skb that created
+ * ct2 had arrived.
+ *
+ * Note we don't compare netns because ct entries in different net
+ * namespace cannot clash to begin with.
+ *
+ * @return: true if ct1 and ct2 are identical when swapping origin/reply.
+ */
+static bool
+nf_ct_match_reverse(const struct nf_conn *ct1, const struct nf_conn *ct2)
+{
+ u16 id1, id2;
+
+ if (!nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ &ct2->tuplehash[IP_CT_DIR_REPLY].tuple))
+ return false;
+
+ if (!nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple,
+ &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple))
+ return false;
+
+ id1 = nf_ct_zone_id(nf_ct_zone(ct1), IP_CT_DIR_ORIGINAL);
+ id2 = nf_ct_zone_id(nf_ct_zone(ct2), IP_CT_DIR_REPLY);
+ if (id1 != id2)
+ return false;
+
+ id1 = nf_ct_zone_id(nf_ct_zone(ct1), IP_CT_DIR_REPLY);
+ id2 = nf_ct_zone_id(nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL);
+
+ return id1 == id2;
+}
+
+static int nf_ct_can_merge(const struct nf_conn *ct,
+ const struct nf_conn *loser_ct)
+{
+ return nf_ct_match(ct, loser_ct) ||
+ nf_ct_match_reverse(ct, loser_ct);
+}
+
/* caller must hold locks to prevent concurrent changes */
static int __nf_ct_resolve_clash(struct sk_buff *skb,
struct nf_conntrack_tuple_hash *h)
@@ -999,11 +1049,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
loser_ct = nf_ct_get(skb, &ctinfo);
- if (nf_ct_is_dying(ct))
- return NF_DROP;
-
- if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
- nf_ct_match(ct, loser_ct)) {
+ if (nf_ct_can_merge(ct, loser_ct)) {
struct net *net = nf_ct_net(ct);
nf_conntrack_get(&ct->ct_general);
@@ -1722,7 +1768,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
hash);
if (IS_ERR(ct))
- return (struct nf_conntrack_tuple_hash *)ct;
+ return ERR_CAST(ct);
if (!nf_ct_add_synproxy(ct, tmpl)) {
nf_conntrack_free(ct);
@@ -2151,80 +2197,6 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
nf_conntrack_get(skb_nfct(nskb));
}
-static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- const struct nf_nat_hook *nat_hook;
- struct nf_conntrack_tuple_hash *h;
- struct nf_conntrack_tuple tuple;
- unsigned int status;
- int dataoff;
- u16 l3num;
- u8 l4num;
-
- l3num = nf_ct_l3num(ct);
-
- dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
- if (dataoff <= 0)
- return NF_DROP;
-
- if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
- l4num, net, &tuple))
- return NF_DROP;
-
- if (ct->status & IPS_SRC_NAT) {
- memcpy(tuple.src.u3.all,
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.all,
- sizeof(tuple.src.u3.all));
- tuple.src.u.all =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all;
- }
-
- if (ct->status & IPS_DST_NAT) {
- memcpy(tuple.dst.u3.all,
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.all,
- sizeof(tuple.dst.u3.all));
- tuple.dst.u.all =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.all;
- }
-
- h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple);
- if (!h)
- return NF_ACCEPT;
-
- /* Store status bits of the conntrack that is clashing to re-do NAT
- * mangling according to what it has been done already to this packet.
- */
- status = ct->status;
-
- nf_ct_put(ct);
- ct = nf_ct_tuplehash_to_ctrack(h);
- nf_ct_set(skb, ct, ctinfo);
-
- nat_hook = rcu_dereference(nf_nat_hook);
- if (!nat_hook)
- return NF_ACCEPT;
-
- if (status & IPS_SRC_NAT) {
- unsigned int verdict = nat_hook->manip_pkt(skb, ct,
- NF_NAT_MANIP_SRC,
- IP_CT_DIR_ORIGINAL);
- if (verdict != NF_ACCEPT)
- return verdict;
- }
-
- if (status & IPS_DST_NAT) {
- unsigned int verdict = nat_hook->manip_pkt(skb, ct,
- NF_NAT_MANIP_DST,
- IP_CT_DIR_ORIGINAL);
- if (verdict != NF_ACCEPT)
- return verdict;
- }
-
- return NF_ACCEPT;
-}
-
/* This packet is coming from userspace via nf_queue, complete the packet
* processing after the helper invocation in nf_confirm().
*/
@@ -2288,17 +2260,6 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
if (!ct)
return NF_ACCEPT;
- if (!nf_ct_is_confirmed(ct)) {
- int ret = __nf_conntrack_update(net, skb, ct, ctinfo);
-
- if (ret != NF_ACCEPT)
- return ret;
-
- ct = nf_ct_get(skb, &ctinfo);
- if (!ct)
- return NF_ACCEPT;
- }
-
return nf_confirm_cthelper(skb, ct, ctinfo);
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4cbf71d0786b..36168f8b6efa 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -382,7 +382,7 @@ nla_put_failure:
#define ctnetlink_dump_secctx(a, b) (0)
#endif
-#ifdef CONFIG_NF_CONNTRACK_LABELS
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
static inline int ctnetlink_label_size(const struct nf_conn *ct)
{
struct nf_conn_labels *labels = nf_ct_labels_find(ct);
@@ -391,6 +391,7 @@ static inline int ctnetlink_label_size(const struct nf_conn *ct)
return 0;
return nla_total_size(sizeof(labels->bits));
}
+#endif
static int
ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
@@ -411,10 +412,6 @@ ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
return 0;
}
-#else
-#define ctnetlink_dump_labels(a, b) (0)
-#define ctnetlink_label_size(a) (0)
-#endif
#define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
@@ -652,7 +649,6 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
return len + len4;
}
-#endif
static inline size_t ctnetlink_acct_size(const struct nf_conn *ct)
{
@@ -690,6 +686,7 @@ static inline size_t ctnetlink_timestamp_size(const struct nf_conn *ct)
return 0;
#endif
}
+#endif
#ifdef CONFIG_NF_CONNTRACK_EVENTS
static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
@@ -1579,9 +1576,6 @@ static int ctnetlink_flush_conntrack(struct net *net,
};
if (ctnetlink_needs_filter(family, cda)) {
- if (cda[CTA_FILTER])
- return -EOPNOTSUPP;
-
filter = ctnetlink_alloc_filter(cda, family);
if (IS_ERR(filter))
return PTR_ERR(filter);
@@ -1610,14 +1604,14 @@ static int ctnetlink_del_conntrack(struct sk_buff *skb,
if (err < 0)
return err;
- if (cda[CTA_TUPLE_ORIG])
+ if (cda[CTA_TUPLE_ORIG] && !cda[CTA_FILTER])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
family, &zone);
- else if (cda[CTA_TUPLE_REPLY])
+ else if (cda[CTA_TUPLE_REPLY] && !cda[CTA_FILTER])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
family, &zone);
else {
- u_int8_t u3 = info->nfmsg->version ? family : AF_UNSPEC;
+ u8 u3 = info->nfmsg->version || cda[CTA_FILTER] ? family : AF_UNSPEC;
return ctnetlink_flush_conntrack(info->net, cda,
NETLINK_CB(skb).portid,
@@ -3876,7 +3870,7 @@ static int __init ctnetlink_init(void)
{
int ret;
- NL_ASSERT_DUMP_CTX_FITS(struct ctnetlink_list_dump_ctx);
+ NL_ASSERT_CTX_FITS(struct ctnetlink_list_dump_ctx);
ret = nfnetlink_subsys_register(&ctnl_subsys);
if (ret < 0) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index ae493599a3ef..0c1d086e96cb 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -14,7 +14,7 @@
#include <linux/skbuff.h>
#include <linux/ipv6.h>
#include <net/ip6_checksum.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/tcp.h>
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 5c1ff07eaee0..df72b0376970 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -670,8 +670,14 @@ static int __init nf_flow_table_module_init(void)
if (ret)
goto out_offload;
+ ret = nf_flow_register_bpf();
+ if (ret)
+ goto out_bpf;
+
return 0;
+out_bpf:
+ nf_flow_table_offload_exit();
out_offload:
unregister_pernet_subsys(&nf_flow_table_net_ops);
return ret;
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 8b541a080342..b0f199171932 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -101,7 +101,7 @@ static int __init nf_flow_inet_module_init(void)
nft_register_flowtable_type(&flowtable_ipv6);
nft_register_flowtable_type(&flowtable_inet);
- return nf_flow_register_bpf();
+ return 0;
}
static void __exit nf_flow_inet_module_exit(void)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 016c816d91cb..aad84aabd7f1 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -183,7 +183,35 @@ hash_by_src(const struct net *net,
return reciprocal_scale(hash, nf_nat_htable_size);
}
-/* Is this tuple already taken? (not by us) */
+/**
+ * nf_nat_used_tuple - check if proposed nat tuple clashes with existing entry
+ * @tuple: proposed NAT binding
+ * @ignored_conntrack: our (unconfirmed) conntrack entry
+ *
+ * A conntrack entry can be inserted to the connection tracking table
+ * if there is no existing entry with an identical tuple in either direction.
+ *
+ * Example:
+ * INITIATOR -> NAT/PAT -> RESPONDER
+ *
+ * INITIATOR passes through NAT/PAT ("us") and SNAT is done (saddr rewrite).
+ * Then, later, NAT/PAT itself also connects to RESPONDER.
+ *
+ * This will not work if the SNAT done earlier has same IP:PORT source pair.
+ *
+ * Conntrack table has:
+ * ORIGINAL: $IP_INITIATOR:$SPORT -> $IP_RESPONDER:$DPORT
+ * REPLY: $IP_RESPONDER:$DPORT -> $IP_NAT:$SPORT
+ *
+ * and new locally originating connection wants:
+ * ORIGINAL: $IP_NAT:$SPORT -> $IP_RESPONDER:$DPORT
+ * REPLY: $IP_RESPONDER:$DPORT -> $IP_NAT:$SPORT
+ *
+ * ... which would mean incoming packets cannot be distinguished between
+ * the existing and the newly added entry (identical IP_CT_DIR_REPLY tuple).
+ *
+ * @return: true if the proposed NAT mapping collides with an existing entry.
+ */
static int
nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_conntrack)
@@ -200,6 +228,94 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
}
+static bool nf_nat_allow_clash(const struct nf_conn *ct)
+{
+ return nf_ct_l4proto_find(nf_ct_protonum(ct))->allow_clash;
+}
+
+/**
+ * nf_nat_used_tuple_new - check if to-be-inserted conntrack collides with existing entry
+ * @tuple: proposed NAT binding
+ * @ignored_ct: our (unconfirmed) conntrack entry
+ *
+ * Same as nf_nat_used_tuple, but also check for rare clash in reverse
+ * direction. Should be called only when @tuple has not been altered, i.e.
+ * @ignored_conntrack will not be subject to NAT.
+ *
+ * @return: true if the proposed NAT mapping collides with existing entry.
+ */
+static noinline bool
+nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
+ const struct nf_conn *ignored_ct)
+{
+ static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST_BIT;
+ const struct nf_conntrack_tuple_hash *thash;
+ const struct nf_conntrack_zone *zone;
+ struct nf_conn *ct;
+ bool taken = true;
+ struct net *net;
+
+ if (!nf_nat_used_tuple(tuple, ignored_ct))
+ return false;
+
+ if (!nf_nat_allow_clash(ignored_ct))
+ return true;
+
+ /* Initial choice clashes with existing conntrack.
+ * Check for (rare) reverse collision.
+ *
+ * This can happen when new packets are received in both directions
+ * at the exact same time on different CPUs.
+ *
+ * Without SMP, first packet creates new conntrack entry and second
+ * packet is resolved as established reply packet.
+ *
+ * With parallel processing, both packets could be picked up as
+ * new and both get their own ct entry allocated.
+ *
+ * If ignored_conntrack and colliding ct are not subject to NAT then
+ * pretend the tuple is available and let later clash resolution
+ * handle this at insertion time.
+ *
+ * Without it, the 'reply' packet has its source port rewritten
+ * by nat engine.
+ */
+ if (READ_ONCE(ignored_ct->status) & uses_nat)
+ return true;
+
+ net = nf_ct_net(ignored_ct);
+ zone = nf_ct_zone(ignored_ct);
+
+ thash = nf_conntrack_find_get(net, zone, tuple);
+ if (unlikely(!thash)) /* clashing entry went away */
+ return false;
+
+ ct = nf_ct_tuplehash_to_ctrack(thash);
+
+ /* NB: IP_CT_DIR_ORIGINAL should be impossible because
+ * nf_nat_used_tuple() handles origin collisions.
+ *
+ * Handle remote chance other CPU confirmed its ct right after.
+ */
+ if (thash->tuple.dst.dir != IP_CT_DIR_REPLY)
+ goto out;
+
+ /* clashing connection subject to NAT? Retry with new tuple. */
+ if (READ_ONCE(ct->status) & uses_nat)
+ goto out;
+
+ if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple) &&
+ nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ &ignored_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) {
+ taken = false;
+ goto out;
+ }
+out:
+ nf_ct_put(ct);
+ return taken;
+}
+
static bool nf_nat_may_kill(struct nf_conn *ct, unsigned long flags)
{
static const unsigned long flags_refuse = IPS_FIXED_TIMEOUT |
@@ -611,7 +727,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
/* try the original tuple first */
if (nf_in_range(orig_tuple, range)) {
- if (!nf_nat_used_tuple(orig_tuple, ct)) {
+ if (!nf_nat_used_tuple_new(orig_tuple, ct)) {
*tuple = *orig_tuple;
return;
}
@@ -974,10 +1090,8 @@ static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
range->flags |= NF_NAT_RANGE_MAP_IPS;
}
- if (tb[CTA_NAT_V4_MAXIP])
- range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
- else
- range->max_addr.ip = range->min_addr.ip;
+ range->max_addr.ip = nla_get_be32_default(tb[CTA_NAT_V4_MAXIP],
+ range->min_addr.ip);
return 0;
}
@@ -1104,7 +1218,7 @@ int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
if (!nat_proto_net->nat_hook_ops) {
WARN_ON(nat_proto_net->users != 0);
- nat_ops = kmemdup(orig_nat_ops, sizeof(*orig_nat_ops) * ops_count, GFP_KERNEL);
+ nat_ops = kmemdup_array(orig_nat_ops, ops_count, sizeof(*orig_nat_ops), GFP_KERNEL);
if (!nat_ops) {
mutex_unlock(&nf_nat_proto_mutex);
return -ENOMEM;
@@ -1208,7 +1322,6 @@ static const struct nf_nat_hook nat_hook = {
#ifdef CONFIG_XFRM
.decode_session = __nf_nat_decode_session,
#endif
- .manip_pkt = nf_nat_manip_pkt,
.remove_nat_bysrc = nf_nat_cleanup_conntrack,
};
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 5b140c12b7df..3fa3f5dfb264 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -5,7 +5,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/tcp.h>
#include <net/netns/generic.h>
#include <linux/proc_fs.h>
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0a2f79346958..0b9f1e8dfe49 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -26,6 +26,9 @@
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
#define NFT_SET_MAX_ANONLEN 16
+/* limit compaction to avoid huge kmalloc/krealloc sizes. */
+#define NFT_MAX_SET_NELEMS ((2048 - sizeof(struct nft_trans_elem)) / sizeof(struct nft_trans_one_elem))
+
unsigned int nf_tables_net_id __read_mostly;
static LIST_HEAD(nf_tables_expressions);
@@ -146,6 +149,8 @@ static void nft_ctx_init(struct nft_ctx *ctx,
ctx->report = nlmsg_report(nlh);
ctx->flags = nlh->nlmsg_flags;
ctx->seq = nlh->nlmsg_seq;
+
+ bitmap_zero(ctx->reg_inited, NFT_REG32_NUM);
}
static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
@@ -389,10 +394,91 @@ static void nf_tables_unregister_hook(struct net *net,
return __nf_tables_unregister_hook(net, table, chain, false);
}
+static bool nft_trans_collapse_set_elem_allowed(const struct nft_trans_elem *a, const struct nft_trans_elem *b)
+{
+ /* NB: the ->bound equality check is defensive, at this time we only merge
+ * a new nft_trans_elem transaction request with the transaction tail
+ * element, but a->bound != b->bound would imply a NEWRULE transaction
+ * is queued in-between.
+ *
+ * The set check is mandatory, the NFT_MAX_SET_NELEMS check prevents
+ * huge krealloc() requests.
+ */
+ return a->set == b->set && a->bound == b->bound && a->nelems < NFT_MAX_SET_NELEMS;
+}
+
+static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net,
+ struct nft_trans_elem *tail,
+ struct nft_trans_elem *trans,
+ gfp_t gfp)
+{
+ unsigned int nelems, old_nelems = tail->nelems;
+ struct nft_trans_elem *new_trans;
+
+ if (!nft_trans_collapse_set_elem_allowed(tail, trans))
+ return false;
+
+ /* "cannot happen", at this time userspace element add
+ * requests always allocate a new transaction element.
+ *
+ * This serves as a reminder to adjust the list_add_tail
+ * logic below in case this ever changes.
+ */
+ if (WARN_ON_ONCE(trans->nelems != 1))
+ return false;
+
+ if (check_add_overflow(old_nelems, trans->nelems, &nelems))
+ return false;
+
+ /* krealloc might free tail which invalidates list pointers */
+ list_del_init(&tail->nft_trans.list);
+
+ new_trans = krealloc(tail, struct_size(tail, elems, nelems), gfp);
+ if (!new_trans) {
+ list_add_tail(&tail->nft_trans.list, &nft_net->commit_list);
+ return false;
+ }
+
+ /*
+ * new_trans->nft_trans.list contains garbage, but
+ * list_add_tail() doesn't care.
+ */
+ new_trans->nelems = nelems;
+ new_trans->elems[old_nelems] = trans->elems[0];
+ list_add_tail(&new_trans->nft_trans.list, &nft_net->commit_list);
+
+ return true;
+}
+
+static bool nft_trans_try_collapse(struct nftables_pernet *nft_net,
+ struct nft_trans *trans, gfp_t gfp)
+{
+ struct nft_trans *tail;
+
+ if (list_empty(&nft_net->commit_list))
+ return false;
+
+ tail = list_last_entry(&nft_net->commit_list, struct nft_trans, list);
+
+ if (tail->msg_type != trans->msg_type)
+ return false;
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSETELEM:
+ case NFT_MSG_DELSETELEM:
+ return nft_trans_collapse_set_elem(nft_net,
+ nft_trans_container_elem(tail),
+ nft_trans_container_elem(trans), gfp);
+ }
+
+ return false;
+}
+
static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans_binding *binding;
+ struct nft_trans_set *trans_set;
list_add_tail(&trans->list, &nft_net->commit_list);
@@ -402,9 +488,13 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
+ trans_set = nft_trans_container_set(trans);
+
if (!nft_trans_set_update(trans) &&
nft_set_is_anonymous(nft_trans_set(trans)))
list_add_tail(&binding->binding_list, &nft_net->binding_list);
+
+ list_add_tail(&trans_set->list_trans_newset, &nft_net->commit_set_list);
break;
case NFT_MSG_NEWCHAIN:
if (!nft_trans_chain_update(trans) &&
@@ -414,6 +504,24 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
}
}
+static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans,
+ gfp_t gfp)
+{
+ struct nftables_pernet *nft_net = nft_pernet(net);
+
+ WARN_ON_ONCE(trans->msg_type != NFT_MSG_NEWSETELEM &&
+ trans->msg_type != NFT_MSG_DELSETELEM);
+
+ might_alloc(gfp);
+
+ if (nft_trans_try_collapse(nft_net, trans, gfp)) {
+ kfree(trans);
+ return;
+ }
+
+ nft_trans_commit_list_add_tail(net, trans);
+}
+
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
{
struct nft_trans *trans;
@@ -611,6 +719,7 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
trans_set = nft_trans_container_set(trans);
INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list);
+ INIT_LIST_HEAD(&trans_set->list_trans_newset);
if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
nft_trans_set_id(trans) =
@@ -1816,7 +1925,8 @@ nla_put_failure:
return -ENOSPC;
}
-static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
+static int nft_dump_basechain_hook(struct sk_buff *skb,
+ const struct net *net, int family,
const struct nft_base_chain *basechain,
const struct list_head *hook_list)
{
@@ -1841,7 +1951,8 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
if (!hook_list)
hook_list = &basechain->hook_list;
- list_for_each_entry(hook, hook_list, list) {
+ list_for_each_entry_rcu(hook, hook_list, list,
+ lockdep_commit_lock_is_held(net)) {
if (!first)
first = hook;
@@ -1892,7 +2003,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
const struct nft_base_chain *basechain = nft_base_chain(chain);
struct nft_stats __percpu *stats;
- if (nft_dump_basechain_hook(skb, family, basechain, hook_list))
+ if (nft_dump_basechain_hook(skb, net, family, basechain, hook_list))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
@@ -2074,14 +2185,14 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
err = nla_parse_nested_deprecated(tb, NFTA_COUNTER_MAX, attr,
nft_counter_policy, NULL);
if (err < 0)
- return ERR_PTR(err);
+ return ERR_PTR_PCPU(err);
if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
- return ERR_PTR(-EINVAL);
+ return ERR_PTR_PCPU(-EINVAL);
newstats = netdev_alloc_pcpu_stats(struct nft_stats);
if (newstats == NULL)
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR_PCPU(-ENOMEM);
/* Restore old counters on this cpu, no problem. Per-cpu statistics
* are not exposed to userspace.
@@ -2525,10 +2636,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
- if (IS_ERR(stats)) {
+ if (IS_ERR_PCPU(stats)) {
nft_chain_release_hook(&hook);
kfree(basechain);
- return PTR_ERR(stats);
+ return PTR_ERR_PCPU(stats);
}
rcu_assign_pointer(basechain->stats, stats);
}
@@ -2642,7 +2753,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
struct nft_table *table = ctx->table;
struct nft_chain *chain = ctx->chain;
struct nft_chain_hook hook = {};
- struct nft_stats *stats = NULL;
+ struct nft_stats __percpu *stats = NULL;
struct nft_hook *h, *next;
struct nf_hook_ops *ops;
struct nft_trans *trans;
@@ -2738,8 +2849,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
}
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
- if (IS_ERR(stats)) {
- err = PTR_ERR(stats);
+ if (IS_ERR_PCPU(stats)) {
+ err = PTR_ERR_PCPU(stats);
goto err_hooks;
}
}
@@ -3286,25 +3397,37 @@ int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla,
if (!tb[NFTA_EXPR_DATA] || !tb[NFTA_EXPR_NAME])
return -EINVAL;
+ rcu_read_lock();
+
type = __nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
- if (!type)
- return -ENOENT;
+ if (!type) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
- if (!type->inner_ops)
- return -EOPNOTSUPP;
+ if (!type->inner_ops) {
+ err = -EOPNOTSUPP;
+ goto out_unlock;
+ }
err = nla_parse_nested_deprecated(info->tb, type->maxattr,
tb[NFTA_EXPR_DATA],
type->policy, NULL);
if (err < 0)
- goto err_nla_parse;
+ goto out_unlock;
info->attr = nla;
info->ops = type->inner_ops;
+ /* No module reference will be taken on type->owner.
+ * Presence of type->inner_ops implies that the expression
+ * is builtin, so it cannot go away.
+ */
+ rcu_read_unlock();
return 0;
-err_nla_parse:
+out_unlock:
+ rcu_read_unlock();
return err;
}
@@ -3403,13 +3526,15 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
* Rules
*/
-static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
+static struct nft_rule *__nft_rule_lookup(const struct net *net,
+ const struct nft_chain *chain,
u64 handle)
{
struct nft_rule *rule;
// FIXME: this sucks
- list_for_each_entry_rcu(rule, &chain->rules, list) {
+ list_for_each_entry_rcu(rule, &chain->rules, list,
+ lockdep_commit_lock_is_held(net)) {
if (handle == rule->handle)
return rule;
}
@@ -3417,13 +3542,14 @@ static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
return ERR_PTR(-ENOENT);
}
-static struct nft_rule *nft_rule_lookup(const struct nft_chain *chain,
+static struct nft_rule *nft_rule_lookup(const struct net *net,
+ const struct nft_chain *chain,
const struct nlattr *nla)
{
if (nla == NULL)
return ERR_PTR(-EINVAL);
- return __nft_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
+ return __nft_rule_lookup(net, chain, be64_to_cpu(nla_get_be64(nla)));
}
static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
@@ -3724,7 +3850,7 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
return 0;
}
-/* called with rcu_read_lock held */
+/* Caller must hold rcu read lock or transaction mutex */
static struct sk_buff *
nf_tables_getrule_single(u32 portid, const struct nfnl_info *info,
const struct nlattr * const nla[], bool reset)
@@ -3751,7 +3877,7 @@ nf_tables_getrule_single(u32 portid, const struct nfnl_info *info,
return ERR_CAST(chain);
}
- rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+ rule = nft_rule_lookup(net, chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return ERR_CAST(rule);
@@ -3860,8 +3986,11 @@ void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
kfree(rule);
}
+/* can only be used if rule is no longer visible to dumps */
static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
{
+ lockdep_commit_lock_is_held(ctx->net);
+
nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
nf_tables_rule_destroy(ctx, rule);
}
@@ -3878,7 +4007,6 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
{
struct nft_expr *expr, *last;
- const struct nft_data *data;
struct nft_rule *rule;
int err;
@@ -3899,7 +4027,7 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
/* This may call nft_chain_validate() recursively,
* callers that do so must increment ctx->level.
*/
- err = expr->ops->validate(ctx, expr, &data);
+ err = expr->ops->validate(ctx, expr);
if (err < 0)
return err;
}
@@ -3976,7 +4104,8 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
struct nft_set_ext *ext;
int ret = 0;
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list,
+ lockdep_commit_lock_is_held(ctx->net)) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_active(ext, dummy_iter.genmask))
continue;
@@ -4050,7 +4179,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_RULE_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
- rule = __nft_rule_lookup(chain, handle);
+ rule = __nft_rule_lookup(net, chain, handle);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return PTR_ERR(rule);
@@ -4072,7 +4201,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_RULE_POSITION]) {
pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
- old_rule = __nft_rule_lookup(chain, pos_handle);
+ old_rule = __nft_rule_lookup(net, chain, pos_handle);
if (IS_ERR(old_rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
return PTR_ERR(old_rule);
@@ -4289,7 +4418,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
if (chain) {
if (nla[NFTA_RULE_HANDLE]) {
- rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+ rule = nft_rule_lookup(info->net, chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule)) {
if (PTR_ERR(rule) == -ENOENT &&
NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYRULE)
@@ -4449,7 +4578,8 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy),
};
-static struct nft_set *nft_set_lookup(const struct nft_table *table,
+static struct nft_set *nft_set_lookup(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
struct nft_set *set;
@@ -4457,7 +4587,8 @@ static struct nft_set *nft_set_lookup(const struct nft_table *table,
if (nla == NULL)
return ERR_PTR(-EINVAL);
- list_for_each_entry_rcu(set, &table->sets, list) {
+ list_for_each_entry_rcu(set, &table->sets, list,
+ lockdep_commit_lock_is_held(net)) {
if (!nla_strcmp(nla, set->name) &&
nft_active_genmask(set, genmask))
return set;
@@ -4485,17 +4616,16 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net,
{
struct nftables_pernet *nft_net = nft_pernet(net);
u32 id = ntohl(nla_get_be32(nla));
- struct nft_trans *trans;
+ struct nft_trans_set *trans;
- list_for_each_entry(trans, &nft_net->commit_list, list) {
- if (trans->msg_type == NFT_MSG_NEWSET) {
- struct nft_set *set = nft_trans_set(trans);
+ /* its likely the id we need is at the tail, not at start */
+ list_for_each_entry_reverse(trans, &nft_net->commit_set_list, list_trans_newset) {
+ struct nft_set *set = trans->set;
- if (id == nft_trans_set_id(trans) &&
- set->table == table &&
- nft_active_genmask(set, genmask))
- return set;
- }
+ if (id == trans->set_id &&
+ set->table == table &&
+ nft_active_genmask(set, genmask))
+ return set;
}
return ERR_PTR(-ENOENT);
}
@@ -4508,7 +4638,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
{
struct nft_set *set;
- set = nft_set_lookup(table, nla_set_name, genmask);
+ set = nft_set_lookup(net, table, nla_set_name, genmask);
if (IS_ERR(set)) {
if (!nla_set_id)
return set;
@@ -4587,7 +4717,7 @@ int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
return -ERANGE;
ms *= NSEC_PER_MSEC;
- *result = nsecs_to_jiffies64(ms);
+ *result = nsecs_to_jiffies64(ms) ? : !!ms;
return 0;
}
@@ -4884,7 +5014,7 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
if (!nla[NFTA_SET_TABLE])
return -EINVAL;
- set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
+ set = nft_set_lookup(net, table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return PTR_ERR(set);
@@ -5220,7 +5350,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
- set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
+ set = nft_set_lookup(net, table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set)) {
if (PTR_ERR(set) != -ENOENT) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
@@ -5422,7 +5552,7 @@ static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
set = nft_set_lookup_byhandle(table, attr, genmask);
} else {
attr = nla[NFTA_SET_NAME];
- set = nft_set_lookup(table, attr, genmask);
+ set = nft_set_lookup(net, table, attr, genmask);
}
if (IS_ERR(set)) {
@@ -5486,7 +5616,8 @@ static int nft_set_catchall_bind_check(const struct nft_ctx *ctx,
struct nft_set_ext *ext;
int ret = 0;
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list,
+ lockdep_commit_lock_is_held(ctx->net)) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_active(ext, genmask))
continue;
@@ -5628,6 +5759,8 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding,
enum nft_trans_phase phase)
{
+ lockdep_commit_lock_is_held(ctx->net);
+
switch (phase) {
case NFT_TRANS_PREPARE_ERROR:
nft_set_trans_unbind(ctx, set);
@@ -5688,12 +5821,8 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
.align = __alignof__(u8),
},
[NFT_SET_EXT_TIMEOUT] = {
- .len = sizeof(u64),
- .align = __alignof__(u64),
- },
- [NFT_SET_EXT_EXPIRATION] = {
- .len = sizeof(u64),
- .align = __alignof__(u64),
+ .len = sizeof(struct nft_timeout),
+ .align = __alignof__(struct nft_timeout),
},
[NFT_SET_EXT_USERDATA] = {
.len = sizeof(struct nft_userdata),
@@ -5812,25 +5941,32 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
htonl(*nft_set_ext_flags(ext))))
goto nla_put_failure;
- if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
- nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
- nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)),
- NFTA_SET_ELEM_PAD))
- goto nla_put_failure;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) {
+ u64 timeout = READ_ONCE(nft_set_ext_timeout(ext)->timeout);
+ u64 set_timeout = READ_ONCE(set->timeout);
+ __be64 msecs = 0;
- if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
- u64 expires, now = get_jiffies_64();
+ if (set_timeout != timeout) {
+ msecs = nf_jiffies64_to_msecs(timeout);
+ if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, msecs,
+ NFTA_SET_ELEM_PAD))
+ goto nla_put_failure;
+ }
- expires = *nft_set_ext_expiration(ext);
- if (time_before64(now, expires))
- expires -= now;
- else
- expires = 0;
+ if (timeout > 0) {
+ u64 expires, now = get_jiffies_64();
- if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
- nf_jiffies64_to_msecs(expires),
- NFTA_SET_ELEM_PAD))
- goto nla_put_failure;
+ expires = READ_ONCE(nft_set_ext_timeout(ext)->expiration);
+ if (time_before64(now, expires))
+ expires -= now;
+ else
+ expires = 0;
+
+ if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
+ nf_jiffies64_to_msecs(expires),
+ NFTA_SET_ELEM_PAD))
+ goto nla_put_failure;
+ }
}
if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
@@ -6249,7 +6385,7 @@ static int nft_set_dump_ctx_init(struct nft_set_dump_ctx *dump_ctx,
return PTR_ERR(table);
}
- set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+ set = nft_set_lookup(net, table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
@@ -6400,17 +6536,21 @@ err:
nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
-static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
+static struct nft_trans *nft_trans_elem_alloc(const struct nft_ctx *ctx,
int msg_type,
struct nft_set *set)
{
+ struct nft_trans_elem *te;
struct nft_trans *trans;
- trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
+ trans = nft_trans_alloc(ctx, msg_type, struct_size(te, elems, 1));
if (trans == NULL)
return NULL;
- nft_trans_elem_set(trans) = set;
+ te = nft_trans_container_elem(trans);
+ te->nelems = 1;
+ te->set = set;
+
return trans;
}
@@ -6493,13 +6633,14 @@ struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set,
nft_set_ext_data(ext), data, set->dlen) < 0)
goto err_ext_check;
- if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
- *nft_set_ext_expiration(ext) = get_jiffies_64() + expiration;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) {
+ nft_set_ext_timeout(ext)->timeout = timeout;
+
if (expiration == 0)
- *nft_set_ext_expiration(ext) += timeout;
+ expiration = timeout;
+
+ nft_set_ext_timeout(ext)->expiration = get_jiffies_64() + expiration;
}
- if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
- *nft_set_ext_timeout(ext) = timeout;
return elem;
@@ -6531,28 +6672,52 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
}
/* Drop references and destroy. Called from gc, dynset and abort path. */
-void nft_set_elem_destroy(const struct nft_set *set,
- const struct nft_elem_priv *elem_priv,
- bool destroy_expr)
+static void __nft_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv,
+ bool destroy_expr)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
- struct nft_ctx ctx = {
- .net = read_pnet(&set->net),
- .family = set->table->family,
- };
nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS))
- nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
+ nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext));
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
kfree(elem_priv);
}
+
+/* Drop references and destroy. Called from gc and dynset. */
+void nft_set_elem_destroy(const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv,
+ bool destroy_expr)
+{
+ struct nft_ctx ctx = {
+ .net = read_pnet(&set->net),
+ .family = set->table->family,
+ };
+
+ __nft_set_elem_destroy(&ctx, set, elem_priv, destroy_expr);
+}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+/* Drop references and destroy. Called from abort path. */
+static void nft_trans_set_elem_destroy(const struct nft_ctx *ctx, struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ /* skip update request, see nft_trans_elems_new_abort() */
+ if (!te->elems[i].priv)
+ continue;
+
+ __nft_set_elem_destroy(ctx, te->set, te->elems[i].priv, true);
+ }
+}
+
/* Destroy element. References have been already dropped in the preparation
* path via nft_setelem_data_deactivate().
*/
@@ -6568,6 +6733,15 @@ void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
kfree(elem_priv);
}
+static void nft_trans_elems_destroy(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++)
+ nf_tables_set_elem_destroy(ctx, te->set, te->elems[i].priv);
+}
+
int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr_array[])
{
@@ -6674,7 +6848,7 @@ static int nft_setelem_catchall_insert(const struct net *net,
}
}
- catchall = kmalloc(sizeof(*catchall), GFP_KERNEL);
+ catchall = kmalloc(sizeof(*catchall), GFP_KERNEL_ACCOUNT);
if (!catchall)
return -ENOMEM;
@@ -6724,6 +6898,38 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
}
}
+static void nft_trans_elem_update(const struct nft_set *set,
+ const struct nft_trans_one_elem *elem)
+{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_elem_update *update = elem->update;
+
+ if (update->flags & NFT_TRANS_UPD_TIMEOUT)
+ WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, update->timeout);
+
+ if (update->flags & NFT_TRANS_UPD_EXPIRATION)
+ WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + update->expiration);
+}
+
+static void nft_trans_elems_add(const struct nft_ctx *ctx,
+ struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ struct nft_trans_one_elem *elem = &te->elems[i];
+
+ if (elem->update)
+ nft_trans_elem_update(te->set, elem);
+ else
+ nft_setelem_activate(ctx->net, te->set, elem->priv);
+
+ nf_tables_setelem_notify(ctx, te->set, elem->priv,
+ NFT_MSG_NEWSETELEM);
+ kfree(elem->update);
+ }
+}
+
static int nft_setelem_catchall_deactivate(const struct net *net,
struct nft_set *set,
struct nft_set_elem *elem)
@@ -6806,6 +7012,26 @@ static void nft_setelem_remove(const struct net *net,
set->ops->remove(net, set, elem_priv);
}
+static void nft_trans_elems_remove(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ WARN_ON_ONCE(te->elems[i].update);
+
+ nf_tables_setelem_notify(ctx, te->set,
+ te->elems[i].priv,
+ te->nft_trans.msg_type);
+
+ nft_setelem_remove(ctx->net, te->set, te->elems[i].priv);
+ if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) {
+ atomic_dec(&te->set->nelems);
+ te->set->ndeact--;
+ }
+ }
+}
+
static bool nft_setelem_valid_key_end(const struct nft_set *set,
struct nlattr **nla, u32 flags)
{
@@ -6910,17 +7136,23 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
} else if (set->flags & NFT_SET_TIMEOUT &&
!(flags & NFT_SET_ELEM_INTERVAL_END)) {
- timeout = READ_ONCE(set->timeout);
+ timeout = set->timeout;
}
expiration = 0;
if (nla[NFTA_SET_ELEM_EXPIRATION] != NULL) {
if (!(set->flags & NFT_SET_TIMEOUT))
return -EINVAL;
+ if (timeout == 0)
+ return -EOPNOTSUPP;
+
err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_EXPIRATION],
&expiration);
if (err)
return err;
+
+ if (expiration > timeout)
+ return -ERANGE;
}
if (nla[NFTA_SET_ELEM_EXPR]) {
@@ -7006,16 +7238,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_parse_key_end;
}
- if (timeout > 0) {
- err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+ if (set->flags & NFT_SET_TIMEOUT) {
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
if (err < 0)
goto err_parse_key_end;
-
- if (timeout != READ_ONCE(set->timeout)) {
- err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
- if (err < 0)
- goto err_parse_key_end;
- }
}
if (num_exprs) {
@@ -7153,8 +7379,40 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) &&
*nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2)))
goto err_element_clash;
- else if (!(nlmsg_flags & NLM_F_EXCL))
+ else if (!(nlmsg_flags & NLM_F_EXCL)) {
err = 0;
+ if (nft_set_ext_exists(ext2, NFT_SET_EXT_TIMEOUT)) {
+ struct nft_elem_update update = { };
+
+ if (timeout != nft_set_ext_timeout(ext2)->timeout) {
+ update.timeout = timeout;
+ if (expiration == 0)
+ expiration = timeout;
+
+ update.flags |= NFT_TRANS_UPD_TIMEOUT;
+ }
+ if (expiration) {
+ update.expiration = expiration;
+ update.flags |= NFT_TRANS_UPD_EXPIRATION;
+ }
+
+ if (update.flags) {
+ struct nft_trans_one_elem *ue;
+
+ ue = &nft_trans_container_elem(trans)->elems[0];
+
+ ue->update = kmemdup(&update, sizeof(update), GFP_KERNEL);
+ if (!ue->update) {
+ err = -ENOMEM;
+ goto err_element_clash;
+ }
+
+ ue->priv = elem_priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
+ goto err_elem_free;
+ }
+ }
+ }
} else if (err == -ENOTEMPTY) {
/* ENOTEMPTY reports overlapping between this element
* and an existing one.
@@ -7173,8 +7431,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
}
- nft_trans_elem_priv(trans) = elem.priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
err_set_full:
@@ -7311,6 +7569,55 @@ void nft_setelem_data_deactivate(const struct net *net,
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
}
+/* similar to nft_trans_elems_remove, but called from abort path to undo newsetelem.
+ * No notifications and no ndeact changes.
+ *
+ * Returns true if set had been added to (i.e., elements need to be removed again).
+ */
+static bool nft_trans_elems_new_abort(const struct nft_ctx *ctx,
+ struct nft_trans_elem *te)
+{
+ bool removed = false;
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ if (te->elems[i].update) {
+ kfree(te->elems[i].update);
+ te->elems[i].update = NULL;
+ /* Update request, so do not release this element */
+ te->elems[i].priv = NULL;
+ continue;
+ }
+
+ if (!te->set->ops->abort || nft_setelem_is_catchall(te->set, te->elems[i].priv))
+ nft_setelem_remove(ctx->net, te->set, te->elems[i].priv);
+
+ if (!nft_setelem_is_catchall(te->set, te->elems[i].priv))
+ atomic_dec(&te->set->nelems);
+
+ removed = true;
+ }
+
+ return removed;
+}
+
+/* Called from abort path to undo DELSETELEM/DESTROYSETELEM. */
+static void nft_trans_elems_destroy_abort(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ if (!nft_setelem_active_next(ctx->net, te->set, te->elems[i].priv)) {
+ nft_setelem_data_activate(ctx->net, te->set, te->elems[i].priv);
+ nft_setelem_activate(ctx->net, te->set, te->elems[i].priv);
+ }
+
+ if (!nft_setelem_is_catchall(te->set, te->elems[i].priv))
+ te->set->ndeact--;
+ }
+}
+
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
@@ -7390,8 +7697,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
nft_setelem_data_deactivate(ctx->net, set, elem.priv);
- nft_trans_elem_priv(trans) = elem.priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
fail_ops:
@@ -7417,7 +7724,8 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
return 0;
trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
- sizeof(struct nft_trans_elem), GFP_ATOMIC);
+ struct_size_t(struct nft_trans_elem, elems, 1),
+ GFP_ATOMIC);
if (!trans)
return -ENOMEM;
@@ -7426,8 +7734,9 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_elem_set(trans) = set;
- nft_trans_elem_priv(trans) = elem_priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_container_elem(trans)->nelems = 1;
+ nft_trans_container_elem(trans)->elems[0].priv = elem_priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_ATOMIC);
return 0;
}
@@ -7438,15 +7747,13 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
{
struct nft_trans *trans;
- trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
- sizeof(struct nft_trans_elem), GFP_KERNEL);
+ trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
if (!trans)
return -ENOMEM;
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
- nft_trans_elem_set(trans) = set;
- nft_trans_elem_priv(trans) = elem_priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_container_elem(trans)->elems[0].priv = elem_priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
}
@@ -7459,7 +7766,8 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
struct nft_set_ext *ext;
int ret = 0;
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list,
+ lockdep_commit_lock_is_held(ctx->net)) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_active(ext, genmask))
continue;
@@ -7509,7 +7817,7 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
return PTR_ERR(table);
}
- set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+ set = nft_set_lookup(net, table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
@@ -7756,9 +8064,7 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
struct nft_trans *trans;
int err = -ENOMEM;
- if (!try_module_get(type->owner))
- return -ENOENT;
-
+ /* caller must have obtained type->owner reference. */
trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
sizeof(struct nft_trans_obj));
if (!trans)
@@ -7826,15 +8132,16 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- type = __nft_obj_type_get(objtype, family);
- if (WARN_ON_ONCE(!type))
- return -ENOENT;
-
if (!obj->ops->update)
return 0;
+ type = nft_obj_type_get(net, objtype, family);
+ if (WARN_ON_ONCE(IS_ERR(type)))
+ return PTR_ERR(type);
+
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ /* type->owner reference is put when transaction object is released. */
return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
}
@@ -8070,7 +8377,7 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
return 0;
}
-/* called with rcu_read_lock held */
+/* Caller must hold rcu read lock or transaction mutex */
static struct sk_buff *
nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
const struct nlattr * const nla[], bool reset)
@@ -8339,12 +8646,14 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
[NFTA_FLOWTABLE_FLAGS] = { .type = NLA_U32 },
};
-struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
+struct nft_flowtable *nft_flowtable_lookup(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
struct nft_flowtable *flowtable;
- list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+ list_for_each_entry_rcu(flowtable, &table->flowtables, list,
+ lockdep_commit_lock_is_held(net)) {
if (!nla_strcmp(nla, flowtable->name) &&
nft_active_genmask(flowtable, genmask))
return flowtable;
@@ -8700,7 +9009,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
return PTR_ERR(table);
}
- flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+ flowtable = nft_flowtable_lookup(net, table, nla[NFTA_FLOWTABLE_NAME],
genmask);
if (IS_ERR(flowtable)) {
err = PTR_ERR(flowtable);
@@ -8894,7 +9203,7 @@ static int nf_tables_delflowtable(struct sk_buff *skb,
flowtable = nft_flowtable_lookup_byhandle(table, attr, genmask);
} else {
attr = nla[NFTA_FLOWTABLE_NAME];
- flowtable = nft_flowtable_lookup(table, attr, genmask);
+ flowtable = nft_flowtable_lookup(net, table, attr, genmask);
}
if (IS_ERR(flowtable)) {
@@ -8964,7 +9273,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
if (!hook_list)
hook_list = &flowtable->hook_list;
- list_for_each_entry_rcu(hook, hook_list, list) {
+ list_for_each_entry_rcu(hook, hook_list, list,
+ lockdep_commit_lock_is_held(net)) {
if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name))
goto nla_put_failure;
}
@@ -9106,7 +9416,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
return PTR_ERR(table);
}
- flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+ flowtable = nft_flowtable_lookup(net, table, nla[NFTA_FLOWTABLE_NAME],
genmask);
if (IS_ERR(flowtable)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
@@ -9174,7 +9484,7 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
FLOW_BLOCK_UNBIND);
list_del_rcu(&hook->list);
- kfree(hook);
+ kfree_rcu(hook, rcu);
}
kfree(flowtable->name);
module_put(flowtable->data.type->owner);
@@ -9624,9 +9934,7 @@ static void nft_commit_release(struct nft_trans *trans)
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
- nf_tables_set_elem_destroy(&ctx,
- nft_trans_elem_set(trans),
- nft_trans_elem_priv(trans));
+ nft_trans_elems_destroy(&ctx, nft_trans_container_elem(trans));
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
@@ -10199,9 +10507,24 @@ static void nf_tables_commit_audit_free(struct list_head *adl)
}
}
+/* nft audit emits the number of elements that get added/removed/updated,
+ * so NEW/DELSETELEM needs to increment based on the total elem count.
+ */
+static unsigned int nf_tables_commit_audit_entrycount(const struct nft_trans *trans)
+{
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSETELEM:
+ case NFT_MSG_DELSETELEM:
+ return nft_trans_container_elem(trans)->nelems;
+ }
+
+ return 1;
+}
+
static void nf_tables_commit_audit_collect(struct list_head *adl,
- struct nft_table *table, u32 op)
+ const struct nft_trans *trans, u32 op)
{
+ const struct nft_table *table = trans->table;
struct nft_audit_data *adp;
list_for_each_entry(adp, adl, list) {
@@ -10211,7 +10534,7 @@ static void nf_tables_commit_audit_collect(struct list_head *adl,
WARN_ONCE(1, "table=%s not expected in commit list", table->name);
return;
found:
- adp->entries++;
+ adp->entries += nf_tables_commit_audit_entrycount(trans);
if (!adp->op || adp->op > op)
adp->op = op;
}
@@ -10370,7 +10693,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_ctx_update(&ctx, trans);
- nf_tables_commit_audit_collect(&adl, table, trans->msg_type);
+ nf_tables_commit_audit_collect(&adl, trans, trans->msg_type);
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
@@ -10447,6 +10770,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_NEWSET:
+ list_del(&nft_trans_container_set(trans)->list_trans_newset);
if (nft_trans_set_update(trans)) {
struct nft_set *set = nft_trans_set(trans);
@@ -10478,10 +10802,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = nft_trans_container_elem(trans);
- nft_setelem_activate(net, te->set, te->elem_priv);
- nf_tables_setelem_notify(&ctx, te->set,
- te->elem_priv,
- NFT_MSG_NEWSETELEM);
+ nft_trans_elems_add(&ctx, te);
+
if (te->set->ops->commit &&
list_empty(&te->set->pending_update)) {
list_add_tail(&te->set->pending_update,
@@ -10493,14 +10815,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DESTROYSETELEM:
te = nft_trans_container_elem(trans);
- nf_tables_setelem_notify(&ctx, te->set,
- te->elem_priv,
- trans->msg_type);
- nft_setelem_remove(net, te->set, te->elem_priv);
- if (!nft_setelem_is_catchall(te->set, te->elem_priv)) {
- atomic_dec(&te->set->nelems);
- te->set->ndeact--;
- }
+ nft_trans_elems_remove(&ctx, te);
+
if (te->set->ops->commit &&
list_empty(&te->set->pending_update)) {
list_add_tail(&te->set->pending_update,
@@ -10620,8 +10936,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nft_set_destroy(&ctx, nft_trans_set(trans));
break;
case NFT_MSG_NEWSETELEM:
- nft_set_elem_destroy(nft_trans_elem_set(trans),
- nft_trans_elem_priv(trans), true);
+ nft_trans_set_elem_destroy(&ctx, nft_trans_container_elem(trans));
break;
case NFT_MSG_NEWOBJ:
nft_obj_destroy(&ctx, nft_trans_obj(trans));
@@ -10755,6 +11070,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
+ list_del(&nft_trans_container_set(trans)->list_trans_newset);
if (nft_trans_set_update(trans)) {
nft_trans_destroy(trans);
break;
@@ -10782,9 +11098,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
}
te = nft_trans_container_elem(trans);
- nft_setelem_remove(net, te->set, te->elem_priv);
- if (!nft_setelem_is_catchall(te->set, te->elem_priv))
- atomic_dec(&te->set->nelems);
+ if (!nft_trans_elems_new_abort(&ctx, te)) {
+ nft_trans_destroy(trans);
+ break;
+ }
if (te->set->ops->abort &&
list_empty(&te->set->pending_update)) {
@@ -10796,12 +11113,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_DESTROYSETELEM:
te = nft_trans_container_elem(trans);
- if (!nft_setelem_active_next(net, te->set, te->elem_priv)) {
- nft_setelem_data_activate(net, te->set, te->elem_priv);
- nft_setelem_activate(net, te->set, te->elem_priv);
- }
- if (!nft_setelem_is_catchall(te->set, te->elem_priv))
- te->set->ndeact--;
+ nft_trans_elems_destroy_abort(&ctx, te);
if (te->set->ops->abort &&
list_empty(&te->set->pending_update)) {
@@ -10850,6 +11162,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
}
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list));
+
nft_set_abort_update(&set_update_list);
synchronize_rcu();
@@ -11026,10 +11340,11 @@ static int nft_validate_register_load(enum nft_registers reg, unsigned int len)
return 0;
}
-int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
+int nft_parse_register_load(const struct nft_ctx *ctx,
+ const struct nlattr *attr, u8 *sreg, u32 len)
{
- u32 reg;
- int err;
+ int err, invalid_reg;
+ u32 reg, next_register;
err = nft_parse_register(attr, &reg);
if (err < 0)
@@ -11039,11 +11354,36 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
if (err < 0)
return err;
+ next_register = DIV_ROUND_UP(len, NFT_REG32_SIZE) + reg;
+
+ /* Can't happen: nft_validate_register_load() should have failed */
+ if (WARN_ON_ONCE(next_register > NFT_REG32_NUM))
+ return -EINVAL;
+
+ /* find first register that did not see an earlier store. */
+ invalid_reg = find_next_zero_bit(ctx->reg_inited, NFT_REG32_NUM, reg);
+
+ /* invalid register within the range that we're loading from? */
+ if (invalid_reg < next_register)
+ return -ENODATA;
+
*sreg = reg;
return 0;
}
EXPORT_SYMBOL_GPL(nft_parse_register_load);
+static void nft_saw_register_store(const struct nft_ctx *__ctx,
+ int reg, unsigned int len)
+{
+ unsigned int registers = DIV_ROUND_UP(len, NFT_REG32_SIZE);
+ struct nft_ctx *ctx = (struct nft_ctx *)__ctx;
+
+ if (WARN_ON_ONCE(len == 0 || reg < 0))
+ return;
+
+ bitmap_set(ctx->reg_inited, reg, registers);
+}
+
static int nft_validate_register_store(const struct nft_ctx *ctx,
enum nft_registers reg,
const struct nft_data *data,
@@ -11065,7 +11405,7 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
return err;
}
- return 0;
+ break;
default:
if (type != NFT_DATA_VALUE)
return -EINVAL;
@@ -11078,8 +11418,11 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
sizeof_field(struct nft_regs, data))
return -ERANGE;
- return 0;
+ break;
}
+
+ nft_saw_register_store(ctx, reg, len);
+ return 0;
}
int nft_parse_register_store(const struct nft_ctx *ctx,
@@ -11345,23 +11688,43 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
}
EXPORT_SYMBOL_GPL(nft_data_dump);
-int __nft_release_basechain(struct nft_ctx *ctx)
+static void __nft_release_basechain_now(struct nft_ctx *ctx)
{
struct nft_rule *rule, *nr;
- if (WARN_ON(!nft_is_base_chain(ctx->chain)))
- return 0;
-
- nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
list_del(&rule->list);
- nft_use_dec(&ctx->chain->use);
nf_tables_rule_release(ctx, rule);
}
+ nf_tables_chain_destroy(ctx->chain);
+}
+
+int __nft_release_basechain(struct nft_ctx *ctx)
+{
+ struct nft_rule *rule;
+
+ if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain)))
+ return 0;
+
+ nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
+ list_for_each_entry(rule, &ctx->chain->rules, list)
+ nft_use_dec(&ctx->chain->use);
+
nft_chain_del(ctx->chain);
nft_use_dec(&ctx->table->use);
- nf_tables_chain_destroy(ctx->chain);
+ if (!maybe_get_net(ctx->net)) {
+ __nft_release_basechain_now(ctx);
+ return 0;
+ }
+
+ /* wait for ruleset dumps to complete. Owning chain is no longer in
+ * lists, so new dumps can't find any of these rules anymore.
+ */
+ synchronize_rcu();
+
+ __nft_release_basechain_now(ctx);
+ put_net(ctx->net);
return 0;
}
EXPORT_SYMBOL_GPL(__nft_release_basechain);
@@ -11519,6 +11882,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->tables);
INIT_LIST_HEAD(&nft_net->commit_list);
+ INIT_LIST_HEAD(&nft_net->commit_set_list);
INIT_LIST_HEAD(&nft_net->binding_list);
INIT_LIST_HEAD(&nft_net->module_list);
INIT_LIST_HEAD(&nft_net->notify_list);
@@ -11549,6 +11913,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
gc_seq = nft_gc_seq_begin(nft_net);
WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list));
if (!list_empty(&nft_net->module_list))
nf_tables_module_autoload_cleanup(net);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index a48d5f0e2f3e..75598520b0fa 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -256,7 +256,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
const struct net *net = nft_net(pkt);
const struct nft_expr *expr, *last;
const struct nft_rule_dp *rule;
- struct nft_regs regs = {};
+ struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
bool genbit = READ_ONCE(net->nft.gencursor);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 932b3ddb34f1..e598a2a252b0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -402,27 +402,27 @@ replay_abort:
{
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
- return kfree_skb(skb);
+ return consume_skb(skb);
}
}
if (!ss->valid_genid || !ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
- return kfree_skb(skb);
+ return consume_skb(skb);
}
if (!try_module_get(ss->owner)) {
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
- return kfree_skb(skb);
+ return consume_skb(skb);
}
if (!ss->valid_genid(net, genid)) {
module_put(ss->owner);
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -ERESTART, NULL);
- return kfree_skb(skb);
+ return consume_skb(skb);
}
nfnl_unlock(subsys_id);
@@ -517,7 +517,7 @@ replay_abort:
err = nla_parse_deprecated(cda,
ss->cb[cb_id].attr_count,
attr, attrlen,
- ss->cb[cb_id].policy, NULL);
+ ss->cb[cb_id].policy, &extack);
if (err < 0)
goto ack;
@@ -567,7 +567,7 @@ done:
if (status & NFNL_BATCH_REPLAY) {
ss->abort(net, oskb, NFNL_ABORT_AUTOLOAD);
nfnl_err_reset(&err_list);
- kfree_skb(skb);
+ consume_skb(skb);
module_put(ss->owner);
goto replay;
} else if (status == NFNL_BATCH_DONE) {
@@ -593,7 +593,7 @@ done:
err = ss->abort(net, oskb, abort_action);
if (err == -EAGAIN) {
nfnl_err_reset(&err_list);
- kfree_skb(skb);
+ consume_skb(skb);
module_put(ss->owner);
status |= NFNL_BATCH_FAILURE;
goto replay_abort;
@@ -601,7 +601,7 @@ done:
}
nfnl_err_deliver(&err_list, oskb);
- kfree_skb(skb);
+ consume_skb(skb);
module_put(ss->owner);
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index e0716da256bf..d2773ce9b585 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -540,6 +540,14 @@ nla_put_failure:
return -1;
}
+static int nf_queue_checksum_help(struct sk_buff *entskb)
+{
+ if (skb_csum_is_sctp(entskb))
+ return skb_crc32c_csum_help(entskb);
+
+ return skb_checksum_help(entskb);
+}
+
static struct sk_buff *
nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry,
@@ -602,7 +610,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
case NFQNL_COPY_PACKET:
if (!(queue->flags & NFQA_CFG_F_GSO) &&
entskb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_help(entskb))
+ nf_queue_checksum_help(entskb))
return NULL;
data_len = READ_ONCE(queue->copy_range);
@@ -1014,7 +1022,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
break;
}
- if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb))
+ if (!skb_is_gso(skb) || ((queue->flags & NFQA_CFG_F_GSO) && !skb_is_gso_sctp(skb)))
return __nfqnl_enqueue_packet(net, queue, entry);
nf_bridge_adjust_skb_data(skb);
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index ca857afbf061..d550910aabec 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -17,6 +17,7 @@
struct nft_bitwise {
u8 sreg;
+ u8 sreg2;
u8 dreg;
enum nft_bitwise_ops op:8;
u8 len;
@@ -25,8 +26,8 @@ struct nft_bitwise {
struct nft_data data;
};
-static void nft_bitwise_eval_bool(u32 *dst, const u32 *src,
- const struct nft_bitwise *priv)
+static void nft_bitwise_eval_mask_xor(u32 *dst, const u32 *src,
+ const struct nft_bitwise *priv)
{
unsigned int i;
@@ -60,28 +61,72 @@ static void nft_bitwise_eval_rshift(u32 *dst, const u32 *src,
}
}
+static void nft_bitwise_eval_and(u32 *dst, const u32 *src, const u32 *src2,
+ const struct nft_bitwise *priv)
+{
+ unsigned int i, n;
+
+ for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++)
+ dst[i] = src[i] & src2[i];
+}
+
+static void nft_bitwise_eval_or(u32 *dst, const u32 *src, const u32 *src2,
+ const struct nft_bitwise *priv)
+{
+ unsigned int i, n;
+
+ for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++)
+ dst[i] = src[i] | src2[i];
+}
+
+static void nft_bitwise_eval_xor(u32 *dst, const u32 *src, const u32 *src2,
+ const struct nft_bitwise *priv)
+{
+ unsigned int i, n;
+
+ for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++)
+ dst[i] = src[i] ^ src2[i];
+}
+
void nft_bitwise_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
- const u32 *src = &regs->data[priv->sreg];
+ const u32 *src = &regs->data[priv->sreg], *src2;
u32 *dst = &regs->data[priv->dreg];
- switch (priv->op) {
- case NFT_BITWISE_BOOL:
- nft_bitwise_eval_bool(dst, src, priv);
- break;
- case NFT_BITWISE_LSHIFT:
+ if (priv->op == NFT_BITWISE_MASK_XOR) {
+ nft_bitwise_eval_mask_xor(dst, src, priv);
+ return;
+ }
+ if (priv->op == NFT_BITWISE_LSHIFT) {
nft_bitwise_eval_lshift(dst, src, priv);
- break;
- case NFT_BITWISE_RSHIFT:
+ return;
+ }
+ if (priv->op == NFT_BITWISE_RSHIFT) {
nft_bitwise_eval_rshift(dst, src, priv);
- break;
+ return;
+ }
+
+ src2 = priv->sreg2 ? &regs->data[priv->sreg2] : priv->data.data;
+
+ if (priv->op == NFT_BITWISE_AND) {
+ nft_bitwise_eval_and(dst, src, src2, priv);
+ return;
+ }
+ if (priv->op == NFT_BITWISE_OR) {
+ nft_bitwise_eval_or(dst, src, src2, priv);
+ return;
+ }
+ if (priv->op == NFT_BITWISE_XOR) {
+ nft_bitwise_eval_xor(dst, src, src2, priv);
+ return;
}
}
static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
[NFTA_BITWISE_SREG] = { .type = NLA_U32 },
+ [NFTA_BITWISE_SREG2] = { .type = NLA_U32 },
[NFTA_BITWISE_DREG] = { .type = NLA_U32 },
[NFTA_BITWISE_LEN] = { .type = NLA_U32 },
[NFTA_BITWISE_MASK] = { .type = NLA_NESTED },
@@ -90,8 +135,8 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
[NFTA_BITWISE_DATA] = { .type = NLA_NESTED },
};
-static int nft_bitwise_init_bool(struct nft_bitwise *priv,
- const struct nlattr *const tb[])
+static int nft_bitwise_init_mask_xor(struct nft_bitwise *priv,
+ const struct nlattr *const tb[])
{
struct nft_data_desc mask = {
.type = NFT_DATA_VALUE,
@@ -105,7 +150,8 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
};
int err;
- if (tb[NFTA_BITWISE_DATA])
+ if (tb[NFTA_BITWISE_DATA] ||
+ tb[NFTA_BITWISE_SREG2])
return -EINVAL;
if (!tb[NFTA_BITWISE_MASK] ||
@@ -139,7 +185,8 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
int err;
if (tb[NFTA_BITWISE_MASK] ||
- tb[NFTA_BITWISE_XOR])
+ tb[NFTA_BITWISE_XOR] ||
+ tb[NFTA_BITWISE_SREG2])
return -EINVAL;
if (!tb[NFTA_BITWISE_DATA])
@@ -157,6 +204,41 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
return 0;
}
+static int nft_bitwise_init_bool(const struct nft_ctx *ctx,
+ struct nft_bitwise *priv,
+ const struct nlattr *const tb[])
+{
+ int err;
+
+ if (tb[NFTA_BITWISE_MASK] ||
+ tb[NFTA_BITWISE_XOR])
+ return -EINVAL;
+
+ if ((!tb[NFTA_BITWISE_DATA] && !tb[NFTA_BITWISE_SREG2]) ||
+ (tb[NFTA_BITWISE_DATA] && tb[NFTA_BITWISE_SREG2]))
+ return -EINVAL;
+
+ if (tb[NFTA_BITWISE_DATA]) {
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data),
+ .len = priv->len,
+ };
+
+ err = nft_data_init(NULL, &priv->data, &desc,
+ tb[NFTA_BITWISE_DATA]);
+ if (err < 0)
+ return err;
+ } else {
+ err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG2],
+ &priv->sreg2, priv->len);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
static int nft_bitwise_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -171,7 +253,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG], &priv->sreg,
priv->len);
if (err < 0)
return err;
@@ -185,32 +267,40 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
if (tb[NFTA_BITWISE_OP]) {
priv->op = ntohl(nla_get_be32(tb[NFTA_BITWISE_OP]));
switch (priv->op) {
- case NFT_BITWISE_BOOL:
+ case NFT_BITWISE_MASK_XOR:
case NFT_BITWISE_LSHIFT:
case NFT_BITWISE_RSHIFT:
+ case NFT_BITWISE_AND:
+ case NFT_BITWISE_OR:
+ case NFT_BITWISE_XOR:
break;
default:
return -EOPNOTSUPP;
}
} else {
- priv->op = NFT_BITWISE_BOOL;
+ priv->op = NFT_BITWISE_MASK_XOR;
}
switch(priv->op) {
- case NFT_BITWISE_BOOL:
- err = nft_bitwise_init_bool(priv, tb);
+ case NFT_BITWISE_MASK_XOR:
+ err = nft_bitwise_init_mask_xor(priv, tb);
break;
case NFT_BITWISE_LSHIFT:
case NFT_BITWISE_RSHIFT:
err = nft_bitwise_init_shift(priv, tb);
break;
+ case NFT_BITWISE_AND:
+ case NFT_BITWISE_OR:
+ case NFT_BITWISE_XOR:
+ err = nft_bitwise_init_bool(ctx, priv, tb);
+ break;
}
return err;
}
-static int nft_bitwise_dump_bool(struct sk_buff *skb,
- const struct nft_bitwise *priv)
+static int nft_bitwise_dump_mask_xor(struct sk_buff *skb,
+ const struct nft_bitwise *priv)
{
if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask,
NFT_DATA_VALUE, priv->len) < 0)
@@ -232,6 +322,21 @@ static int nft_bitwise_dump_shift(struct sk_buff *skb,
return 0;
}
+static int nft_bitwise_dump_bool(struct sk_buff *skb,
+ const struct nft_bitwise *priv)
+{
+ if (priv->sreg2) {
+ if (nft_dump_register(skb, NFTA_BITWISE_SREG2, priv->sreg2))
+ return -1;
+ } else {
+ if (nft_data_dump(skb, NFTA_BITWISE_DATA, &priv->data,
+ NFT_DATA_VALUE, sizeof(u32)) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
static int nft_bitwise_dump(struct sk_buff *skb,
const struct nft_expr *expr, bool reset)
{
@@ -248,13 +353,18 @@ static int nft_bitwise_dump(struct sk_buff *skb,
return -1;
switch (priv->op) {
- case NFT_BITWISE_BOOL:
- err = nft_bitwise_dump_bool(skb, priv);
+ case NFT_BITWISE_MASK_XOR:
+ err = nft_bitwise_dump_mask_xor(skb, priv);
break;
case NFT_BITWISE_LSHIFT:
case NFT_BITWISE_RSHIFT:
err = nft_bitwise_dump_shift(skb, priv);
break;
+ case NFT_BITWISE_AND:
+ case NFT_BITWISE_OR:
+ case NFT_BITWISE_XOR:
+ err = nft_bitwise_dump_bool(skb, priv);
+ break;
}
return err;
@@ -269,7 +379,7 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx,
const struct nft_bitwise *priv = nft_expr_priv(expr);
struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
- if (priv->op != NFT_BITWISE_BOOL)
+ if (priv->op != NFT_BITWISE_MASK_XOR)
return -EOPNOTSUPP;
if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) ||
@@ -299,6 +409,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track,
track->regs[priv->dreg].bitwise &&
track->regs[priv->dreg].bitwise->ops == expr->ops &&
priv->sreg == bitwise->sreg &&
+ priv->sreg2 == bitwise->sreg2 &&
priv->dreg == bitwise->dreg &&
priv->op == bitwise->op &&
priv->len == bitwise->len &&
@@ -365,7 +476,7 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
int err;
- err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG], &priv->sreg,
sizeof(u32));
if (err < 0)
return err;
@@ -375,7 +486,8 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- if (tb[NFTA_BITWISE_DATA])
+ if (tb[NFTA_BITWISE_DATA] ||
+ tb[NFTA_BITWISE_SREG2])
return -EINVAL;
if (!tb[NFTA_BITWISE_MASK] ||
@@ -406,7 +518,7 @@ nft_bitwise_fast_dump(struct sk_buff *skb,
return -1;
if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(sizeof(u32))))
return -1;
- if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_BOOL)))
+ if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_MASK_XOR)))
return -1;
data.data[0] = priv->mask;
@@ -501,7 +613,7 @@ nft_bitwise_select_ops(const struct nft_ctx *ctx,
return &nft_bitwise_ops;
if (tb[NFTA_BITWISE_OP] &&
- ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_BOOL)
+ ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_MASK_XOR)
return &nft_bitwise_ops;
return &nft_bitwise_fast_ops;
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index f6e791a68101..af9206a3afd1 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -5,7 +5,7 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -139,7 +139,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_BYTEORDER_SREG], &priv->sreg,
priv->len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index cd4652259095..2605f43737bc 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -83,7 +83,7 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (err < 0)
return err;
- err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
@@ -222,7 +222,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
@@ -323,7 +323,7 @@ static int nft_cmp16_fast_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index d3d11dede545..7ca4f0d21fe2 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -350,8 +350,7 @@ nla_put_failure:
}
static int nft_target_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct xt_target *target = expr->ops->data;
unsigned int hook_mask = 0;
@@ -536,7 +535,7 @@ nft_match_large_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
struct xt_match *m = expr->ops->data;
int ret;
- priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL);
+ priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL_ACCOUNT);
if (!priv->info)
return -ENOMEM;
@@ -611,8 +610,7 @@ static int nft_match_large_dump(struct sk_buff *skb,
}
static int nft_match_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct xt_match *match = expr->ops->data;
unsigned int hook_mask = 0;
@@ -810,7 +808,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
goto err;
}
- ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
+ ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL_ACCOUNT);
if (!ops) {
err = -ENOMEM;
goto err;
@@ -900,7 +898,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
goto err;
}
- ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
+ ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL_ACCOUNT);
if (!ops) {
err = -ENOMEM;
goto err;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index eab0dc66bee6..cc7325329496 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -8,7 +8,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/seqlock.h>
+#include <linux/u64_stats_sync.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
@@ -17,6 +17,11 @@
#include <net/netfilter/nf_tables_offload.h>
struct nft_counter {
+ u64_stats_t bytes;
+ u64_stats_t packets;
+};
+
+struct nft_counter_tot {
s64 bytes;
s64 packets;
};
@@ -25,25 +30,24 @@ struct nft_counter_percpu_priv {
struct nft_counter __percpu *counter;
};
-static DEFINE_PER_CPU(seqcount_t, nft_counter_seq);
+static DEFINE_PER_CPU(struct u64_stats_sync, nft_counter_sync);
static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
+ struct u64_stats_sync *nft_sync;
struct nft_counter *this_cpu;
- seqcount_t *myseq;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- myseq = this_cpu_ptr(&nft_counter_seq);
-
- write_seqcount_begin(myseq);
+ nft_sync = this_cpu_ptr(&nft_counter_sync);
- this_cpu->bytes += pkt->skb->len;
- this_cpu->packets++;
+ u64_stats_update_begin(nft_sync);
+ u64_stats_add(&this_cpu->bytes, pkt->skb->len);
+ u64_stats_inc(&this_cpu->packets);
+ u64_stats_update_end(nft_sync);
- write_seqcount_end(myseq);
local_bh_enable();
}
@@ -66,17 +70,16 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
if (cpu_stats == NULL)
return -ENOMEM;
- preempt_disable();
- this_cpu = this_cpu_ptr(cpu_stats);
+ this_cpu = raw_cpu_ptr(cpu_stats);
if (tb[NFTA_COUNTER_PACKETS]) {
- this_cpu->packets =
- be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ u64_stats_set(&this_cpu->packets,
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])));
}
if (tb[NFTA_COUNTER_BYTES]) {
- this_cpu->bytes =
- be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+ u64_stats_set(&this_cpu->bytes,
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])));
}
- preempt_enable();
+
priv->counter = cpu_stats;
return 0;
}
@@ -104,40 +107,41 @@ static void nft_counter_obj_destroy(const struct nft_ctx *ctx,
}
static void nft_counter_reset(struct nft_counter_percpu_priv *priv,
- struct nft_counter *total)
+ struct nft_counter_tot *total)
{
+ struct u64_stats_sync *nft_sync;
struct nft_counter *this_cpu;
- seqcount_t *myseq;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- myseq = this_cpu_ptr(&nft_counter_seq);
+ nft_sync = this_cpu_ptr(&nft_counter_sync);
+
+ u64_stats_update_begin(nft_sync);
+ u64_stats_add(&this_cpu->packets, -total->packets);
+ u64_stats_add(&this_cpu->bytes, -total->bytes);
+ u64_stats_update_end(nft_sync);
- write_seqcount_begin(myseq);
- this_cpu->packets -= total->packets;
- this_cpu->bytes -= total->bytes;
- write_seqcount_end(myseq);
local_bh_enable();
}
static void nft_counter_fetch(struct nft_counter_percpu_priv *priv,
- struct nft_counter *total)
+ struct nft_counter_tot *total)
{
struct nft_counter *this_cpu;
- const seqcount_t *myseq;
u64 bytes, packets;
unsigned int seq;
int cpu;
memset(total, 0, sizeof(*total));
for_each_possible_cpu(cpu) {
- myseq = per_cpu_ptr(&nft_counter_seq, cpu);
+ struct u64_stats_sync *nft_sync = per_cpu_ptr(&nft_counter_sync, cpu);
+
this_cpu = per_cpu_ptr(priv->counter, cpu);
do {
- seq = read_seqcount_begin(myseq);
- bytes = this_cpu->bytes;
- packets = this_cpu->packets;
- } while (read_seqcount_retry(myseq, seq));
+ seq = u64_stats_fetch_begin(nft_sync);
+ bytes = u64_stats_read(&this_cpu->bytes);
+ packets = u64_stats_read(&this_cpu->packets);
+ } while (u64_stats_fetch_retry(nft_sync, seq));
total->bytes += bytes;
total->packets += packets;
@@ -148,7 +152,7 @@ static int nft_counter_do_dump(struct sk_buff *skb,
struct nft_counter_percpu_priv *priv,
bool reset)
{
- struct nft_counter total;
+ struct nft_counter_tot total;
nft_counter_fetch(priv, &total);
@@ -237,7 +241,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, g
struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
struct nft_counter __percpu *cpu_stats;
struct nft_counter *this_cpu;
- struct nft_counter total;
+ struct nft_counter_tot total;
nft_counter_fetch(priv, &total);
@@ -245,11 +249,9 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, g
if (cpu_stats == NULL)
return -ENOMEM;
- preempt_disable();
- this_cpu = this_cpu_ptr(cpu_stats);
- this_cpu->packets = total.packets;
- this_cpu->bytes = total.bytes;
- preempt_enable();
+ this_cpu = raw_cpu_ptr(cpu_stats);
+ u64_stats_set(&this_cpu->packets, total.packets);
+ u64_stats_set(&this_cpu->bytes, total.bytes);
priv_clone->counter = cpu_stats;
return 0;
@@ -267,17 +269,17 @@ static void nft_counter_offload_stats(struct nft_expr *expr,
const struct flow_stats *stats)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct u64_stats_sync *nft_sync;
struct nft_counter *this_cpu;
- seqcount_t *myseq;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- myseq = this_cpu_ptr(&nft_counter_seq);
+ nft_sync = this_cpu_ptr(&nft_counter_sync);
- write_seqcount_begin(myseq);
- this_cpu->packets += stats->pkts;
- this_cpu->bytes += stats->bytes;
- write_seqcount_end(myseq);
+ u64_stats_update_begin(nft_sync);
+ u64_stats_add(&this_cpu->packets, stats->pkts);
+ u64_stats_add(&this_cpu->bytes, stats->bytes);
+ u64_stats_update_end(nft_sync);
local_bh_enable();
}
@@ -286,7 +288,7 @@ void nft_counter_init_seqcount(void)
int cpu;
for_each_possible_cpu(cpu)
- seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
+ u64_stats_init(per_cpu_ptr(&nft_counter_sync, cpu));
}
struct nft_expr_type nft_counter_type;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 452ed94c3a4d..67a41cd2baaf 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -606,7 +606,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
}
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len);
+ err = nft_parse_register_load(ctx, tb[NFTA_CT_SREG], &priv->sreg, len);
if (err < 0)
goto err1;
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index e5739a59ebf1..0573f96ce079 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -40,7 +40,7 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_DEV] == NULL)
return -EINVAL;
- return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev,
+ return nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev,
sizeof(int));
}
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index b4ada3ab2167..88922e0e8e83 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -56,7 +56,7 @@ static struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
if (!atomic_add_unless(&set->nelems, 1, set->size))
return NULL;
- timeout = priv->timeout ? : set->timeout;
+ timeout = priv->timeout ? : READ_ONCE(set->timeout);
elem_priv = nft_set_elem_init(set, &priv->tmpl,
&regs->data[priv->sreg_key], NULL,
&regs->data[priv->sreg_data],
@@ -94,9 +94,10 @@ void nft_dynset_eval(const struct nft_expr *expr,
if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
expr, regs, &ext)) {
if (priv->op == NFT_DYNSET_OP_UPDATE &&
- nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
- timeout = priv->timeout ? : set->timeout;
- *nft_set_ext_expiration(ext) = get_jiffies_64() + timeout;
+ nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+ READ_ONCE(nft_set_ext_timeout(ext)->timeout) != 0) {
+ timeout = priv->timeout ? : READ_ONCE(set->timeout);
+ WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + timeout);
}
nft_set_elem_update_expr(ext, regs, pkt);
@@ -215,7 +216,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
return err;
}
- err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key,
+ err = nft_parse_register_load(ctx, tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key,
set->klen);
if (err < 0)
return err;
@@ -226,7 +227,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (set->dtype == NFT_DATA_VERDICT)
return -EOPNOTSUPP;
- err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA],
+ err = nft_parse_register_load(ctx, tb[NFTA_DYNSET_SREG_DATA],
&priv->sreg_data, set->dlen);
if (err < 0)
return err;
@@ -312,12 +313,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (priv->num_exprs)
nft_dynset_ext_add_expr(priv);
- if (set->flags & NFT_SET_TIMEOUT) {
- if (timeout || set->timeout) {
- nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT);
- nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
- }
- }
+ if (set->flags & NFT_SET_TIMEOUT &&
+ (timeout || READ_ONCE(set->timeout)))
+ nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT);
priv->timeout = timeout;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 6eb571d0c3fd..b8d03364566c 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -5,7 +5,7 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/kernel.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
@@ -588,7 +588,7 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
priv->flags = flags;
priv->op = op;
- return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg,
+ return nft_parse_register_load(ctx, tb[NFTA_EXTHDR_SREG], &priv->sreg,
priv->len);
}
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index b58f62195ff3..96e02a83c045 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -26,8 +26,7 @@ const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
};
EXPORT_SYMBOL(nft_fib_policy);
-int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nft_data **data)
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
const struct nft_fib *priv = nft_expr_priv(expr);
unsigned int hooks;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index ab9576098701..3b474d235663 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -8,7 +8,8 @@
#include <linux/spinlock.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_tables.h>
-#include <net/ip.h> /* for ipv4 options. */
+#include <net/ip.h>
+#include <net/inet_dscp.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -235,7 +236,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
- fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos);
+ fl.u.ip4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(pkt->skb)));
fl.u.ip4.flowi4_mark = pkt->skb->mark;
fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
break;
@@ -380,8 +381,7 @@ out:
}
static int nft_flow_offload_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
unsigned int hook_mask = (1 << NF_INET_FORWARD);
@@ -409,8 +409,8 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
if (!tb[NFTA_FLOW_TABLE_NAME])
return -EINVAL;
- flowtable = nft_flowtable_lookup(ctx->table, tb[NFTA_FLOW_TABLE_NAME],
- genmask);
+ flowtable = nft_flowtable_lookup(ctx->net, ctx->table,
+ tb[NFTA_FLOW_TABLE_NAME], genmask);
if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 358e742afad7..152a9fb4d23a 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -52,7 +52,7 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
if (tb[NFTA_FWD_SREG_DEV] == NULL)
return -EINVAL;
- return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+ return nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
sizeof(int));
}
@@ -178,12 +178,12 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+ err = nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
sizeof(int));
if (err < 0)
return err;
- return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr,
+ return nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr,
addr_len);
}
@@ -204,8 +204,7 @@ nla_put_failure:
}
static int nft_fwd_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS) |
(1 << NF_NETDEV_EGRESS));
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 868d68302d22..5d034bbb6913 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -92,7 +92,7 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len);
+ err = nft_parse_register_load(ctx, tb[NFTA_HASH_SREG], &priv->sreg, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index ac2422c215e5..02ee5fb69871 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -244,8 +244,7 @@ nla_put_failure:
}
static int nft_immediate_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **d)
+ const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
struct nft_ctx *pctx = (struct nft_ctx *)ctx;
diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c
index 928312d01eb1..817ab978d24a 100644
--- a/net/netfilter/nft_inner.c
+++ b/net/netfilter/nft_inner.c
@@ -210,35 +210,66 @@ static int nft_inner_parse(const struct nft_inner *priv,
struct nft_pktinfo *pkt,
struct nft_inner_tun_ctx *tun_ctx)
{
- struct nft_inner_tun_ctx ctx = {};
u32 off = pkt->inneroff;
if (priv->flags & NFT_INNER_HDRSIZE &&
- nft_inner_parse_tunhdr(priv, pkt, &ctx, &off) < 0)
+ nft_inner_parse_tunhdr(priv, pkt, tun_ctx, &off) < 0)
return -1;
if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) {
- if (nft_inner_parse_l2l3(priv, pkt, &ctx, off) < 0)
+ if (nft_inner_parse_l2l3(priv, pkt, tun_ctx, off) < 0)
return -1;
} else if (priv->flags & NFT_INNER_TH) {
- ctx.inner_thoff = off;
- ctx.flags |= NFT_PAYLOAD_CTX_INNER_TH;
+ tun_ctx->inner_thoff = off;
+ tun_ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
}
- *tun_ctx = ctx;
tun_ctx->type = priv->type;
+ tun_ctx->cookie = (unsigned long)pkt->skb;
pkt->flags |= NFT_PKTINFO_INNER_FULL;
return 0;
}
+static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt,
+ struct nft_inner_tun_ctx *tun_ctx)
+{
+ struct nft_inner_tun_ctx *this_cpu_tun_ctx;
+
+ local_bh_disable();
+ this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx);
+ if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) {
+ local_bh_enable();
+ return false;
+ }
+ *tun_ctx = *this_cpu_tun_ctx;
+ local_bh_enable();
+
+ return true;
+}
+
+static void nft_inner_save_tun_ctx(const struct nft_pktinfo *pkt,
+ const struct nft_inner_tun_ctx *tun_ctx)
+{
+ struct nft_inner_tun_ctx *this_cpu_tun_ctx;
+
+ local_bh_disable();
+ this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx);
+ if (this_cpu_tun_ctx->cookie != tun_ctx->cookie)
+ *this_cpu_tun_ctx = *tun_ctx;
+ local_bh_enable();
+}
+
static bool nft_inner_parse_needed(const struct nft_inner *priv,
const struct nft_pktinfo *pkt,
- const struct nft_inner_tun_ctx *tun_ctx)
+ struct nft_inner_tun_ctx *tun_ctx)
{
if (!(pkt->flags & NFT_PKTINFO_INNER_FULL))
return true;
+ if (!nft_inner_restore_tun_ctx(pkt, tun_ctx))
+ return true;
+
if (priv->type != tun_ctx->type)
return true;
@@ -248,27 +279,29 @@ static bool nft_inner_parse_needed(const struct nft_inner *priv,
static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
- struct nft_inner_tun_ctx *tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx);
const struct nft_inner *priv = nft_expr_priv(expr);
+ struct nft_inner_tun_ctx tun_ctx = {};
if (nft_payload_inner_offset(pkt) < 0)
goto err;
- if (nft_inner_parse_needed(priv, pkt, tun_ctx) &&
- nft_inner_parse(priv, (struct nft_pktinfo *)pkt, tun_ctx) < 0)
+ if (nft_inner_parse_needed(priv, pkt, &tun_ctx) &&
+ nft_inner_parse(priv, (struct nft_pktinfo *)pkt, &tun_ctx) < 0)
goto err;
switch (priv->expr_type) {
case NFT_INNER_EXPR_PAYLOAD:
- nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, tun_ctx);
+ nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx);
break;
case NFT_INNER_EXPR_META:
- nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, tun_ctx);
+ nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx);
break;
default:
WARN_ON_ONCE(1);
goto err;
}
+ nft_inner_save_tun_ctx(pkt, &tun_ctx);
+
return;
err:
regs->verdict.code = NFT_BREAK;
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 5defe6e4fd98..e35588137995 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -163,7 +163,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
nla = tb[NFTA_LOG_PREFIX];
if (nla != NULL) {
- priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
+ priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL_ACCOUNT);
if (priv->prefix == NULL)
return -ENOMEM;
nla_strscpy(priv->prefix, nla, nla_len(nla) + 1);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index f3080fa1b226..63ef832b8aa7 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -113,7 +113,7 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);
- err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_LOOKUP_SREG], &priv->sreg,
set->klen);
if (err < 0)
return err;
@@ -206,8 +206,7 @@ nla_put_failure:
}
static int nft_lookup_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **d)
+ const struct nft_expr *expr)
{
const struct nft_lookup *priv = nft_expr_priv(expr);
struct nft_set_iter iter;
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 8a14aaca93bb..868bd4d73555 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -27,8 +27,7 @@ static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
};
static int nft_masq_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
int err;
@@ -52,13 +51,13 @@ static int nft_masq_init(const struct nft_ctx *ctx,
priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
- err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN],
+ err = nft_parse_register_load(ctx, tb[NFTA_MASQ_REG_PROTO_MIN],
&priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_MASQ_REG_PROTO_MAX]) {
- err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX],
+ err = nft_parse_register_load(ctx, tb[NFTA_MASQ_REG_PROTO_MAX],
&priv->sreg_proto_max,
plen);
if (err < 0)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 9139ce38ea7b..05cd1e6e6a2f 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -581,8 +581,7 @@ static int nft_meta_get_validate_xfrm(const struct nft_ctx *ctx)
}
static int nft_meta_get_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -600,8 +599,7 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx,
}
int nft_meta_set_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int hooks;
@@ -657,7 +655,7 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
}
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len);
+ err = nft_parse_register_load(ctx, tb[NFTA_META_SREG], &priv->sreg, len);
if (err < 0)
return err;
@@ -954,7 +952,7 @@ static int nft_secmark_obj_init(const struct nft_ctx *ctx,
if (tb[NFTA_SECMARK_CTX] == NULL)
return -EINVAL;
- priv->ctx = nla_strdup(tb[NFTA_SECMARK_CTX], GFP_KERNEL);
+ priv->ctx = nla_strdup(tb[NFTA_SECMARK_CTX], GFP_KERNEL_ACCOUNT);
if (!priv->ctx)
return -ENOMEM;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 808f5802c270..6e21f72c5b57 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -137,8 +137,7 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
};
static int nft_nat_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct nft_nat *priv = nft_expr_priv(expr);
int err;
@@ -214,13 +213,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
- err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN],
+ err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_ADDR_MIN],
&priv->sreg_addr_min, alen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_ADDR_MAX]) {
- err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX],
+ err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_ADDR_MAX],
&priv->sreg_addr_max,
alen);
if (err < 0)
@@ -234,13 +233,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
plen = sizeof_field(struct nf_nat_range, min_proto.all);
if (tb[NFTA_NAT_REG_PROTO_MIN]) {
- err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN],
+ err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_PROTO_MIN],
&priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_PROTO_MAX]) {
- err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX],
+ err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_PROTO_MAX],
&priv->sreg_proto_max,
plen);
if (err < 0)
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 7d29db7c2ac0..bd058babfc82 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -66,7 +66,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- priv->counter = kmalloc(sizeof(*priv->counter), GFP_KERNEL);
+ priv->counter = kmalloc(sizeof(*priv->counter), GFP_KERNEL_ACCOUNT);
if (!priv->counter)
return -ENOMEM;
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 509011b1ef59..09da7a3f9f96 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -143,7 +143,7 @@ static int nft_objref_map_init(const struct nft_ctx *ctx,
if (!(set->flags & NFT_SET_OBJECT))
return -EINVAL;
- err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_OBJREF_SET_SREG], &priv->sreg,
set->klen);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 7fec57ff736f..1c0b493ef0a9 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -108,8 +108,7 @@ nla_put_failure:
}
static int nft_osf_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
unsigned int hooks;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 50429cbd42da..7dfc5343dae4 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -904,6 +904,9 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
((priv->base != NFT_PAYLOAD_TRANSPORT_HEADER &&
priv->base != NFT_PAYLOAD_INNER_HEADER) ||
skb->ip_summed != CHECKSUM_PARTIAL)) {
+ if (offset + priv->len > skb->len)
+ goto err;
+
fsum = skb_checksum(skb, offset, priv->len, 0);
tsum = csum_partial(src, priv->len, 0);
@@ -981,7 +984,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
}
priv->csum_type = csum_type;
- return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
+ return nft_parse_register_load(ctx, tb[NFTA_PAYLOAD_SREG], &priv->sreg,
priv->len);
}
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index b2b8127c8d43..344fe311878f 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -69,8 +69,7 @@ static void nft_queue_sreg_eval(const struct nft_expr *expr,
}
static int nft_queue_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
static const unsigned int supported_hooks = ((1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
@@ -136,7 +135,7 @@ static int nft_queue_sreg_init(const struct nft_ctx *ctx,
struct nft_queue *priv = nft_expr_priv(expr);
int err;
- err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM],
+ err = nft_parse_register_load(ctx, tb[NFTA_QUEUE_SREG_QNUM],
&priv->sreg_qnum, sizeof(u32));
if (err < 0)
return err;
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 51ae64cd268f..ea382f7bbd78 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -83,7 +83,7 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
goto err2;
}
- err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_RANGE_SREG], &priv->sreg,
desc_from.len);
if (err < 0)
goto err2;
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index a58bd8d291ff..95eedad85c83 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -27,8 +27,7 @@ static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
};
static int nft_redir_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
int err;
@@ -51,13 +50,13 @@ static int nft_redir_init(const struct nft_ctx *ctx,
plen = sizeof_field(struct nf_nat_range, min_proto.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
- err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN],
+ err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MIN],
&priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_REDIR_REG_PROTO_MAX]) {
- err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX],
+ err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MAX],
&priv->sreg_proto_max,
plen);
if (err < 0)
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index ed2e668474d6..196a92c7ea09 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -24,8 +24,7 @@ const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
EXPORT_SYMBOL_GPL(nft_reject_policy);
int nft_reject_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_LOCAL_IN) |
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 973fa31a9dd6..49020e67304a 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -61,8 +61,7 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
}
static int nft_reject_inet_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_LOCAL_IN) |
diff --git a/net/netfilter/nft_reject_netdev.c b/net/netfilter/nft_reject_netdev.c
index 7865cd8b11bb..2558ce1505d9 100644
--- a/net/netfilter/nft_reject_netdev.c
+++ b/net/netfilter/nft_reject_netdev.c
@@ -145,8 +145,7 @@ out:
}
static int nft_reject_netdev_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS));
}
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 14d88394bcb7..dc50b9a5bd68 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -160,8 +160,7 @@ nla_put_failure:
return -1;
}
-static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nft_data **data)
+static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
const struct nft_rt *priv = nft_expr_priv(expr);
unsigned int hooks;
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 1caa04619dc6..12390d2e994f 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -88,13 +88,15 @@ bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
}
static struct nft_bitmap_elem *
-nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
+nft_bitmap_elem_find(const struct net *net,
+ const struct nft_set *set, struct nft_bitmap_elem *this,
u8 genmask)
{
const struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be;
- list_for_each_entry_rcu(be, &priv->list, head) {
+ list_for_each_entry_rcu(be, &priv->list, head,
+ lockdep_is_held(&nft_pernet(net)->commit_mutex)) {
if (memcmp(nft_set_ext_key(&be->ext),
nft_set_ext_key(&this->ext), set->klen) ||
!nft_set_elem_active(&be->ext, genmask))
@@ -132,7 +134,7 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
u8 genmask = nft_genmask_next(net);
u32 idx, off;
- be = nft_bitmap_elem_find(set, new, genmask);
+ be = nft_bitmap_elem_find(net, set, new, genmask);
if (be) {
*elem_priv = &be->priv;
return -EEXIST;
@@ -201,7 +203,7 @@ nft_bitmap_deactivate(const struct net *net, const struct nft_set *set,
nft_bitmap_location(set, elem->key.val.data, &idx, &off);
- be = nft_bitmap_elem_find(set, this, genmask);
+ be = nft_bitmap_elem_find(net, set, this, genmask);
if (!be)
return NULL;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index daa56dda737a..8bfac4185ac7 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -24,11 +24,13 @@
struct nft_rhash {
struct rhashtable ht;
struct delayed_work gc_work;
+ u32 wq_gc_seq;
};
struct nft_rhash_elem {
struct nft_elem_priv priv;
struct rhash_head node;
+ u32 wq_gc_seq;
struct nft_set_ext ext;
};
@@ -338,6 +340,10 @@ static void nft_rhash_gc(struct work_struct *work)
if (!gc)
goto done;
+ /* Elements never collected use a zero gc worker sequence number. */
+ if (unlikely(++priv->wq_gc_seq == 0))
+ priv->wq_gc_seq++;
+
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
@@ -355,6 +361,14 @@ static void nft_rhash_gc(struct work_struct *work)
goto try_later;
}
+ /* rhashtable walk is unstable, already seen in this gc run?
+ * Then, skip this element. In case of (unlikely) sequence
+ * wraparound and stale element wq_gc_seq, next gc run will
+ * just find this expired element.
+ */
+ if (he->wq_gc_seq == priv->wq_gc_seq)
+ continue;
+
if (nft_set_elem_is_dead(&he->ext))
goto dead_elem;
@@ -371,6 +385,8 @@ dead_elem:
if (!gc)
goto try_later;
+ /* annotate gc sequence for this attempt. */
+ he->wq_gc_seq = priv->wq_gc_seq;
nft_trans_gc_elem_add(gc, he);
}
@@ -647,7 +663,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
int i;
for (i = 0; i < priv->buckets; i++) {
- hlist_for_each_entry_rcu(he, &priv->table[i], node) {
+ hlist_for_each_entry_rcu(he, &priv->table[i], node,
+ lockdep_is_held(&nft_pernet(ctx->net)->commit_mutex)) {
if (iter->count < iter->skip)
goto cont;
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index eb4c4a4ac7ac..7be342b495f5 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -663,7 +663,7 @@ static int pipapo_realloc_mt(struct nft_pipapo_field *f,
check_add_overflow(rules, extra, &rules_alloc))
return -EOVERFLOW;
- new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL);
+ new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL_ACCOUNT);
if (!new_mt)
return -ENOMEM;
@@ -936,7 +936,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
return;
}
- new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL);
+ new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL_ACCOUNT);
if (!new_lt)
return;
@@ -1212,7 +1212,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
scratch = kzalloc_node(struct_size(scratch, map,
bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
- GFP_KERNEL, cpu_to_node(i));
+ GFP_KERNEL_ACCOUNT, cpu_to_node(i));
if (!scratch) {
/* On failure, there's no need to undo previous
* allocations: this means that some scratch maps have
@@ -1427,7 +1427,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
struct nft_pipapo_match *new;
int i;
- new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL);
+ new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL_ACCOUNT);
if (!new)
return NULL;
@@ -1457,7 +1457,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) *
src->bsize * sizeof(*dst->lt) +
NFT_PIPAPO_ALIGN_HEADROOM,
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!new_lt)
goto out_lt;
@@ -1470,7 +1470,8 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (src->rules > 0) {
dst->mt = kvmalloc_array(src->rules_alloc,
- sizeof(*src->mt), GFP_KERNEL);
+ sizeof(*src->mt),
+ GFP_KERNEL_ACCOUNT);
if (!dst->mt)
goto out_mt;
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index f30163e2ca62..35d0409b0095 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -9,7 +9,8 @@
struct nft_socket {
enum nft_socket_keys key:8;
- u8 level;
+ u8 level; /* cgroupv2 level to extract */
+ u8 level_user; /* cgroupv2 level provided by userspace */
u8 len;
union {
u8 dreg;
@@ -53,6 +54,28 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo
memcpy(dest, &cgid, sizeof(u64));
return true;
}
+
+/* process context only, uses current->nsproxy. */
+static noinline int nft_socket_cgroup_subtree_level(void)
+{
+ struct cgroup *cgrp = cgroup_get_from_path("/");
+ int level;
+
+ if (IS_ERR(cgrp))
+ return PTR_ERR(cgrp);
+
+ level = cgrp->level;
+
+ cgroup_put(cgrp);
+
+ if (level > 255)
+ return -ERANGE;
+
+ if (WARN_ON_ONCE(level < 0))
+ return -EINVAL;
+
+ return level;
+}
#endif
static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
@@ -110,13 +133,13 @@ static void nft_socket_eval(const struct nft_expr *expr,
*dest = READ_ONCE(sk->sk_mark);
} else {
regs->verdict.code = NFT_BREAK;
- return;
+ goto out_put_sk;
}
break;
case NFT_SOCKET_WILDCARD:
if (!sk_fullsock(sk)) {
regs->verdict.code = NFT_BREAK;
- return;
+ goto out_put_sk;
}
nft_socket_wildcard(pkt, regs, sk, dest);
break;
@@ -124,7 +147,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
case NFT_SOCKET_CGROUPV2:
if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) {
regs->verdict.code = NFT_BREAK;
- return;
+ goto out_put_sk;
}
break;
#endif
@@ -133,6 +156,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
}
+out_put_sk:
if (sk != skb->sk)
sock_gen_put(sk);
}
@@ -173,9 +197,10 @@ static int nft_socket_init(const struct nft_ctx *ctx,
case NFT_SOCKET_MARK:
len = sizeof(u32);
break;
-#ifdef CONFIG_CGROUPS
+#ifdef CONFIG_SOCK_CGROUP_DATA
case NFT_SOCKET_CGROUPV2: {
unsigned int level;
+ int err;
if (!tb[NFTA_SOCKET_LEVEL])
return -EINVAL;
@@ -184,6 +209,17 @@ static int nft_socket_init(const struct nft_ctx *ctx,
if (level > 255)
return -EOPNOTSUPP;
+ err = nft_socket_cgroup_subtree_level();
+ if (err < 0)
+ return err;
+
+ priv->level_user = level;
+
+ level += err;
+ /* Implies a giant cgroup tree */
+ if (WARN_ON_ONCE(level > 255))
+ return -EOPNOTSUPP;
+
priv->level = level;
len = sizeof(u64);
break;
@@ -208,7 +244,7 @@ static int nft_socket_dump(struct sk_buff *skb,
if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
return -1;
if (priv->key == NFT_SOCKET_CGROUPV2 &&
- nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level)))
+ nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user)))
return -1;
return 0;
}
@@ -239,8 +275,7 @@ static bool nft_socket_reduce(struct nft_regs_track *track,
}
static int nft_socket_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 1d737f89dfc1..5d3e51825985 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -248,8 +248,7 @@ static void nft_synproxy_eval(const struct nft_expr *expr,
}
static int nft_synproxy_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index 71412adb73d4..50481280abd2 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -254,14 +254,14 @@ static int nft_tproxy_init(const struct nft_ctx *ctx,
}
if (tb[NFTA_TPROXY_REG_ADDR]) {
- err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR],
+ err = nft_parse_register_load(ctx, tb[NFTA_TPROXY_REG_ADDR],
&priv->sreg_addr, alen);
if (err < 0)
return err;
}
if (tb[NFTA_TPROXY_REG_PORT]) {
- err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT],
+ err = nft_parse_register_load(ctx, tb[NFTA_TPROXY_REG_PORT],
&priv->sreg_port, sizeof(u16));
if (err < 0)
return err;
@@ -313,8 +313,7 @@ static int nft_tproxy_dump(struct sk_buff *skb,
}
static int nft_tproxy_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 60a76e6e348e..681301b46aa4 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -497,10 +497,7 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
}
if (tb[NFTA_TUNNEL_KEY_TOS])
info.key.tos = nla_get_u8(tb[NFTA_TUNNEL_KEY_TOS]);
- if (tb[NFTA_TUNNEL_KEY_TTL])
- info.key.ttl = nla_get_u8(tb[NFTA_TUNNEL_KEY_TTL]);
- else
- info.key.ttl = U8_MAX;
+ info.key.ttl = nla_get_u8_default(tb[NFTA_TUNNEL_KEY_TTL], U8_MAX);
if (tb[NFTA_TUNNEL_KEY_OPTS]) {
err = nft_tunnel_obj_opts_init(ctx, tb[NFTA_TUNNEL_KEY_OPTS],
@@ -509,13 +506,14 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
return err;
}
- md = metadata_dst_alloc(priv->opts.len, METADATA_IP_TUNNEL, GFP_KERNEL);
+ md = metadata_dst_alloc(priv->opts.len, METADATA_IP_TUNNEL,
+ GFP_KERNEL_ACCOUNT);
if (!md)
return -ENOMEM;
memcpy(&md->u.tun_info, &info, sizeof(info));
#ifdef CONFIG_DST_CACHE
- err = dst_cache_init(&md->u.tun_info.dst_cache, GFP_KERNEL);
+ err = dst_cache_init(&md->u.tun_info.dst_cache, GFP_KERNEL_ACCOUNT);
if (err < 0) {
metadata_dst_free(md);
return err;
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 1c866757db55..8a07b46cc8fb 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -229,8 +229,7 @@ static int nft_xfrm_get_dump(struct sk_buff *skb,
return 0;
}
-static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nft_data **data)
+static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
const struct nft_xfrm *priv = nft_expr_priv(expr);
unsigned int hooks;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index da5d929c7c85..709840612f0d 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1269,7 +1269,7 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
/* and once again: */
list_for_each_entry(t, &xt_net->tables[af], list)
- if (strcmp(t->name, name) == 0)
+ if (strcmp(t->name, name) == 0 && owner == t->me)
return t;
module_put(owner);
diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c
index c8a639f56168..9d99f5a3d176 100644
--- a/net/netfilter/xt_CHECKSUM.c
+++ b/net/netfilter/xt_CHECKSUM.c
@@ -63,24 +63,37 @@ static int checksum_tg_check(const struct xt_tgchk_param *par)
return 0;
}
-static struct xt_target checksum_tg_reg __read_mostly = {
- .name = "CHECKSUM",
- .family = NFPROTO_UNSPEC,
- .target = checksum_tg,
- .targetsize = sizeof(struct xt_CHECKSUM_info),
- .table = "mangle",
- .checkentry = checksum_tg_check,
- .me = THIS_MODULE,
+static struct xt_target checksum_tg_reg[] __read_mostly = {
+ {
+ .name = "CHECKSUM",
+ .family = NFPROTO_IPV4,
+ .target = checksum_tg,
+ .targetsize = sizeof(struct xt_CHECKSUM_info),
+ .table = "mangle",
+ .checkentry = checksum_tg_check,
+ .me = THIS_MODULE,
+ },
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "CHECKSUM",
+ .family = NFPROTO_IPV6,
+ .target = checksum_tg,
+ .targetsize = sizeof(struct xt_CHECKSUM_info),
+ .table = "mangle",
+ .checkentry = checksum_tg_check,
+ .me = THIS_MODULE,
+ },
+#endif
};
static int __init checksum_tg_init(void)
{
- return xt_register_target(&checksum_tg_reg);
+ return xt_register_targets(checksum_tg_reg, ARRAY_SIZE(checksum_tg_reg));
}
static void __exit checksum_tg_exit(void)
{
- xt_unregister_target(&checksum_tg_reg);
+ xt_unregister_targets(checksum_tg_reg, ARRAY_SIZE(checksum_tg_reg));
}
module_init(checksum_tg_init);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 0accac98dea7..0ae8d8a1216e 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -38,9 +38,9 @@ static struct xt_target classify_tg_reg[] __read_mostly = {
{
.name = "CLASSIFY",
.revision = 0,
- .family = NFPROTO_UNSPEC,
+ .family = NFPROTO_IPV4,
.hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
- (1 << NF_INET_POST_ROUTING),
+ (1 << NF_INET_POST_ROUTING),
.target = classify_tg,
.targetsize = sizeof(struct xt_classify_target_info),
.me = THIS_MODULE,
@@ -54,6 +54,18 @@ static struct xt_target classify_tg_reg[] __read_mostly = {
.targetsize = sizeof(struct xt_classify_target_info),
.me = THIS_MODULE,
},
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "CLASSIFY",
+ .revision = 0,
+ .family = NFPROTO_IPV6,
+ .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_POST_ROUTING),
+ .target = classify_tg,
+ .targetsize = sizeof(struct xt_classify_target_info),
+ .me = THIS_MODULE,
+ },
+#endif
};
static int __init classify_tg_init(void)
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 76acecf3e757..1494b3ee30e1 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -114,25 +114,39 @@ static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
nf_ct_netns_put(par->net, par->family);
}
-static struct xt_target connsecmark_tg_reg __read_mostly = {
- .name = "CONNSECMARK",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .checkentry = connsecmark_tg_check,
- .destroy = connsecmark_tg_destroy,
- .target = connsecmark_tg,
- .targetsize = sizeof(struct xt_connsecmark_target_info),
- .me = THIS_MODULE,
+static struct xt_target connsecmark_tg_reg[] __read_mostly = {
+ {
+ .name = "CONNSECMARK",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .checkentry = connsecmark_tg_check,
+ .destroy = connsecmark_tg_destroy,
+ .target = connsecmark_tg,
+ .targetsize = sizeof(struct xt_connsecmark_target_info),
+ .me = THIS_MODULE,
+ },
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "CONNSECMARK",
+ .revision = 0,
+ .family = NFPROTO_IPV6,
+ .checkentry = connsecmark_tg_check,
+ .destroy = connsecmark_tg_destroy,
+ .target = connsecmark_tg,
+ .targetsize = sizeof(struct xt_connsecmark_target_info),
+ .me = THIS_MODULE,
+ },
+#endif
};
static int __init connsecmark_tg_init(void)
{
- return xt_register_target(&connsecmark_tg_reg);
+ return xt_register_targets(connsecmark_tg_reg, ARRAY_SIZE(connsecmark_tg_reg));
}
static void __exit connsecmark_tg_exit(void)
{
- xt_unregister_target(&connsecmark_tg_reg);
+ xt_unregister_targets(connsecmark_tg_reg, ARRAY_SIZE(connsecmark_tg_reg));
}
module_init(connsecmark_tg_init);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 2be2f7a7b60f..3ba94c34297c 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -313,10 +313,30 @@ static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par)
xt_ct_tg_destroy(par, par->targinfo);
}
+static unsigned int
+notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ /* Previously seen (loopback)? Ignore. */
+ if (skb->_nfct != 0)
+ return XT_CONTINUE;
+
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+
+ return XT_CONTINUE;
+}
+
static struct xt_target xt_ct_tg_reg[] __read_mostly = {
{
+ .name = "NOTRACK",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .target = notrack_tg,
+ .table = "raw",
+ .me = THIS_MODULE,
+ },
+ {
.name = "CT",
- .family = NFPROTO_UNSPEC,
+ .family = NFPROTO_IPV4,
.targetsize = sizeof(struct xt_ct_target_info),
.usersize = offsetof(struct xt_ct_target_info, ct),
.checkentry = xt_ct_tg_check_v0,
@@ -327,7 +347,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
},
{
.name = "CT",
- .family = NFPROTO_UNSPEC,
+ .family = NFPROTO_IPV4,
.revision = 1,
.targetsize = sizeof(struct xt_ct_target_info_v1),
.usersize = offsetof(struct xt_ct_target_info, ct),
@@ -339,7 +359,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
},
{
.name = "CT",
- .family = NFPROTO_UNSPEC,
+ .family = NFPROTO_IPV4,
.revision = 2,
.targetsize = sizeof(struct xt_ct_target_info_v1),
.usersize = offsetof(struct xt_ct_target_info, ct),
@@ -349,49 +369,61 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
.table = "raw",
.me = THIS_MODULE,
},
-};
-
-static unsigned int
-notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
- /* Previously seen (loopback)? Ignore. */
- if (skb->_nfct != 0)
- return XT_CONTINUE;
-
- nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
-
- return XT_CONTINUE;
-}
-
-static struct xt_target notrack_tg_reg __read_mostly = {
- .name = "NOTRACK",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .target = notrack_tg,
- .table = "raw",
- .me = THIS_MODULE,
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "NOTRACK",
+ .revision = 0,
+ .family = NFPROTO_IPV6,
+ .target = notrack_tg,
+ .table = "raw",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "CT",
+ .family = NFPROTO_IPV6,
+ .targetsize = sizeof(struct xt_ct_target_info),
+ .usersize = offsetof(struct xt_ct_target_info, ct),
+ .checkentry = xt_ct_tg_check_v0,
+ .destroy = xt_ct_tg_destroy_v0,
+ .target = xt_ct_target_v0,
+ .table = "raw",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "CT",
+ .family = NFPROTO_IPV6,
+ .revision = 1,
+ .targetsize = sizeof(struct xt_ct_target_info_v1),
+ .usersize = offsetof(struct xt_ct_target_info, ct),
+ .checkentry = xt_ct_tg_check_v1,
+ .destroy = xt_ct_tg_destroy_v1,
+ .target = xt_ct_target_v1,
+ .table = "raw",
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "CT",
+ .family = NFPROTO_IPV6,
+ .revision = 2,
+ .targetsize = sizeof(struct xt_ct_target_info_v1),
+ .usersize = offsetof(struct xt_ct_target_info, ct),
+ .checkentry = xt_ct_tg_check_v2,
+ .destroy = xt_ct_tg_destroy_v1,
+ .target = xt_ct_target_v1,
+ .table = "raw",
+ .me = THIS_MODULE,
+ },
+#endif
};
static int __init xt_ct_tg_init(void)
{
- int ret;
-
- ret = xt_register_target(&notrack_tg_reg);
- if (ret < 0)
- return ret;
-
- ret = xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
- if (ret < 0) {
- xt_unregister_target(&notrack_tg_reg);
- return ret;
- }
- return 0;
+ return xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
}
static void __exit xt_ct_tg_exit(void)
{
xt_unregister_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
- xt_unregister_target(&notrack_tg_reg);
}
module_init(xt_ct_tg_init);
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index db720efa811d..9f54819eb52c 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -107,14 +107,12 @@ static void idletimer_tg_expired(struct timer_list *t)
schedule_work(&timer->work);
}
-static enum alarmtimer_restart idletimer_tg_alarmproc(struct alarm *alarm,
- ktime_t now)
+static void idletimer_tg_alarmproc(struct alarm *alarm, ktime_t now)
{
struct idletimer_tg *timer = alarm->data;
pr_debug("alarm %s expired\n", timer->attr.attr.name);
schedule_work(&timer->work);
- return ALARMTIMER_NORESTART;
}
static int idletimer_check_sysfs_name(const char *name, unsigned int size)
@@ -409,21 +407,23 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
mutex_lock(&list_mutex);
- if (--info->timer->refcnt == 0) {
- pr_debug("deleting timer %s\n", info->label);
-
- list_del(&info->timer->entry);
- timer_shutdown_sync(&info->timer->timer);
- cancel_work_sync(&info->timer->work);
- sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
- kfree(info->timer->attr.attr.name);
- kfree(info->timer);
- } else {
+ if (--info->timer->refcnt > 0) {
pr_debug("decreased refcnt of timer %s to %u\n",
info->label, info->timer->refcnt);
+ mutex_unlock(&list_mutex);
+ return;
}
+ pr_debug("deleting timer %s\n", info->label);
+
+ list_del(&info->timer->entry);
mutex_unlock(&list_mutex);
+
+ timer_shutdown_sync(&info->timer->timer);
+ cancel_work_sync(&info->timer->work);
+ sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+ kfree(info->timer->attr.attr.name);
+ kfree(info->timer);
}
static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par)
@@ -434,52 +434,75 @@ static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par)
mutex_lock(&list_mutex);
- if (--info->timer->refcnt == 0) {
- pr_debug("deleting timer %s\n", info->label);
-
- list_del(&info->timer->entry);
- if (info->timer->timer_type & XT_IDLETIMER_ALARM) {
- alarm_cancel(&info->timer->alarm);
- } else {
- timer_shutdown_sync(&info->timer->timer);
- }
- cancel_work_sync(&info->timer->work);
- sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
- kfree(info->timer->attr.attr.name);
- kfree(info->timer);
- } else {
+ if (--info->timer->refcnt > 0) {
pr_debug("decreased refcnt of timer %s to %u\n",
info->label, info->timer->refcnt);
+ mutex_unlock(&list_mutex);
+ return;
}
+ pr_debug("deleting timer %s\n", info->label);
+
+ list_del(&info->timer->entry);
mutex_unlock(&list_mutex);
+
+ if (info->timer->timer_type & XT_IDLETIMER_ALARM) {
+ alarm_cancel(&info->timer->alarm);
+ } else {
+ timer_shutdown_sync(&info->timer->timer);
+ }
+ cancel_work_sync(&info->timer->work);
+ sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+ kfree(info->timer->attr.attr.name);
+ kfree(info->timer);
}
static struct xt_target idletimer_tg[] __read_mostly = {
{
- .name = "IDLETIMER",
- .family = NFPROTO_UNSPEC,
- .target = idletimer_tg_target,
- .targetsize = sizeof(struct idletimer_tg_info),
- .usersize = offsetof(struct idletimer_tg_info, timer),
- .checkentry = idletimer_tg_checkentry,
- .destroy = idletimer_tg_destroy,
- .me = THIS_MODULE,
+ .name = "IDLETIMER",
+ .family = NFPROTO_IPV4,
+ .target = idletimer_tg_target,
+ .targetsize = sizeof(struct idletimer_tg_info),
+ .usersize = offsetof(struct idletimer_tg_info, timer),
+ .checkentry = idletimer_tg_checkentry,
+ .destroy = idletimer_tg_destroy,
+ .me = THIS_MODULE,
},
{
- .name = "IDLETIMER",
- .family = NFPROTO_UNSPEC,
- .revision = 1,
- .target = idletimer_tg_target_v1,
- .targetsize = sizeof(struct idletimer_tg_info_v1),
- .usersize = offsetof(struct idletimer_tg_info_v1, timer),
- .checkentry = idletimer_tg_checkentry_v1,
- .destroy = idletimer_tg_destroy_v1,
- .me = THIS_MODULE,
+ .name = "IDLETIMER",
+ .family = NFPROTO_IPV4,
+ .revision = 1,
+ .target = idletimer_tg_target_v1,
+ .targetsize = sizeof(struct idletimer_tg_info_v1),
+ .usersize = offsetof(struct idletimer_tg_info_v1, timer),
+ .checkentry = idletimer_tg_checkentry_v1,
+ .destroy = idletimer_tg_destroy_v1,
+ .me = THIS_MODULE,
},
-
-
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "IDLETIMER",
+ .family = NFPROTO_IPV6,
+ .target = idletimer_tg_target,
+ .targetsize = sizeof(struct idletimer_tg_info),
+ .usersize = offsetof(struct idletimer_tg_info, timer),
+ .checkentry = idletimer_tg_checkentry,
+ .destroy = idletimer_tg_destroy,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "IDLETIMER",
+ .family = NFPROTO_IPV6,
+ .revision = 1,
+ .target = idletimer_tg_target_v1,
+ .targetsize = sizeof(struct idletimer_tg_info_v1),
+ .usersize = offsetof(struct idletimer_tg_info_v1, timer),
+ .checkentry = idletimer_tg_checkentry_v1,
+ .destroy = idletimer_tg_destroy_v1,
+ .me = THIS_MODULE,
+ },
+#endif
};
static struct class *idletimer_tg_class;
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 36c9720ad8d6..8a80fd76fe45 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -96,7 +96,9 @@ static int led_tg_check(const struct xt_tgchk_param *par)
struct xt_led_info_internal *ledinternal;
int err;
- if (ledinfo->id[0] == '\0')
+ /* Bail out if empty string or not a string at all. */
+ if (ledinfo->id[0] == '\0' ||
+ !memchr(ledinfo->id, '\0', sizeof(ledinfo->id)))
return -EINVAL;
mutex_lock(&xt_led_mutex);
@@ -175,26 +177,41 @@ static void led_tg_destroy(const struct xt_tgdtor_param *par)
kfree(ledinternal);
}
-static struct xt_target led_tg_reg __read_mostly = {
- .name = "LED",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .target = led_tg,
- .targetsize = sizeof(struct xt_led_info),
- .usersize = offsetof(struct xt_led_info, internal_data),
- .checkentry = led_tg_check,
- .destroy = led_tg_destroy,
- .me = THIS_MODULE,
+static struct xt_target led_tg_reg[] __read_mostly = {
+ {
+ .name = "LED",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .target = led_tg,
+ .targetsize = sizeof(struct xt_led_info),
+ .usersize = offsetof(struct xt_led_info, internal_data),
+ .checkentry = led_tg_check,
+ .destroy = led_tg_destroy,
+ .me = THIS_MODULE,
+ },
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "LED",
+ .revision = 0,
+ .family = NFPROTO_IPV6,
+ .target = led_tg,
+ .targetsize = sizeof(struct xt_led_info),
+ .usersize = offsetof(struct xt_led_info, internal_data),
+ .checkentry = led_tg_check,
+ .destroy = led_tg_destroy,
+ .me = THIS_MODULE,
+ },
+#endif
};
static int __init led_tg_init(void)
{
- return xt_register_target(&led_tg_reg);
+ return xt_register_targets(led_tg_reg, ARRAY_SIZE(led_tg_reg));
}
static void __exit led_tg_exit(void)
{
- xt_unregister_target(&led_tg_reg);
+ xt_unregister_targets(led_tg_reg, ARRAY_SIZE(led_tg_reg));
}
module_init(led_tg_init);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index e660c3710a10..6dcf4bc7e30b 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -64,25 +64,39 @@ static void nflog_tg_destroy(const struct xt_tgdtor_param *par)
nf_logger_put(par->family, NF_LOG_TYPE_ULOG);
}
-static struct xt_target nflog_tg_reg __read_mostly = {
- .name = "NFLOG",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .checkentry = nflog_tg_check,
- .destroy = nflog_tg_destroy,
- .target = nflog_tg,
- .targetsize = sizeof(struct xt_nflog_info),
- .me = THIS_MODULE,
+static struct xt_target nflog_tg_reg[] __read_mostly = {
+ {
+ .name = "NFLOG",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .checkentry = nflog_tg_check,
+ .destroy = nflog_tg_destroy,
+ .target = nflog_tg,
+ .targetsize = sizeof(struct xt_nflog_info),
+ .me = THIS_MODULE,
+ },
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "NFLOG",
+ .revision = 0,
+ .family = NFPROTO_IPV6,
+ .checkentry = nflog_tg_check,
+ .destroy = nflog_tg_destroy,
+ .target = nflog_tg,
+ .targetsize = sizeof(struct xt_nflog_info),
+ .me = THIS_MODULE,
+ },
+#endif
};
static int __init nflog_tg_init(void)
{
- return xt_register_target(&nflog_tg_reg);
+ return xt_register_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
}
static void __exit nflog_tg_exit(void)
{
- xt_unregister_target(&nflog_tg_reg);
+ xt_unregister_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
}
module_init(nflog_tg_init);
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 80f6624e2355..4f49cfc27831 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -179,16 +179,31 @@ static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
xt_rateest_put(par->net, info->est);
}
-static struct xt_target xt_rateest_tg_reg __read_mostly = {
- .name = "RATEEST",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .target = xt_rateest_tg,
- .checkentry = xt_rateest_tg_checkentry,
- .destroy = xt_rateest_tg_destroy,
- .targetsize = sizeof(struct xt_rateest_target_info),
- .usersize = offsetof(struct xt_rateest_target_info, est),
- .me = THIS_MODULE,
+static struct xt_target xt_rateest_tg_reg[] __read_mostly = {
+ {
+ .name = "RATEEST",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .target = xt_rateest_tg,
+ .checkentry = xt_rateest_tg_checkentry,
+ .destroy = xt_rateest_tg_destroy,
+ .targetsize = sizeof(struct xt_rateest_target_info),
+ .usersize = offsetof(struct xt_rateest_target_info, est),
+ .me = THIS_MODULE,
+ },
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "RATEEST",
+ .revision = 0,
+ .family = NFPROTO_IPV6,
+ .target = xt_rateest_tg,
+ .checkentry = xt_rateest_tg_checkentry,
+ .destroy = xt_rateest_tg_destroy,
+ .targetsize = sizeof(struct xt_rateest_target_info),
+ .usersize = offsetof(struct xt_rateest_target_info, est),
+ .me = THIS_MODULE,
+ },
+#endif
};
static __net_init int xt_rateest_net_init(struct net *net)
@@ -214,12 +229,12 @@ static int __init xt_rateest_tg_init(void)
if (err)
return err;
- return xt_register_target(&xt_rateest_tg_reg);
+ return xt_register_targets(xt_rateest_tg_reg, ARRAY_SIZE(xt_rateest_tg_reg));
}
static void __exit xt_rateest_tg_fini(void)
{
- xt_unregister_target(&xt_rateest_tg_reg);
+ xt_unregister_targets(xt_rateest_tg_reg, ARRAY_SIZE(xt_rateest_tg_reg));
unregister_pernet_subsys(&xt_rateest_net_ops);
}
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 498a0bf6f044..5bc5ea505eb9 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -157,7 +157,7 @@ static struct xt_target secmark_tg_reg[] __read_mostly = {
{
.name = "SECMARK",
.revision = 0,
- .family = NFPROTO_UNSPEC,
+ .family = NFPROTO_IPV4,
.checkentry = secmark_tg_check_v0,
.destroy = secmark_tg_destroy,
.target = secmark_tg_v0,
@@ -167,7 +167,7 @@ static struct xt_target secmark_tg_reg[] __read_mostly = {
{
.name = "SECMARK",
.revision = 1,
- .family = NFPROTO_UNSPEC,
+ .family = NFPROTO_IPV4,
.checkentry = secmark_tg_check_v1,
.destroy = secmark_tg_destroy,
.target = secmark_tg_v1,
@@ -175,6 +175,29 @@ static struct xt_target secmark_tg_reg[] __read_mostly = {
.usersize = offsetof(struct xt_secmark_target_info_v1, secid),
.me = THIS_MODULE,
},
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "SECMARK",
+ .revision = 0,
+ .family = NFPROTO_IPV6,
+ .checkentry = secmark_tg_check_v0,
+ .destroy = secmark_tg_destroy,
+ .target = secmark_tg_v0,
+ .targetsize = sizeof(struct xt_secmark_target_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "SECMARK",
+ .revision = 1,
+ .family = NFPROTO_IPV6,
+ .checkentry = secmark_tg_check_v1,
+ .destroy = secmark_tg_destroy,
+ .target = secmark_tg_v1,
+ .targetsize = sizeof(struct xt_secmark_target_info_v1),
+ .usersize = offsetof(struct xt_secmark_target_info_v1, secid),
+ .me = THIS_MODULE,
+ },
+#endif
};
static int __init secmark_tg_init(void)
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 5582dce98cae..a642ff09fc8e 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -29,25 +29,39 @@ trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-static struct xt_target trace_tg_reg __read_mostly = {
- .name = "TRACE",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .table = "raw",
- .target = trace_tg,
- .checkentry = trace_tg_check,
- .destroy = trace_tg_destroy,
- .me = THIS_MODULE,
+static struct xt_target trace_tg_reg[] __read_mostly = {
+ {
+ .name = "TRACE",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .table = "raw",
+ .target = trace_tg,
+ .checkentry = trace_tg_check,
+ .destroy = trace_tg_destroy,
+ .me = THIS_MODULE,
+ },
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "TRACE",
+ .revision = 0,
+ .family = NFPROTO_IPV6,
+ .table = "raw",
+ .target = trace_tg,
+ .checkentry = trace_tg_check,
+ .destroy = trace_tg_destroy,
+ .me = THIS_MODULE,
+ },
+#endif
};
static int __init trace_tg_init(void)
{
- return xt_register_target(&trace_tg_reg);
+ return xt_register_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
}
static void __exit trace_tg_exit(void)
{
- xt_unregister_target(&trace_tg_reg);
+ xt_unregister_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
}
module_init(trace_tg_init);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index e9b2181e8c42..a77088943107 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -208,13 +208,24 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = {
},
{
.name = "addrtype",
- .family = NFPROTO_UNSPEC,
+ .family = NFPROTO_IPV4,
.revision = 1,
.match = addrtype_mt_v1,
.checkentry = addrtype_mt_checkentry_v1,
.matchsize = sizeof(struct xt_addrtype_info_v1),
.me = THIS_MODULE
- }
+ },
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "addrtype",
+ .family = NFPROTO_IPV6,
+ .revision = 1,
+ .match = addrtype_mt_v1,
+ .checkentry = addrtype_mt_checkentry_v1,
+ .matchsize = sizeof(struct xt_addrtype_info_v1),
+ .me = THIS_MODULE
+ },
+#endif
};
static int __init addrtype_mt_init(void)
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index a047a545371e..908fd5f2c3c8 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -146,24 +146,37 @@ static void xt_cluster_mt_destroy(const struct xt_mtdtor_param *par)
nf_ct_netns_put(par->net, par->family);
}
-static struct xt_match xt_cluster_match __read_mostly = {
- .name = "cluster",
- .family = NFPROTO_UNSPEC,
- .match = xt_cluster_mt,
- .checkentry = xt_cluster_mt_checkentry,
- .matchsize = sizeof(struct xt_cluster_match_info),
- .destroy = xt_cluster_mt_destroy,
- .me = THIS_MODULE,
+static struct xt_match xt_cluster_match[] __read_mostly = {
+ {
+ .name = "cluster",
+ .family = NFPROTO_IPV4,
+ .match = xt_cluster_mt,
+ .checkentry = xt_cluster_mt_checkentry,
+ .matchsize = sizeof(struct xt_cluster_match_info),
+ .destroy = xt_cluster_mt_destroy,
+ .me = THIS_MODULE,
+ },
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "cluster",
+ .family = NFPROTO_IPV6,
+ .match = xt_cluster_mt,
+ .checkentry = xt_cluster_mt_checkentry,
+ .matchsize = sizeof(struct xt_cluster_match_info),
+ .destroy = xt_cluster_mt_destroy,
+ .me = THIS_MODULE,
+ },
+#endif
};
static int __init xt_cluster_mt_init(void)
{
- return xt_register_match(&xt_cluster_match);
+ return xt_register_matches(xt_cluster_match, ARRAY_SIZE(xt_cluster_match));
}
static void __exit xt_cluster_mt_fini(void)
{
- xt_unregister_match(&xt_cluster_match);
+ xt_unregister_matches(xt_cluster_match, ARRAY_SIZE(xt_cluster_match));
}
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 93cb018c3055..2aabdcea8707 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -111,9 +111,11 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
ret = nf_ct_netns_get(par->net, par->family);
- if (ret < 0)
+ if (ret < 0) {
pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
par->family);
+ return ret;
+ }
/*
* This filter cannot function correctly unless connection tracking
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 5d04ef80a61d..0189f8b6b0bd 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -86,6 +86,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
{
struct xt_connlimit_info *info = par->matchinfo;
unsigned int keylen;
+ int ret;
keylen = sizeof(u32);
if (par->family == NFPROTO_IPV6)
@@ -93,8 +94,17 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
else
keylen += sizeof(struct in_addr);
+ ret = nf_ct_netns_get(par->net, par->family);
+ if (ret < 0) {
+ pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+ par->family);
+ return ret;
+ }
+
/* init private data */
- info->data = nf_conncount_init(par->net, par->family, keylen);
+ info->data = nf_conncount_init(par->net, keylen);
+ if (IS_ERR(info->data))
+ nf_ct_netns_put(par->net, par->family);
return PTR_ERR_OR_ZERO(info->data);
}
@@ -103,29 +113,45 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_connlimit_info *info = par->matchinfo;
- nf_conncount_destroy(par->net, par->family, info->data);
+ nf_conncount_destroy(par->net, info->data);
+ nf_ct_netns_put(par->net, par->family);
}
-static struct xt_match connlimit_mt_reg __read_mostly = {
- .name = "connlimit",
- .revision = 1,
- .family = NFPROTO_UNSPEC,
- .checkentry = connlimit_mt_check,
- .match = connlimit_mt,
- .matchsize = sizeof(struct xt_connlimit_info),
- .usersize = offsetof(struct xt_connlimit_info, data),
- .destroy = connlimit_mt_destroy,
- .me = THIS_MODULE,
+static struct xt_match connlimit_mt_reg[] __read_mostly = {
+ {
+ .name = "connlimit",
+ .revision = 1,
+ .family = NFPROTO_IPV4,
+ .checkentry = connlimit_mt_check,
+ .match = connlimit_mt,
+ .matchsize = sizeof(struct xt_connlimit_info),
+ .usersize = offsetof(struct xt_connlimit_info, data),
+ .destroy = connlimit_mt_destroy,
+ .me = THIS_MODULE,
+ },
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "connlimit",
+ .revision = 1,
+ .family = NFPROTO_IPV6,
+ .checkentry = connlimit_mt_check,
+ .match = connlimit_mt,
+ .matchsize = sizeof(struct xt_connlimit_info),
+ .usersize = offsetof(struct xt_connlimit_info, data),
+ .destroy = connlimit_mt_destroy,
+ .me = THIS_MODULE,
+ },
+#endif
};
static int __init connlimit_mt_init(void)
{
- return xt_register_match(&connlimit_mt_reg);
+ return xt_register_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
}
static void __exit connlimit_mt_exit(void)
{
- xt_unregister_match(&connlimit_mt_reg);
+ xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
}
module_init(connlimit_mt_init);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index ad3c033db64e..4277084de2e7 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -151,7 +151,7 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
{
.name = "CONNMARK",
.revision = 1,
- .family = NFPROTO_UNSPEC,
+ .family = NFPROTO_IPV4,
.checkentry = connmark_tg_check,
.target = connmark_tg,
.targetsize = sizeof(struct xt_connmark_tginfo1),
@@ -161,13 +161,35 @@ static struct xt_target connmark_tg_reg[] __read_mostly = {
{
.name = "CONNMARK",
.revision = 2,
- .family = NFPROTO_UNSPEC,
+ .family = NFPROTO_IPV4,
.checkentry = connmark_tg_check,
.target = connmark_tg_v2,
.targetsize = sizeof(struct xt_connmark_tginfo2),
.destroy = connmark_tg_destroy,
.me = THIS_MODULE,
- }
+ },
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "CONNMARK",
+ .revision = 1,
+ .family = NFPROTO_IPV6,
+ .checkentry = connmark_tg_check,
+ .target = connmark_tg,
+ .targetsize = sizeof(struct xt_connmark_tginfo1),
+ .destroy = connmark_tg_destroy,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "CONNMARK",
+ .revision = 2,
+ .family = NFPROTO_IPV6,
+ .checkentry = connmark_tg_check,
+ .target = connmark_tg_v2,
+ .targetsize = sizeof(struct xt_connmark_tginfo2),
+ .destroy = connmark_tg_destroy,
+ .me = THIS_MODULE,
+ },
+#endif
};
static struct xt_match connmark_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 1ad74b5920b5..65b965ca40ea 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -39,13 +39,35 @@ mark_mt(const struct sk_buff *skb, struct xt_action_param *par)
return ((skb->mark & info->mask) == info->mark) ^ info->invert;
}
-static struct xt_target mark_tg_reg __read_mostly = {
- .name = "MARK",
- .revision = 2,
- .family = NFPROTO_UNSPEC,
- .target = mark_tg,
- .targetsize = sizeof(struct xt_mark_tginfo2),
- .me = THIS_MODULE,
+static struct xt_target mark_tg_reg[] __read_mostly = {
+ {
+ .name = "MARK",
+ .revision = 2,
+ .family = NFPROTO_IPV4,
+ .target = mark_tg,
+ .targetsize = sizeof(struct xt_mark_tginfo2),
+ .me = THIS_MODULE,
+ },
+#if IS_ENABLED(CONFIG_IP_NF_ARPTABLES)
+ {
+ .name = "MARK",
+ .revision = 2,
+ .family = NFPROTO_ARP,
+ .target = mark_tg,
+ .targetsize = sizeof(struct xt_mark_tginfo2),
+ .me = THIS_MODULE,
+ },
+#endif
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ {
+ .name = "MARK",
+ .revision = 2,
+ .family = NFPROTO_IPV6,
+ .target = mark_tg,
+ .targetsize = sizeof(struct xt_mark_tginfo2),
+ .me = THIS_MODULE,
+ },
+#endif
};
static struct xt_match mark_mt_reg __read_mostly = {
@@ -61,12 +83,12 @@ static int __init mark_mt_init(void)
{
int ret;
- ret = xt_register_target(&mark_tg_reg);
+ ret = xt_register_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
if (ret < 0)
return ret;
ret = xt_register_match(&mark_mt_reg);
if (ret < 0) {
- xt_unregister_target(&mark_tg_reg);
+ xt_unregister_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
return ret;
}
return 0;
@@ -75,7 +97,7 @@ static int __init mark_mt_init(void)
static void __exit mark_mt_exit(void)
{
xt_unregister_match(&mark_mt_reg);
- xt_unregister_target(&mark_tg_reg);
+ xt_unregister_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
}
module_init(mark_mt_init);
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 689eaa2afbec..079fe72a6384 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -107,11 +107,9 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
switch (entry->def.type) {
case NETLBL_NLTYPE_UNLABELED:
- if (info->attrs[NLBL_MGMT_A_FAMILY])
- entry->family =
- nla_get_u16(info->attrs[NLBL_MGMT_A_FAMILY]);
- else
- entry->family = AF_UNSPEC;
+ entry->family =
+ nla_get_u16_default(info->attrs[NLBL_MGMT_A_FAMILY],
+ AF_UNSPEC);
break;
case NETLBL_NLTYPE_CIPSOV4:
if (!info->attrs[NLBL_MGMT_A_CV4DOI])
@@ -601,10 +599,7 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
struct netlbl_dom_map *entry;
u16 family;
- if (info->attrs[NLBL_MGMT_A_FAMILY])
- family = nla_get_u16(info->attrs[NLBL_MGMT_A_FAMILY]);
- else
- family = AF_INET;
+ family = nla_get_u16_default(info->attrs[NLBL_MGMT_A_FAMILY], AF_INET);
ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (ans_skb == NULL)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 9996883bf2b7..1bc2d0890a9f 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1538,7 +1538,7 @@ int __init netlbl_unlabel_defconf(void)
/* Only the kernel is allowed to call this function and the only time
* it is called is at bootup before the audit subsystem is reporting
* messages so don't worry to much about these values. */
- security_current_getsecid_subj(&audit_info.secid);
+ security_current_getlsmprop_subj(&audit_info.prop);
audit_info.loginuid = GLOBAL_ROOT_UID;
audit_info.sessionid = 0;
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 3ed4fea2a2de..81635a13987b 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -98,10 +98,9 @@ struct audit_buffer *netlbl_audit_start_common(int type,
from_kuid(&init_user_ns, audit_info->loginuid),
audit_info->sessionid);
- if (audit_info->secid != 0 &&
- security_secid_to_secctx(audit_info->secid,
- &secctx,
- &secctx_len) == 0) {
+ if (lsmprop_is_set(&audit_info->prop) &&
+ security_lsmprop_to_secctx(&audit_info->prop, &secctx,
+ &secctx_len) == 0) {
audit_log_format(audit_buf, " subj=%s", secctx);
security_release_secctx(secctx, secctx_len);
}
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index d6c5b31eb4eb..d4c434956212 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -32,7 +32,7 @@
*/
static inline void netlbl_netlink_auditinfo(struct netlbl_audit *audit_info)
{
- security_current_getsecid_subj(&audit_info->secid);
+ security_current_getlsmprop_subj(&audit_info->prop);
audit_info->loginuid = audit_get_loginuid(current);
audit_info->sessionid = audit_get_sessionid(current);
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0b7a89db3ab7..f4e7b5e4bb59 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -393,15 +393,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
static void netlink_sock_destruct(struct sock *sk)
{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (nlk->cb_running) {
- if (nlk->cb.done)
- nlk->cb.done(&nlk->cb);
- module_put(nlk->cb.module);
- kfree_skb(nlk->cb.skb);
- }
-
skb_queue_purge(&sk->sk_receive_queue);
if (!sock_flag(sk, SOCK_DEAD)) {
@@ -414,14 +405,6 @@ static void netlink_sock_destruct(struct sock *sk)
WARN_ON(nlk_sk(sk)->groups);
}
-static void netlink_sock_destruct_work(struct work_struct *work)
-{
- struct netlink_sock *nlk = container_of(work, struct netlink_sock,
- work);
-
- sk_free(&nlk->sk);
-}
-
/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
* SMP. Look, when several writers sleep and reader wakes them up, all but one
* immediately hit write lock and grab all the cpus. Exclusive sleep solves
@@ -731,12 +714,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
if (!refcount_dec_and_test(&sk->sk_refcnt))
return;
- if (nlk->cb_running && nlk->cb.done) {
- INIT_WORK(&nlk->work, netlink_sock_destruct_work);
- schedule_work(&nlk->work);
- return;
- }
-
sk_free(sk);
}
@@ -788,6 +765,14 @@ static int netlink_release(struct socket *sock)
NETLINK_URELEASE, &n);
}
+ /* Terminate any outstanding dump */
+ if (nlk->cb_running) {
+ if (nlk->cb.done)
+ nlk->cb.done(&nlk->cb);
+ module_put(nlk->cb.module);
+ kfree_skb(nlk->cb.skb);
+ }
+
module_put(nlk->module);
if (netlink_is_kernel(sk)) {
@@ -1180,11 +1165,16 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
return sock;
}
-struct sock *netlink_getsockbyfilp(struct file *filp)
+struct sock *netlink_getsockbyfd(int fd)
{
- struct inode *inode = file_inode(filp);
+ CLASS(fd, f)(fd);
+ struct inode *inode;
struct sock *sock;
+ if (fd_empty(f))
+ return ERR_PTR(-EBADF);
+
+ inode = file_inode(fd_file(f));
if (!S_ISSOCK(inode->i_mode))
return ERR_PTR(-ENOTSOCK);
@@ -2136,8 +2126,9 @@ void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
{
struct sock *sk;
struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
+ struct hlist_node *tmp;
- sk_for_each_bound(sk, &tbl->mc_list)
+ sk_for_each_bound_safe(sk, tmp, &tbl->mc_list)
netlink_update_socket_mc(nlk_sk(sk), group, 0);
}
@@ -2190,9 +2181,14 @@ netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
return tlvlen;
}
+static bool nlmsg_check_in_payload(const struct nlmsghdr *nlh, const void *addr)
+{
+ return !WARN_ON(addr < nlmsg_data(nlh) ||
+ addr - (const void *) nlh >= nlh->nlmsg_len);
+}
+
static void
-netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
- const struct nlmsghdr *nlh, int err,
+netlink_ack_tlv_fill(struct sk_buff *skb, const struct nlmsghdr *nlh, int err,
const struct netlink_ext_ack *extack)
{
if (extack->_msg)
@@ -2204,9 +2200,7 @@ netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
if (!err)
return;
- if (extack->bad_attr &&
- !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
- (u8 *)extack->bad_attr >= in_skb->data + in_skb->len))
+ if (extack->bad_attr && nlmsg_check_in_payload(nlh, extack->bad_attr))
WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
(u8 *)extack->bad_attr - (const u8 *)nlh));
if (extack->policy)
@@ -2215,9 +2209,7 @@ netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
if (extack->miss_type)
WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
extack->miss_type));
- if (extack->miss_nest &&
- !WARN_ON((u8 *)extack->miss_nest < in_skb->data ||
- (u8 *)extack->miss_nest > in_skb->data + in_skb->len))
+ if (extack->miss_nest && nlmsg_check_in_payload(nlh, extack->miss_nest))
WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
(u8 *)extack->miss_nest - (const u8 *)nlh));
}
@@ -2246,7 +2238,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
if (extack_len) {
nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
if (skb_tailroom(skb) >= extack_len) {
- netlink_ack_tlv_fill(cb->skb, skb, cb->nlh,
+ netlink_ack_tlv_fill(skb, cb->nlh,
nlk->dump_done_errno, extack);
nlmsg_end(skb, nlh);
}
@@ -2278,7 +2270,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
goto errout_skb;
/* NLMSG_GOODSIZE is small to avoid high order allocations being
- * required, but it makes sense to _attempt_ a 16K bytes allocation
+ * required, but it makes sense to _attempt_ a 32KiB allocation
* to reduce number of system calls on dump operations, if user
* ever provided a big enough buffer.
*/
@@ -2300,7 +2292,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
goto errout_skb;
/* Trim skb to allocated size. User is expected to provide buffer as
- * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
+ * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at
* netlink_recvmsg())). dump will pack as many smaller messages as
* could fit within the allocated skb. skb is typically allocated
* with larger space than required (could be as much as near 2x the
@@ -2505,7 +2497,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
}
if (tlvlen)
- netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack);
+ netlink_ack_tlv_fill(skb, nlh, err, extack);
nlmsg_end(skb, rep);
@@ -2934,12 +2926,8 @@ static int __init netlink_proto_init(void)
for (i = 0; i < MAX_LINKS; i++) {
if (rhashtable_init(&nl_table[i].hash,
- &netlink_rhashtable_params) < 0) {
- while (--i > 0)
- rhashtable_destroy(&nl_table[i].hash);
- kfree(nl_table);
+ &netlink_rhashtable_params) < 0)
goto panic;
- }
}
netlink_add_usersock_entry();
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 9751e29d4bbb..778a3809361f 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -4,7 +4,6 @@
#include <linux/rhashtable.h>
#include <linux/atomic.h>
-#include <linux/workqueue.h>
#include <net/sock.h>
/* flags */
@@ -41,7 +40,6 @@ struct netlink_sock {
struct netlink_callback cb;
struct mutex nl_cb_mutex;
- struct mutex *dump_cb_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
@@ -51,7 +49,6 @@ struct netlink_sock {
struct rhash_head node;
struct rcu_head rcu;
- struct work_struct work;
};
static inline struct netlink_sock *nlk_sk(struct sock *sk)
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index feb54c63a116..104732d34543 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -997,7 +997,7 @@ static int genl_start(struct netlink_callback *cb)
info->info.attrs = attrs;
genl_info_net_set(&info->info, sock_net(cb->skb->sk));
info->info.extack = cb->extack;
- memset(&info->info.user_ptr, 0, sizeof(info->info.user_ptr));
+ memset(&info->info.ctx, 0, sizeof(info->info.ctx));
cb->data = info;
if (ops->start) {
@@ -1104,7 +1104,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family,
info.attrs = attrbuf;
info.extack = extack;
genl_info_net_set(&info, net);
- memset(&info.user_ptr, 0, sizeof(info.user_ptr));
+ memset(&info.ctx, 0, sizeof(info.ctx));
if (ops->pre_doit) {
err = ops->pre_doit(ops, skb, &info);
@@ -1501,15 +1501,11 @@ static int genl_ctrl_event(int event, const struct genl_family *family,
if (IS_ERR(msg))
return PTR_ERR(msg);
- if (!family->netnsok) {
+ if (!family->netnsok)
genlmsg_multicast_netns(&genl_ctrl, &init_net, msg, 0,
0, GFP_KERNEL);
- } else {
- rcu_read_lock();
- genlmsg_multicast_allns(&genl_ctrl, msg, 0,
- 0, GFP_ATOMIC);
- rcu_read_unlock();
- }
+ else
+ genlmsg_multicast_allns(&genl_ctrl, msg, 0, 0);
return 0;
}
@@ -1929,23 +1925,23 @@ problem:
core_initcall(genl_init);
-static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
- gfp_t flags)
+static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group)
{
struct sk_buff *tmp;
struct net *net, *prev = NULL;
bool delivered = false;
int err;
+ rcu_read_lock();
for_each_net_rcu(net) {
if (prev) {
- tmp = skb_clone(skb, flags);
+ tmp = skb_clone(skb, GFP_ATOMIC);
if (!tmp) {
err = -ENOMEM;
goto error;
}
err = nlmsg_multicast(prev->genl_sock, tmp,
- portid, group, flags);
+ portid, group, GFP_ATOMIC);
if (!err)
delivered = true;
else if (err != -ESRCH)
@@ -1954,27 +1950,31 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
prev = net;
}
+ err = nlmsg_multicast(prev->genl_sock, skb, portid, group, GFP_ATOMIC);
+
+ rcu_read_unlock();
- err = nlmsg_multicast(prev->genl_sock, skb, portid, group, flags);
if (!err)
delivered = true;
else if (err != -ESRCH)
return err;
return delivered ? 0 : -ESRCH;
error:
+ rcu_read_unlock();
+
kfree_skb(skb);
return err;
}
int genlmsg_multicast_allns(const struct genl_family *family,
struct sk_buff *skb, u32 portid,
- unsigned int group, gfp_t flags)
+ unsigned int group)
{
if (WARN_ON_ONCE(group >= family->n_mcgrps))
return -EINVAL;
group = family->mcgrp_offset + group;
- return genlmsg_mcast(skb, portid, group, flags);
+ return genlmsg_mcast(skb, portid, group);
}
EXPORT_SYMBOL(genlmsg_multicast_allns);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index bd2b17b219ae..2b5e246b8d9a 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -189,7 +189,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
}
nr_node->callsign = *nr;
- strcpy(nr_node->mnemonic, mnemonic);
+ strscpy(nr_node->mnemonic, mnemonic);
nr_node->which = 0;
nr_node->count = 1;
@@ -214,7 +214,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
nr_node_lock(nr_node);
if (quality != 0)
- strcpy(nr_node->mnemonic, mnemonic);
+ strscpy(nr_node->mnemonic, mnemonic);
for (found = 0, i = 0; i < nr_node->count; i++) {
if (nr_node->routes[i].neighbour == nr_neigh) {
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index f456a5911e7d..1ec5955fe469 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -757,6 +757,14 @@ int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
}
EXPORT_SYMBOL(nci_core_conn_close);
+static void nci_set_target_ats(struct nfc_target *target, struct nci_dev *ndev)
+{
+ if (ndev->target_ats_len > 0) {
+ target->ats_len = ndev->target_ats_len;
+ memcpy(target->ats, ndev->target_ats, target->ats_len);
+ }
+}
+
static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
{
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
@@ -939,8 +947,11 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT));
}
- if (!rc)
+ if (!rc) {
ndev->target_active_prot = protocol;
+ if (protocol == NFC_PROTO_ISO14443)
+ nci_set_target_ats(target, ndev);
+ }
return rc;
}
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 994a0a1efb58..a818eff27e6b 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -402,7 +402,7 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev,
switch (ntf->activation_rf_tech_and_mode) {
case NCI_NFC_A_PASSIVE_POLL_MODE:
nfca_poll = &ntf->activation_params.nfca_poll_iso_dep;
- nfca_poll->rats_res_len = min_t(__u8, *data++, 20);
+ nfca_poll->rats_res_len = min_t(__u8, *data++, NFC_ATS_MAXSIZE);
pr_debug("rats_res_len %d\n", nfca_poll->rats_res_len);
if (nfca_poll->rats_res_len > 0) {
memcpy(nfca_poll->rats_res,
@@ -531,6 +531,28 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev,
return NCI_STATUS_OK;
}
+static int nci_store_ats_nfc_iso_dep(struct nci_dev *ndev,
+ const struct nci_rf_intf_activated_ntf *ntf)
+{
+ ndev->target_ats_len = 0;
+
+ if (ntf->activation_params_len <= 0)
+ return NCI_STATUS_OK;
+
+ if (ntf->activation_params.nfca_poll_iso_dep.rats_res_len > NFC_ATS_MAXSIZE) {
+ pr_debug("ATS too long\n");
+ return NCI_STATUS_RF_PROTOCOL_ERROR;
+ }
+
+ if (ntf->activation_params.nfca_poll_iso_dep.rats_res_len > 0) {
+ ndev->target_ats_len = ntf->activation_params.nfca_poll_iso_dep.rats_res_len;
+ memcpy(ndev->target_ats, ntf->activation_params.nfca_poll_iso_dep.rats_res,
+ ndev->target_ats_len);
+ }
+
+ return NCI_STATUS_OK;
+}
+
static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
const struct sk_buff *skb)
{
@@ -660,6 +682,14 @@ exit:
if (err != NCI_STATUS_OK)
pr_err("unable to store general bytes\n");
}
+
+ /* store ATS to be reported later in nci_activate_target */
+ if (ntf.rf_interface == NCI_RF_INTERFACE_ISO_DEP &&
+ ntf.activation_rf_tech_and_mode == NCI_NFC_A_PASSIVE_POLL_MODE) {
+ err = nci_store_ats_nfc_iso_dep(ndev, &ntf);
+ if (err != NCI_STATUS_OK)
+ pr_err("unable to store ATS\n");
+ }
}
if (!(ntf.activation_rf_tech_and_mode & NCI_RF_TECH_MODE_LISTEN_MASK)) {
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index dd2ce73a24fb..6a40b8d0350d 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -96,6 +96,11 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
goto nla_put_failure;
}
+ if (target->ats_len > 0 &&
+ nla_put(msg, NFC_ATTR_TARGET_ATS, target->ats_len,
+ target->ats))
+ goto nla_put_failure;
+
genlmsg_end(msg, hdr);
return 0;
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 101f9a23792c..16e260014684 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -237,14 +237,18 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_action_push_vlan *vlan)
{
+ int err;
+
if (skb_vlan_tag_present(skb)) {
invalidate_flow_key(key);
} else {
key->eth.vlan.tci = vlan->vlan_tci;
key->eth.vlan.tpid = vlan->vlan_tpid;
}
- return skb_vlan_push(skb, vlan->vlan_tpid,
- ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
+ err = skb_vlan_push(skb, vlan->vlan_tpid,
+ ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
+ skb_reset_mac_len(skb);
+ return err;
}
/* 'src' is already properly masked. */
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 8eb1d644b741..3bb4810234aa 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
attr == OVS_KEY_ATTR_CT_MARK)
return true;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
- attr == OVS_KEY_ATTR_CT_LABELS) {
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
- return ovs_net->xt_label;
- }
+ attr == OVS_KEY_ATTR_CT_LABELS)
+ return true;
return false;
}
@@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log)
{
+ unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
struct ovs_conntrack_info ct_info;
const char *helper = NULL;
u16 family;
@@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
return -ENOMEM;
}
+ if (nf_connlabels_get(net, n_bits - 1)) {
+ nf_ct_tmpl_free(ct_info.ct);
+ OVS_NLERR(log, "Failed to set connlabel length");
+ return -EOPNOTSUPP;
+ }
+
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
@@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
if (ct_info->ct) {
if (ct_info->timeout[0])
nf_ct_destroy_timeout(ct_info->ct);
+ nf_connlabels_put(nf_ct_net(ct_info->ct));
nf_ct_tmpl_free(ct_info->ct);
}
}
@@ -1603,8 +1608,7 @@ static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net)
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]);
- ovs_net->ct_limit_info->data =
- nf_conncount_init(net, NFPROTO_INET, sizeof(u32));
+ ovs_net->ct_limit_info->data = nf_conncount_init(net, sizeof(u32));
if (IS_ERR(ovs_net->ct_limit_info->data)) {
err = PTR_ERR(ovs_net->ct_limit_info->data);
@@ -1621,7 +1625,7 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info;
int i;
- nf_conncount_destroy(net, NFPROTO_INET, info->data);
+ nf_conncount_destroy(net, info->data);
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
struct hlist_head *head = &info->limits[i];
struct ovs_ct_limit *ct_limit;
@@ -2002,17 +2006,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
int ovs_ct_init(struct net *net)
{
- unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
+#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
- if (nf_connlabels_get(net, n_bits - 1)) {
- ovs_net->xt_label = false;
- OVS_NLERR(true, "Failed to set connlabel length");
- } else {
- ovs_net->xt_label = true;
- }
-
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
return ovs_ct_limit_init(net, ovs_net);
#else
return 0;
@@ -2021,12 +2017,9 @@ int ovs_ct_init(struct net *net)
void ovs_ct_exit(struct net *net)
{
+#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
ovs_ct_limit_exit(net, ovs_net);
#endif
-
- if (ovs_net->xt_label)
- nf_connlabels_put(net);
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 78d9961fcd44..225f6048867f 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1828,8 +1828,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.dp = dp;
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
- parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
- ? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0;
+ parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0);
/* So far only local changes have been made, now need the lock. */
ovs_lock();
@@ -2266,8 +2265,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
return -EOPNOTSUPP;
- port_no = a[OVS_VPORT_ATTR_PORT_NO]
- ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
+ port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0);
if (port_no >= DP_MAX_PORTS)
return -EFBIG;
@@ -2304,8 +2302,8 @@ restart:
parms.dp = dp;
parms.port_no = port_no;
parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
- parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
- ? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
+ parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX],
+ 0);
vport = new_vport(&parms);
err = PTR_ERR(vport);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 9ca6231ea647..365b9bb7f546 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -160,9 +160,6 @@ struct ovs_net {
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_ct_limit_info *ct_limit_info;
#endif
-
- /* Module reference for configuring conntrack. */
- bool xt_label;
};
/**
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index c92bdc4dfe19..881ddd3696d5 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1938,7 +1938,7 @@ int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
{
- return attr ? nla_get_u32(attr) : 0;
+ return nla_get_u32_default(attr, 0);
}
/**
@@ -2491,7 +2491,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
acts = nla_alloc_flow_actions(new_acts_size);
if (IS_ERR(acts))
- return (void *)acts;
+ return ERR_CAST(acts);
memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
acts->actions_len = (*sfa)->actions_len;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 4b33133cbdff..2412d7813d24 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -102,19 +102,20 @@ static void do_setup(struct net_device *netdev)
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
IFF_NO_QUEUE;
+ netdev->lltx = true;
netdev->needs_free_netdev = true;
netdev->priv_destructor = NULL;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
- netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
- NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
- NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
+ netdev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
+ NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE |
+ NETIF_F_GSO_ENCAP_ALL;
netdev->vlan_features = netdev->features;
netdev->hw_enc_features = netdev->features;
netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
- netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
+ netdev->hw_features = netdev->features;
eth_hw_addr_random(netdev);
}
@@ -148,7 +149,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
/* Restrict bridge port to current netns. */
if (vport->port_no == OVSP_LOCAL)
- vport->dev->features |= NETIF_F_NETNS_LOCAL;
+ vport->dev->netns_local = true;
rtnl_lock();
err = register_netdevice(vport->dev);
@@ -194,7 +195,6 @@ static int internal_dev_recv(struct sk_buff *skb)
skb_dst_drop(skb);
nf_reset_ct(skb);
- secpath_reset(skb);
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, netdev);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4a364cdd445e..886c0dd47b66 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1846,21 +1846,22 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
err = -EINVAL;
spin_lock(&po->bind_lock);
- if (packet_sock_flag(po, PACKET_SOCK_RUNNING) &&
+ if (po->num &&
match->type == type &&
match->prot_hook.type == po->prot_hook.type &&
match->prot_hook.dev == po->prot_hook.dev) {
err = -ENOSPC;
if (refcount_read(&match->sk_ref) < match->max_num_members) {
- __dev_remove_pack(&po->prot_hook);
-
/* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
WRITE_ONCE(po->fanout, match);
po->rollover = rollover;
rollover = NULL;
refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
- __fanout_link(sk, po);
+ if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
+ __dev_remove_pack(&po->prot_hook);
+ __fanout_link(sk, po);
+ }
err = 0;
}
}
@@ -2118,7 +2119,7 @@ retry:
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = READ_ONCE(sk->sk_mark);
skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
- skb_setup_tx_timestamp(skb, sockc.tsflags);
+ skb_setup_tx_timestamp(skb, &sockc);
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
@@ -2216,7 +2217,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
}
}
- snaplen = skb->len;
+ snaplen = skb_frags_readable(skb) ? skb->len : skb_headlen(skb);
res = run_filter(skb, sk, snaplen);
if (!res)
@@ -2336,7 +2337,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
}
}
- snaplen = skb->len;
+ snaplen = skb_frags_readable(skb) ? skb->len : skb_headlen(skb);
res = run_filter(skb, sk, snaplen);
if (!res)
@@ -2650,7 +2651,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->priority = READ_ONCE(po->sk.sk_priority);
skb->mark = READ_ONCE(po->sk.sk_mark);
skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid);
- skb_setup_tx_timestamp(skb, sockc->tsflags);
+ skb_setup_tx_timestamp(skb, sockc);
skb_zcopy_set_nouarg(skb, ph.raw);
skb_reserve(skb, hlen);
@@ -3115,7 +3116,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
goto out_free;
}
- skb_setup_tx_timestamp(skb, sockc.tsflags);
+ skb_setup_tx_timestamp(skb, &sockc);
if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
!packet_extra_vlan_len_allowed(dev, skb)) {
@@ -3421,17 +3422,17 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
if (sock->type == SOCK_PACKET)
sock->ops = &packet_ops_spkt;
+ po = pkt_sk(sk);
+ err = packet_alloc_pending(po);
+ if (err)
+ goto out_sk_free;
+
sock_init_data(sock, sk);
- po = pkt_sk(sk);
init_completion(&po->skb_completion);
sk->sk_family = PF_PACKET;
po->num = proto;
- err = packet_alloc_pending(po);
- if (err)
- goto out2;
-
packet_cached_dev_reset(po);
sk->sk_destruct = packet_sock_destruct;
@@ -3463,7 +3464,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
sock_prot_inuse_add(net, &packet_proto, 1);
return 0;
-out2:
+out_sk_free:
sk_free(sk);
out:
return err;
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 2b582da1e88c..a27efa4faa4e 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -13,7 +13,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/sock.h>
#include <linux/if_phonet.h>
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index cde671d29d5d..5c36bae37b8f 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -22,7 +22,7 @@
#include <net/phonet/pn_dev.h>
struct phonet_routes {
- struct mutex lock;
+ spinlock_t lock;
struct net_device __rcu *table[64];
};
@@ -54,7 +54,7 @@ static struct phonet_device *__phonet_device_alloc(struct net_device *dev)
pnd->netdev = dev;
bitmap_zero(pnd->addrs, 64);
- BUG_ON(!mutex_is_locked(&pndevs->lock));
+ lockdep_assert_held(&pndevs->lock);
list_add_rcu(&pnd->list, &pndevs->list);
return pnd;
}
@@ -64,7 +64,8 @@ static struct phonet_device *__phonet_get(struct net_device *dev)
struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
struct phonet_device *pnd;
- BUG_ON(!mutex_is_locked(&pndevs->lock));
+ lockdep_assert_held(&pndevs->lock);
+
list_for_each_entry(pnd, &pndevs->list, list) {
if (pnd->netdev == dev)
return pnd;
@@ -91,17 +92,22 @@ static void phonet_device_destroy(struct net_device *dev)
ASSERT_RTNL();
- mutex_lock(&pndevs->lock);
+ spin_lock(&pndevs->lock);
+
pnd = __phonet_get(dev);
if (pnd)
list_del_rcu(&pnd->list);
- mutex_unlock(&pndevs->lock);
+
+ spin_unlock(&pndevs->lock);
if (pnd) {
+ struct net *net = dev_net(dev);
+ u32 ifindex = dev->ifindex;
u8 addr;
for_each_set_bit(addr, pnd->addrs, 64)
- phonet_address_notify(RTM_DELADDR, dev, addr);
+ phonet_address_notify(net, RTM_DELADDR, ifindex, addr);
+
kfree(pnd);
}
}
@@ -133,7 +139,8 @@ int phonet_address_add(struct net_device *dev, u8 addr)
struct phonet_device *pnd;
int err = 0;
- mutex_lock(&pndevs->lock);
+ spin_lock(&pndevs->lock);
+
/* Find or create Phonet-specific device data */
pnd = __phonet_get(dev);
if (pnd == NULL)
@@ -142,7 +149,9 @@ int phonet_address_add(struct net_device *dev, u8 addr)
err = -ENOMEM;
else if (test_and_set_bit(addr >> 2, pnd->addrs))
err = -EEXIST;
- mutex_unlock(&pndevs->lock);
+
+ spin_unlock(&pndevs->lock);
+
return err;
}
@@ -152,7 +161,8 @@ int phonet_address_del(struct net_device *dev, u8 addr)
struct phonet_device *pnd;
int err = 0;
- mutex_lock(&pndevs->lock);
+ spin_lock(&pndevs->lock);
+
pnd = __phonet_get(dev);
if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) {
err = -EADDRNOTAVAIL;
@@ -161,7 +171,8 @@ int phonet_address_del(struct net_device *dev, u8 addr)
list_del_rcu(&pnd->list);
else
pnd = NULL;
- mutex_unlock(&pndevs->lock);
+
+ spin_unlock(&pndevs->lock);
if (pnd)
kfree_rcu(pnd, rcu);
@@ -244,32 +255,39 @@ static int phonet_device_autoconf(struct net_device *dev)
ret = phonet_address_add(dev, req.ifr_phonet_autoconf.device);
if (ret)
return ret;
- phonet_address_notify(RTM_NEWADDR, dev,
- req.ifr_phonet_autoconf.device);
+
+ phonet_address_notify(dev_net(dev), RTM_NEWADDR, dev->ifindex,
+ req.ifr_phonet_autoconf.device);
return 0;
}
static void phonet_route_autodel(struct net_device *dev)
{
- struct phonet_net *pnn = phonet_pernet(dev_net(dev));
- unsigned int i;
+ struct net *net = dev_net(dev);
DECLARE_BITMAP(deleted, 64);
+ u32 ifindex = dev->ifindex;
+ struct phonet_net *pnn;
+ unsigned int i;
+
+ pnn = phonet_pernet(net);
/* Remove left-over Phonet routes */
bitmap_zero(deleted, 64);
- mutex_lock(&pnn->routes.lock);
- for (i = 0; i < 64; i++)
+
+ spin_lock(&pnn->routes.lock);
+ for (i = 0; i < 64; i++) {
if (rcu_access_pointer(pnn->routes.table[i]) == dev) {
RCU_INIT_POINTER(pnn->routes.table[i], NULL);
set_bit(i, deleted);
}
- mutex_unlock(&pnn->routes.lock);
+ }
+ spin_unlock(&pnn->routes.lock);
if (bitmap_empty(deleted, 64))
return; /* short-circuit RCU */
synchronize_rcu();
for_each_set_bit(i, deleted, 64) {
- rtm_phonet_notify(RTM_DELROUTE, dev, i);
+ rtm_phonet_notify(net, RTM_DELROUTE, ifindex, i);
dev_put(dev);
}
}
@@ -309,8 +327,8 @@ static int __net_init phonet_init_net(struct net *net)
return -ENOMEM;
INIT_LIST_HEAD(&pnn->pndevs.list);
- mutex_init(&pnn->pndevs.lock);
- mutex_init(&pnn->routes.lock);
+ spin_lock_init(&pnn->pndevs.lock);
+ spin_lock_init(&pnn->routes.lock);
return 0;
}
@@ -360,13 +378,15 @@ int phonet_route_add(struct net_device *dev, u8 daddr)
int err = -EEXIST;
daddr = daddr >> 2;
- mutex_lock(&routes->lock);
+
+ spin_lock(&routes->lock);
if (routes->table[daddr] == NULL) {
rcu_assign_pointer(routes->table[daddr], dev);
dev_hold(dev);
err = 0;
}
- mutex_unlock(&routes->lock);
+ spin_unlock(&routes->lock);
+
return err;
}
@@ -376,17 +396,19 @@ int phonet_route_del(struct net_device *dev, u8 daddr)
struct phonet_routes *routes = &pnn->routes;
daddr = daddr >> 2;
- mutex_lock(&routes->lock);
+
+ spin_lock(&routes->lock);
if (rcu_access_pointer(routes->table[daddr]) == dev)
RCU_INIT_POINTER(routes->table[daddr], NULL);
else
dev = NULL;
- mutex_unlock(&routes->lock);
+ spin_unlock(&routes->lock);
if (!dev)
return -ENOENT;
- synchronize_rcu();
- dev_put(dev);
+
+ /* Note : our caller must call synchronize_rcu() and dev_put(dev) */
+
return 0;
}
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 7008d402499d..b9043c92dc24 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -19,10 +19,10 @@
/* Device address handling */
-static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
+static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr,
u32 portid, u32 seq, int event);
-void phonet_address_notify(int event, struct net_device *dev, u8 addr)
+void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -31,17 +31,18 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr)
nla_total_size(1), GFP_KERNEL);
if (skb == NULL)
goto errout;
- err = fill_addr(skb, dev, addr, 0, 0, event);
+
+ err = fill_addr(skb, ifindex, addr, 0, 0, event);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, dev_net(dev), 0,
- RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL);
+
+ rtnl_notify(skb, net, 0, RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL);
return;
errout:
- rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_PHONET_IFADDR, err);
}
static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = {
@@ -64,8 +65,6 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
- ASSERT_RTNL();
-
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_phonet_policy, extack);
if (err < 0)
@@ -79,21 +78,29 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
/* Phonet addresses only have 6 high-order bits */
return -EINVAL;
- dev = __dev_get_by_index(net, ifm->ifa_index);
- if (dev == NULL)
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, ifm->ifa_index);
+ if (!dev) {
+ rcu_read_unlock();
return -ENODEV;
+ }
if (nlh->nlmsg_type == RTM_NEWADDR)
err = phonet_address_add(dev, pnaddr);
else
err = phonet_address_del(dev, pnaddr);
+
+ rcu_read_unlock();
+
if (!err)
- phonet_address_notify(nlh->nlmsg_type, dev, pnaddr);
+ phonet_address_notify(net, nlh->nlmsg_type, ifm->ifa_index, pnaddr);
+
return err;
}
-static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
- u32 portid, u32 seq, int event)
+static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr,
+ u32 portid, u32 seq, int event)
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
@@ -107,7 +114,7 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
ifm->ifa_prefixlen = 0;
ifm->ifa_flags = IFA_F_PERMANENT;
ifm->ifa_scope = RT_SCOPE_LINK;
- ifm->ifa_index = dev->ifindex;
+ ifm->ifa_index = ifindex;
if (nla_put_u8(skb, IFA_LOCAL, addr))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -120,14 +127,17 @@ nla_put_failure:
static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
+ int addr_idx = 0, addr_start_idx = cb->args[1];
+ int dev_idx = 0, dev_start_idx = cb->args[0];
struct phonet_device_list *pndevs;
struct phonet_device *pnd;
- int dev_idx = 0, dev_start_idx = cb->args[0];
- int addr_idx = 0, addr_start_idx = cb->args[1];
+ int err = 0;
pndevs = phonet_device_list(sock_net(skb->sk));
+
rcu_read_lock();
list_for_each_entry_rcu(pnd, &pndevs->list, list) {
+ DECLARE_BITMAP(addrs, 64);
u8 addr;
if (dev_idx > dev_start_idx)
@@ -136,29 +146,32 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
continue;
addr_idx = 0;
- for_each_set_bit(addr, pnd->addrs, 64) {
+ memcpy(addrs, pnd->addrs, sizeof(pnd->addrs));
+
+ for_each_set_bit(addr, addrs, 64) {
if (addr_idx++ < addr_start_idx)
continue;
- if (fill_addr(skb, pnd->netdev, addr << 2,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0)
+ err = fill_addr(skb, READ_ONCE(pnd->netdev->ifindex),
+ addr << 2, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWADDR);
+ if (err < 0)
goto out;
}
}
-
out:
rcu_read_unlock();
+
cb->args[0] = dev_idx;
cb->args[1] = addr_idx;
- return skb->len;
+ return err;
}
/* Routes handling */
-static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst,
- u32 portid, u32 seq, int event)
+static int fill_route(struct sk_buff *skb, u32 ifindex, u8 dst,
+ u32 portid, u32 seq, int event)
{
struct rtmsg *rtm;
struct nlmsghdr *nlh;
@@ -177,8 +190,7 @@ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst,
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
rtm->rtm_type = RTN_UNICAST;
rtm->rtm_flags = 0;
- if (nla_put_u8(skb, RTA_DST, dst) ||
- nla_put_u32(skb, RTA_OIF, READ_ONCE(dev->ifindex)))
+ if (nla_put_u8(skb, RTA_DST, dst) || nla_put_u32(skb, RTA_OIF, ifindex))
goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -188,7 +200,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-void rtm_phonet_notify(int event, struct net_device *dev, u8 dst)
+void rtm_phonet_notify(struct net *net, int event, u32 ifindex, u8 dst)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -197,17 +209,18 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst)
nla_total_size(1) + nla_total_size(4), GFP_KERNEL);
if (skb == NULL)
goto errout;
- err = fill_route(skb, dev, dst, 0, 0, event);
+
+ err = fill_route(skb, ifindex, dst, 0, 0, event);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, dev_net(dev), 0,
- RTNLGRP_PHONET_ROUTE, NULL, GFP_KERNEL);
+
+ rtnl_notify(skb, net, 0, RTNLGRP_PHONET_ROUTE, NULL, GFP_KERNEL);
return;
errout:
- rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_PHONET_ROUTE, err);
}
static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = {
@@ -220,8 +233,10 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[RTA_MAX+1];
+ bool sync_needed = false;
struct net_device *dev;
struct rtmsg *rtm;
+ u32 ifindex;
int err;
u8 dst;
@@ -231,8 +246,6 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
- ASSERT_RTNL();
-
err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_phonet_policy, extack);
if (err < 0)
@@ -247,16 +260,33 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (dst & 3) /* Phonet addresses only have 6 high-order bits */
return -EINVAL;
- dev = __dev_get_by_index(net, nla_get_u32(tb[RTA_OIF]));
- if (dev == NULL)
+ ifindex = nla_get_u32(tb[RTA_OIF]);
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (!dev) {
+ rcu_read_unlock();
return -ENODEV;
+ }
- if (nlh->nlmsg_type == RTM_NEWROUTE)
+ if (nlh->nlmsg_type == RTM_NEWROUTE) {
err = phonet_route_add(dev, dst);
- else
+ } else {
err = phonet_route_del(dev, dst);
+ if (!err)
+ sync_needed = true;
+ }
+
+ rcu_read_unlock();
+
+ if (sync_needed) {
+ synchronize_rcu();
+ dev_put(dev);
+ }
if (!err)
- rtm_phonet_notify(nlh->nlmsg_type, dev, dst);
+ rtm_phonet_notify(net, nlh->nlmsg_type, ifindex, dst);
+
return err;
}
@@ -273,7 +303,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
if (!dev)
continue;
- err = fill_route(skb, dev, addr << 2,
+ err = fill_route(skb, READ_ONCE(dev->ifindex), addr << 2,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE);
if (err < 0)
@@ -285,23 +315,22 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
return err;
}
+static const struct rtnl_msg_handler phonet_rtnl_msg_handlers[] __initdata_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_NEWADDR,
+ .doit = addr_doit, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_DELADDR,
+ .doit = addr_doit, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_GETADDR,
+ .dumpit = getaddr_dumpit, .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_NEWROUTE,
+ .doit = route_doit, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_DELROUTE,
+ .doit = route_doit, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_GETROUTE,
+ .dumpit = route_dumpit, .flags = RTNL_FLAG_DUMP_UNLOCKED},
+};
+
int __init phonet_netlink_register(void)
{
- int err = rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWADDR,
- addr_doit, NULL, 0);
- if (err)
- return err;
-
- /* Further rtnl_register_module() cannot fail */
- rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELADDR,
- addr_doit, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETADDR,
- NULL, getaddr_dumpit, 0);
- rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWROUTE,
- route_doit, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELROUTE,
- route_doit, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETROUTE,
- NULL, route_dumpit, RTNL_FLAG_DUMP_UNLOCKED);
- return 0;
+ return rtnl_register_many(phonet_rtnl_msg_handlers);
}
diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
index 41ece61eb57a..00c51cf693f3 100644
--- a/net/qrtr/af_qrtr.c
+++ b/net/qrtr/af_qrtr.c
@@ -884,7 +884,7 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
mutex_lock(&qrtr_node_lock);
list_for_each_entry(node, &qrtr_all_nodes, item) {
- skbn = skb_clone(skb, GFP_KERNEL);
+ skbn = pskb_copy(skb, GFP_KERNEL);
if (!skbn)
break;
skb_set_owner_w(skbn, skb->sk);
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index 75cd696963b2..f007730aa2bb 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -26,3 +26,12 @@ config RDS_DEBUG
bool "RDS debugging messages"
depends on RDS
default n
+
+config GCOV_PROFILE_RDS
+ bool "Enable GCOV profiling on RDS"
+ depends on GCOV_KERNEL
+ help
+ Enable GCOV profiling on RDS for checking which functions/lines
+ are executed.
+
+ If unsure, say N.
diff --git a/net/rds/Makefile b/net/rds/Makefile
index 8fdc118e2927..3af1ca1d965c 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -15,3 +15,8 @@ rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \
tcp_send.o tcp_stats.o
ccflags-$(CONFIG_RDS_DEBUG) := -DRDS_DEBUG
+
+# for GCOV coverage profiling
+ifdef CONFIG_GCOV_PROFILE_RDS
+GCOV_PROFILE := y
+endif
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 2ba71102b1f1..8ef3178ed4d6 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -369,9 +369,6 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp);
void rds_ib_conn_free(void *arg);
int rds_ib_conn_path_connect(struct rds_conn_path *cp);
void rds_ib_conn_path_shutdown(struct rds_conn_path *cp);
-void rds_ib_state_change(struct sock *sk);
-int rds_ib_listen_init(void);
-void rds_ib_listen_stop(void);
__printf(2, 3)
void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -402,7 +399,6 @@ void rds_ib_inc_free(struct rds_incoming *inc);
int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc,
struct rds_ib_ack_state *state);
-void rds_ib_recv_tasklet_fn(unsigned long data);
void rds_ib_recv_init_ring(struct rds_ib_connection *ic);
void rds_ib_recv_clear_ring(struct rds_ib_connection *ic);
void rds_ib_recv_init_ack(struct rds_ib_connection *ic);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 8f070ee7e742..d1cfceeff133 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -40,10 +40,6 @@
#include "rds.h"
struct workqueue_struct *rds_ib_mr_wq;
-struct rds_ib_dereg_odp_mr {
- struct work_struct work;
- struct ib_mr *mr;
-};
static void rds_ib_odp_mr_worker(struct work_struct *work);
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 7a5367628c05..7d3e82e4c2fc 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -539,18 +539,14 @@ bool rfkill_get_global_sw_state(const enum rfkill_type type)
#endif
bool rfkill_set_hw_state_reason(struct rfkill *rfkill,
- bool blocked, unsigned long reason)
+ bool blocked,
+ enum rfkill_hard_block_reasons reason)
{
unsigned long flags;
bool ret, prev;
BUG_ON(!rfkill);
- if (WARN(reason & ~(RFKILL_HARD_BLOCK_SIGNAL |
- RFKILL_HARD_BLOCK_NOT_OWNER),
- "hw_state reason not supported: 0x%lx", reason))
- return rfkill_blocked(rfkill);
-
spin_lock_irqsave(&rfkill->lock, flags);
prev = !!(rfkill->hard_block_reasons & reason);
if (blocked) {
@@ -1398,7 +1394,6 @@ static const struct file_operations rfkill_fops = {
.release = rfkill_fop_release,
.unlocked_ioctl = rfkill_fop_ioctl,
.compat_ioctl = compat_ptr_ioctl,
- .llseek = no_llseek,
};
#define RFKILL_NAME "rfkill"
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 84529886c2e6..9fa019e0dcad 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -3,6 +3,7 @@
* Copyright (c) 2011, NVIDIA Corporation.
*/
+#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -31,8 +32,12 @@ static int rfkill_gpio_set_power(void *data, bool blocked)
{
struct rfkill_gpio_data *rfkill = data;
- if (!blocked && !IS_ERR(rfkill->clk) && !rfkill->clk_enabled)
- clk_enable(rfkill->clk);
+ if (!blocked && !IS_ERR(rfkill->clk) && !rfkill->clk_enabled) {
+ int ret = clk_enable(rfkill->clk);
+
+ if (ret)
+ return ret;
+ }
gpiod_set_value_cansleep(rfkill->shutdown_gpio, !blocked);
gpiod_set_value_cansleep(rfkill->reset_gpio, !blocked);
@@ -72,6 +77,20 @@ static int rfkill_gpio_acpi_probe(struct device *dev,
return devm_acpi_dev_add_driver_gpios(dev, acpi_rfkill_default_gpios);
}
+/* List of DMI matches for devices on which rfkill-gpio should not load,
+ * to avoid firmware bugs.
+ */
+static const struct dmi_system_id rfkill_gpio_deny_table[] = {
+ {
+ /* Lenovo Yoga Tab 3 Pro YT3-X90, bogus "BCM4752" device in DSDT */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"),
+ },
+ },
+ { }
+};
+
static int rfkill_gpio_probe(struct platform_device *pdev)
{
struct rfkill_gpio_data *rfkill;
@@ -81,6 +100,9 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
const char *type_name;
int ret;
+ if (dmi_check_system(rfkill_gpio_deny_table))
+ return -ENODEV;
+
rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL);
if (!rfkill)
return -ENOMEM;
@@ -181,7 +203,7 @@ MODULE_DEVICE_TABLE(of, rfkill_of_match);
static struct platform_driver rfkill_gpio_driver = {
.probe = rfkill_gpio_probe,
- .remove_new = rfkill_gpio_remove,
+ .remove = rfkill_gpio_remove,
.driver = {
.name = "rfkill_gpio",
.acpi_match_table = ACPI_PTR(rfkill_acpi_match),
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index f4844683e120..9d8bd0b37e41 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -707,9 +707,10 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
ret = -EISCONN;
if (rx->sk.sk_state != RXRPC_UNBOUND)
goto error;
- ret = copy_from_sockptr(&min_sec_level, optval,
- sizeof(unsigned int));
- if (ret < 0)
+ ret = copy_safe_from_sockptr(&min_sec_level,
+ sizeof(min_sec_level),
+ optval, optlen);
+ if (ret)
goto error;
ret = -EINVAL;
if (min_sec_level > RXRPC_SECURITY_MAX)
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 08de24658f4f..d0fd37bdcfe9 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -856,7 +856,6 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
struct rxrpc_connection *conn,
struct sockaddr_rxrpc *peer_srx,
struct sk_buff *skb);
-void rxrpc_accept_incoming_calls(struct rxrpc_local *);
int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
/*
@@ -969,7 +968,6 @@ void rxrpc_connect_client_calls(struct rxrpc_local *local);
void rxrpc_expose_client_call(struct rxrpc_call *);
void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *);
void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
-void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
void rxrpc_discard_expired_client_conns(struct rxrpc_local *local);
void rxrpc_clean_up_local_conns(struct rxrpc_local *);
@@ -1058,7 +1056,7 @@ bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
int rxrpc_io_thread(void *data);
static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local)
{
- wake_up_process(local->io_thread);
+ wake_up_process(READ_ONCE(local->io_thread));
}
static inline bool rxrpc_protocol_error(struct sk_buff *skb, enum rxrpc_abort_reason why)
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index d25bf1cf3670..bb11e8289d6d 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -516,6 +516,7 @@ void rxrpc_connect_client_calls(struct rxrpc_local *local)
spin_lock(&local->client_call_lock);
list_move_tail(&call->wait_link, &bundle->waiting_calls);
+ rxrpc_see_call(call, rxrpc_call_see_waiting_call);
spin_unlock(&local->client_call_lock);
if (rxrpc_bundle_has_space(bundle))
@@ -586,7 +587,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
_debug("call is waiting");
ASSERTCMP(call->call_id, ==, 0);
ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags));
+ /* May still be on ->new_client_calls. */
+ spin_lock(&local->client_call_lock);
list_del_init(&call->wait_link);
+ spin_unlock(&local->client_call_lock);
return;
}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 1539d315afe7..694c4df7a1a3 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -337,9 +337,7 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
*/
rxrpc_purge_queue(&conn->rx_queue);
- if (conn->tx_data_alloc.va)
- __page_frag_cache_drain(virt_to_page(conn->tx_data_alloc.va),
- conn->tx_data_alloc.pagecnt_bias);
+ page_frag_cache_drain(&conn->tx_data_alloc);
call_rcu(&conn->rcu, rxrpc_rcu_free_connection);
}
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 0300baa9afcd..07c74c77d802 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -27,11 +27,17 @@ int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
{
struct sk_buff_head *rx_queue;
struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
+ struct task_struct *io_thread;
if (unlikely(!local)) {
kfree_skb(skb);
return 0;
}
+ io_thread = READ_ONCE(local->io_thread);
+ if (!io_thread) {
+ kfree_skb(skb);
+ return 0;
+ }
if (skb->tstamp == 0)
skb->tstamp = ktime_get_real();
@@ -47,7 +53,7 @@ int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
#endif
skb_queue_tail(rx_queue, skb);
- rxrpc_wake_up_io_thread(local);
+ wake_up_process(io_thread);
return 0;
}
@@ -565,7 +571,7 @@ int rxrpc_io_thread(void *data)
__set_current_state(TASK_RUNNING);
rxrpc_see_local(local, rxrpc_local_stop);
rxrpc_destroy_local(local);
- local->io_thread = NULL;
+ WRITE_ONCE(local->io_thread, NULL);
rxrpc_see_local(local, rxrpc_local_stopped);
return 0;
}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 504453c688d7..2792d2304605 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -232,7 +232,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
}
wait_for_completion(&local->io_thread_ready);
- local->io_thread = io_thread;
+ WRITE_ONCE(local->io_thread, io_thread);
_leave(" = 0");
return 0;
@@ -452,9 +452,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
#endif
rxrpc_purge_queue(&local->rx_queue);
rxrpc_purge_client_connections(local);
- if (local->tx_alloc.va)
- __page_frag_cache_drain(virt_to_page(local->tx_alloc.va),
- local->tx_alloc.pagecnt_bias);
+ page_frag_cache_drain(&local->tx_alloc);
}
/*
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 894b8fa68e5e..6abb8eec1b2b 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -29,6 +29,7 @@ bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
call->send_abort_why = why;
call->send_abort_err = error;
call->send_abort_seq = 0;
+ trace_rxrpc_abort_call(call, abort_code);
/* Request abort locklessly vs rxrpc_input_call_event(). */
smp_store_release(&call->send_abort, abort_code);
rxrpc_poke_call(call, rxrpc_call_poke_abort);
@@ -303,6 +304,11 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
reload:
+ txb = call->tx_pending;
+ call->tx_pending = NULL;
+ if (txb)
+ rxrpc_see_txbuf(txb, rxrpc_txbuf_see_send_more);
+
ret = -EPIPE;
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto maybe_error;
@@ -329,11 +335,6 @@ reload:
goto maybe_error;
}
- txb = call->tx_pending;
- call->tx_pending = NULL;
- if (txb)
- rxrpc_see_txbuf(txb, rxrpc_txbuf_see_send_more);
-
do {
if (!txb) {
size_t remain;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2714c4ed928e..839790043256 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -504,6 +504,50 @@ nla_put_failure:
return -1;
}
+static int
+tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nest;
+ int err = -EINVAL;
+ u32 flags;
+
+ if (tcf_action_dump_terse(skb, a, false))
+ goto nla_put_failure;
+
+ if (a->hw_stats != TCA_ACT_HW_STATS_ANY &&
+ nla_put_bitfield32(skb, TCA_ACT_HW_STATS,
+ a->hw_stats, TCA_ACT_HW_STATS_ANY))
+ goto nla_put_failure;
+
+ if (a->used_hw_stats_valid &&
+ nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS,
+ a->used_hw_stats, TCA_ACT_HW_STATS_ANY))
+ goto nla_put_failure;
+
+ flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK;
+ if (flags &&
+ nla_put_bitfield32(skb, TCA_ACT_FLAGS,
+ flags, flags))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count))
+ goto nla_put_failure;
+
+ nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS);
+ if (nest == NULL)
+ goto nla_put_failure;
+ err = tcf_action_dump_old(skb, a, bind, ref);
+ if (err > 0) {
+ nla_nest_end(skb, nest);
+ return err;
+ }
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -1190,51 +1234,6 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
return a->ops->dump(skb, a, bind, ref);
}
-int
-tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
-{
- int err = -EINVAL;
- unsigned char *b = skb_tail_pointer(skb);
- struct nlattr *nest;
- u32 flags;
-
- if (tcf_action_dump_terse(skb, a, false))
- goto nla_put_failure;
-
- if (a->hw_stats != TCA_ACT_HW_STATS_ANY &&
- nla_put_bitfield32(skb, TCA_ACT_HW_STATS,
- a->hw_stats, TCA_ACT_HW_STATS_ANY))
- goto nla_put_failure;
-
- if (a->used_hw_stats_valid &&
- nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS,
- a->used_hw_stats, TCA_ACT_HW_STATS_ANY))
- goto nla_put_failure;
-
- flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK;
- if (flags &&
- nla_put_bitfield32(skb, TCA_ACT_FLAGS,
- flags, flags))
- goto nla_put_failure;
-
- if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count))
- goto nla_put_failure;
-
- nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
- err = tcf_action_dump_old(skb, a, bind, ref);
- if (err > 0) {
- nla_nest_end(skb, nest);
- return err;
- }
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-EXPORT_SYMBOL(tcf_action_dump_1);
-
int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
int bind, int ref, bool terse)
{
@@ -1498,8 +1497,29 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
bool skip_sw = tc_skip_sw(fl_flags);
bool skip_hw = tc_skip_hw(fl_flags);
- if (tc_act_bind(act->tcfa_flags))
+ if (tc_act_bind(act->tcfa_flags)) {
+ /* Action is created by classifier and is not
+ * standalone. Check that the user did not set
+ * any action flags different than the
+ * classifier flags, and inherit the flags from
+ * the classifier for the compatibility case
+ * where no flags were specified at all.
+ */
+ if ((tc_act_skip_sw(act->tcfa_flags) && !skip_sw) ||
+ (tc_act_skip_hw(act->tcfa_flags) && !skip_hw)) {
+ NL_SET_ERR_MSG(extack,
+ "Mismatch between action and filter offload flags");
+ err = -EINVAL;
+ goto err;
+ }
+ if (skip_sw)
+ act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_SW;
+ if (skip_hw)
+ act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_HW;
continue;
+ }
+
+ /* Action is standalone */
if (skip_sw != tc_act_skip_sw(act->tcfa_flags) ||
skip_hw != tc_act_skip_hw(act->tcfa_flags)) {
NL_SET_ERR_MSG(extack,
@@ -2243,13 +2263,16 @@ out_module_put:
return skb->len;
}
+static const struct rtnl_msg_handler tc_action_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWACTION, .doit = tc_ctl_action},
+ {.msgtype = RTM_DELACTION, .doit = tc_ctl_action},
+ {.msgtype = RTM_GETACTION, .doit = tc_ctl_action,
+ .dumpit = tc_dump_action},
+};
+
static int __init tc_action_init(void)
{
- rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
- 0);
-
+ rtnl_register_many(tc_action_rtnl_msg_handlers);
return 0;
}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 3ba8e7e739b5..c02f39efc6ef 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -44,8 +44,6 @@ static DEFINE_MUTEX(zones_mutex);
struct zones_ht_key {
struct net *net;
u16 zone;
- /* Note : pad[] must be the last field. */
- u8 pad[];
};
struct tcf_ct_flow_table {
@@ -62,7 +60,7 @@ struct tcf_ct_flow_table {
static const struct rhashtable_params zones_params = {
.head_offset = offsetof(struct tcf_ct_flow_table, node),
.key_offset = offsetof(struct tcf_ct_flow_table, key),
- .key_len = offsetof(struct zones_ht_key, pad),
+ .key_len = offsetofend(struct zones_ht_key, zone),
.automatic_shrinking = true,
};
@@ -1185,9 +1183,8 @@ static int tcf_ct_fill_params_nat(struct tcf_ct_params *p,
range->min_addr.ip =
nla_get_in_addr(tb[TCA_CT_NAT_IPV4_MIN]);
- range->max_addr.ip = max_attr ?
- nla_get_in_addr(max_attr) :
- range->min_addr.ip;
+ range->max_addr.ip =
+ nla_get_in_addr_default(max_attr, range->min_addr.ip);
} else if (tb[TCA_CT_NAT_IPV6_MIN]) {
struct nlattr *max_attr = tb[TCA_CT_NAT_IPV6_MAX];
@@ -1316,8 +1313,9 @@ static int tcf_ct_fill_params(struct net *net,
err = -EINVAL;
goto err;
}
- family = tb[TCA_CT_HELPER_FAMILY] ? nla_get_u8(tb[TCA_CT_HELPER_FAMILY]) : AF_INET;
- proto = tb[TCA_CT_HELPER_PROTO] ? nla_get_u8(tb[TCA_CT_HELPER_PROTO]) : IPPROTO_TCP;
+ family = nla_get_u8_default(tb[TCA_CT_HELPER_FAMILY], AF_INET);
+ proto = nla_get_u8_default(tb[TCA_CT_HELPER_PROTO],
+ IPPROTO_TCP);
err = nf_ct_add_helper(tmpl, name, family, proto,
p->ct_action & TCA_CT_ACT_NAT, &p->helper);
if (err) {
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 5dd41a012110..5b1241ddc758 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -197,8 +197,9 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
"dscp mask must be 6 contiguous bits");
return -EINVAL;
}
- dscpstatemask = tb[TCA_CTINFO_PARMS_DSCP_STATEMASK] ?
- nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) : 0;
+ dscpstatemask =
+ nla_get_u32_default(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK],
+ 0);
/* mask & statemask must not overlap */
if (dscpmask & dscpstatemask) {
NL_SET_ERR_MSG_ATTR(extack,
@@ -243,8 +244,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
}
cp_new->net = net;
- cp_new->zone = tb[TCA_CTINFO_ZONE] ?
- nla_get_u16(tb[TCA_CTINFO_ZONE]) : 0;
+ cp_new->zone = nla_get_u16_default(tb[TCA_CTINFO_ZONE], 0);
if (dscpmask) {
cp_new->dscpmask = dscpmask;
cp_new->dscpmaskshift = dscpmaskshift;
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 1dd74125398a..91c0ec729823 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -190,15 +190,10 @@ static int fill_gate_entry(struct nlattr **tb, struct tcfg_gate_entry *entry,
entry->interval = interval;
- if (tb[TCA_GATE_ENTRY_IPV])
- entry->ipv = nla_get_s32(tb[TCA_GATE_ENTRY_IPV]);
- else
- entry->ipv = -1;
+ entry->ipv = nla_get_s32_default(tb[TCA_GATE_ENTRY_IPV], -1);
- if (tb[TCA_GATE_ENTRY_MAX_OCTETS])
- entry->maxoctets = nla_get_s32(tb[TCA_GATE_ENTRY_MAX_OCTETS]);
- else
- entry->maxoctets = -1;
+ entry->maxoctets = nla_get_s32_default(tb[TCA_GATE_ENTRY_MAX_OCTETS],
+ -1);
return 0;
}
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 44a37a71ae92..9f86f4e666d3 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -288,16 +288,14 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
}
p->tcfm_action = parm->m_action;
- p->tcfm_label = tb[TCA_MPLS_LABEL] ? nla_get_u32(tb[TCA_MPLS_LABEL]) :
- ACT_MPLS_LABEL_NOT_SET;
- p->tcfm_tc = tb[TCA_MPLS_TC] ? nla_get_u8(tb[TCA_MPLS_TC]) :
- ACT_MPLS_TC_NOT_SET;
- p->tcfm_ttl = tb[TCA_MPLS_TTL] ? nla_get_u8(tb[TCA_MPLS_TTL]) :
- mpls_ttl;
- p->tcfm_bos = tb[TCA_MPLS_BOS] ? nla_get_u8(tb[TCA_MPLS_BOS]) :
- ACT_MPLS_BOS_NOT_SET;
- p->tcfm_proto = tb[TCA_MPLS_PROTO] ? nla_get_be16(tb[TCA_MPLS_PROTO]) :
- htons(ETH_P_MPLS_UC);
+ p->tcfm_label = nla_get_u32_default(tb[TCA_MPLS_LABEL],
+ ACT_MPLS_LABEL_NOT_SET);
+ p->tcfm_tc = nla_get_u8_default(tb[TCA_MPLS_TC], ACT_MPLS_TC_NOT_SET);
+ p->tcfm_ttl = nla_get_u8_default(tb[TCA_MPLS_TTL], mpls_ttl);
+ p->tcfm_bos = nla_get_u8_default(tb[TCA_MPLS_BOS],
+ ACT_MPLS_BOS_NOT_SET);
+ p->tcfm_proto = nla_get_be16_default(tb[TCA_MPLS_PROTO],
+ htons(ETH_P_MPLS_UC));
spin_lock_bh(&m->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8555125ed34d..a214ed681142 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -167,8 +167,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
if (R_tab) {
new->rate_present = true;
- rate64 = tb[TCA_POLICE_RATE64] ?
- nla_get_u64(tb[TCA_POLICE_RATE64]) : 0;
+ rate64 = nla_get_u64_default(tb[TCA_POLICE_RATE64], 0);
psched_ratecfg_precompute(&new->rate, &R_tab->rate, rate64);
qdisc_put_rtab(R_tab);
} else {
@@ -176,8 +175,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
if (P_tab) {
new->peak_present = true;
- prate64 = tb[TCA_POLICE_PEAKRATE64] ?
- nla_get_u64(tb[TCA_POLICE_PEAKRATE64]) : 0;
+ prate64 = nla_get_u64_default(tb[TCA_POLICE_PEAKRATE64], 0);
psched_ratecfg_precompute(&new->peak, &P_tab->rate, prate64);
qdisc_put_rtab(P_tab);
} else {
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 22f4b1e8ade9..383bf18b6862 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -96,6 +96,7 @@ out:
if (skb_at_tc_ingress(skb))
skb_pull_rcsum(skb, skb->mac_len);
+ skb_reset_mac_len(skb);
return action;
drop:
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 17d97bbe890f..7578e27260c9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1518,6 +1518,7 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
return 0;
err_dev_insert:
+ tcf_block_offload_unbind(block, q, ei);
err_block_offload_bind:
tcf_chain0_head_change_cb_del(block, ei);
err_chain0_head_change_cb_add:
@@ -1932,7 +1933,8 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
struct tcf_chain_info *chain_info,
u32 protocol, u32 prio,
- bool prio_allocate);
+ bool prio_allocate,
+ struct netlink_ext_ack *extack);
/* Try to insert new proto.
* If proto with specified priority already exists, free new proto
@@ -1956,8 +1958,7 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
return ERR_PTR(-EAGAIN);
}
- tp = tcf_chain_tp_find(chain, &chain_info,
- protocol, prio, false);
+ tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false, NULL);
if (!tp)
err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
mutex_unlock(&chain->filter_chain_lock);
@@ -2017,7 +2018,8 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
struct tcf_chain_info *chain_info,
u32 protocol, u32 prio,
- bool prio_allocate)
+ bool prio_allocate,
+ struct netlink_ext_ack *extack)
{
struct tcf_proto **pprev;
struct tcf_proto *tp;
@@ -2028,9 +2030,14 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
pprev = &tp->next) {
if (tp->prio >= prio) {
if (tp->prio == prio) {
- if (prio_allocate ||
- (tp->protocol != protocol && protocol))
+ if (prio_allocate) {
+ NL_SET_ERR_MSG(extack, "Lowest ID from auto-alloc range already in use");
+ return ERR_PTR(-ENOSPC);
+ }
+ if (tp->protocol != protocol && protocol) {
+ NL_SET_ERR_MSG(extack, "Protocol mismatch for filter with specified priority");
return ERR_PTR(-EINVAL);
+ }
} else {
tp = NULL;
}
@@ -2296,7 +2303,7 @@ replay:
}
block->classid = parent;
- chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0);
if (chain_index > TC_ACT_EXT_VAL_MASK) {
NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
err = -EINVAL;
@@ -2311,9 +2318,8 @@ replay:
mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
- prio, prio_allocate);
+ prio, prio_allocate, extack);
if (IS_ERR(tp)) {
- NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = PTR_ERR(tp);
goto errout_locked;
}
@@ -2508,7 +2514,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
goto errout;
}
- chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0);
if (chain_index > TC_ACT_EXT_VAL_MASK) {
NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
err = -EINVAL;
@@ -2538,10 +2544,13 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
- prio, false);
- if (!tp || IS_ERR(tp)) {
+ prio, false, extack);
+ if (!tp) {
+ err = -ENOENT;
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
- err = tp ? PTR_ERR(tp) : -ENOENT;
+ goto errout_locked;
+ } else if (IS_ERR(tp)) {
+ err = PTR_ERR(tp);
goto errout_locked;
} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
@@ -2663,7 +2672,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
goto errout;
}
- chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0);
if (chain_index > TC_ACT_EXT_VAL_MASK) {
NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
err = -EINVAL;
@@ -2678,11 +2687,14 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
- prio, false);
+ prio, false, extack);
mutex_unlock(&chain->filter_chain_lock);
- if (!tp || IS_ERR(tp)) {
+ if (!tp) {
+ err = -ENOENT;
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
- err = tp ? PTR_ERR(tp) : -ENOENT;
+ goto errout;
+ } else if (IS_ERR(tp)) {
+ err = PTR_ERR(tp);
goto errout;
} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
@@ -3103,7 +3115,7 @@ replay:
if (IS_ERR(block))
return PTR_ERR(block);
- chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0);
if (chain_index > TC_ACT_EXT_VAL_MASK) {
NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
err = -EINVAL;
@@ -4055,6 +4067,19 @@ static struct pernet_operations tcf_net_ops = {
.size = sizeof(struct tcf_net),
};
+static const struct rtnl_msg_handler tc_filter_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWTFILTER, .doit = tc_new_tfilter,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.msgtype = RTM_DELTFILTER, .doit = tc_del_tfilter,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.msgtype = RTM_GETTFILTER, .doit = tc_get_tfilter,
+ .dumpit = tc_dump_tfilter, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.msgtype = RTM_NEWCHAIN, .doit = tc_ctl_chain},
+ {.msgtype = RTM_DELCHAIN, .doit = tc_ctl_chain},
+ {.msgtype = RTM_GETCHAIN, .doit = tc_ctl_chain,
+ .dumpit = tc_dump_chain},
+};
+
static int __init tc_filter_init(void)
{
int err;
@@ -4068,17 +4093,7 @@ static int __init tc_filter_init(void)
goto err_register_pernet_subsys;
xa_init_flags(&tcf_exts_miss_cookies_xa, XA_FLAGS_ALLOC1);
-
- rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
- rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
- rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
- tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
- rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
- tc_dump_chain, 0);
+ rtnl_register_many(tc_filter_rtnl_msg_handlers);
return 0;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e280c27cb9f9..1008ec8a464c 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1369,7 +1369,6 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key,
int err;
md = (struct erspan_metadata *)&key->enc_opts.data[key->enc_opts.len];
- memset(md, 0xff, sizeof(*md));
md->version = 1;
if (!depth)
@@ -1398,9 +1397,9 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key,
NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option index");
return -EINVAL;
}
+ memset(&md->u.index, 0xff, sizeof(md->u.index));
if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) {
nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX];
- memset(&md->u, 0x00, sizeof(md->u));
md->u.index = nla_get_be32(nla);
}
} else if (md->version == 2) {
@@ -1409,10 +1408,12 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key,
NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option dir or hwid");
return -EINVAL;
}
+ md->u.md2.dir = 1;
if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR]) {
nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR];
md->u.md2.dir = nla_get_u8(nla);
}
+ set_hwid(&md->u.md2, 0xff);
if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]) {
nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID];
set_hwid(&md->u.md2, nla_get_u8(nla));
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 9412d88a99bc..d3a03c57545b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -92,6 +92,16 @@ struct tc_u_common {
long knodes;
};
+static u32 handle2id(u32 h)
+{
+ return ((h & 0x80000000) ? ((h >> 20) & 0x7FF) : h);
+}
+
+static u32 id2handle(u32 id)
+{
+ return (id | 0x800U) << 20;
+}
+
static inline unsigned int u32_hash_fold(__be32 key,
const struct tc_u32_sel *sel,
u8 fshift)
@@ -310,7 +320,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, 1, 0x7FF, GFP_KERNEL);
if (id < 0)
return 0;
- return (id | 0x800U) << 20;
+ return id2handle(id);
}
static struct hlist_head *tc_u_common_hash;
@@ -360,7 +370,7 @@ static int u32_init(struct tcf_proto *tp)
return -ENOBUFS;
refcount_set(&root_ht->refcnt, 1);
- root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
+ root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : id2handle(0);
root_ht->prio = tp->prio;
root_ht->is_root = true;
idr_init(&root_ht->handle_idr);
@@ -612,7 +622,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
if (phn == ht) {
u32_clear_hw_hnode(tp, ht, extack);
idr_destroy(&ht->handle_idr);
- idr_remove(&tp_c->handle_idr, ht->handle);
+ idr_remove(&tp_c->handle_idr, handle2id(ht->handle));
RCU_INIT_POINTER(*hn, ht->next);
kfree_rcu(ht, rcu);
return 0;
@@ -989,7 +999,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
err = u32_replace_hw_hnode(tp, ht, userflags, extack);
if (err) {
- idr_remove(&tp_c->handle_idr, handle);
+ idr_remove(&tp_c->handle_idr, handle2id(handle));
kfree(ht);
return err;
}
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index c90ad7ea26b4..64b637f18bc7 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -10,7 +10,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/tc_ematch/tc_em_cmp.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/pkt_cls.h>
static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 74afc210527d..300430b8c4d2 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -593,7 +593,6 @@ out:
pkt_len = 1;
qdisc_skb_cb(skb)->pkt_len = pkt_len;
}
-EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
{
@@ -792,7 +791,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
drops = max_t(int, n, 0);
rcu_read_lock();
while ((parentid = sch->parent)) {
- if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
+ if (parentid == TC_H_ROOT)
break;
if (sch->flags & TCQ_F_NOPARENT)
@@ -1201,6 +1200,12 @@ skip:
return -EINVAL;
}
+ if (new &&
+ !(parent->flags & TCQ_F_MQROOT) &&
+ rcu_access_pointer(new->stab)) {
+ NL_SET_ERR_MSG(extack, "STAB not supported on a non root");
+ return -EINVAL;
+ }
err = cops->graft(parent, cl, new, &old, extack);
if (err)
return err;
@@ -2415,6 +2420,17 @@ static struct pernet_operations psched_net_ops = {
DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
#endif
+static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWQDISC, .doit = tc_modify_qdisc},
+ {.msgtype = RTM_DELQDISC, .doit = tc_get_qdisc},
+ {.msgtype = RTM_GETQDISC, .doit = tc_get_qdisc,
+ .dumpit = tc_dump_qdisc},
+ {.msgtype = RTM_NEWTCLASS, .doit = tc_ctl_tclass},
+ {.msgtype = RTM_DELTCLASS, .doit = tc_ctl_tclass},
+ {.msgtype = RTM_GETTCLASS, .doit = tc_ctl_tclass,
+ .dumpit = tc_dump_tclass},
+};
+
static int __init pktsched_init(void)
{
int err;
@@ -2433,14 +2449,7 @@ static int __init pktsched_init(void)
register_qdisc(&mq_qdisc_ops);
register_qdisc(&noqueue_qdisc_ops);
- rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
- 0);
- rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
- 0);
+ rtnl_register_many(psched_rtnl_msg_handlers);
tc_wrapper_init();
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 9602dafe32e6..8d8b2db4653c 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -361,8 +361,24 @@ static const u8 besteffort[] = {
static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7};
static const u8 bulk_order[] = {1, 0, 2, 3};
+/* There is a big difference in timing between the accurate values placed in the
+ * cache and the approximations given by a single Newton step for small count
+ * values, particularly when stepping from count 1 to 2 or vice versa. Hence,
+ * these values are calculated using eight Newton steps, using the
+ * implementation below. Above 16, a single Newton step gives sufficient
+ * accuracy in either direction, given the precision stored.
+ *
+ * The magnitude of the error when stepping up to count 2 is such as to give the
+ * value that *should* have been produced at count 4.
+ */
+
#define REC_INV_SQRT_CACHE (16)
-static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
+static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = {
+ ~0, ~0, 3037000500, 2479700525,
+ 2147483647, 1920767767, 1753413056, 1623345051,
+ 1518500250, 1431655765, 1358187914, 1294981364,
+ 1239850263, 1191209601, 1147878294, 1108955788
+};
/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
* new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
@@ -388,47 +404,14 @@ static void cobalt_newton_step(struct cobalt_vars *vars)
static void cobalt_invsqrt(struct cobalt_vars *vars)
{
if (vars->count < REC_INV_SQRT_CACHE)
- vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
+ vars->rec_inv_sqrt = inv_sqrt_cache[vars->count];
else
cobalt_newton_step(vars);
}
-/* There is a big difference in timing between the accurate values placed in
- * the cache and the approximations given by a single Newton step for small
- * count values, particularly when stepping from count 1 to 2 or vice versa.
- * Above 16, a single Newton step gives sufficient accuracy in either
- * direction, given the precision stored.
- *
- * The magnitude of the error when stepping up to count 2 is such as to give
- * the value that *should* have been produced at count 4.
- */
-
-static void cobalt_cache_init(void)
-{
- struct cobalt_vars v;
-
- memset(&v, 0, sizeof(v));
- v.rec_inv_sqrt = ~0U;
- cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt;
-
- for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) {
- cobalt_newton_step(&v);
- cobalt_newton_step(&v);
- cobalt_newton_step(&v);
- cobalt_newton_step(&v);
-
- cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt;
- }
-}
-
static void cobalt_vars_init(struct cobalt_vars *vars)
{
memset(vars, 0, sizeof(*vars));
-
- if (!cobalt_rec_inv_sqrt_cache[0]) {
- cobalt_cache_init();
- cobalt_rec_inv_sqrt_cache[0] = ~0;
- }
}
/* CoDel control_law is t + interval/sqrt(count)
@@ -786,12 +769,15 @@ skip_hash:
* queue, accept the collision, update the host tags.
*/
q->way_collisions++;
- if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
- q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
- q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
- }
allocate_src = cake_dsrc(flow_mode);
allocate_dst = cake_ddst(flow_mode);
+
+ if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
+ if (allocate_src)
+ q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
+ if (allocate_dst)
+ q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
+ }
found:
/* reserve queue for future packets in same flow */
reduced_hash = outer_hash + k;
@@ -1539,7 +1525,6 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
b->backlogs[idx] -= len;
b->tin_backlog -= len;
sch->qstats.backlog -= len;
- qdisc_tree_reduce_backlog(sch, 1, len);
flow->dropped++;
b->tin_dropped++;
@@ -1550,6 +1535,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
__qdisc_drop(skb, to_free);
sch->q.qlen--;
+ qdisc_tree_reduce_backlog(sch, 1, len);
cake_heapify(q, 0);
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 939425da1895..8c9a0400c862 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -310,7 +310,7 @@ static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q)
{
struct ethtool_link_ksettings ecmd;
int speed = SPEED_10;
- int port_rate;
+ s64 port_rate;
int err;
err = __ethtool_get_link_ksettings(dev, &ecmd);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 91072010923d..59e7bdf5063e 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -123,10 +123,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
if (idx == q->tail)
choke_zap_tail_holes(q);
+ --sch->q.qlen;
qdisc_qstats_backlog_dec(sch, skb);
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
qdisc_drop(skb, sch, to_free);
- --sch->q.qlen;
}
struct choke_skb_cb {
@@ -356,7 +356,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
tb[TCA_CHOKE_STAB] == NULL)
return -EINVAL;
- max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
+ max_P = nla_get_u32_default(tb[TCA_CHOKE_MAX_P], 0);
ctl = nla_data(tb[TCA_CHOKE_PARMS]);
stab = nla_data(tb[TCA_CHOKE_STAB]);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 19a49af5a9e5..a5e87f9ea986 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -111,6 +111,7 @@ struct fq_perband_flows {
struct fq_sched_data {
/* Read mostly cache line */
+ u64 offload_horizon;
u32 quantum;
u32 initial_quantum;
u32 flow_refill_delay;
@@ -299,7 +300,7 @@ static void fq_gc(struct fq_sched_data *q,
}
/* Fast path can be used if :
- * 1) Packet tstamp is in the past.
+ * 1) Packet tstamp is in the past, or within the pacing offload horizon.
* 2) FQ qlen == 0 OR
* (no flow is currently eligible for transmit,
* AND fast path queue has less than 8 packets)
@@ -314,7 +315,7 @@ static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb,
const struct fq_sched_data *q = qdisc_priv(sch);
const struct sock *sk;
- if (fq_skb_cb(skb)->time_to_send > now)
+ if (fq_skb_cb(skb)->time_to_send > now + q->offload_horizon)
return false;
if (sch->q.qlen != 0) {
@@ -331,6 +332,12 @@ static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb,
*/
if (q->internal.qlen >= 8)
return false;
+
+ /* Ordering invariants fall apart if some delayed flows
+ * are ready but we haven't serviced them, yet.
+ */
+ if (q->time_next_delayed_flow <= now + q->offload_horizon)
+ return false;
}
sk = skb->sk;
@@ -361,8 +368,9 @@ static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb,
* 3) We do not want to rate limit them (eg SYNFLOOD attack),
* especially if the listener set SO_MAX_PACING_RATE
* 4) We pretend they are orphaned
+ * TCP can also associate TIME_WAIT sockets with RST or ACK packets.
*/
- if (!sk || sk_listener(sk)) {
+ if (!sk || sk_listener_or_tw(sk)) {
unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
/* By forcing low order bit to 1, we make sure to not
@@ -595,15 +603,18 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now)
unsigned long sample;
struct rb_node *p;
- if (q->time_next_delayed_flow > now)
+ if (q->time_next_delayed_flow > now + q->offload_horizon)
return;
/* Update unthrottle latency EWMA.
* This is cheap and can help diagnosing timer/latency problems.
*/
sample = (unsigned long)(now - q->time_next_delayed_flow);
- q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
- q->unthrottle_latency_ns += sample >> 3;
+ if ((long)sample > 0) {
+ q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
+ q->unthrottle_latency_ns += sample >> 3;
+ }
+ now += q->offload_horizon;
q->time_next_delayed_flow = ~0ULL;
while ((p = rb_first(&q->delayed)) != NULL) {
@@ -687,7 +698,7 @@ begin:
u64 time_next_packet = max_t(u64, fq_skb_cb(skb)->time_to_send,
f->time_next_packet);
- if (now < time_next_packet) {
+ if (now + q->offload_horizon < time_next_packet) {
head->first = f->next;
f->time_next_packet = time_next_packet;
fq_flow_set_throttled(q, f);
@@ -925,6 +936,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 },
[TCA_FQ_PRIOMAP] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_prio_qopt)),
[TCA_FQ_WEIGHTS] = NLA_POLICY_EXACT_LEN(FQ_BANDS * sizeof(s32)),
+ [TCA_FQ_OFFLOAD_HORIZON] = { .type = NLA_U32 },
};
/* compress a u8 array with all elems <= 3 to an array of 2-bit fields */
@@ -1100,6 +1112,17 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
WRITE_ONCE(q->horizon_drop,
nla_get_u8(tb[TCA_FQ_HORIZON_DROP]));
+ if (tb[TCA_FQ_OFFLOAD_HORIZON]) {
+ u64 offload_horizon = (u64)NSEC_PER_USEC *
+ nla_get_u32(tb[TCA_FQ_OFFLOAD_HORIZON]);
+
+ if (offload_horizon <= qdisc_dev(sch)->max_pacing_offload_horizon) {
+ WRITE_ONCE(q->offload_horizon, offload_horizon);
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "invalid offload_horizon");
+ err = -EINVAL;
+ }
+ }
if (!err) {
sch_tree_unlock(sch);
@@ -1183,6 +1206,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
.bands = FQ_BANDS,
};
struct nlattr *opts;
+ u64 offload_horizon;
u64 ce_threshold;
s32 weights[3];
u64 horizon;
@@ -1199,6 +1223,9 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
horizon = READ_ONCE(q->horizon);
do_div(horizon, NSEC_PER_USEC);
+ offload_horizon = READ_ONCE(q->offload_horizon);
+ do_div(offload_horizon, NSEC_PER_USEC);
+
if (nla_put_u32(skb, TCA_FQ_PLIMIT,
READ_ONCE(sch->limit)) ||
nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT,
@@ -1224,6 +1251,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_FQ_TIMER_SLACK,
READ_ONCE(q->timer_slack)) ||
nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
+ nla_put_u32(skb, TCA_FQ_OFFLOAD_HORIZON, (u32)offload_horizon) ||
nla_put_u8(skb, TCA_FQ_HORIZON_DROP,
READ_ONCE(q->horizon_drop)))
goto nla_put_failure;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2af24547a82c..38ec18f73de4 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -512,9 +512,15 @@ static void dev_watchdog(struct timer_list *t)
struct netdev_queue *txq;
txq = netdev_get_tx_queue(dev, i);
- trans_start = READ_ONCE(txq->trans_start);
if (!netif_xmit_stopped(txq))
continue;
+
+ /* Paired with WRITE_ONCE() + smp_mb...() in
+ * netdev_tx_sent_queue() and netif_tx_stop_queue().
+ */
+ smp_mb();
+ trans_start = READ_ONCE(txq->trans_start);
+
if (time_after(jiffies, trans_start + dev->watchdog_timeo)) {
timedout_ms = jiffies_to_msecs(jiffies - trans_start);
atomic_long_inc(&txq->trans_timeout);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 79ba9dc70254..7d2151c62c4a 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -668,7 +668,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt,
return -EINVAL;
}
- max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
+ max_P = nla_get_u32_default(tb[TCA_GRED_MAX_P], 0);
ctl = nla_data(tb[TCA_GRED_PARMS]);
stab = nla_data(tb[TCA_GRED_STAB]);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index ff3de37874e4..c31bc5489bdd 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1810,8 +1810,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
NULL));
- rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
- ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+ rate64 = nla_get_u64_default(tb[TCA_HTB_RATE64], 0);
+ ceil64 = nla_get_u64_default(tb[TCA_HTB_CEIL64], 0);
if (!cl) { /* new class */
struct net_device *dev = qdisc_dev(sch);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 0f8d581438c3..71ec9986ed37 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -17,6 +17,7 @@
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
+#include <linux/prandom.h>
#include <linux/rtnetlink.h>
#include <linux/reciprocal_div.h>
#include <linux/rbtree.h>
@@ -78,6 +79,8 @@ struct netem_sched_data {
struct sk_buff *t_head;
struct sk_buff *t_tail;
+ u32 t_len;
+
/* optional qdisc for classful handling (NULL at netem init) */
struct Qdisc *qdisc;
@@ -382,6 +385,7 @@ static void tfifo_reset(struct Qdisc *sch)
rtnl_kfree_skbs(q->t_head, q->t_tail);
q->t_head = NULL;
q->t_tail = NULL;
+ q->t_len = 0;
}
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
@@ -411,6 +415,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
rb_link_node(&nskb->rbnode, parent, p);
rb_insert_color(&nskb->rbnode, &q->t_root);
}
+ q->t_len++;
sch->q.qlen++;
}
@@ -517,7 +522,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1<<get_random_u32_below(8);
}
- if (unlikely(sch->q.qlen >= sch->limit)) {
+ if (unlikely(q->t_len >= sch->limit)) {
/* re-link segs, so that qdisc_drop_all() frees them all */
skb->next = segs;
qdisc_drop_all(skb, sch, to_free);
@@ -701,8 +706,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
tfifo_dequeue:
skb = __qdisc_dequeue_head(&sch->q);
if (skb) {
- qdisc_qstats_backlog_dec(sch, skb);
deliver:
+ qdisc_qstats_backlog_dec(sch, skb);
qdisc_bstats_update(sch, skb);
return skb;
}
@@ -718,8 +723,7 @@ deliver:
if (time_to_send <= now && q->slot.slot_next <= now) {
netem_erase_head(q, skb);
- sch->q.qlen--;
- qdisc_qstats_backlog_dec(sch, skb);
+ q->t_len--;
skb->next = NULL;
skb->prev = NULL;
/* skb->dev shares skb->rbnode area,
@@ -742,21 +746,25 @@ deliver:
err = qdisc_enqueue(skb, q->qdisc, &to_free);
kfree_skb_list(to_free);
- if (err != NET_XMIT_SUCCESS &&
- net_xmit_drop_count(err)) {
- qdisc_qstats_drop(sch);
- qdisc_tree_reduce_backlog(sch, 1,
- pkt_len);
+ if (err != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(err))
+ qdisc_qstats_drop(sch);
+ qdisc_tree_reduce_backlog(sch, 1, pkt_len);
+ sch->qstats.backlog -= pkt_len;
+ sch->q.qlen--;
}
goto tfifo_dequeue;
}
+ sch->q.qlen--;
goto deliver;
}
if (q->qdisc) {
skb = q->qdisc->ops->dequeue(q->qdisc);
- if (skb)
+ if (skb) {
+ sch->q.qlen--;
goto deliver;
+ }
}
qdisc_watchdog_schedule_ns(&q->watchdog,
@@ -766,8 +774,10 @@ deliver:
if (q->qdisc) {
skb = q->qdisc->ops->dequeue(q->qdisc);
- if (skb)
+ if (skb) {
+ sch->q.qlen--;
goto deliver;
+ }
}
return NULL;
}
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index d584c0c25899..6a07cdbdb9e1 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -421,10 +421,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (err < 0)
return err;
- if (tb[TCA_QFQ_WEIGHT])
- weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
- else
- weight = 1;
+ weight = nla_get_u32_default(tb[TCA_QFQ_WEIGHT], 1);
if (tb[TCA_QFQ_LMAX]) {
lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index b5f096588fae..6029bc29b51e 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -248,7 +248,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
tb[TCA_RED_STAB] == NULL)
return -EINVAL;
- max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
+ max_P = nla_get_u32_default(tb[TCA_RED_MAX_P], 0);
ctl = nla_data(tb[TCA_RED_PARMS]);
stab = nla_data(tb[TCA_RED_STAB]);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3b9245a3c767..a4b8296a2fa1 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -77,12 +77,6 @@
#define SFQ_EMPTY_SLOT 0xffff
#define SFQ_DEFAULT_HASH_DIVISOR 1024
-/* We use 16 bits to store allot, and want to handle packets up to 64K
- * Scale allot by 8 (1<<3) so that no overflow occurs.
- */
-#define SFQ_ALLOT_SHIFT 3
-#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
-
/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
typedef u16 sfq_index;
@@ -104,7 +98,7 @@ struct sfq_slot {
sfq_index next; /* next slot in sfq RR chain */
struct sfq_head dep; /* anchor in dep[] chains */
unsigned short hash; /* hash value (index in ht[]) */
- short allot; /* credit for this slot */
+ int allot; /* credit for this slot */
unsigned int backlog;
struct red_vars vars;
@@ -120,7 +114,6 @@ struct sfq_sched_data {
siphash_key_t perturbation;
u8 cur_depth; /* depth of longest slot */
u8 flags;
- unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
struct tcf_proto __rcu *filter_list;
struct tcf_block *block;
sfq_index *ht; /* Hash table ('divisor' slots) */
@@ -456,7 +449,7 @@ enqueue:
*/
q->tail = slot;
/* We could use a bigger initial quantum for new flows */
- slot->allot = q->scaled_quantum;
+ slot->allot = q->quantum;
}
if (++sch->q.qlen <= q->limit)
return NET_XMIT_SUCCESS;
@@ -493,7 +486,7 @@ next_slot:
slot = &q->slots[a];
if (slot->allot <= 0) {
q->tail = slot;
- slot->allot += q->scaled_quantum;
+ slot->allot += q->quantum;
goto next_slot;
}
skb = slot_dequeue_head(slot);
@@ -512,7 +505,7 @@ next_slot:
}
q->tail->next = next_a;
} else {
- slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
+ slot->allot -= qdisc_pkt_len(skb);
}
return skb;
}
@@ -595,7 +588,7 @@ drop:
q->tail->next = x;
}
q->tail = slot;
- slot->allot = q->scaled_quantum;
+ slot->allot = q->quantum;
}
}
sch->q.qlen -= dropped;
@@ -628,7 +621,8 @@ static void sfq_perturbation(struct timer_list *t)
rcu_read_unlock();
}
-static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
+static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = nla_data(opt);
@@ -646,14 +640,10 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL;
- /* slot->allot is a short, make sure quantum is not too big. */
- if (ctl->quantum) {
- unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
-
- if (scaled <= 0 || scaled > SHRT_MAX)
- return -EINVAL;
+ if ((int)ctl->quantum < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
+ return -EINVAL;
}
-
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL;
@@ -663,10 +653,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
return -ENOMEM;
}
sch_tree_lock(sch);
- if (ctl->quantum) {
+ if (ctl->quantum)
q->quantum = ctl->quantum;
- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
- }
WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
if (ctl->flows)
q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
@@ -762,12 +750,11 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
q->maxflows = SFQ_DEFAULT_FLOWS;
q->quantum = psched_mtu(qdisc_dev(sch));
- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
q->perturb_period = 0;
get_random_bytes(&q->perturbation, sizeof(q->perturbation));
if (opt) {
- int err = sfq_change(sch, opt);
+ int err = sfq_change(sch, opt, extack);
if (err)
return err;
}
@@ -878,7 +865,7 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
if (idx != SFQ_EMPTY_SLOT) {
const struct sfq_slot *slot = &q->slots[idx];
- xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
+ xstats.allot = slot->allot;
qs.qlen = slot->qlen;
qs.backlog = slot->backlog;
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index cc2df9f8c14a..a68e17891b0b 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1828,7 +1828,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
* zero; (2) the 'flags' of a "running" taprio instance cannot be
* changed.
*/
- taprio_flags = tb[TCA_TAPRIO_ATTR_FLAGS] ? nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]) : 0;
+ taprio_flags = nla_get_u32_default(tb[TCA_TAPRIO_ATTR_FLAGS], 0);
/* txtime-assist and full offload are mutually exclusive */
if ((taprio_flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
@@ -1952,7 +1952,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto unlock;
}
- rcu_assign_pointer(q->admin_sched, new_admin);
+ /* Not going to race against advance_sched(), but still */
+ admin = rcu_replace_pointer(q->admin_sched, new_admin,
+ lockdep_rtnl_is_held());
if (admin)
call_rcu(&admin->rcu, taprio_free_sched_cb);
} else {
@@ -1963,7 +1965,8 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
taprio_start_sched(sch, start, new_admin);
- rcu_assign_pointer(q->admin_sched, new_admin);
+ admin = rcu_replace_pointer(q->admin_sched, new_admin,
+ lockdep_rtnl_is_held());
if (admin)
call_rcu(&admin->rcu, taprio_free_sched_cb);
@@ -2371,9 +2374,6 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tc_mqprio_qopt opt = { 0 };
struct nlattr *nest, *sched_nest;
- oper = rtnl_dereference(q->oper_sched);
- admin = rtnl_dereference(q->admin_sched);
-
mqprio_qopt_reconstruct(dev, &opt);
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -2394,18 +2394,23 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
goto options_error;
+ rcu_read_lock();
+
+ oper = rtnl_dereference(q->oper_sched);
+ admin = rtnl_dereference(q->admin_sched);
+
if (oper && taprio_dump_tc_entries(skb, q, oper))
- goto options_error;
+ goto options_error_rcu;
if (oper && dump_schedule(skb, oper))
- goto options_error;
+ goto options_error_rcu;
if (!admin)
goto done;
sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED);
if (!sched_nest)
- goto options_error;
+ goto options_error_rcu;
if (dump_schedule(skb, admin))
goto admin_error;
@@ -2413,11 +2418,15 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_nest_end(skb, sched_nest);
done:
+ rcu_read_unlock();
return nla_nest_end(skb, nest);
admin_error:
nla_nest_cancel(skb, sched_nest);
+options_error_rcu:
+ rcu_read_unlock();
+
options_error:
nla_nest_cancel(skb, nest);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index f1d09183ae63..dc26b22d53c7 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -208,7 +208,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
struct tbf_sched_data *q = qdisc_priv(sch);
struct sk_buff *segs, *nskb;
netdev_features_t features = netif_skb_features(skb);
- unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
+ unsigned int len = 0, prev_len = qdisc_pkt_len(skb), seg_len;
int ret, nb;
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
@@ -219,21 +219,27 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
nb = 0;
skb_list_walk_safe(segs, segs, nskb) {
skb_mark_not_on_list(segs);
- qdisc_skb_cb(segs)->pkt_len = segs->len;
- len += segs->len;
+ seg_len = segs->len;
+ qdisc_skb_cb(segs)->pkt_len = seg_len;
ret = qdisc_enqueue(segs, q->qdisc, to_free);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
qdisc_qstats_drop(sch);
} else {
nb++;
+ len += seg_len;
}
}
sch->q.qlen += nb;
- if (nb > 1)
+ sch->qstats.backlog += len;
+ if (nb > 0) {
qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
- consume_skb(skb);
- return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
+ consume_skb(skb);
+ return NET_XMIT_SUCCESS;
+ }
+
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
}
static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index f7b809c0d142..a9ed2ccab1bd 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -103,10 +103,10 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
ipv6_addr_equal(&addr->a.v6.sin6_addr,
&ifa->addr) &&
addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) {
- sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
found = 1;
addr->valid = 0;
list_del_rcu(&addr->list);
+ sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
break;
}
}
@@ -683,7 +683,7 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
struct sock *sk = &sp->inet.sk;
struct net *net = sock_net(sk);
struct net_device *dev = NULL;
- int type;
+ int type, res, bound_dev_if;
type = ipv6_addr_type(in6);
if (IPV6_ADDR_ANY == type)
@@ -697,14 +697,21 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
if (!(type & IPV6_ADDR_UNICAST))
return 0;
- if (sk->sk_bound_dev_if) {
- dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ rcu_read_lock();
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if) {
+ res = 0;
+ dev = dev_get_by_index_rcu(net, bound_dev_if);
if (!dev)
- return 0;
+ goto out;
}
- return ipv6_can_nonlocal_bind(net, &sp->inet) ||
- ipv6_chk_addr(net, in6, dev, 0);
+ res = ipv6_can_nonlocal_bind(net, &sp->inet) ||
+ ipv6_chk_addr(net, in6, dev, 0);
+
+out:
+ rcu_read_unlock();
+ return res;
}
/* This function checks if the address is a valid address to be used for
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5a7436a13b74..8b9a1b96695e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -44,6 +44,7 @@
#include <net/inet_common.h>
#include <net/inet_ecn.h>
#include <net/udp_tunnel.h>
+#include <net/inet_dscp.h>
#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
@@ -435,7 +436,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl4->fl4_dport = daddr->v4.sin_port;
fl4->flowi4_proto = IPPROTO_SCTP;
if (asoc) {
- fl4->flowi4_tos = RT_TOS(tos);
+ fl4->flowi4_tos = tos & INET_DSCP_MASK;
fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk);
fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
fl4->fl4_sport = htons(asoc->base.bind_addr.port);
@@ -737,6 +738,20 @@ void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cm
*/
spin_lock_bh(&net->sctp.addr_wq_lock);
+
+ /* Avoid searching the queue or modifying it if there are no consumers,
+ * as it can lead to performance degradation if addresses are modified
+ * en-masse.
+ *
+ * If the queue already contains some events, update it anyway to avoid
+ * ugly races between new sessions and new address events.
+ */
+ if (list_empty(&net->sctp.auto_asconf_splist) &&
+ list_empty(&net->sctp.addr_waitq)) {
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
+ return;
+ }
+
/* Offsets existing events in addr_wq */
addrw = sctp_addr_wq_lookup(net, addr);
if (addrw) {
@@ -807,10 +822,10 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
if (addr->a.sa.sa_family == AF_INET &&
addr->a.v4.sin_addr.s_addr ==
ifa->ifa_local) {
- sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
found = 1;
addr->valid = 0;
list_del_rcu(&addr->list);
+ sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
break;
}
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 7d315a18612b..a0524ba8d787 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3751,7 +3751,7 @@ enum sctp_disposition sctp_sf_ootb(struct net *net,
}
ch = (struct sctp_chunkhdr *)ch_end;
- } while (ch_end < skb_tail_pointer(skb));
+ } while (ch_end + sizeof(*ch) < skb_tail_pointer(skb));
if (ootb_shut_ack)
return sctp_sf_shut_8_4_5(net, ep, asoc, type, arg, commands);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 32f76f1298da..36ee34f483d7 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -8531,6 +8531,7 @@ static int sctp_listen_start(struct sock *sk, int backlog)
struct sctp_endpoint *ep = sp->ep;
struct crypto_shash *tfm = NULL;
char alg[32];
+ int err;
/* Allocate HMAC for generating cookie. */
if (!sp->hmac && sp->sctp_hmac_alg) {
@@ -8557,17 +8558,26 @@ static int sctp_listen_start(struct sock *sk, int backlog)
*/
inet_sk_set_state(sk, SCTP_SS_LISTENING);
if (!ep->base.bind_addr.port) {
- if (sctp_autobind(sk))
- return -EAGAIN;
+ if (sctp_autobind(sk)) {
+ err = -EAGAIN;
+ goto err;
+ }
} else {
if (sctp_get_port(sk, inet_sk(sk)->inet_num)) {
- inet_sk_set_state(sk, SCTP_SS_CLOSED);
- return -EADDRINUSE;
+ err = -EADDRINUSE;
+ goto err;
}
}
WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
- return sctp_hash_endpoint(ep);
+ err = sctp_hash_endpoint(ep);
+ if (err)
+ goto err;
+
+ return 0;
+err:
+ inet_sk_set_state(sk, SCTP_SS_CLOSED);
+ return err;
}
/*
diff --git a/net/shaper/Makefile b/net/shaper/Makefile
new file mode 100644
index 000000000000..54af7169a331
--- /dev/null
+++ b/net/shaper/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the net shaper infrastructure.
+#
+# Copyright (c) 2024, Red Hat, Inc.
+#
+
+obj-y += shaper.o shaper_nl_gen.o
diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c
new file mode 100644
index 000000000000..15463062fe7b
--- /dev/null
+++ b/net/shaper/shaper.c
@@ -0,0 +1,1438 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/idr.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/skbuff.h>
+#include <linux/xarray.h>
+#include <net/devlink.h>
+#include <net/net_shaper.h>
+
+#include "shaper_nl_gen.h"
+
+#include "../core/dev.h"
+
+#define NET_SHAPER_SCOPE_SHIFT 26
+#define NET_SHAPER_ID_MASK GENMASK(NET_SHAPER_SCOPE_SHIFT - 1, 0)
+#define NET_SHAPER_SCOPE_MASK GENMASK(31, NET_SHAPER_SCOPE_SHIFT)
+
+#define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK
+
+struct net_shaper_hierarchy {
+ struct xarray shapers;
+};
+
+struct net_shaper_nl_ctx {
+ struct net_shaper_binding binding;
+ netdevice_tracker dev_tracker;
+ unsigned long start_index;
+};
+
+static struct net_shaper_binding *net_shaper_binding_from_ctx(void *ctx)
+{
+ return &((struct net_shaper_nl_ctx *)ctx)->binding;
+}
+
+static void net_shaper_lock(struct net_shaper_binding *binding)
+{
+ switch (binding->type) {
+ case NET_SHAPER_BINDING_TYPE_NETDEV:
+ mutex_lock(&binding->netdev->lock);
+ break;
+ }
+}
+
+static void net_shaper_unlock(struct net_shaper_binding *binding)
+{
+ switch (binding->type) {
+ case NET_SHAPER_BINDING_TYPE_NETDEV:
+ mutex_unlock(&binding->netdev->lock);
+ break;
+ }
+}
+
+static struct net_shaper_hierarchy *
+net_shaper_hierarchy(struct net_shaper_binding *binding)
+{
+ /* Pairs with WRITE_ONCE() in net_shaper_hierarchy_setup. */
+ if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV)
+ return READ_ONCE(binding->netdev->net_shaper_hierarchy);
+
+ /* No other type supported yet. */
+ return NULL;
+}
+
+static const struct net_shaper_ops *
+net_shaper_ops(struct net_shaper_binding *binding)
+{
+ if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV)
+ return binding->netdev->netdev_ops->net_shaper_ops;
+
+ /* No other type supported yet. */
+ return NULL;
+}
+
+/* Count the number of [multi] attributes of the given type. */
+static int net_shaper_list_len(struct genl_info *info, int type)
+{
+ struct nlattr *attr;
+ int rem, cnt = 0;
+
+ nla_for_each_attr_type(attr, type, genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem)
+ cnt++;
+ return cnt;
+}
+
+static int net_shaper_handle_size(void)
+{
+ return nla_total_size(nla_total_size(sizeof(u32)) +
+ nla_total_size(sizeof(u32)));
+}
+
+static int net_shaper_fill_binding(struct sk_buff *msg,
+ const struct net_shaper_binding *binding,
+ u32 type)
+{
+ /* Should never happen, as currently only NETDEV is supported. */
+ if (WARN_ON_ONCE(binding->type != NET_SHAPER_BINDING_TYPE_NETDEV))
+ return -EINVAL;
+
+ if (nla_put_u32(msg, type, binding->netdev->ifindex))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int net_shaper_fill_handle(struct sk_buff *msg,
+ const struct net_shaper_handle *handle,
+ u32 type)
+{
+ struct nlattr *handle_attr;
+
+ if (handle->scope == NET_SHAPER_SCOPE_UNSPEC)
+ return 0;
+
+ handle_attr = nla_nest_start(msg, type);
+ if (!handle_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(msg, NET_SHAPER_A_HANDLE_SCOPE, handle->scope) ||
+ (handle->scope >= NET_SHAPER_SCOPE_QUEUE &&
+ nla_put_u32(msg, NET_SHAPER_A_HANDLE_ID, handle->id)))
+ goto handle_nest_cancel;
+
+ nla_nest_end(msg, handle_attr);
+ return 0;
+
+handle_nest_cancel:
+ nla_nest_cancel(msg, handle_attr);
+ return -EMSGSIZE;
+}
+
+static int
+net_shaper_fill_one(struct sk_buff *msg,
+ const struct net_shaper_binding *binding,
+ const struct net_shaper *shaper,
+ const struct genl_info *info)
+{
+ void *hdr;
+
+ hdr = genlmsg_iput(msg, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) ||
+ net_shaper_fill_handle(msg, &shaper->parent,
+ NET_SHAPER_A_PARENT) ||
+ net_shaper_fill_handle(msg, &shaper->handle,
+ NET_SHAPER_A_HANDLE) ||
+ ((shaper->bw_min || shaper->bw_max || shaper->burst) &&
+ nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric)) ||
+ (shaper->bw_min &&
+ nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min)) ||
+ (shaper->bw_max &&
+ nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max)) ||
+ (shaper->burst &&
+ nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst)) ||
+ (shaper->priority &&
+ nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority)) ||
+ (shaper->weight &&
+ nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight)))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+/* Initialize the context fetching the relevant device and
+ * acquiring a reference to it.
+ */
+static int net_shaper_ctx_setup(const struct genl_info *info, int type,
+ struct net_shaper_nl_ctx *ctx)
+{
+ struct net *ns = genl_info_net(info);
+ struct net_device *dev;
+ int ifindex;
+
+ if (GENL_REQ_ATTR_CHECK(info, type))
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[type]);
+ dev = netdev_get_by_index(ns, ifindex, &ctx->dev_tracker, GFP_KERNEL);
+ if (!dev) {
+ NL_SET_BAD_ATTR(info->extack, info->attrs[type]);
+ return -ENOENT;
+ }
+
+ if (!dev->netdev_ops->net_shaper_ops) {
+ NL_SET_BAD_ATTR(info->extack, info->attrs[type]);
+ netdev_put(dev, &ctx->dev_tracker);
+ return -EOPNOTSUPP;
+ }
+
+ ctx->binding.type = NET_SHAPER_BINDING_TYPE_NETDEV;
+ ctx->binding.netdev = dev;
+ return 0;
+}
+
+static void net_shaper_ctx_cleanup(struct net_shaper_nl_ctx *ctx)
+{
+ if (ctx->binding.type == NET_SHAPER_BINDING_TYPE_NETDEV)
+ netdev_put(ctx->binding.netdev, &ctx->dev_tracker);
+}
+
+static u32 net_shaper_handle_to_index(const struct net_shaper_handle *handle)
+{
+ return FIELD_PREP(NET_SHAPER_SCOPE_MASK, handle->scope) |
+ FIELD_PREP(NET_SHAPER_ID_MASK, handle->id);
+}
+
+static void net_shaper_index_to_handle(u32 index,
+ struct net_shaper_handle *handle)
+{
+ handle->scope = FIELD_GET(NET_SHAPER_SCOPE_MASK, index);
+ handle->id = FIELD_GET(NET_SHAPER_ID_MASK, index);
+}
+
+static void net_shaper_default_parent(const struct net_shaper_handle *handle,
+ struct net_shaper_handle *parent)
+{
+ switch (handle->scope) {
+ case NET_SHAPER_SCOPE_UNSPEC:
+ case NET_SHAPER_SCOPE_NETDEV:
+ case __NET_SHAPER_SCOPE_MAX:
+ parent->scope = NET_SHAPER_SCOPE_UNSPEC;
+ break;
+
+ case NET_SHAPER_SCOPE_QUEUE:
+ case NET_SHAPER_SCOPE_NODE:
+ parent->scope = NET_SHAPER_SCOPE_NETDEV;
+ break;
+ }
+ parent->id = 0;
+}
+
+/*
+ * MARK_0 is already in use due to XA_FLAGS_ALLOC, can't reuse such flag as
+ * it's cleared by xa_store().
+ */
+#define NET_SHAPER_NOT_VALID XA_MARK_1
+
+static struct net_shaper *
+net_shaper_lookup(struct net_shaper_binding *binding,
+ const struct net_shaper_handle *handle)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ u32 index = net_shaper_handle_to_index(handle);
+
+ if (!hierarchy || xa_get_mark(&hierarchy->shapers, index,
+ NET_SHAPER_NOT_VALID))
+ return NULL;
+
+ return xa_load(&hierarchy->shapers, index);
+}
+
+/* Allocate on demand the per device shaper's hierarchy container.
+ * Called under the net shaper lock
+ */
+static struct net_shaper_hierarchy *
+net_shaper_hierarchy_setup(struct net_shaper_binding *binding)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+
+ if (hierarchy)
+ return hierarchy;
+
+ hierarchy = kmalloc(sizeof(*hierarchy), GFP_KERNEL);
+ if (!hierarchy)
+ return NULL;
+
+ /* The flag is required for ID allocation */
+ xa_init_flags(&hierarchy->shapers, XA_FLAGS_ALLOC);
+
+ switch (binding->type) {
+ case NET_SHAPER_BINDING_TYPE_NETDEV:
+ /* Pairs with READ_ONCE in net_shaper_hierarchy. */
+ WRITE_ONCE(binding->netdev->net_shaper_hierarchy, hierarchy);
+ break;
+ }
+ return hierarchy;
+}
+
+/* Prepare the hierarchy container to actually insert the given shaper, doing
+ * in advance the needed allocations.
+ */
+static int net_shaper_pre_insert(struct net_shaper_binding *binding,
+ struct net_shaper_handle *handle,
+ struct netlink_ext_ack *extack)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ struct net_shaper *prev, *cur;
+ bool id_allocated = false;
+ int ret, index;
+
+ if (!hierarchy)
+ return -ENOMEM;
+
+ index = net_shaper_handle_to_index(handle);
+ cur = xa_load(&hierarchy->shapers, index);
+ if (cur)
+ return 0;
+
+ /* Allocated a new id, if needed. */
+ if (handle->scope == NET_SHAPER_SCOPE_NODE &&
+ handle->id == NET_SHAPER_ID_UNSPEC) {
+ u32 min, max;
+
+ handle->id = NET_SHAPER_ID_MASK - 1;
+ max = net_shaper_handle_to_index(handle);
+ handle->id = 0;
+ min = net_shaper_handle_to_index(handle);
+
+ ret = xa_alloc(&hierarchy->shapers, &index, NULL,
+ XA_LIMIT(min, max), GFP_KERNEL);
+ if (ret < 0) {
+ NL_SET_ERR_MSG(extack, "Can't allocate new id for NODE shaper");
+ return ret;
+ }
+
+ net_shaper_index_to_handle(index, handle);
+ id_allocated = true;
+ }
+
+ cur = kzalloc(sizeof(*cur), GFP_KERNEL);
+ if (!cur) {
+ ret = -ENOMEM;
+ goto free_id;
+ }
+
+ /* Mark 'tentative' shaper inside the hierarchy container.
+ * xa_set_mark is a no-op if the previous store fails.
+ */
+ xa_lock(&hierarchy->shapers);
+ prev = __xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL);
+ __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_NOT_VALID);
+ xa_unlock(&hierarchy->shapers);
+ if (xa_err(prev)) {
+ NL_SET_ERR_MSG(extack, "Can't insert shaper into device store");
+ kfree_rcu(cur, rcu);
+ ret = xa_err(prev);
+ goto free_id;
+ }
+ return 0;
+
+free_id:
+ if (id_allocated)
+ xa_erase(&hierarchy->shapers, index);
+ return ret;
+}
+
+/* Commit the tentative insert with the actual values.
+ * Must be called only after a successful net_shaper_pre_insert().
+ */
+static void net_shaper_commit(struct net_shaper_binding *binding,
+ int nr_shapers, const struct net_shaper *shapers)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ struct net_shaper *cur;
+ int index;
+ int i;
+
+ xa_lock(&hierarchy->shapers);
+ for (i = 0; i < nr_shapers; ++i) {
+ index = net_shaper_handle_to_index(&shapers[i].handle);
+
+ cur = xa_load(&hierarchy->shapers, index);
+ if (WARN_ON_ONCE(!cur))
+ continue;
+
+ /* Successful update: drop the tentative mark
+ * and update the hierarchy container.
+ */
+ __xa_clear_mark(&hierarchy->shapers, index,
+ NET_SHAPER_NOT_VALID);
+ *cur = shapers[i];
+ }
+ xa_unlock(&hierarchy->shapers);
+}
+
+/* Rollback all the tentative inserts from the hierarchy. */
+static void net_shaper_rollback(struct net_shaper_binding *binding)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ struct net_shaper *cur;
+ unsigned long index;
+
+ if (!hierarchy)
+ return;
+
+ xa_lock(&hierarchy->shapers);
+ xa_for_each_marked(&hierarchy->shapers, index, cur,
+ NET_SHAPER_NOT_VALID) {
+ __xa_erase(&hierarchy->shapers, index);
+ kfree(cur);
+ }
+ xa_unlock(&hierarchy->shapers);
+}
+
+static int net_shaper_parse_handle(const struct nlattr *attr,
+ const struct genl_info *info,
+ struct net_shaper_handle *handle)
+{
+ struct nlattr *tb[NET_SHAPER_A_HANDLE_MAX + 1];
+ struct nlattr *id_attr;
+ u32 id = 0;
+ int ret;
+
+ ret = nla_parse_nested(tb, NET_SHAPER_A_HANDLE_MAX, attr,
+ net_shaper_handle_nl_policy, info->extack);
+ if (ret < 0)
+ return ret;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, attr, tb,
+ NET_SHAPER_A_HANDLE_SCOPE))
+ return -EINVAL;
+
+ handle->scope = nla_get_u32(tb[NET_SHAPER_A_HANDLE_SCOPE]);
+
+ /* The default id for NODE scope shapers is an invalid one
+ * to help the 'group' operation discriminate between new
+ * NODE shaper creation (ID_UNSPEC) and reuse of existing
+ * shaper (any other value).
+ */
+ id_attr = tb[NET_SHAPER_A_HANDLE_ID];
+ if (id_attr)
+ id = nla_get_u32(id_attr);
+ else if (handle->scope == NET_SHAPER_SCOPE_NODE)
+ id = NET_SHAPER_ID_UNSPEC;
+
+ handle->id = id;
+ return 0;
+}
+
+static int net_shaper_validate_caps(struct net_shaper_binding *binding,
+ struct nlattr **tb,
+ const struct genl_info *info,
+ struct net_shaper *shaper)
+{
+ const struct net_shaper_ops *ops = net_shaper_ops(binding);
+ struct nlattr *bad = NULL;
+ unsigned long caps = 0;
+
+ ops->capabilities(binding, shaper->handle.scope, &caps);
+
+ if (tb[NET_SHAPER_A_PRIORITY] &&
+ !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_PRIORITY)))
+ bad = tb[NET_SHAPER_A_PRIORITY];
+ if (tb[NET_SHAPER_A_WEIGHT] &&
+ !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_WEIGHT)))
+ bad = tb[NET_SHAPER_A_WEIGHT];
+ if (tb[NET_SHAPER_A_BW_MIN] &&
+ !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MIN)))
+ bad = tb[NET_SHAPER_A_BW_MIN];
+ if (tb[NET_SHAPER_A_BW_MAX] &&
+ !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MAX)))
+ bad = tb[NET_SHAPER_A_BW_MAX];
+ if (tb[NET_SHAPER_A_BURST] &&
+ !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_BURST)))
+ bad = tb[NET_SHAPER_A_BURST];
+
+ if (!caps)
+ bad = tb[NET_SHAPER_A_HANDLE];
+
+ if (bad) {
+ NL_SET_BAD_ATTR(info->extack, bad);
+ return -EOPNOTSUPP;
+ }
+
+ if (shaper->handle.scope == NET_SHAPER_SCOPE_QUEUE &&
+ binding->type == NET_SHAPER_BINDING_TYPE_NETDEV &&
+ shaper->handle.id >= binding->netdev->real_num_tx_queues) {
+ NL_SET_ERR_MSG_FMT(info->extack,
+ "Not existing queue id %d max %d",
+ shaper->handle.id,
+ binding->netdev->real_num_tx_queues);
+ return -ENOENT;
+ }
+
+ /* The metric is really used only if there is *any* rate-related
+ * setting, either in current attributes set or in pre-existing
+ * values.
+ */
+ if (shaper->burst || shaper->bw_min || shaper->bw_max) {
+ u32 metric_cap = NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS +
+ shaper->metric;
+
+ /* The metric test can fail even when the user did not
+ * specify the METRIC attribute. Pointing to rate related
+ * attribute will be confusing, as the attribute itself
+ * could be indeed supported, with a different metric.
+ * Be more specific.
+ */
+ if (!(caps & BIT(metric_cap))) {
+ NL_SET_ERR_MSG_FMT(info->extack, "Bad metric %d",
+ shaper->metric);
+ return -EOPNOTSUPP;
+ }
+ }
+ return 0;
+}
+
+static int net_shaper_parse_info(struct net_shaper_binding *binding,
+ struct nlattr **tb,
+ const struct genl_info *info,
+ struct net_shaper *shaper,
+ bool *exists)
+{
+ struct net_shaper *old;
+ int ret;
+
+ /* The shaper handle is the only mandatory attribute. */
+ if (NL_REQ_ATTR_CHECK(info->extack, NULL, tb, NET_SHAPER_A_HANDLE))
+ return -EINVAL;
+
+ ret = net_shaper_parse_handle(tb[NET_SHAPER_A_HANDLE], info,
+ &shaper->handle);
+ if (ret)
+ return ret;
+
+ if (shaper->handle.scope == NET_SHAPER_SCOPE_UNSPEC) {
+ NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_HANDLE]);
+ return -EINVAL;
+ }
+
+ /* Fetch existing hierarchy, if any, so that user provide info will
+ * incrementally update the existing shaper configuration.
+ */
+ old = net_shaper_lookup(binding, &shaper->handle);
+ if (old)
+ *shaper = *old;
+ *exists = !!old;
+
+ if (tb[NET_SHAPER_A_METRIC])
+ shaper->metric = nla_get_u32(tb[NET_SHAPER_A_METRIC]);
+
+ if (tb[NET_SHAPER_A_BW_MIN])
+ shaper->bw_min = nla_get_uint(tb[NET_SHAPER_A_BW_MIN]);
+
+ if (tb[NET_SHAPER_A_BW_MAX])
+ shaper->bw_max = nla_get_uint(tb[NET_SHAPER_A_BW_MAX]);
+
+ if (tb[NET_SHAPER_A_BURST])
+ shaper->burst = nla_get_uint(tb[NET_SHAPER_A_BURST]);
+
+ if (tb[NET_SHAPER_A_PRIORITY])
+ shaper->priority = nla_get_u32(tb[NET_SHAPER_A_PRIORITY]);
+
+ if (tb[NET_SHAPER_A_WEIGHT])
+ shaper->weight = nla_get_u32(tb[NET_SHAPER_A_WEIGHT]);
+
+ ret = net_shaper_validate_caps(binding, tb, info, shaper);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int net_shaper_validate_nesting(struct net_shaper_binding *binding,
+ const struct net_shaper *shaper,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_shaper_ops *ops = net_shaper_ops(binding);
+ unsigned long caps = 0;
+
+ ops->capabilities(binding, shaper->handle.scope, &caps);
+ if (!(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_NESTING))) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "Nesting not supported for scope %d",
+ shaper->handle.scope);
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+/* Fetch the existing leaf and update it with the user-provided
+ * attributes.
+ */
+static int net_shaper_parse_leaf(struct net_shaper_binding *binding,
+ const struct nlattr *attr,
+ const struct genl_info *info,
+ const struct net_shaper *node,
+ struct net_shaper *shaper)
+{
+ struct nlattr *tb[NET_SHAPER_A_WEIGHT + 1];
+ bool exists;
+ int ret;
+
+ ret = nla_parse_nested(tb, NET_SHAPER_A_WEIGHT, attr,
+ net_shaper_leaf_info_nl_policy, info->extack);
+ if (ret < 0)
+ return ret;
+
+ ret = net_shaper_parse_info(binding, tb, info, shaper, &exists);
+ if (ret < 0)
+ return ret;
+
+ if (shaper->handle.scope != NET_SHAPER_SCOPE_QUEUE) {
+ NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_HANDLE]);
+ return -EINVAL;
+ }
+
+ if (node->handle.scope == NET_SHAPER_SCOPE_NODE) {
+ ret = net_shaper_validate_nesting(binding, shaper,
+ info->extack);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (!exists)
+ net_shaper_default_parent(&shaper->handle, &shaper->parent);
+ return 0;
+}
+
+/* Alike net_parse_shaper_info(), but additionally allow the user specifying
+ * the shaper's parent handle.
+ */
+static int net_shaper_parse_node(struct net_shaper_binding *binding,
+ struct nlattr **tb,
+ const struct genl_info *info,
+ struct net_shaper *shaper)
+{
+ bool exists;
+ int ret;
+
+ ret = net_shaper_parse_info(binding, tb, info, shaper, &exists);
+ if (ret)
+ return ret;
+
+ if (shaper->handle.scope != NET_SHAPER_SCOPE_NODE &&
+ shaper->handle.scope != NET_SHAPER_SCOPE_NETDEV) {
+ NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_HANDLE]);
+ return -EINVAL;
+ }
+
+ if (tb[NET_SHAPER_A_PARENT]) {
+ ret = net_shaper_parse_handle(tb[NET_SHAPER_A_PARENT], info,
+ &shaper->parent);
+ if (ret)
+ return ret;
+
+ if (shaper->parent.scope != NET_SHAPER_SCOPE_NODE &&
+ shaper->parent.scope != NET_SHAPER_SCOPE_NETDEV) {
+ NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_PARENT]);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int net_shaper_generic_pre(struct genl_info *info, int type)
+{
+ struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)info->ctx;
+
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(info->ctx));
+
+ return net_shaper_ctx_setup(info, type, ctx);
+}
+
+int net_shaper_nl_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return net_shaper_generic_pre(info, NET_SHAPER_A_IFINDEX);
+}
+
+static void net_shaper_generic_post(struct genl_info *info)
+{
+ net_shaper_ctx_cleanup((struct net_shaper_nl_ctx *)info->ctx);
+}
+
+void net_shaper_nl_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ net_shaper_generic_post(info);
+}
+
+int net_shaper_nl_pre_dumpit(struct netlink_callback *cb)
+{
+ struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx;
+ const struct genl_info *info = genl_info_dump(cb);
+
+ return net_shaper_ctx_setup(info, NET_SHAPER_A_IFINDEX, ctx);
+}
+
+int net_shaper_nl_post_dumpit(struct netlink_callback *cb)
+{
+ net_shaper_ctx_cleanup((struct net_shaper_nl_ctx *)cb->ctx);
+ return 0;
+}
+
+int net_shaper_nl_cap_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return net_shaper_generic_pre(info, NET_SHAPER_A_CAPS_IFINDEX);
+}
+
+void net_shaper_nl_cap_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ net_shaper_generic_post(info);
+}
+
+int net_shaper_nl_cap_pre_dumpit(struct netlink_callback *cb)
+{
+ struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx;
+
+ return net_shaper_ctx_setup(genl_info_dump(cb),
+ NET_SHAPER_A_CAPS_IFINDEX, ctx);
+}
+
+int net_shaper_nl_cap_post_dumpit(struct netlink_callback *cb)
+{
+ struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx;
+
+ net_shaper_ctx_cleanup(ctx);
+ return 0;
+}
+
+int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_shaper_binding *binding;
+ struct net_shaper_handle handle;
+ struct net_shaper *shaper;
+ struct sk_buff *msg;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE))
+ return -EINVAL;
+
+ binding = net_shaper_binding_from_ctx(info->ctx);
+ ret = net_shaper_parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info,
+ &handle);
+ if (ret < 0)
+ return ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ rcu_read_lock();
+ shaper = net_shaper_lookup(binding, &handle);
+ if (!shaper) {
+ NL_SET_BAD_ATTR(info->extack,
+ info->attrs[NET_SHAPER_A_HANDLE]);
+ rcu_read_unlock();
+ ret = -ENOENT;
+ goto free_msg;
+ }
+
+ ret = net_shaper_fill_one(msg, binding, shaper, info);
+ rcu_read_unlock();
+ if (ret)
+ goto free_msg;
+
+ ret = genlmsg_reply(msg, info);
+ if (ret)
+ goto free_msg;
+
+ return 0;
+
+free_msg:
+ nlmsg_free(msg);
+ return ret;
+}
+
+int net_shaper_nl_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx;
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net_shaper_hierarchy *hierarchy;
+ struct net_shaper_binding *binding;
+ struct net_shaper *shaper;
+ int ret = 0;
+
+ /* Don't error out dumps performed before any set operation. */
+ binding = net_shaper_binding_from_ctx(ctx);
+ hierarchy = net_shaper_hierarchy(binding);
+ if (!hierarchy)
+ return 0;
+
+ rcu_read_lock();
+ for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index,
+ U32_MAX, XA_PRESENT)); ctx->start_index++) {
+ ret = net_shaper_fill_one(skb, binding, shaper, info);
+ if (ret)
+ break;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_shaper_hierarchy *hierarchy;
+ struct net_shaper_binding *binding;
+ const struct net_shaper_ops *ops;
+ struct net_shaper_handle handle;
+ struct net_shaper shaper = {};
+ bool exists;
+ int ret;
+
+ binding = net_shaper_binding_from_ctx(info->ctx);
+
+ net_shaper_lock(binding);
+ ret = net_shaper_parse_info(binding, info->attrs, info, &shaper,
+ &exists);
+ if (ret)
+ goto unlock;
+
+ if (!exists)
+ net_shaper_default_parent(&shaper.handle, &shaper.parent);
+
+ hierarchy = net_shaper_hierarchy_setup(binding);
+ if (!hierarchy) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ /* The 'set' operation can't create node-scope shapers. */
+ handle = shaper.handle;
+ if (handle.scope == NET_SHAPER_SCOPE_NODE &&
+ !net_shaper_lookup(binding, &handle)) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ ret = net_shaper_pre_insert(binding, &handle, info->extack);
+ if (ret)
+ goto unlock;
+
+ ops = net_shaper_ops(binding);
+ ret = ops->set(binding, &shaper, info->extack);
+ if (ret) {
+ net_shaper_rollback(binding);
+ goto unlock;
+ }
+
+ net_shaper_commit(binding, 1, &shaper);
+
+unlock:
+ net_shaper_unlock(binding);
+ return ret;
+}
+
+static int __net_shaper_delete(struct net_shaper_binding *binding,
+ struct net_shaper *shaper,
+ struct netlink_ext_ack *extack)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ struct net_shaper_handle parent_handle, handle = shaper->handle;
+ const struct net_shaper_ops *ops = net_shaper_ops(binding);
+ int ret;
+
+again:
+ parent_handle = shaper->parent;
+
+ ret = ops->delete(binding, &handle, extack);
+ if (ret < 0)
+ return ret;
+
+ xa_erase(&hierarchy->shapers, net_shaper_handle_to_index(&handle));
+ kfree_rcu(shaper, rcu);
+
+ /* Eventually delete the parent, if it is left over with no leaves. */
+ if (parent_handle.scope == NET_SHAPER_SCOPE_NODE) {
+ shaper = net_shaper_lookup(binding, &parent_handle);
+ if (shaper && !--shaper->leaves) {
+ handle = parent_handle;
+ goto again;
+ }
+ }
+ return 0;
+}
+
+static int net_shaper_handle_cmp(const struct net_shaper_handle *a,
+ const struct net_shaper_handle *b)
+{
+ /* Must avoid holes in struct net_shaper_handle. */
+ BUILD_BUG_ON(sizeof(*a) != 8);
+
+ return memcmp(a, b, sizeof(*a));
+}
+
+static int net_shaper_parent_from_leaves(int leaves_count,
+ const struct net_shaper *leaves,
+ struct net_shaper *node,
+ struct netlink_ext_ack *extack)
+{
+ struct net_shaper_handle parent = leaves[0].parent;
+ int i;
+
+ for (i = 1; i < leaves_count; ++i) {
+ if (net_shaper_handle_cmp(&leaves[i].parent, &parent)) {
+ NL_SET_ERR_MSG_FMT(extack, "All the leaves shapers must have the same old parent");
+ return -EINVAL;
+ }
+ }
+
+ node->parent = parent;
+ return 0;
+}
+
+static int __net_shaper_group(struct net_shaper_binding *binding,
+ bool update_node, int leaves_count,
+ struct net_shaper *leaves,
+ struct net_shaper *node,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_shaper_ops *ops = net_shaper_ops(binding);
+ struct net_shaper_handle leaf_handle;
+ struct net_shaper *parent = NULL;
+ bool new_node = false;
+ int i, ret;
+
+ if (node->handle.scope == NET_SHAPER_SCOPE_NODE) {
+ new_node = node->handle.id == NET_SHAPER_ID_UNSPEC;
+
+ if (!new_node && !net_shaper_lookup(binding, &node->handle)) {
+ /* The related attribute is not available when
+ * reaching here from the delete() op.
+ */
+ NL_SET_ERR_MSG_FMT(extack, "Node shaper %d:%d does not exists",
+ node->handle.scope, node->handle.id);
+ return -ENOENT;
+ }
+
+ /* When unspecified, the node parent scope is inherited from
+ * the leaves.
+ */
+ if (node->parent.scope == NET_SHAPER_SCOPE_UNSPEC) {
+ ret = net_shaper_parent_from_leaves(leaves_count,
+ leaves, node,
+ extack);
+ if (ret)
+ return ret;
+ }
+
+ } else {
+ net_shaper_default_parent(&node->handle, &node->parent);
+ }
+
+ if (node->parent.scope == NET_SHAPER_SCOPE_NODE) {
+ parent = net_shaper_lookup(binding, &node->parent);
+ if (!parent) {
+ NL_SET_ERR_MSG_FMT(extack, "Node parent shaper %d:%d does not exists",
+ node->parent.scope, node->parent.id);
+ return -ENOENT;
+ }
+
+ ret = net_shaper_validate_nesting(binding, node, extack);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (update_node) {
+ /* For newly created node scope shaper, the following will
+ * update the handle, due to id allocation.
+ */
+ ret = net_shaper_pre_insert(binding, &node->handle, extack);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < leaves_count; ++i) {
+ leaf_handle = leaves[i].handle;
+
+ ret = net_shaper_pre_insert(binding, &leaf_handle, extack);
+ if (ret)
+ goto rollback;
+
+ if (!net_shaper_handle_cmp(&leaves[i].parent, &node->handle))
+ continue;
+
+ /* The leaves shapers will be nested to the node, update the
+ * linking accordingly.
+ */
+ leaves[i].parent = node->handle;
+ node->leaves++;
+ }
+
+ ret = ops->group(binding, leaves_count, leaves, node, extack);
+ if (ret < 0)
+ goto rollback;
+
+ /* The node's parent gains a new leaf only when the node itself
+ * is created by this group operation
+ */
+ if (new_node && parent)
+ parent->leaves++;
+ if (update_node)
+ net_shaper_commit(binding, 1, node);
+ net_shaper_commit(binding, leaves_count, leaves);
+ return 0;
+
+rollback:
+ net_shaper_rollback(binding);
+ return ret;
+}
+
+static int net_shaper_pre_del_node(struct net_shaper_binding *binding,
+ const struct net_shaper *shaper,
+ struct netlink_ext_ack *extack)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ struct net_shaper *cur, *leaves, node = {};
+ int ret, leaves_count = 0;
+ unsigned long index;
+ bool update_node;
+
+ if (!shaper->leaves)
+ return 0;
+
+ /* Fetch the new node information. */
+ node.handle = shaper->parent;
+ cur = net_shaper_lookup(binding, &node.handle);
+ if (cur) {
+ node = *cur;
+ } else {
+ /* A scope NODE shaper can be nested only to the NETDEV scope
+ * shaper without creating the latter, this check may fail only
+ * if the data is in inconsistent status.
+ */
+ if (WARN_ON_ONCE(node.handle.scope != NET_SHAPER_SCOPE_NETDEV))
+ return -EINVAL;
+ }
+
+ leaves = kcalloc(shaper->leaves, sizeof(struct net_shaper),
+ GFP_KERNEL);
+ if (!leaves)
+ return -ENOMEM;
+
+ /* Build the leaves arrays. */
+ xa_for_each(&hierarchy->shapers, index, cur) {
+ if (net_shaper_handle_cmp(&cur->parent, &shaper->handle))
+ continue;
+
+ if (WARN_ON_ONCE(leaves_count == shaper->leaves)) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ leaves[leaves_count++] = *cur;
+ }
+
+ /* When re-linking to the netdev shaper, avoid the eventual, implicit,
+ * creation of the new node, would be surprising since the user is
+ * doing a delete operation.
+ */
+ update_node = node.handle.scope != NET_SHAPER_SCOPE_NETDEV;
+ ret = __net_shaper_group(binding, update_node, leaves_count,
+ leaves, &node, extack);
+
+free:
+ kfree(leaves);
+ return ret;
+}
+
+int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_shaper_hierarchy *hierarchy;
+ struct net_shaper_binding *binding;
+ struct net_shaper_handle handle;
+ struct net_shaper *shaper;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE))
+ return -EINVAL;
+
+ binding = net_shaper_binding_from_ctx(info->ctx);
+
+ net_shaper_lock(binding);
+ ret = net_shaper_parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info,
+ &handle);
+ if (ret)
+ goto unlock;
+
+ hierarchy = net_shaper_hierarchy(binding);
+ if (!hierarchy) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ shaper = net_shaper_lookup(binding, &handle);
+ if (!shaper) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ if (handle.scope == NET_SHAPER_SCOPE_NODE) {
+ ret = net_shaper_pre_del_node(binding, shaper, info->extack);
+ if (ret)
+ goto unlock;
+ }
+
+ ret = __net_shaper_delete(binding, shaper, info->extack);
+
+unlock:
+ net_shaper_unlock(binding);
+ return ret;
+}
+
+static int net_shaper_group_send_reply(struct net_shaper_binding *binding,
+ const struct net_shaper_handle *handle,
+ struct genl_info *info,
+ struct sk_buff *msg)
+{
+ void *hdr;
+
+ hdr = genlmsg_iput(msg, info);
+ if (!hdr)
+ goto free_msg;
+
+ if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) ||
+ net_shaper_fill_handle(msg, handle, NET_SHAPER_A_HANDLE))
+ goto free_msg;
+
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+
+free_msg:
+ /* Should never happen as msg is pre-allocated with enough space. */
+ WARN_ONCE(true, "calculated message payload length (%d)",
+ net_shaper_handle_size());
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+}
+
+int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_shaper **old_nodes, *leaves, node = {};
+ struct net_shaper_hierarchy *hierarchy;
+ struct net_shaper_binding *binding;
+ int i, ret, rem, leaves_count;
+ int old_nodes_count = 0;
+ struct sk_buff *msg;
+ struct nlattr *attr;
+
+ if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_LEAVES))
+ return -EINVAL;
+
+ binding = net_shaper_binding_from_ctx(info->ctx);
+
+ /* The group operation is optional. */
+ if (!net_shaper_ops(binding)->group)
+ return -EOPNOTSUPP;
+
+ net_shaper_lock(binding);
+ leaves_count = net_shaper_list_len(info, NET_SHAPER_A_LEAVES);
+ if (!leaves_count) {
+ NL_SET_BAD_ATTR(info->extack,
+ info->attrs[NET_SHAPER_A_LEAVES]);
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ leaves = kcalloc(leaves_count, sizeof(struct net_shaper) +
+ sizeof(struct net_shaper *), GFP_KERNEL);
+ if (!leaves) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+ old_nodes = (void *)&leaves[leaves_count];
+
+ ret = net_shaper_parse_node(binding, info->attrs, info, &node);
+ if (ret)
+ goto free_leaves;
+
+ i = 0;
+ nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES,
+ genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem) {
+ if (WARN_ON_ONCE(i >= leaves_count))
+ goto free_leaves;
+
+ ret = net_shaper_parse_leaf(binding, attr, info,
+ &node, &leaves[i]);
+ if (ret)
+ goto free_leaves;
+ i++;
+ }
+
+ /* Prepare the msg reply in advance, to avoid device operation
+ * rollback on allocation failure.
+ */
+ msg = genlmsg_new(net_shaper_handle_size(), GFP_KERNEL);
+ if (!msg)
+ goto free_leaves;
+
+ hierarchy = net_shaper_hierarchy_setup(binding);
+ if (!hierarchy) {
+ ret = -ENOMEM;
+ goto free_msg;
+ }
+
+ /* Record the node shapers that this group() operation can make
+ * childless for later cleanup.
+ */
+ for (i = 0; i < leaves_count; i++) {
+ if (leaves[i].parent.scope == NET_SHAPER_SCOPE_NODE &&
+ net_shaper_handle_cmp(&leaves[i].parent, &node.handle)) {
+ struct net_shaper *tmp;
+
+ tmp = net_shaper_lookup(binding, &leaves[i].parent);
+ if (!tmp)
+ continue;
+
+ old_nodes[old_nodes_count++] = tmp;
+ }
+ }
+
+ ret = __net_shaper_group(binding, true, leaves_count, leaves, &node,
+ info->extack);
+ if (ret)
+ goto free_msg;
+
+ /* Check if we need to delete any node left alone by the new leaves
+ * linkage.
+ */
+ for (i = 0; i < old_nodes_count; ++i) {
+ struct net_shaper *tmp = old_nodes[i];
+
+ if (--tmp->leaves > 0)
+ continue;
+
+ /* Errors here are not fatal: the grouping operation is
+ * completed, and user-space can still explicitly clean-up
+ * left-over nodes.
+ */
+ __net_shaper_delete(binding, tmp, info->extack);
+ }
+
+ ret = net_shaper_group_send_reply(binding, &node.handle, info, msg);
+ if (ret)
+ GENL_SET_ERR_MSG_FMT(info, "Can't send reply");
+
+free_leaves:
+ kfree(leaves);
+
+unlock:
+ net_shaper_unlock(binding);
+ return ret;
+
+free_msg:
+ kfree_skb(msg);
+ goto free_leaves;
+}
+
+static int
+net_shaper_cap_fill_one(struct sk_buff *msg,
+ struct net_shaper_binding *binding,
+ enum net_shaper_scope scope, unsigned long flags,
+ const struct genl_info *info)
+{
+ unsigned long cur;
+ void *hdr;
+
+ hdr = genlmsg_iput(msg, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_CAPS_IFINDEX) ||
+ nla_put_u32(msg, NET_SHAPER_A_CAPS_SCOPE, scope))
+ goto nla_put_failure;
+
+ for (cur = NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS;
+ cur <= NET_SHAPER_A_CAPS_MAX; ++cur) {
+ if (flags & BIT(cur) && nla_put_flag(msg, cur))
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_shaper_binding *binding;
+ const struct net_shaper_ops *ops;
+ enum net_shaper_scope scope;
+ unsigned long flags = 0;
+ struct sk_buff *msg;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_CAPS_SCOPE))
+ return -EINVAL;
+
+ binding = net_shaper_binding_from_ctx(info->ctx);
+ scope = nla_get_u32(info->attrs[NET_SHAPER_A_CAPS_SCOPE]);
+ ops = net_shaper_ops(binding);
+ ops->capabilities(binding, scope, &flags);
+ if (!flags)
+ return -EOPNOTSUPP;
+
+ msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ ret = net_shaper_cap_fill_one(msg, binding, scope, flags, info);
+ if (ret)
+ goto free_msg;
+
+ ret = genlmsg_reply(msg, info);
+ if (ret)
+ goto free_msg;
+ return 0;
+
+free_msg:
+ nlmsg_free(msg);
+ return ret;
+}
+
+int net_shaper_nl_cap_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net_shaper_binding *binding;
+ const struct net_shaper_ops *ops;
+ enum net_shaper_scope scope;
+ int ret;
+
+ binding = net_shaper_binding_from_ctx(cb->ctx);
+ ops = net_shaper_ops(binding);
+ for (scope = 0; scope <= NET_SHAPER_SCOPE_MAX; ++scope) {
+ unsigned long flags = 0;
+
+ ops->capabilities(binding, scope, &flags);
+ if (!flags)
+ continue;
+
+ ret = net_shaper_cap_fill_one(skb, binding, scope, flags,
+ info);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void net_shaper_flush(struct net_shaper_binding *binding)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ struct net_shaper *cur;
+ unsigned long index;
+
+ if (!hierarchy)
+ return;
+
+ net_shaper_lock(binding);
+ xa_lock(&hierarchy->shapers);
+ xa_for_each(&hierarchy->shapers, index, cur) {
+ __xa_erase(&hierarchy->shapers, index);
+ kfree(cur);
+ }
+ xa_unlock(&hierarchy->shapers);
+ net_shaper_unlock(binding);
+
+ kfree(hierarchy);
+}
+
+void net_shaper_flush_netdev(struct net_device *dev)
+{
+ struct net_shaper_binding binding = {
+ .type = NET_SHAPER_BINDING_TYPE_NETDEV,
+ .netdev = dev,
+ };
+
+ net_shaper_flush(&binding);
+}
+
+void net_shaper_set_real_num_tx_queues(struct net_device *dev,
+ unsigned int txq)
+{
+ struct net_shaper_hierarchy *hierarchy;
+ struct net_shaper_binding binding;
+ int i;
+
+ binding.type = NET_SHAPER_BINDING_TYPE_NETDEV;
+ binding.netdev = dev;
+ hierarchy = net_shaper_hierarchy(&binding);
+ if (!hierarchy)
+ return;
+
+ /* Only drivers implementing shapers support ensure
+ * the lock is acquired in advance.
+ */
+ lockdep_assert_held(&dev->lock);
+
+ /* Take action only when decreasing the tx queue number. */
+ for (i = txq; i < dev->real_num_tx_queues; ++i) {
+ struct net_shaper_handle handle, parent_handle;
+ struct net_shaper *shaper;
+ u32 index;
+
+ handle.scope = NET_SHAPER_SCOPE_QUEUE;
+ handle.id = i;
+ shaper = net_shaper_lookup(&binding, &handle);
+ if (!shaper)
+ continue;
+
+ /* Don't touch the H/W for the queue shaper, the drivers already
+ * deleted the queue and related resources.
+ */
+ parent_handle = shaper->parent;
+ index = net_shaper_handle_to_index(&handle);
+ xa_erase(&hierarchy->shapers, index);
+ kfree_rcu(shaper, rcu);
+
+ /* The recursion on parent does the full job. */
+ if (parent_handle.scope != NET_SHAPER_SCOPE_NODE)
+ continue;
+
+ shaper = net_shaper_lookup(&binding, &parent_handle);
+ if (shaper && !--shaper->leaves)
+ __net_shaper_delete(&binding, shaper, NULL);
+ }
+}
+
+static int __init shaper_init(void)
+{
+ return genl_register_family(&net_shaper_nl_family);
+}
+
+subsys_initcall(shaper_init);
diff --git a/net/shaper/shaper_nl_gen.c b/net/shaper/shaper_nl_gen.c
new file mode 100644
index 000000000000..204c8ae8c7b1
--- /dev/null
+++ b/net/shaper/shaper_nl_gen.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/net_shaper.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "shaper_nl_gen.h"
+
+#include <uapi/linux/net_shaper.h>
+
+/* Common nested types */
+const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1] = {
+ [NET_SHAPER_A_HANDLE_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3),
+ [NET_SHAPER_A_HANDLE_ID] = { .type = NLA_U32, },
+};
+
+const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1] = {
+ [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy),
+ [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, },
+ [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, },
+};
+
+/* NET_SHAPER_CMD_GET - do */
+static const struct nla_policy net_shaper_get_do_nl_policy[NET_SHAPER_A_IFINDEX + 1] = {
+ [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, },
+ [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy),
+};
+
+/* NET_SHAPER_CMD_GET - dump */
+static const struct nla_policy net_shaper_get_dump_nl_policy[NET_SHAPER_A_IFINDEX + 1] = {
+ [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, },
+};
+
+/* NET_SHAPER_CMD_SET - do */
+static const struct nla_policy net_shaper_set_nl_policy[NET_SHAPER_A_IFINDEX + 1] = {
+ [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, },
+ [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy),
+ [NET_SHAPER_A_METRIC] = NLA_POLICY_MAX(NLA_U32, 1),
+ [NET_SHAPER_A_BW_MIN] = { .type = NLA_UINT, },
+ [NET_SHAPER_A_BW_MAX] = { .type = NLA_UINT, },
+ [NET_SHAPER_A_BURST] = { .type = NLA_UINT, },
+ [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, },
+ [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, },
+};
+
+/* NET_SHAPER_CMD_DELETE - do */
+static const struct nla_policy net_shaper_delete_nl_policy[NET_SHAPER_A_IFINDEX + 1] = {
+ [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, },
+ [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy),
+};
+
+/* NET_SHAPER_CMD_GROUP - do */
+static const struct nla_policy net_shaper_group_nl_policy[NET_SHAPER_A_LEAVES + 1] = {
+ [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, },
+ [NET_SHAPER_A_PARENT] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy),
+ [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy),
+ [NET_SHAPER_A_METRIC] = NLA_POLICY_MAX(NLA_U32, 1),
+ [NET_SHAPER_A_BW_MIN] = { .type = NLA_UINT, },
+ [NET_SHAPER_A_BW_MAX] = { .type = NLA_UINT, },
+ [NET_SHAPER_A_BURST] = { .type = NLA_UINT, },
+ [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, },
+ [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, },
+ [NET_SHAPER_A_LEAVES] = NLA_POLICY_NESTED(net_shaper_leaf_info_nl_policy),
+};
+
+/* NET_SHAPER_CMD_CAP_GET - do */
+static const struct nla_policy net_shaper_cap_get_do_nl_policy[NET_SHAPER_A_CAPS_SCOPE + 1] = {
+ [NET_SHAPER_A_CAPS_IFINDEX] = { .type = NLA_U32, },
+ [NET_SHAPER_A_CAPS_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3),
+};
+
+/* NET_SHAPER_CMD_CAP_GET - dump */
+static const struct nla_policy net_shaper_cap_get_dump_nl_policy[NET_SHAPER_A_CAPS_IFINDEX + 1] = {
+ [NET_SHAPER_A_CAPS_IFINDEX] = { .type = NLA_U32, },
+};
+
+/* Ops table for net_shaper */
+static const struct genl_split_ops net_shaper_nl_ops[] = {
+ {
+ .cmd = NET_SHAPER_CMD_GET,
+ .pre_doit = net_shaper_nl_pre_doit,
+ .doit = net_shaper_nl_get_doit,
+ .post_doit = net_shaper_nl_post_doit,
+ .policy = net_shaper_get_do_nl_policy,
+ .maxattr = NET_SHAPER_A_IFINDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NET_SHAPER_CMD_GET,
+ .start = net_shaper_nl_pre_dumpit,
+ .dumpit = net_shaper_nl_get_dumpit,
+ .done = net_shaper_nl_post_dumpit,
+ .policy = net_shaper_get_dump_nl_policy,
+ .maxattr = NET_SHAPER_A_IFINDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = NET_SHAPER_CMD_SET,
+ .pre_doit = net_shaper_nl_pre_doit,
+ .doit = net_shaper_nl_set_doit,
+ .post_doit = net_shaper_nl_post_doit,
+ .policy = net_shaper_set_nl_policy,
+ .maxattr = NET_SHAPER_A_IFINDEX,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NET_SHAPER_CMD_DELETE,
+ .pre_doit = net_shaper_nl_pre_doit,
+ .doit = net_shaper_nl_delete_doit,
+ .post_doit = net_shaper_nl_post_doit,
+ .policy = net_shaper_delete_nl_policy,
+ .maxattr = NET_SHAPER_A_IFINDEX,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NET_SHAPER_CMD_GROUP,
+ .pre_doit = net_shaper_nl_pre_doit,
+ .doit = net_shaper_nl_group_doit,
+ .post_doit = net_shaper_nl_post_doit,
+ .policy = net_shaper_group_nl_policy,
+ .maxattr = NET_SHAPER_A_LEAVES,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NET_SHAPER_CMD_CAP_GET,
+ .pre_doit = net_shaper_nl_cap_pre_doit,
+ .doit = net_shaper_nl_cap_get_doit,
+ .post_doit = net_shaper_nl_cap_post_doit,
+ .policy = net_shaper_cap_get_do_nl_policy,
+ .maxattr = NET_SHAPER_A_CAPS_SCOPE,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NET_SHAPER_CMD_CAP_GET,
+ .start = net_shaper_nl_cap_pre_dumpit,
+ .dumpit = net_shaper_nl_cap_get_dumpit,
+ .done = net_shaper_nl_cap_post_dumpit,
+ .policy = net_shaper_cap_get_dump_nl_policy,
+ .maxattr = NET_SHAPER_A_CAPS_IFINDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+};
+
+struct genl_family net_shaper_nl_family __ro_after_init = {
+ .name = NET_SHAPER_FAMILY_NAME,
+ .version = NET_SHAPER_FAMILY_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .split_ops = net_shaper_nl_ops,
+ .n_split_ops = ARRAY_SIZE(net_shaper_nl_ops),
+};
diff --git a/net/shaper/shaper_nl_gen.h b/net/shaper/shaper_nl_gen.h
new file mode 100644
index 000000000000..cb7f9026fc23
--- /dev/null
+++ b/net/shaper/shaper_nl_gen.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/net_shaper.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_NET_SHAPER_GEN_H
+#define _LINUX_NET_SHAPER_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/net_shaper.h>
+
+/* Common nested types */
+extern const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1];
+extern const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1];
+
+int net_shaper_nl_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_cap_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
+void
+net_shaper_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+void
+net_shaper_nl_cap_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_pre_dumpit(struct netlink_callback *cb);
+int net_shaper_nl_cap_pre_dumpit(struct netlink_callback *cb);
+int net_shaper_nl_post_dumpit(struct netlink_callback *cb);
+int net_shaper_nl_cap_post_dumpit(struct netlink_callback *cb);
+
+int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_cap_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+
+extern struct genl_family net_shaper_nl_family;
+
+#endif /* _LINUX_NET_SHAPER_GEN_H */
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 8e3093938cd2..9e6c69d18581 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -383,6 +383,7 @@ void smc_sk_init(struct net *net, struct sock *sk, int protocol)
smc->limit_smc_hs = net->smc.limit_smc_hs;
smc->use_fallback = false; /* assume rdma capability first */
smc->fallback_rsn = 0;
+ smc_close_init(smc);
}
static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
@@ -1299,7 +1300,6 @@ static int smc_connect_rdma(struct smc_sock *smc,
goto connect_abort;
}
- smc_close_init(smc);
smc_rx_init(smc);
if (ini->first_contact_local) {
@@ -1435,7 +1435,6 @@ static int smc_connect_ism(struct smc_sock *smc,
goto connect_abort;
}
}
- smc_close_init(smc);
smc_rx_init(smc);
smc_tx_init(smc);
@@ -1466,10 +1465,6 @@ connect_abort:
static int smc_connect_check_aclc(struct smc_init_info *ini,
struct smc_clc_msg_accept_confirm *aclc)
{
- if (aclc->hdr.typev1 != SMC_TYPE_R &&
- aclc->hdr.typev1 != SMC_TYPE_D)
- return SMC_CLC_DECL_MODEUNSUPP;
-
if (aclc->hdr.version >= SMC_V2) {
if ((aclc->hdr.typev1 == SMC_TYPE_R &&
!smcr_indicated(ini->smc_type_v2)) ||
@@ -1523,10 +1518,6 @@ static int __smc_connect(struct smc_sock *smc)
ini->smcd_version &= ~SMC_V1;
ini->smcr_version = 0;
ini->smc_type_v1 = SMC_TYPE_N;
- if (!ini->smcd_version) {
- rc = SMC_CLC_DECL_GETVLANERR;
- goto fallback;
- }
}
rc = smc_find_proposal_devices(smc, ini);
@@ -1909,6 +1900,7 @@ static void smc_listen_out(struct smc_sock *new_smc)
if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
atomic_dec(&lsmc->queued_smc_hs);
+ release_sock(newsmcsk); /* lock in smc_listen_work() */
if (lsmc->sk.sk_state == SMC_LISTEN) {
lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
smc_accept_enqueue(&lsmc->sk, newsmcsk);
@@ -2430,6 +2422,7 @@ static void smc_listen_work(struct work_struct *work)
u8 accept_version;
int rc = 0;
+ lock_sock(&new_smc->sk); /* release in smc_listen_out() */
if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
return smc_listen_out_err(new_smc);
@@ -2487,7 +2480,6 @@ static void smc_listen_work(struct work_struct *work)
goto out_decl;
mutex_lock(&smc_server_lgr_pending);
- smc_close_init(new_smc);
smc_rx_init(new_smc);
smc_tx_init(new_smc);
@@ -3367,8 +3359,10 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
else
rc = smc_create_clcsk(net, sk, family);
- if (rc)
+ if (rc) {
sk_common_release(sk);
+ sock->sk = NULL;
+ }
out:
return rc;
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 34b781e463c4..78ae10d06ed2 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -278,12 +278,15 @@ struct smc_connection {
*/
u64 peer_token; /* SMC-D token of peer */
u8 killed : 1; /* abnormal termination */
- u8 freed : 1; /* normal termiation */
+ u8 freed : 1; /* normal termination */
u8 out_of_sync : 1; /* out of sync with peer */
};
struct smc_sock { /* smc sock container */
struct sock sk;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6_pinfo *pinet6;
+#endif
struct socket *clcsock; /* internal tcp socket */
void (*clcsk_state_change)(struct sock *sk);
/* original stat_change fct. */
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 467effb50cd6..5fd6f5b8ef03 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -145,6 +145,8 @@ struct smc_clc_v2_extension {
);
u8 user_eids[][SMC_MAX_EID_LEN];
};
+static_assert(offsetof(struct smc_clc_v2_extension, user_eids) == sizeof(struct smc_clc_v2_extension_fixed),
+ "struct member likely outside of struct_group_tagged()");
struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
__be32 outgoing_subnet; /* subnet mask */
@@ -154,7 +156,7 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
} __aligned(4);
struct smc_clc_msg_smcd { /* SMC-D GID information */
- struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */
+ struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requester */
__be16 v2_ext_offset; /* SMC Version 2 Extension Offset */
u8 vendor_oui[3]; /* vendor organizationally unique identifier */
u8 vendor_exp_options[5];
@@ -169,6 +171,8 @@ struct smc_clc_smcd_v2_extension {
);
struct smc_clc_smcd_gid_chid gidchid[];
};
+static_assert(offsetof(struct smc_clc_smcd_v2_extension, gidchid) == sizeof(struct smc_clc_smcd_v2_extension_fixed),
+ "struct member likely outside of struct_group_tagged()");
struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
struct smc_clc_msg_hdr hdr;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3b95828d9976..500952c2e67b 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -221,6 +221,35 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
write_unlock_bh(&lgr->conns_lock);
}
+static void smc_lgr_buf_list_add(struct smc_link_group *lgr,
+ bool is_rmb,
+ struct list_head *buf_list,
+ struct smc_buf_desc *buf_desc)
+{
+ list_add(&buf_desc->list, buf_list);
+ if (is_rmb) {
+ lgr->alloc_rmbs += buf_desc->len;
+ lgr->alloc_rmbs +=
+ lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
+ } else {
+ lgr->alloc_sndbufs += buf_desc->len;
+ }
+}
+
+static void smc_lgr_buf_list_del(struct smc_link_group *lgr,
+ bool is_rmb,
+ struct smc_buf_desc *buf_desc)
+{
+ list_del(&buf_desc->list);
+ if (is_rmb) {
+ lgr->alloc_rmbs -= buf_desc->len;
+ lgr->alloc_rmbs -=
+ lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
+ } else {
+ lgr->alloc_sndbufs -= buf_desc->len;
+ }
+}
+
int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
{
struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
@@ -363,6 +392,10 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr,
smc_target[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_LGR_R_SNDBUF_ALLOC, lgr->alloc_sndbufs))
+ goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_LGR_R_RMB_ALLOC, lgr->alloc_rmbs))
+ goto errattr;
if (lgr->smc_version > SMC_V1) {
v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
if (!v2_attrs)
@@ -541,6 +574,10 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
goto errattr;
if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_LGR_D_SNDBUF_ALLOC, lgr->alloc_sndbufs))
+ goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_LGR_D_DMB_ALLOC, lgr->alloc_rmbs))
+ goto errattr;
memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
smc_pnet[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
@@ -1138,7 +1175,7 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
lock = is_rmb ? &lgr->rmbs_lock :
&lgr->sndbufs_lock;
down_write(lock);
- list_del(&buf_desc->list);
+ smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
up_write(lock);
smc_buf_free(lgr, is_rmb, buf_desc);
@@ -1166,22 +1203,30 @@ static void smcd_buf_detach(struct smc_connection *conn)
static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ bool is_smcd = lgr->is_smcd;
+ int bufsize;
+
if (conn->sndbuf_desc) {
- if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) {
+ bufsize = conn->sndbuf_desc->len;
+ if (!is_smcd && conn->sndbuf_desc->is_vm) {
smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
} else {
- memzero_explicit(conn->sndbuf_desc->cpu_addr, conn->sndbuf_desc->len);
+ memzero_explicit(conn->sndbuf_desc->cpu_addr, bufsize);
WRITE_ONCE(conn->sndbuf_desc->used, 0);
}
+ SMC_STAT_RMB_SIZE(smc, is_smcd, false, false, bufsize);
}
if (conn->rmb_desc) {
- if (!lgr->is_smcd) {
+ bufsize = conn->rmb_desc->len;
+ if (!is_smcd) {
smcr_buf_unuse(conn->rmb_desc, true, lgr);
} else {
- memzero_explicit(conn->rmb_desc->cpu_addr,
- conn->rmb_desc->len + sizeof(struct smcd_cdc_msg));
+ bufsize += sizeof(struct smcd_cdc_msg);
+ memzero_explicit(conn->rmb_desc->cpu_addr, bufsize);
WRITE_ONCE(conn->rmb_desc->used, 0);
}
+ SMC_STAT_RMB_SIZE(smc, is_smcd, true, false, bufsize);
}
}
@@ -1377,7 +1422,7 @@ static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
buf_list = &lgr->sndbufs[i];
list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
list) {
- list_del(&buf_desc->list);
+ smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
smc_buf_free(lgr, is_rmb, buf_desc);
}
}
@@ -2250,7 +2295,7 @@ int smcr_buf_reg_lgr(struct smc_link *lnk)
}
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
- bool is_rmb, int bufsize)
+ int bufsize)
{
struct smc_buf_desc *buf_desc;
@@ -2276,7 +2321,7 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
}
if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
goto out;
- fallthrough; // try virtually continguous buf
+ fallthrough; // try virtually contiguous buf
case SMCR_VIRT_CONT_BUFS:
buf_desc->order = get_order(bufsize);
buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order);
@@ -2390,7 +2435,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
if (buf_desc) {
buf_desc->is_dma_need_sync = 0;
- SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
+ SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
break; /* found reusable slot */
}
@@ -2398,7 +2443,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_smcd)
buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
else
- buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
+ buf_desc = smcr_new_buf_create(lgr, bufsize);
if (PTR_ERR(buf_desc) == -ENOMEM)
break;
@@ -2411,10 +2456,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
}
SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
- SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
+ SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
buf_desc->used = 1;
down_write(lock);
- list_add(&buf_desc->list, buf_list);
+ smc_lgr_buf_list_add(lgr, is_rmb, buf_list, buf_desc);
up_write(lock);
break; /* found */
}
@@ -2496,7 +2541,8 @@ create_rmb:
rc = __smc_buf_create(smc, is_smcd, true);
if (rc && smc->conn.sndbuf_desc) {
down_write(&smc->conn.lgr->sndbufs_lock);
- list_del(&smc->conn.sndbuf_desc->list);
+ smc_lgr_buf_list_del(smc->conn.lgr, false,
+ smc->conn.sndbuf_desc);
up_write(&smc->conn.lgr->sndbufs_lock);
smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
smc->conn.sndbuf_desc = NULL;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index d93cf51dbd7c..69b54ecd6503 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -30,7 +30,7 @@
*/
#define SMC_CONN_PER_LGR_PREFER 255 /* Preferred connections per link group used for
* SMC-R v2.1 and later negotiation, vendors or
- * distrubutions may modify it to a value between
+ * distributions may modify it to a value between
* 16-255 as needed.
*/
@@ -181,7 +181,7 @@ struct smc_link {
*/
#define SMC_LINKS_PER_LGR_MAX_PREFER 2 /* Preferred max links per link group used for
* SMC-R v2.1 and later negotiation, vendors or
- * distrubutions may modify it to a value between
+ * distributions may modify it to a value between
* 1-2 as needed.
*/
@@ -281,6 +281,8 @@ struct smc_link_group {
struct rw_semaphore sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
struct rw_semaphore rmbs_lock; /* protects rx buffers */
+ u64 alloc_sndbufs; /* stats of tx buffers */
+ u64 alloc_rmbs; /* stats of rx buffers */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 9297dc20bfe2..9c563cdbea90 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -899,9 +899,7 @@ static void smc_copy_netdev_ifindex(struct smc_ib_device *smcibdev, int port)
struct ib_device *ibdev = smcibdev->ibdev;
struct net_device *ndev;
- if (!ibdev->ops.get_netdev)
- return;
- ndev = ibdev->ops.get_netdev(ibdev, port + 1);
+ ndev = ib_device_get_netdev(ibdev, port + 1);
if (ndev) {
smcibdev->ndev_ifidx[port] = ndev->ifindex;
dev_put(ndev);
@@ -921,9 +919,7 @@ void smc_ib_ndev_change(struct net_device *ndev, unsigned long event)
port_cnt = smcibdev->ibdev->phys_port_cnt;
for (i = 0; i < min_t(size_t, port_cnt, SMC_MAX_PORTS); i++) {
libdev = smcibdev->ibdev;
- if (!libdev->ops.get_netdev)
- continue;
- lndev = libdev->ops.get_netdev(libdev, i + 1);
+ lndev = ib_device_get_netdev(libdev, i + 1);
dev_put(lndev);
if (lndev != ndev)
continue;
diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c
index bece346dd8e9..a944e7dcb8b9 100644
--- a/net/smc/smc_inet.c
+++ b/net/smc/smc_inet.c
@@ -60,6 +60,11 @@ static struct inet_protosw smc_inet_protosw = {
};
#if IS_ENABLED(CONFIG_IPV6)
+struct smc6_sock {
+ struct smc_sock smc;
+ struct ipv6_pinfo inet6;
+};
+
static struct proto smc_inet6_prot = {
.name = "INET6_SMC",
.owner = THIS_MODULE,
@@ -67,9 +72,10 @@ static struct proto smc_inet6_prot = {
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
.release_cb = smc_release_cb,
- .obj_size = sizeof(struct smc_sock),
+ .obj_size = sizeof(struct smc6_sock),
.h.smc_hash = &smc_v6_hashinfo,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
+ .ipv6_pinfo_offset = offsetof(struct smc6_sock, inet6),
};
static const struct proto_ops smc_inet6_stream_ops = {
@@ -102,12 +108,23 @@ static struct inet_protosw smc_inet6_protosw = {
};
#endif /* CONFIG_IPV6 */
+static unsigned int smc_sync_mss(struct sock *sk, u32 pmtu)
+{
+ /* No need pass it through to clcsock, mss can always be set by
+ * sock_create_kern or smc_setsockopt.
+ */
+ return 0;
+}
+
static int smc_inet_init_sock(struct sock *sk)
{
struct net *net = sock_net(sk);
/* init common smc sock */
smc_sk_init(net, sk, IPPROTO_SMC);
+
+ inet_csk(sk)->icsk_sync_mss = smc_sync_mss;
+
/* create clcsock */
return smc_create_clcsk(net, sk, sk->sk_family);
}
diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
index 6dd4292dae56..04dc6808d2e1 100644
--- a/net/smc/smc_loopback.h
+++ b/net/smc/smc_loopback.h
@@ -15,7 +15,6 @@
#define _SMC_LOOPBACK_H
#include <linux/device.h>
-#include <linux/err.h>
#include <net/smc.h>
#if IS_ENABLED(CONFIG_SMC_LO)
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 2adb92b8c469..716808f374a8 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -753,7 +753,7 @@ static int smc_pnet_add_pnetid(struct net *net, u8 *pnetid)
write_lock(&sn->pnetids_ndev.lock);
list_for_each_entry(pi, &sn->pnetids_ndev.list, list) {
- if (smc_pnet_match(pnetid, pe->pnetid)) {
+ if (smc_pnet_match(pnetid, pi->pnetid)) {
refcount_inc(&pi->refcnt);
kfree(pe);
goto unlock;
@@ -887,9 +887,6 @@ int smc_pnet_net_init(struct net *net)
smc_pnet_create_pnetids_list(net);
- /* disable handshake limitation by default */
- net->smc.limit_smc_hs = 0;
-
return 0;
}
@@ -1057,9 +1054,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
for (i = 1; i <= SMC_MAX_PORTS; i++) {
if (!rdma_is_port_valid(ibdev->ibdev, i))
continue;
- if (!ibdev->ibdev->ops.get_netdev)
- continue;
- ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i);
+ ndev = ib_device_get_netdev(ibdev->ibdev, i);
if (!ndev)
continue;
dev_put(ndev);
diff --git a/net/smc/smc_stats.c b/net/smc/smc_stats.c
index ca14c0f3a07d..e71b17d1e21c 100644
--- a/net/smc/smc_stats.c
+++ b/net/smc/smc_stats.c
@@ -218,6 +218,12 @@ static int smc_nl_fill_stats_tech_data(struct sk_buff *skb,
smc_tech->tx_bytes,
SMC_NLA_STATS_PAD))
goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_STATS_T_RX_RMB_USAGE,
+ smc_tech->rx_rmbuse))
+ goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_STATS_T_TX_RMB_USAGE,
+ smc_tech->tx_rmbuse))
+ goto errattr;
if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_RX_CNT,
smc_tech->rx_cnt,
SMC_NLA_STATS_PAD))
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
index e19177ce4092..571f9d9e7814 100644
--- a/net/smc/smc_stats.h
+++ b/net/smc/smc_stats.h
@@ -79,6 +79,8 @@ struct smc_stats_tech {
u64 tx_bytes;
u64 rx_cnt;
u64 tx_cnt;
+ u64 rx_rmbuse;
+ u64 tx_rmbuse;
};
struct smc_stats {
@@ -135,38 +137,46 @@ do { \
} \
while (0)
-#define SMC_STAT_RMB_SIZE_SUB(_smc_stats, _tech, k, _len) \
+#define SMC_STAT_RMB_SIZE_SUB(_smc_stats, _tech, k, _is_add, _len) \
do { \
+ typeof(_smc_stats) stats = (_smc_stats); \
+ typeof(_is_add) is_a = (_is_add); \
typeof(_len) _l = (_len); \
typeof(_tech) t = (_tech); \
int _pos; \
int m = SMC_BUF_MAX - 1; \
if (_l <= 0) \
break; \
- _pos = fls((_l - 1) >> 13); \
- _pos = (_pos <= m) ? _pos : m; \
- this_cpu_inc((*(_smc_stats)).smc[t].k ## _rmbsize.buf[_pos]); \
+ if (is_a) { \
+ _pos = fls((_l - 1) >> 13); \
+ _pos = (_pos <= m) ? _pos : m; \
+ this_cpu_inc((*stats).smc[t].k ## _rmbsize.buf[_pos]); \
+ this_cpu_add((*stats).smc[t].k ## _rmbuse, _l); \
+ } else { \
+ this_cpu_sub((*stats).smc[t].k ## _rmbuse, _l); \
+ } \
} \
while (0)
#define SMC_STAT_RMB_SUB(_smc_stats, type, t, key) \
this_cpu_inc((*(_smc_stats)).smc[t].rmb ## _ ## key.type ## _cnt)
-#define SMC_STAT_RMB_SIZE(_smc, _is_smcd, _is_rx, _len) \
+#define SMC_STAT_RMB_SIZE(_smc, _is_smcd, _is_rx, _is_add, _len) \
do { \
struct net *_net = sock_net(&(_smc)->sk); \
struct smc_stats __percpu *_smc_stats = _net->smc.smc_stats; \
+ typeof(_is_add) is_add = (_is_add); \
typeof(_is_smcd) is_d = (_is_smcd); \
typeof(_is_rx) is_r = (_is_rx); \
typeof(_len) l = (_len); \
if ((is_d) && (is_r)) \
- SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, rx, l); \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, rx, is_add, l); \
if ((is_d) && !(is_r)) \
- SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, tx, l); \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, tx, is_add, l); \
if (!(is_d) && (is_r)) \
- SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, rx, l); \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, rx, is_add, l); \
if (!(is_d) && !(is_r)) \
- SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, tx, l); \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, tx, is_add, l); \
} \
while (0)
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 13f2bc092db1..2fab6456f765 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -90,6 +90,15 @@ static struct ctl_table smc_table[] = {
.extra1 = &conns_per_lgr_min,
.extra2 = &conns_per_lgr_max,
},
+ {
+ .procname = "limit_smc_hs",
+ .data = &init_net.smc.limit_smc_hs,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
};
int __net_init smc_sysctl_net_init(struct net *net)
@@ -121,6 +130,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER;
net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER;
+ /* disable handshake limitation by default */
+ net->smc.limit_smc_hs = 0;
return 0;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 0021065a600a..994c0cd4fddb 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -648,8 +648,10 @@ void smc_wr_free_link(struct smc_link *lnk)
smc_wr_tx_wait_no_pending_sends(lnk);
percpu_ref_kill(&lnk->wr_reg_refs);
wait_for_completion(&lnk->reg_ref_comp);
+ percpu_ref_exit(&lnk->wr_reg_refs);
percpu_ref_kill(&lnk->wr_tx_refs);
wait_for_completion(&lnk->tx_ref_comp);
+ percpu_ref_exit(&lnk->wr_tx_refs);
if (lnk->wr_rx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
@@ -912,11 +914,13 @@ int smc_wr_create_link(struct smc_link *lnk)
init_waitqueue_head(&lnk->wr_reg_wait);
rc = percpu_ref_init(&lnk->wr_reg_refs, smcr_wr_reg_refs_free, 0, GFP_KERNEL);
if (rc)
- goto dma_unmap;
+ goto cancel_ref;
init_completion(&lnk->reg_ref_comp);
init_waitqueue_head(&lnk->wr_rx_empty_wait);
return rc;
+cancel_ref:
+ percpu_ref_exit(&lnk->wr_tx_refs);
dma_unmap:
if (lnk->wr_rx_v2_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr,
diff --git a/net/socket.c b/net/socket.c
index fcbdd5bc47ac..9a117248f18f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -153,7 +153,6 @@ static void sock_show_fdinfo(struct seq_file *m, struct file *f)
static const struct file_operations socket_file_ops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read_iter = sock_read_iter,
.write_iter = sock_write_iter,
.poll = sock_poll,
@@ -510,7 +509,7 @@ static int sock_map_fd(struct socket *sock, int flags)
struct socket *sock_from_file(struct file *file)
{
- if (file->f_op == &socket_file_ops)
+ if (likely(file->f_op == &socket_file_ops))
return file->private_data; /* set in sock_alloc_file */
return NULL;
@@ -550,24 +549,6 @@ struct socket *sockfd_lookup(int fd, int *err)
}
EXPORT_SYMBOL(sockfd_lookup);
-static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
-{
- struct fd f = fdget(fd);
- struct socket *sock;
-
- *err = -EBADF;
- if (f.file) {
- sock = sock_from_file(f.file);
- if (likely(sock)) {
- *fput_needed = f.flags & FDPUT_FPUT;
- return sock;
- }
- *err = -ENOTSOCK;
- fdput(f);
- }
- return NULL;
-}
-
static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
size_t size)
{
@@ -688,7 +669,7 @@ void sock_release(struct socket *sock)
}
EXPORT_SYMBOL(sock_release);
-void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
+void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags)
{
u8 flags = *tx_flags;
@@ -946,11 +927,17 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
memset(&tss, 0, sizeof(tss));
tsflags = READ_ONCE(sk->sk_tsflags);
- if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+ if ((tsflags & SOF_TIMESTAMPING_SOFTWARE &&
+ (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE ||
+ skb_is_err_queue(skb) ||
+ !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) &&
ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
empty = 0;
if (shhwtstamps &&
- (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+ (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE &&
+ (tsflags & SOF_TIMESTAMPING_RX_HARDWARE ||
+ skb_is_err_queue(skb) ||
+ !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) &&
!skb_is_swtx_tstamp(skb, false_tstamp)) {
if_index = 0;
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
@@ -1569,8 +1556,15 @@ int __sock_create(struct net *net, int family, int type, int protocol,
rcu_read_unlock();
err = pf->create(net, sock, protocol, kern);
- if (err < 0)
+ if (err < 0) {
+ /* ->create should release the allocated sock->sk object on error
+ * and make sure sock->sk is set to NULL to avoid use-after-free
+ */
+ DEBUG_NET_WARN_ONCE(sock->sk,
+ "%ps must clear sock->sk on failure, family: %d, type: %d, protocol: %d\n",
+ pf->create, family, type, protocol);
goto out_module_put;
+ }
/*
* Now to bump the refcnt of the [loadable] module that owns this
@@ -1848,16 +1842,20 @@ int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
{
struct socket *sock;
struct sockaddr_storage address;
- int err, fput_needed;
-
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (sock) {
- err = move_addr_to_kernel(umyaddr, addrlen, &address);
- if (!err)
- err = __sys_bind_socket(sock, &address, addrlen);
- fput_light(sock->file, fput_needed);
- }
- return err;
+ CLASS(fd, f)(fd);
+ int err;
+
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ err = move_addr_to_kernel(umyaddr, addrlen, &address);
+ if (unlikely(err))
+ return err;
+
+ return __sys_bind_socket(sock, &address, addrlen);
}
SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
@@ -1886,15 +1884,16 @@ int __sys_listen_socket(struct socket *sock, int backlog)
int __sys_listen(int fd, int backlog)
{
+ CLASS(fd, f)(fd);
struct socket *sock;
- int err, fput_needed;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (sock) {
- err = __sys_listen_socket(sock, backlog);
- fput_light(sock->file, fput_needed);
- }
- return err;
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ return __sys_listen_socket(sock, backlog);
}
SYSCALL_DEFINE2(listen, int, fd, int, backlog)
@@ -2004,17 +2003,12 @@ static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_s
int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
- int ret = -EBADF;
- struct fd f;
+ CLASS(fd, f)(fd);
- f = fdget(fd);
- if (f.file) {
- ret = __sys_accept4_file(f.file, upeer_sockaddr,
+ if (fd_empty(f))
+ return -EBADF;
+ return __sys_accept4_file(fd_file(f), upeer_sockaddr,
upeer_addrlen, flags);
- fdput(f);
- }
-
- return ret;
}
SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
@@ -2066,20 +2060,18 @@ out:
int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
{
- int ret = -EBADF;
- struct fd f;
+ struct sockaddr_storage address;
+ CLASS(fd, f)(fd);
+ int ret;
- f = fdget(fd);
- if (f.file) {
- struct sockaddr_storage address;
+ if (fd_empty(f))
+ return -EBADF;
- ret = move_addr_to_kernel(uservaddr, addrlen, &address);
- if (!ret)
- ret = __sys_connect_file(f.file, &address, addrlen, 0);
- fdput(f);
- }
+ ret = move_addr_to_kernel(uservaddr, addrlen, &address);
+ if (ret)
+ return ret;
- return ret;
+ return __sys_connect_file(fd_file(f), &address, addrlen, 0);
}
SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
@@ -2098,26 +2090,25 @@ int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
{
struct socket *sock;
struct sockaddr_storage address;
- int err, fput_needed;
+ CLASS(fd, f)(fd);
+ int err;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- goto out;
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
err = security_socket_getsockname(sock);
if (err)
- goto out_put;
+ return err;
err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 0);
if (err < 0)
- goto out_put;
- /* "err" is actually length in this case */
- err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
+ return err;
-out_put:
- fput_light(sock->file, fput_needed);
-out:
- return err;
+ /* "err" is actually length in this case */
+ return move_addr_to_user(&address, err, usockaddr, usockaddr_len);
}
SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
@@ -2136,26 +2127,25 @@ int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
{
struct socket *sock;
struct sockaddr_storage address;
- int err, fput_needed;
+ CLASS(fd, f)(fd);
+ int err;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (sock != NULL) {
- const struct proto_ops *ops = READ_ONCE(sock->ops);
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
- err = security_socket_getpeername(sock);
- if (err) {
- fput_light(sock->file, fput_needed);
- return err;
- }
+ err = security_socket_getpeername(sock);
+ if (err)
+ return err;
- err = ops->getname(sock, (struct sockaddr *)&address, 1);
- if (err >= 0)
- /* "err" is actually length in this case */
- err = move_addr_to_user(&address, err, usockaddr,
- usockaddr_len);
- fput_light(sock->file, fput_needed);
- }
- return err;
+ err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 1);
+ if (err < 0)
+ return err;
+
+ /* "err" is actually length in this case */
+ return move_addr_to_user(&address, err, usockaddr, usockaddr_len);
}
SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
@@ -2176,14 +2166,17 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
struct sockaddr_storage address;
int err;
struct msghdr msg;
- int fput_needed;
err = import_ubuf(ITER_SOURCE, buff, len, &msg.msg_iter);
if (unlikely(err))
return err;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- goto out;
+
+ CLASS(fd, f)(fd);
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
msg.msg_name = NULL;
msg.msg_control = NULL;
@@ -2193,7 +2186,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
if (addr) {
err = move_addr_to_kernel(addr, addr_len, &address);
if (err < 0)
- goto out_put;
+ return err;
msg.msg_name = (struct sockaddr *)&address;
msg.msg_namelen = addr_len;
}
@@ -2201,12 +2194,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
msg.msg_flags = flags;
- err = __sock_sendmsg(sock, &msg);
-
-out_put:
- fput_light(sock->file, fput_needed);
-out:
- return err;
+ return __sock_sendmsg(sock, &msg);
}
SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
@@ -2241,14 +2229,18 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
};
struct socket *sock;
int err, err2;
- int fput_needed;
err = import_ubuf(ITER_DEST, ubuf, size, &msg.msg_iter);
if (unlikely(err))
return err;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- goto out;
+
+ CLASS(fd, f)(fd);
+
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
@@ -2260,9 +2252,6 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
if (err2 < 0)
err = err2;
}
-
- fput_light(sock->file, fput_needed);
-out:
return err;
}
@@ -2337,17 +2326,16 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
{
sockptr_t optval = USER_SOCKPTR(user_optval);
bool compat = in_compat_syscall();
- int err, fput_needed;
struct socket *sock;
+ CLASS(fd, f)(fd);
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- return err;
-
- err = do_sock_setsockopt(sock, compat, level, optname, optval, optlen);
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
- fput_light(sock->file, fput_needed);
- return err;
+ return do_sock_setsockopt(sock, compat, level, optname, optval, optlen);
}
SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
@@ -2362,7 +2350,7 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
int do_sock_getsockopt(struct socket *sock, bool compat, int level,
int optname, sockptr_t optval, sockptr_t optlen)
{
- int max_optlen __maybe_unused;
+ int max_optlen __maybe_unused = 0;
const struct proto_ops *ops;
int err;
@@ -2371,7 +2359,7 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
return err;
if (!compat)
- max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
+ copy_from_sockptr(&max_optlen, optlen, sizeof(int));
ops = READ_ONCE(sock->ops);
if (level == SOL_SOCKET) {
@@ -2403,20 +2391,17 @@ EXPORT_SYMBOL(do_sock_getsockopt);
int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
int __user *optlen)
{
- int err, fput_needed;
struct socket *sock;
- bool compat;
+ CLASS(fd, f)(fd);
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- return err;
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
- compat = in_compat_syscall();
- err = do_sock_getsockopt(sock, compat, level, optname,
+ return do_sock_getsockopt(sock, in_compat_syscall(), level, optname,
USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
-
- fput_light(sock->file, fput_needed);
- return err;
}
SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
@@ -2442,15 +2427,16 @@ int __sys_shutdown_sock(struct socket *sock, int how)
int __sys_shutdown(int fd, int how)
{
- int err, fput_needed;
struct socket *sock;
+ CLASS(fd, f)(fd);
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (sock != NULL) {
- err = __sys_shutdown_sock(sock, how);
- fput_light(sock->file, fput_needed);
- }
- return err;
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ return __sys_shutdown_sock(sock, how);
}
SYSCALL_DEFINE2(shutdown, int, fd, int, how)
@@ -2666,22 +2652,21 @@ long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
bool forbid_cmsg_compat)
{
- int fput_needed, err;
struct msghdr msg_sys;
struct socket *sock;
if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
return -EINVAL;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- goto out;
+ CLASS(fd, f)(fd);
- err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
- fput_light(sock->file, fput_needed);
-out:
- return err;
+ return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
}
SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
@@ -2696,7 +2681,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int
int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
unsigned int flags, bool forbid_cmsg_compat)
{
- int fput_needed, err, datagrams;
+ int err, datagrams;
struct socket *sock;
struct mmsghdr __user *entry;
struct compat_mmsghdr __user *compat_entry;
@@ -2712,9 +2697,13 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
datagrams = 0;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- return err;
+ CLASS(fd, f)(fd);
+
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
used_address.name_len = UINT_MAX;
entry = mmsg;
@@ -2751,8 +2740,6 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
cond_resched();
}
- fput_light(sock->file, fput_needed);
-
/* We only return an error if no datagrams were able to be sent */
if (datagrams != 0)
return datagrams;
@@ -2874,22 +2861,21 @@ long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
bool forbid_cmsg_compat)
{
- int fput_needed, err;
struct msghdr msg_sys;
struct socket *sock;
if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
return -EINVAL;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- goto out;
+ CLASS(fd, f)(fd);
- err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
- fput_light(sock->file, fput_needed);
-out:
- return err;
+ return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
}
SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
@@ -2906,7 +2892,7 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags,
struct timespec64 *timeout)
{
- int fput_needed, err, datagrams;
+ int err = 0, datagrams;
struct socket *sock;
struct mmsghdr __user *entry;
struct compat_mmsghdr __user *compat_entry;
@@ -2921,16 +2907,18 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
datagrams = 0;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- return err;
+ CLASS(fd, f)(fd);
+
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
if (likely(!(flags & MSG_ERRQUEUE))) {
err = sock_error(sock->sk);
- if (err) {
- datagrams = err;
- goto out_put;
- }
+ if (err)
+ return err;
}
entry = mmsg;
@@ -2987,12 +2975,10 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
}
if (err == 0)
- goto out_put;
+ return datagrams;
- if (datagrams == 0) {
- datagrams = err;
- goto out_put;
- }
+ if (datagrams == 0)
+ return err;
/*
* We may return less entries than requested (vlen) if the
@@ -3007,9 +2993,6 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
*/
WRITE_ONCE(sock->sk->sk_err, -err);
}
-out_put:
- fput_light(sock->file, fput_needed);
-
return datagrams;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_test.c b/net/sunrpc/auth_gss/gss_krb5_test.c
index 85625e3f3814..a5bff02cd7ba 100644
--- a/net/sunrpc/auth_gss/gss_krb5_test.c
+++ b/net/sunrpc/auth_gss/gss_krb5_test.c
@@ -17,7 +17,7 @@
#include "gss_krb5_internal.h"
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
struct gss_krb5_test_param {
const char *desc;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 95ff74706104..059f6ef1ad18 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -731,11 +731,10 @@ static bool cache_defer_req(struct cache_req *req, struct cache_head *item)
static void cache_revisit_request(struct cache_head *item)
{
struct cache_deferred_req *dreq;
- struct list_head pending;
struct hlist_node *tmp;
int hash = DFR_HASH(item);
+ LIST_HEAD(pending);
- INIT_LIST_HEAD(&pending);
spin_lock(&cache_defer_lock);
hlist_for_each_entry_safe(dreq, tmp, &cache_defer_hash[hash], hash)
@@ -756,10 +755,8 @@ static void cache_revisit_request(struct cache_head *item)
void cache_clean_deferred(void *owner)
{
struct cache_deferred_req *dreq, *tmp;
- struct list_head pending;
+ LIST_HEAD(pending);
-
- INIT_LIST_HEAD(&pending);
spin_lock(&cache_defer_lock);
list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
@@ -1085,9 +1082,8 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
{
struct cache_queue *cq, *tmp;
struct cache_request *cr;
- struct list_head dequeued;
+ LIST_HEAD(dequeued);
- INIT_LIST_HEAD(&dequeued);
spin_lock(&queue_lock);
list_for_each_entry_safe(cq, tmp, &detail->queue, list)
if (!cq->reader) {
@@ -1431,7 +1427,9 @@ static int c_show(struct seq_file *m, void *p)
seq_printf(m, "# expiry=%lld refcnt=%d flags=%lx\n",
convert_to_wallclock(cp->expiry_time),
kref_read(&cp->ref), cp->flags);
- cache_get(cp);
+ if (!cache_get_rcu(cp))
+ return 0;
+
if (cache_check(cd, cp, NULL))
/* cache_check does a cache_put on failure */
seq_puts(m, "# ");
@@ -1596,7 +1594,6 @@ static int cache_release_procfs(struct inode *inode, struct file *filp)
}
static const struct proc_ops cache_channel_proc_ops = {
- .proc_lseek = no_llseek,
.proc_read = cache_read_procfs,
.proc_write = cache_write_procfs,
.proc_poll = cache_poll_procfs,
@@ -1662,7 +1659,6 @@ static const struct proc_ops cache_flush_proc_ops = {
.proc_read = read_flush_procfs,
.proc_write = write_flush_procfs,
.proc_release = release_flush_procfs,
- .proc_lseek = no_llseek,
};
static void remove_cache_proc_entries(struct cache_detail *cd)
@@ -1815,7 +1811,6 @@ static int cache_release_pipefs(struct inode *inode, struct file *filp)
const struct file_operations cache_file_operations_pipefs = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = cache_read_pipefs,
.write = cache_write_pipefs,
.poll = cache_poll_pipefs,
@@ -1881,7 +1876,6 @@ const struct file_operations cache_flush_operations_pipefs = {
.read = read_flush_pipefs,
.write = write_flush_pipefs,
.release = release_flush_pipefs,
- .llseek = no_llseek,
};
int sunrpc_cache_register_pipefs(struct dentry *parent,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 09f29a95f2bc..0090162ee8c3 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -48,13 +48,8 @@
# define RPCDBG_FACILITY RPCDBG_CALL
#endif
-/*
- * All RPC clients are linked into this list
- */
-
static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);
-
static void call_start(struct rpc_task *task);
static void call_reserve(struct rpc_task *task);
static void call_reserveresult(struct rpc_task *task);
@@ -546,7 +541,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
.connect_timeout = args->connect_timeout,
.reconnect_timeout = args->reconnect_timeout,
};
- char servername[48];
+ char servername[RPC_MAXNETNAMELEN];
struct rpc_clnt *clnt;
int i;
@@ -1893,12 +1888,6 @@ call_allocate(struct rpc_task *task)
if (req->rq_buffer)
return;
- if (proc->p_proc != 0) {
- BUG_ON(proc->p_arglen == 0);
- if (proc->p_decode != NULL)
- BUG_ON(proc->p_replen == 0);
- }
-
/*
* Calculate the size (in quads) of the RPC call
* and reply headers, and convert both values
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 910a5d850d04..7ce3721c06ca 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -385,7 +385,6 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
static const struct file_operations rpc_pipe_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = rpc_pipe_read,
.write = rpc_pipe_write,
.poll = rpc_pipe_poll,
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index d4a362c9e4b3..e3c6e3b63f0b 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -36,7 +36,11 @@ static inline int sock_is_loopback(struct sock *sk)
return loopback;
}
+struct svc_serv;
+struct svc_rqst;
int rpc_clients_notifier_register(void);
void rpc_clients_notifier_unregister(void);
void auth_domain_cleanup(void);
+void svc_sock_update_bufs(struct svc_serv *serv);
+enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp);
#endif /* _NET_SUNRPC_SUNRPC_H */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 88a59cfa5583..79879b7d39cb 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -32,6 +32,7 @@
#include <trace/events/sunrpc.h>
#include "fail.h"
+#include "sunrpc.h"
#define RPCDBG_FACILITY RPCDBG_SVCDSP
@@ -417,7 +418,7 @@ struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv)
return &serv->sv_pools[pidx % serv->sv_nrpools];
}
-int svc_rpcb_setup(struct svc_serv *serv, struct net *net)
+static int svc_rpcb_setup(struct svc_serv *serv, struct net *net)
{
int err;
@@ -429,7 +430,6 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net)
svc_unregister(serv, net);
return 0;
}
-EXPORT_SYMBOL_GPL(svc_rpcb_setup);
void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net)
{
@@ -440,10 +440,11 @@ EXPORT_SYMBOL_GPL(svc_rpcb_cleanup);
static int svc_uses_rpcbind(struct svc_serv *serv)
{
- struct svc_program *progp;
- unsigned int i;
+ unsigned int p, i;
+
+ for (p = 0; p < serv->sv_nprogs; p++) {
+ struct svc_program *progp = &serv->sv_programs[p];
- for (progp = serv->sv_program; progp; progp = progp->pg_next) {
for (i = 0; i < progp->pg_nvers; i++) {
if (progp->pg_vers[i] == NULL)
continue;
@@ -480,7 +481,7 @@ __svc_init_bc(struct svc_serv *serv)
* Create an RPC service
*/
static struct svc_serv *
-__svc_create(struct svc_program *prog, struct svc_stat *stats,
+__svc_create(struct svc_program *prog, int nprogs, struct svc_stat *stats,
unsigned int bufsize, int npools, int (*threadfn)(void *data))
{
struct svc_serv *serv;
@@ -491,7 +492,8 @@ __svc_create(struct svc_program *prog, struct svc_stat *stats,
if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
serv->sv_name = prog->pg_name;
- serv->sv_program = prog;
+ serv->sv_programs = prog;
+ serv->sv_nprogs = nprogs;
serv->sv_stats = stats;
if (bufsize > RPCSVC_MAXPAYLOAD)
bufsize = RPCSVC_MAXPAYLOAD;
@@ -499,17 +501,18 @@ __svc_create(struct svc_program *prog, struct svc_stat *stats,
serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
serv->sv_threadfn = threadfn;
xdrsize = 0;
- while (prog) {
- prog->pg_lovers = prog->pg_nvers-1;
- for (vers=0; vers<prog->pg_nvers ; vers++)
- if (prog->pg_vers[vers]) {
- prog->pg_hivers = vers;
- if (prog->pg_lovers > vers)
- prog->pg_lovers = vers;
- if (prog->pg_vers[vers]->vs_xdrsize > xdrsize)
- xdrsize = prog->pg_vers[vers]->vs_xdrsize;
+ for (i = 0; i < nprogs; i++) {
+ struct svc_program *progp = &prog[i];
+
+ progp->pg_lovers = progp->pg_nvers-1;
+ for (vers = 0; vers < progp->pg_nvers ; vers++)
+ if (progp->pg_vers[vers]) {
+ progp->pg_hivers = vers;
+ if (progp->pg_lovers > vers)
+ progp->pg_lovers = vers;
+ if (progp->pg_vers[vers]->vs_xdrsize > xdrsize)
+ xdrsize = progp->pg_vers[vers]->vs_xdrsize;
}
- prog = prog->pg_next;
}
serv->sv_xdrsize = xdrsize;
INIT_LIST_HEAD(&serv->sv_tempsocks);
@@ -558,13 +561,14 @@ __svc_create(struct svc_program *prog, struct svc_stat *stats,
struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
int (*threadfn)(void *data))
{
- return __svc_create(prog, NULL, bufsize, 1, threadfn);
+ return __svc_create(prog, 1, NULL, bufsize, 1, threadfn);
}
EXPORT_SYMBOL_GPL(svc_create);
/**
* svc_create_pooled - Create an RPC service with pooled threads
- * @prog: the RPC program the new service will handle
+ * @prog: Array of RPC programs the new service will handle
+ * @nprogs: Number of programs in the array
* @stats: the stats struct if desired
* @bufsize: maximum message size for @prog
* @threadfn: a function to service RPC requests for @prog
@@ -572,6 +576,7 @@ EXPORT_SYMBOL_GPL(svc_create);
* Returns an instantiated struct svc_serv object or NULL.
*/
struct svc_serv *svc_create_pooled(struct svc_program *prog,
+ unsigned int nprogs,
struct svc_stat *stats,
unsigned int bufsize,
int (*threadfn)(void *data))
@@ -579,7 +584,7 @@ struct svc_serv *svc_create_pooled(struct svc_program *prog,
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, stats, bufsize, npools, threadfn);
+ serv = __svc_create(prog, nprogs, stats, bufsize, npools, threadfn);
if (!serv)
goto out_err;
serv->sv_is_pooled = true;
@@ -602,16 +607,16 @@ svc_destroy(struct svc_serv **servp)
*servp = NULL;
- dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
+ dprintk("svc: svc_destroy(%s)\n", serv->sv_programs->pg_name);
timer_shutdown_sync(&serv->sv_temptimer);
/*
* Remaining transports at this point are not expected.
*/
WARN_ONCE(!list_empty(&serv->sv_permsocks),
- "SVC: permsocks remain for %s\n", serv->sv_program->pg_name);
+ "SVC: permsocks remain for %s\n", serv->sv_programs->pg_name);
WARN_ONCE(!list_empty(&serv->sv_tempsocks),
- "SVC: tempsocks remain for %s\n", serv->sv_program->pg_name);
+ "SVC: tempsocks remain for %s\n", serv->sv_programs->pg_name);
cache_clean_deferred(serv);
@@ -664,8 +669,21 @@ svc_release_buffer(struct svc_rqst *rqstp)
put_page(rqstp->rq_pages[i]);
}
-struct svc_rqst *
-svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
+static void
+svc_rqst_free(struct svc_rqst *rqstp)
+{
+ folio_batch_release(&rqstp->rq_fbatch);
+ svc_release_buffer(rqstp);
+ if (rqstp->rq_scratch_page)
+ put_page(rqstp->rq_scratch_page);
+ kfree(rqstp->rq_resp);
+ kfree(rqstp->rq_argp);
+ kfree(rqstp->rq_auth_data);
+ kfree_rcu(rqstp, rq_rcu_head);
+}
+
+static struct svc_rqst *
+svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
{
struct svc_rqst *rqstp;
@@ -693,27 +711,10 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
goto out_enomem;
- return rqstp;
-out_enomem:
- svc_rqst_free(rqstp);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(svc_rqst_alloc);
-
-static struct svc_rqst *
-svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
-{
- struct svc_rqst *rqstp;
+ rqstp->rq_err = -EAGAIN; /* No error yet */
- rqstp = svc_rqst_alloc(serv, pool, node);
- if (!rqstp)
- return ERR_PTR(-ENOMEM);
-
- spin_lock_bh(&serv->sv_lock);
serv->sv_nrthreads += 1;
- spin_unlock_bh(&serv->sv_lock);
-
- atomic_inc(&pool->sp_nrthreads);
+ pool->sp_nrthreads += 1;
/* Protected by whatever lock the service uses when calling
* svc_set_num_threads()
@@ -721,6 +722,10 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
return rqstp;
+
+out_enomem:
+ svc_rqst_free(rqstp);
+ return NULL;
}
/**
@@ -768,31 +773,22 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool,
struct svc_pool *pool;
unsigned int i;
-retry:
pool = target_pool;
- if (pool != NULL) {
- if (atomic_inc_not_zero(&pool->sp_nrthreads))
- goto found_pool;
- return NULL;
- } else {
+ if (!pool) {
for (i = 0; i < serv->sv_nrpools; i++) {
pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
- if (atomic_inc_not_zero(&pool->sp_nrthreads))
- goto found_pool;
+ if (pool->sp_nrthreads)
+ break;
}
- return NULL;
}
-found_pool:
- set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
- set_bit(SP_NEED_VICTIM, &pool->sp_flags);
- if (!atomic_dec_and_test(&pool->sp_nrthreads))
+ if (pool && pool->sp_nrthreads) {
+ set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
+ set_bit(SP_NEED_VICTIM, &pool->sp_flags);
return pool;
- /* Nothing left in this pool any more */
- clear_bit(SP_NEED_VICTIM, &pool->sp_flags);
- clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
- goto retry;
+ }
+ return NULL;
}
static int
@@ -803,6 +799,7 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
struct svc_pool *chosen_pool;
unsigned int state = serv->sv_nrthreads-1;
int node;
+ int err;
do {
nrservs--;
@@ -810,8 +807,8 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
node = svc_pool_map_get_node(chosen_pool->sp_id);
rqstp = svc_prepare_thread(serv, chosen_pool, node);
- if (IS_ERR(rqstp))
- return PTR_ERR(rqstp);
+ if (!rqstp)
+ return -ENOMEM;
task = kthread_create_on_node(serv->sv_threadfn, rqstp,
node, "%s", serv->sv_name);
if (IS_ERR(task)) {
@@ -825,6 +822,13 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
svc_sock_update_bufs(serv);
wake_up_process(task);
+
+ wait_var_event(&rqstp->rq_err, rqstp->rq_err != -EAGAIN);
+ err = rqstp->rq_err;
+ if (err) {
+ svc_exit_thread(rqstp);
+ return err;
+ }
} while (nrservs > 0);
return 0;
@@ -871,7 +875,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
if (!pool)
nrservs -= serv->sv_nrthreads;
else
- nrservs -= atomic_read(&pool->sp_nrthreads);
+ nrservs -= pool->sp_nrthreads;
if (nrservs > 0)
return svc_start_kthreads(serv, pool, nrservs);
@@ -933,25 +937,21 @@ void svc_rqst_release_pages(struct svc_rqst *rqstp)
}
}
-/*
- * Called from a server thread as it's exiting. Caller must hold the "service
- * mutex" for the service.
+/**
+ * svc_exit_thread - finalise the termination of a sunrpc server thread
+ * @rqstp: the svc_rqst which represents the thread.
+ *
+ * When a thread started with svc_new_thread() exits it must call
+ * svc_exit_thread() as its last act. This must be done with the
+ * service mutex held. Normally this is held by a DIFFERENT thread, the
+ * one that is calling svc_set_num_threads() and which will wait for
+ * SP_VICTIM_REMAINS to be cleared before dropping the mutex. If the
+ * thread exits for any reason other than svc_thread_should_stop()
+ * returning %true (which indicated that svc_set_num_threads() is
+ * waiting for it to exit), then it must take the service mutex itself,
+ * which can only safely be done using mutex_try_lock().
*/
void
-svc_rqst_free(struct svc_rqst *rqstp)
-{
- folio_batch_release(&rqstp->rq_fbatch);
- svc_release_buffer(rqstp);
- if (rqstp->rq_scratch_page)
- put_page(rqstp->rq_scratch_page);
- kfree(rqstp->rq_resp);
- kfree(rqstp->rq_argp);
- kfree(rqstp->rq_auth_data);
- kfree_rcu(rqstp, rq_rcu_head);
-}
-EXPORT_SYMBOL_GPL(svc_rqst_free);
-
-void
svc_exit_thread(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
@@ -959,11 +959,8 @@ svc_exit_thread(struct svc_rqst *rqstp)
list_del_rcu(&rqstp->rq_all);
- atomic_dec(&pool->sp_nrthreads);
-
- spin_lock_bh(&serv->sv_lock);
+ pool->sp_nrthreads -= 1;
serv->sv_nrthreads -= 1;
- spin_unlock_bh(&serv->sv_lock);
svc_sock_update_bufs(serv);
svc_rqst_free(rqstp);
@@ -1098,6 +1095,7 @@ static int __svc_register(struct net *net, const char *progname,
return error;
}
+static
int svc_rpcbind_set_version(struct net *net,
const struct svc_program *progp,
u32 version, int family,
@@ -1108,7 +1106,6 @@ int svc_rpcbind_set_version(struct net *net,
version, family, proto, port);
}
-EXPORT_SYMBOL_GPL(svc_rpcbind_set_version);
int svc_generic_rpcbind_set(struct net *net,
const struct svc_program *progp,
@@ -1156,15 +1153,16 @@ int svc_register(const struct svc_serv *serv, struct net *net,
const int family, const unsigned short proto,
const unsigned short port)
{
- struct svc_program *progp;
- unsigned int i;
+ unsigned int p, i;
int error = 0;
WARN_ON_ONCE(proto == 0 && port == 0);
if (proto == 0 && port == 0)
return -EINVAL;
- for (progp = serv->sv_program; progp; progp = progp->pg_next) {
+ for (p = 0; p < serv->sv_nprogs; p++) {
+ struct svc_program *progp = &serv->sv_programs[p];
+
for (i = 0; i < progp->pg_nvers; i++) {
error = progp->pg_rpcbind_set(net, progp, i,
@@ -1216,13 +1214,14 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi
static void svc_unregister(const struct svc_serv *serv, struct net *net)
{
struct sighand_struct *sighand;
- struct svc_program *progp;
unsigned long flags;
- unsigned int i;
+ unsigned int p, i;
clear_thread_flag(TIF_SIGPENDING);
- for (progp = serv->sv_program; progp; progp = progp->pg_next) {
+ for (p = 0; p < serv->sv_nprogs; p++) {
+ struct svc_program *progp = &serv->sv_programs[p];
+
for (i = 0; i < progp->pg_nvers; i++) {
if (progp->pg_vers[i] == NULL)
continue;
@@ -1322,13 +1321,13 @@ static int
svc_process_common(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_res_stream;
- struct svc_program *progp;
+ struct svc_program *progp = NULL;
const struct svc_procedure *procp = NULL;
struct svc_serv *serv = rqstp->rq_server;
struct svc_process_info process;
enum svc_auth_status auth_res;
unsigned int aoffset;
- int rc;
+ int pr, rc;
__be32 *p;
/* Will be turned off only when NFSv4 Sessions are used */
@@ -1352,9 +1351,9 @@ svc_process_common(struct svc_rqst *rqstp)
rqstp->rq_vers = be32_to_cpup(p++);
rqstp->rq_proc = be32_to_cpup(p);
- for (progp = serv->sv_program; progp; progp = progp->pg_next)
- if (rqstp->rq_prog == progp->pg_prog)
- break;
+ for (pr = 0; pr < serv->sv_nprogs; pr++)
+ if (rqstp->rq_prog == serv->sv_programs[pr].pg_prog)
+ progp = &serv->sv_programs[pr];
/*
* Decode auth data, and add verifier to reply buffer.
@@ -1526,6 +1525,14 @@ err_system_err:
goto sendit;
}
+/*
+ * Drop request
+ */
+static void svc_drop(struct svc_rqst *rqstp)
+{
+ trace_svc_drop(rqstp);
+}
+
/**
* svc_process - Execute one RPC transaction
* @rqstp: RPC transaction context
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d3735ab3e6d1..43c57124de52 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -268,7 +268,7 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
spin_unlock(&svc_xprt_class_lock);
newxprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
if (IS_ERR(newxprt)) {
- trace_svc_xprt_create_err(serv->sv_program->pg_name,
+ trace_svc_xprt_create_err(serv->sv_programs->pg_name,
xcl->xcl_name, sap, len,
newxprt);
module_put(xcl->xcl_owner);
@@ -905,15 +905,6 @@ void svc_recv(struct svc_rqst *rqstp)
}
EXPORT_SYMBOL_GPL(svc_recv);
-/*
- * Drop request
- */
-void svc_drop(struct svc_rqst *rqstp)
-{
- trace_svc_drop(rqstp);
-}
-EXPORT_SYMBOL_GPL(svc_drop);
-
/**
* svc_send - Return reply to client
* @rqstp: RPC transaction context
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 1619211f0960..55b4d2874188 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -18,6 +18,7 @@
#include <linux/sunrpc/svcauth.h>
#include <linux/err.h>
#include <linux/hash.h>
+#include <linux/user_namespace.h>
#include <trace/events/sunrpc.h>
@@ -98,7 +99,6 @@ enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp)
rqstp->rq_authop = aops;
return aops->accept(rqstp);
}
-EXPORT_SYMBOL_GPL(svc_authenticate);
/**
* svc_set_client - Assign an appropriate 'auth_domain' as the client
@@ -176,6 +176,33 @@ rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp)
}
EXPORT_SYMBOL_GPL(svc_auth_flavor);
+/**
+ * svcauth_map_clnt_to_svc_cred_local - maps a generic cred
+ * to a svc_cred suitable for use in nfsd.
+ * @clnt: rpc_clnt associated with nfs client
+ * @cred: generic cred associated with nfs client
+ * @svc: returned svc_cred that is suitable for use in nfsd
+ */
+void svcauth_map_clnt_to_svc_cred_local(struct rpc_clnt *clnt,
+ const struct cred *cred,
+ struct svc_cred *svc)
+{
+ struct user_namespace *userns = clnt->cl_cred ?
+ clnt->cl_cred->user_ns : &init_user_ns;
+
+ memset(svc, 0, sizeof(struct svc_cred));
+
+ svc->cr_uid = KUIDT_INIT(from_kuid_munged(userns, cred->fsuid));
+ svc->cr_gid = KGIDT_INIT(from_kgid_munged(userns, cred->fsgid));
+ svc->cr_flavor = clnt->cl_auth->au_flavor;
+ if (cred->group_info)
+ svc->cr_group_info = get_group_info(cred->group_info);
+ /* These aren't relevant for local (network is bypassed) */
+ svc->cr_principal = NULL;
+ svc->cr_gss_mech = NULL;
+}
+EXPORT_SYMBOL_GPL(svcauth_map_clnt_to_svc_cred_local);
+
/**************************************************
* 'auth_domains' are stored in a hash table indexed by name.
* When the last reference to an 'auth_domain' is dropped,
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 04b45588ae6f..8ca98b146ec8 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -697,7 +697,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
rqstp->rq_auth_stat = rpc_autherr_badcred;
ipm = ip_map_cached_get(xprt);
if (ipm == NULL)
- ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
+ ipm = __ip_map_lookup(sn->ip_map_cache,
+ rqstp->rq_server->sv_programs->pg_class,
&sin6->sin6_addr);
if (ipm == NULL)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 6b3f01beb294..95397677673b 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1378,7 +1378,6 @@ void svc_sock_update_bufs(struct svc_serv *serv)
set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
spin_unlock_bh(&serv->sv_lock);
}
-EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
/*
* Initialize socket for RPC use and create svc_sock struct
@@ -1552,6 +1551,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
newlen = error;
if (protocol == IPPROTO_TCP) {
+ __netns_tracker_free(net, &sock->sk->ns_tracker, false);
+ sock->sk->sk_net_refcnt = 1;
+ get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
if ((error = kernel_listen(sock, 64)) < 0)
goto bummer;
}
@@ -1609,7 +1612,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- struct page_frag_cache *pfc = &svsk->sk_frag_cache;
struct socket *sock = svsk->sk_sock;
trace_svcsock_free(svsk, sock);
@@ -1619,8 +1621,7 @@ static void svc_sock_free(struct svc_xprt *xprt)
sockfd_put(sock);
else
sock_release(sock);
- if (pfc->va)
- __page_frag_cache_drain(virt_to_head_page(pfc->va),
- pfc->pagecnt_bias);
+
+ page_frag_cache_drain(&svsk->sk_frag_cache);
kfree(svsk);
}
diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c
index 8507cd4d8921..28c68b5f6823 100644
--- a/net/sunrpc/xprtrdma/ib_client.c
+++ b/net/sunrpc/xprtrdma/ib_client.c
@@ -153,6 +153,7 @@ static void rpcrdma_remove_one(struct ib_device *device,
}
trace_rpcrdma_client_remove_one_done(device);
+ xa_destroy(&rd->rd_xa);
kfree(rd);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 58ae6ec4f25b..415c0310101f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -233,25 +233,34 @@ static int svc_rdma_proc_init(void)
rc = percpu_counter_init(&svcrdma_stat_read, 0, GFP_KERNEL);
if (rc)
- goto out_err;
+ goto err;
rc = percpu_counter_init(&svcrdma_stat_recv, 0, GFP_KERNEL);
if (rc)
- goto out_err;
+ goto err_read;
rc = percpu_counter_init(&svcrdma_stat_sq_starve, 0, GFP_KERNEL);
if (rc)
- goto out_err;
+ goto err_recv;
rc = percpu_counter_init(&svcrdma_stat_write, 0, GFP_KERNEL);
if (rc)
- goto out_err;
+ goto err_sq;
svcrdma_table_header = register_sysctl("sunrpc/svc_rdma",
svcrdma_parm_table);
+ if (!svcrdma_table_header)
+ goto err_write;
+
return 0;
-out_err:
+err_write:
+ rc = -ENOMEM;
+ percpu_counter_destroy(&svcrdma_stat_write);
+err_sq:
percpu_counter_destroy(&svcrdma_stat_sq_starve);
+err_recv:
percpu_counter_destroy(&svcrdma_stat_recv);
+err_read:
percpu_counter_destroy(&svcrdma_stat_read);
+err:
return rc;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index d72953f29258..292022f0976e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -94,7 +94,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
@@ -493,7 +493,13 @@ static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt)
if (xdr_stream_decode_u32(&rctxt->rc_stream, &segcount))
return false;
- /* A bogus segcount causes this buffer overflow check to fail. */
+ /* Before trusting the segcount value enough to use it in
+ * a computation, perform a simple range check. This is an
+ * arbitrary but sensible limit (ie, not architectural).
+ */
+ if (unlikely(segcount > RPCSVC_MAXPAGES))
+ return false;
+
p = xdr_inline_decode(&rctxt->rc_stream,
segcount * rpcrdma_segment_maxsz * sizeof(*p));
return p != NULL;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index bb5436b719e0..96154a2367a1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -100,7 +100,7 @@
*/
#include <linux/spinlock.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f15750cacacf..c3fbf0779d4a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -339,7 +339,6 @@ static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id,
svc_xprt_enqueue(xprt);
break;
case RDMA_CM_EVENT_DISCONNECTED:
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
svc_xprt_deferred_close(xprt);
break;
default:
@@ -370,7 +369,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
listen_id = svc_rdma_create_listen_id(net, sa, cma_xprt);
if (IS_ERR(listen_id)) {
kfree(cma_xprt);
- return (struct svc_xprt *)listen_id;
+ return ERR_CAST(listen_id);
}
cma_xprt->sc_cm_id = listen_id;
@@ -384,6 +383,16 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
return &cma_xprt->sc_xprt;
}
+static void svc_rdma_xprt_done(struct rpcrdma_notification *rn)
+{
+ struct svcxprt_rdma *rdma = container_of(rn, struct svcxprt_rdma,
+ sc_rn);
+ struct rdma_cm_id *id = rdma->sc_cm_id;
+
+ trace_svcrdma_device_removal(id);
+ svc_xprt_close(&rdma->sc_xprt);
+}
+
/*
* This is the xpo_recvfrom function for listening endpoints. Its
* purpose is to accept incoming connections. The CMA callback handler
@@ -425,6 +434,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dev = newxprt->sc_cm_id->device;
newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
+ if (rpcrdma_rn_register(dev, &newxprt->sc_rn, svc_rdma_xprt_done))
+ goto errout;
+
newxprt->sc_max_req_size = svcrdma_max_req_size;
newxprt->sc_max_requests = svcrdma_max_requests;
newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
@@ -580,6 +592,7 @@ static void __svc_rdma_free(struct work_struct *work)
{
struct svcxprt_rdma *rdma =
container_of(work, struct svcxprt_rdma, sc_work);
+ struct ib_device *device = rdma->sc_cm_id->device;
/* This blocks until the Completion Queues are empty */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -608,6 +621,7 @@ static void __svc_rdma_free(struct work_struct *work)
/* Destroy the CM ID */
rdma_destroy_id(rdma->sc_cm_id);
+ rpcrdma_rn_unregister(device, &rdma->sc_rn);
kfree(rdma);
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0e1691316f42..c60936d8cef7 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1198,6 +1198,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state);
clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state);
+ clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
}
static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr)
@@ -1278,6 +1279,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
transport->file = NULL;
sk->sk_user_data = NULL;
+ sk->sk_sndtimeo = 0;
xs_restore_old_callbacks(transport, sk);
xprt_clear_connected(xprt);
@@ -1939,6 +1941,13 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
goto out;
}
+ if (protocol == IPPROTO_TCP) {
+ __netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false);
+ sock->sk->sk_net_refcnt = 1;
+ get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(xprt->xprt_net, 1);
+ }
+
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
if (IS_ERR(filp))
return ERR_CAST(filp);
@@ -2459,6 +2468,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -EHOSTUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
+ case -ENOTCONN:
break;
default:
printk("%s: connect returned unhandled error %d\n",
@@ -2613,11 +2623,10 @@ static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_par
rc = wait_for_completion_interruptible_timeout(&lower_transport->handshake_done,
XS_TLS_HANDSHAKE_TO);
if (rc <= 0) {
- if (!tls_handshake_cancel(sk)) {
- if (rc == 0)
- rc = -ETIMEDOUT;
- goto out_put_xprt;
- }
+ tls_handshake_cancel(sk);
+ if (rc == 0)
+ rc = -ETIMEDOUT;
+ goto out_put_xprt;
}
rc = lower_transport->xprt_err;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 593846d25214..114fef65f92e 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -320,8 +320,8 @@ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb,
{
struct tipc_msg *hdr, *_hdr;
struct sk_buff_head tmpq;
+ u16 cong_link_cnt = 0;
struct sk_buff *_skb;
- u16 cong_link_cnt;
int rc = 0;
/* Is a cluster supporting with new capabilities ? */
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 5a526ebafeb4..ae1ddbf71853 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -163,8 +163,12 @@ static int bearer_name_validate(const char *name,
/* return bearer name components, if necessary */
if (name_parts) {
- strcpy(name_parts->media_name, media_name);
- strcpy(name_parts->if_name, if_name);
+ if (strscpy(name_parts->media_name, media_name,
+ TIPC_MAX_MEDIA_NAME) < 0)
+ return 0;
+ if (strscpy(name_parts->if_name, if_name,
+ TIPC_MAX_IF_NAME) < 0)
+ return 0;
}
return 1;
}
@@ -322,7 +326,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
if (!b)
return -ENOMEM;
- strcpy(b->name, name);
+ strscpy(b->name, name);
b->media = m;
res = m->enable_media(net, b, attr);
if (res) {
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 77a3d016cade..e2f19627e43d 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -149,7 +149,7 @@ static int dom_size(int peers)
while ((i * i) < peers)
i++;
- return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
+ return min(i, MAX_MON_DOMAIN);
}
static void map_set(u64 *up_map, int i, unsigned int v)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1a0cd06f0eae..65dcbb54f55d 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1009,12 +1009,11 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
struct tipc_member *mbr = NULL;
struct net *net = sock_net(sk);
u32 node, port, exclude;
- struct list_head dsts;
+ LIST_HEAD(dsts);
int lookups = 0;
int dstcnt, rc;
bool cong;
- INIT_LIST_HEAD(&dsts);
ua->sa.type = msg_nametype(hdr);
ua->scope = msg_lookup_scope(hdr);
@@ -1161,10 +1160,9 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
struct net *net = sock_net(sk);
- struct list_head dsts;
u32 dstcnt, exclude;
+ LIST_HEAD(dsts);
- INIT_LIST_HEAD(&dsts);
ua->sa.type = msg_nametype(hdr);
ua->scope = msg_lookup_scope(hdr);
exclude = tipc_group_exclude(grp);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 439f75539977..108a4cc2e001 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -807,6 +807,7 @@ static void cleanup_bearer(struct work_struct *work)
{
struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
struct udp_replicast *rcast, *tmp;
+ struct tipc_net *tn;
list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
dst_cache_destroy(&rcast->dst_cache);
@@ -814,10 +815,14 @@ static void cleanup_bearer(struct work_struct *work)
kfree_rcu(rcast, rcu);
}
- atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count);
+ tn = tipc_net(sock_net(ub->ubsock->sk));
+
dst_cache_destroy(&ub->rcast.dst_cache);
udp_tunnel_sock_release(ub->ubsock);
+
+ /* Note: could use a call_rcu() to avoid another synchronize_net() */
synchronize_net();
+ atomic_dec(&tn->wq_count);
kfree(ub);
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 305a412785f5..bbf26cc4f6ee 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1201,7 +1201,7 @@ trim_sgl:
if (!num_async) {
goto send_end;
- } else if (num_zc) {
+ } else if (num_zc || eor) {
int err;
/* Wait for pending encryptions to get completed */
diff --git a/net/tls/trace.h b/net/tls/trace.h
index 9ba5f600ea43..2d8ce4ff3265 100644
--- a/net/tls/trace.h
+++ b/net/tls/trace.h
@@ -7,7 +7,7 @@
#if !defined(_TLS_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
#define _TLS_TRACE_H_
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/tracepoint.h>
struct sock;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0be0dcb07f7b..6b1762300443 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -693,10 +693,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_state_unlock(sk);
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- if (u->oob_skb) {
- kfree_skb(u->oob_skb);
- u->oob_skb = NULL;
- }
+ u->oob_skb = NULL;
#endif
wake_up_interruptible_all(&u->peer_wait);
@@ -2226,13 +2223,9 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
}
maybe_add_creds(skb, sock, other);
- skb_get(skb);
-
scm_stat_add(other, skb);
spin_lock(&other->sk_receive_queue.lock);
- if (ousk->oob_skb)
- consume_skb(ousk->oob_skb);
WRITE_ONCE(ousk->oob_skb, skb);
__skb_queue_tail(&other->sk_receive_queue, skb);
spin_unlock(&other->sk_receive_queue.lock);
@@ -2320,6 +2313,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
fds_sent = true;
if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
err = skb_splice_from_iter(skb, &msg->msg_iter, size,
sk->sk_allocation);
if (err < 0) {
@@ -2640,8 +2634,6 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
if (!(state->flags & MSG_PEEK))
WRITE_ONCE(u->oob_skb, NULL);
- else
- skb_get(oob_skb);
spin_unlock(&sk->sk_receive_queue.lock);
unix_state_unlock(sk);
@@ -2651,8 +2643,6 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
if (!(state->flags & MSG_PEEK))
UNIXCB(oob_skb).consumed += 1;
- consume_skb(oob_skb);
-
mutex_unlock(&u->iolock);
if (chunk < 0)
@@ -2665,56 +2655,52 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
int flags, int copied)
{
+ struct sk_buff *read_skb = NULL, *unread_skb = NULL;
struct unix_sock *u = unix_sk(sk);
- if (!unix_skb_len(skb)) {
- struct sk_buff *unlinked_skb = NULL;
+ if (likely(unix_skb_len(skb) && skb != READ_ONCE(u->oob_skb)))
+ return skb;
- spin_lock(&sk->sk_receive_queue.lock);
+ spin_lock(&sk->sk_receive_queue.lock);
+ if (!unix_skb_len(skb)) {
if (copied && (!u->oob_skb || skb == u->oob_skb)) {
skb = NULL;
} else if (flags & MSG_PEEK) {
skb = skb_peek_next(skb, &sk->sk_receive_queue);
} else {
- unlinked_skb = skb;
+ read_skb = skb;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
- __skb_unlink(unlinked_skb, &sk->sk_receive_queue);
+ __skb_unlink(read_skb, &sk->sk_receive_queue);
}
- spin_unlock(&sk->sk_receive_queue.lock);
+ if (!skb)
+ goto unlock;
+ }
- consume_skb(unlinked_skb);
- } else {
- struct sk_buff *unlinked_skb = NULL;
+ if (skb != u->oob_skb)
+ goto unlock;
- spin_lock(&sk->sk_receive_queue.lock);
+ if (copied) {
+ skb = NULL;
+ } else if (!(flags & MSG_PEEK)) {
+ WRITE_ONCE(u->oob_skb, NULL);
- if (skb == u->oob_skb) {
- if (copied) {
- skb = NULL;
- } else if (!(flags & MSG_PEEK)) {
- if (sock_flag(sk, SOCK_URGINLINE)) {
- WRITE_ONCE(u->oob_skb, NULL);
- consume_skb(skb);
- } else {
- __skb_unlink(skb, &sk->sk_receive_queue);
- WRITE_ONCE(u->oob_skb, NULL);
- unlinked_skb = skb;
- skb = skb_peek(&sk->sk_receive_queue);
- }
- } else if (!sock_flag(sk, SOCK_URGINLINE)) {
- skb = skb_peek_next(skb, &sk->sk_receive_queue);
- }
+ if (!sock_flag(sk, SOCK_URGINLINE)) {
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ unread_skb = skb;
+ skb = skb_peek(&sk->sk_receive_queue);
}
+ } else if (!sock_flag(sk, SOCK_URGINLINE)) {
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ }
- spin_unlock(&sk->sk_receive_queue.lock);
+unlock:
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ consume_skb(read_skb);
+ kfree_skb(unread_skb);
- if (unlinked_skb) {
- WARN_ON_ONCE(skb_unref(unlinked_skb));
- kfree_skb(unlinked_skb);
- }
- }
return skb;
}
#endif
@@ -2756,7 +2742,6 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
unix_state_unlock(sk);
if (drop) {
- WARN_ON_ONCE(skb_unref(skb));
kfree_skb(skb);
return -EAGAIN;
}
@@ -3192,9 +3177,13 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
skb = skb_peek(&sk->sk_receive_queue);
if (skb) {
struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
+ struct sk_buff *next_skb;
+
+ next_skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (skb == oob_skb ||
- (!oob_skb && !unix_skb_len(skb)))
+ (!unix_skb_len(skb) &&
+ (!oob_skb || next_skb == oob_skb)))
answ = 1;
}
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 06d94ad999e9..0068e758be4d 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -337,18 +337,6 @@ static bool unix_vertex_dead(struct unix_vertex *vertex)
return true;
}
-static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist)
-{
- skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist);
-
-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- if (u->oob_skb) {
- WARN_ON_ONCE(skb_unref(u->oob_skb));
- u->oob_skb = NULL;
- }
-#endif
-}
-
static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist)
{
struct unix_vertex *vertex;
@@ -371,11 +359,11 @@ static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist
struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue;
spin_lock(&embryo_queue->lock);
- unix_collect_queue(unix_sk(skb->sk), hitlist);
+ skb_queue_splice_init(embryo_queue, hitlist);
spin_unlock(&embryo_queue->lock);
}
} else {
- unix_collect_queue(u, hitlist);
+ skb_queue_splice_init(queue, hitlist);
}
spin_unlock(&queue->lock);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 0ff9b2dd86ba..5cf8109f672a 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -112,16 +112,19 @@
#include <net/sock.h>
#include <net/af_vsock.h>
#include <uapi/linux/vm_sockets.h>
+#include <uapi/asm-generic/ioctls.h>
static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
static void vsock_sk_destruct(struct sock *sk);
static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+static void vsock_close(struct sock *sk, long timeout);
/* Protocol family. */
struct proto vsock_proto = {
.name = "AF_VSOCK",
.owner = THIS_MODULE,
.obj_size = sizeof(struct vsock_sock),
+ .close = vsock_close,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = vsock_bpf_update_proto,
#endif
@@ -796,45 +799,46 @@ static bool sock_type_connectible(u16 type)
static void __vsock_release(struct sock *sk, int level)
{
- if (sk) {
- struct sock *pending;
- struct vsock_sock *vsk;
-
- vsk = vsock_sk(sk);
- pending = NULL; /* Compiler warning. */
+ struct vsock_sock *vsk;
+ struct sock *pending;
- /* When "level" is SINGLE_DEPTH_NESTING, use the nested
- * version to avoid the warning "possible recursive locking
- * detected". When "level" is 0, lock_sock_nested(sk, level)
- * is the same as lock_sock(sk).
- */
- lock_sock_nested(sk, level);
+ vsk = vsock_sk(sk);
+ pending = NULL; /* Compiler warning. */
- if (vsk->transport)
- vsk->transport->release(vsk);
- else if (sock_type_connectible(sk->sk_type))
- vsock_remove_sock(vsk);
+ /* When "level" is SINGLE_DEPTH_NESTING, use the nested
+ * version to avoid the warning "possible recursive locking
+ * detected". When "level" is 0, lock_sock_nested(sk, level)
+ * is the same as lock_sock(sk).
+ */
+ lock_sock_nested(sk, level);
- sock_orphan(sk);
- sk->sk_shutdown = SHUTDOWN_MASK;
+ if (vsk->transport)
+ vsk->transport->release(vsk);
+ else if (sock_type_connectible(sk->sk_type))
+ vsock_remove_sock(vsk);
- skb_queue_purge(&sk->sk_receive_queue);
+ sock_orphan(sk);
+ sk->sk_shutdown = SHUTDOWN_MASK;
- /* Clean up any sockets that never were accepted. */
- while ((pending = vsock_dequeue_accept(sk)) != NULL) {
- __vsock_release(pending, SINGLE_DEPTH_NESTING);
- sock_put(pending);
- }
+ skb_queue_purge(&sk->sk_receive_queue);
- release_sock(sk);
- sock_put(sk);
+ /* Clean up any sockets that never were accepted. */
+ while ((pending = vsock_dequeue_accept(sk)) != NULL) {
+ __vsock_release(pending, SINGLE_DEPTH_NESTING);
+ sock_put(pending);
}
+
+ release_sock(sk);
+ sock_put(sk);
}
static void vsock_sk_destruct(struct sock *sk)
{
struct vsock_sock *vsk = vsock_sk(sk);
+ /* Flush MSG_ZEROCOPY leftovers. */
+ __skb_queue_purge(&sk->sk_error_queue);
+
vsock_deassign_transport(vsk);
/* When clearing these addresses, there's no need to set the family and
@@ -897,9 +901,22 @@ void vsock_data_ready(struct sock *sk)
}
EXPORT_SYMBOL_GPL(vsock_data_ready);
+/* Dummy callback required by sockmap.
+ * See unconditional call of saved_close() in sock_map_close().
+ */
+static void vsock_close(struct sock *sk, long timeout)
+{
+}
+
static int vsock_release(struct socket *sock)
{
- __vsock_release(sock->sk, 0);
+ struct sock *sk = sock->sk;
+
+ if (!sk)
+ return 0;
+
+ sk->sk_prot->close(sk, 0);
+ __vsock_release(sk, 0);
sock->sk = NULL;
sock->state = SS_FREE;
@@ -1050,6 +1067,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
mask |= EPOLLRDHUP;
}
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
if (sock->type == SOCK_DGRAM) {
/* For datagram sockets we can read if there is something in
* the queue and write as long as the socket isn't shutdown for
@@ -1295,6 +1315,57 @@ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
}
EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);
+static int vsock_do_ioctl(struct socket *sock, unsigned int cmd,
+ int __user *arg)
+{
+ struct sock *sk = sock->sk;
+ struct vsock_sock *vsk;
+ int ret;
+
+ vsk = vsock_sk(sk);
+
+ switch (cmd) {
+ case SIOCOUTQ: {
+ ssize_t n_bytes;
+
+ if (!vsk->transport || !vsk->transport->unsent_bytes) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ if (sock_type_connectible(sk->sk_type) && sk->sk_state == TCP_LISTEN) {
+ ret = -EINVAL;
+ break;
+ }
+
+ n_bytes = vsk->transport->unsent_bytes(vsk);
+ if (n_bytes < 0) {
+ ret = n_bytes;
+ break;
+ }
+
+ ret = put_user(n_bytes, arg);
+ break;
+ }
+ default:
+ ret = -ENOIOCTLCMD;
+ }
+
+ return ret;
+}
+
+static int vsock_ioctl(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret;
+
+ lock_sock(sock->sk);
+ ret = vsock_do_ioctl(sock, cmd, (int __user *)arg);
+ release_sock(sock->sk);
+
+ return ret;
+}
+
static const struct proto_ops vsock_dgram_ops = {
.family = PF_VSOCK,
.owner = THIS_MODULE,
@@ -1305,7 +1376,7 @@ static const struct proto_ops vsock_dgram_ops = {
.accept = sock_no_accept,
.getname = vsock_getname,
.poll = vsock_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = vsock_ioctl,
.listen = sock_no_listen,
.shutdown = vsock_shutdown,
.sendmsg = vsock_dgram_sendmsg,
@@ -2294,7 +2365,7 @@ static const struct proto_ops vsock_stream_ops = {
.accept = vsock_accept,
.getname = vsock_getname,
.poll = vsock_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = vsock_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
.setsockopt = vsock_connectible_setsockopt,
@@ -2316,7 +2387,7 @@ static const struct proto_ops vsock_seqpacket_ops = {
.accept = vsock_accept,
.getname = vsock_getname,
.poll = vsock_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = vsock_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
.setsockopt = vsock_connectible_setsockopt,
@@ -2365,6 +2436,7 @@ static int vsock_create(struct net *net, struct socket *sock,
if (sock->type == SOCK_DGRAM) {
ret = vsock_assign_transport(vsk, NULL);
if (ret < 0) {
+ sock->sk = NULL;
sock_put(sk);
return ret;
}
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index e2157e387217..56c232cf5b0f 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -549,6 +549,7 @@ static void hvs_destruct(struct vsock_sock *vsk)
vmbus_hvsock_device_unregister(chan);
kfree(hvs);
+ vsk->trans = NULL;
}
static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 64a07acfef12..b58c3818f284 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -94,6 +94,63 @@ out_rcu:
return ret;
}
+/* Caller need to hold vsock->tx_lock on vq */
+static int virtio_transport_send_skb(struct sk_buff *skb, struct virtqueue *vq,
+ struct virtio_vsock *vsock, gfp_t gfp)
+{
+ int ret, in_sg = 0, out_sg = 0;
+ struct scatterlist **sgs;
+
+ sgs = vsock->out_sgs;
+ sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb),
+ sizeof(*virtio_vsock_hdr(skb)));
+ out_sg++;
+
+ if (!skb_is_nonlinear(skb)) {
+ if (skb->len > 0) {
+ sg_init_one(sgs[out_sg], skb->data, skb->len);
+ out_sg++;
+ }
+ } else {
+ struct skb_shared_info *si;
+ int i;
+
+ /* If skb is nonlinear, then its buffer must contain
+ * only header and nothing more. Data is stored in
+ * the fragged part.
+ */
+ WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb)));
+
+ si = skb_shinfo(skb);
+
+ for (i = 0; i < si->nr_frags; i++) {
+ skb_frag_t *skb_frag = &si->frags[i];
+ void *va;
+
+ /* We will use 'page_to_virt()' for the userspace page
+ * here, because virtio or dma-mapping layers will call
+ * 'virt_to_phys()' later to fill the buffer descriptor.
+ * We don't touch memory at "virtual" address of this page.
+ */
+ va = page_to_virt(skb_frag_page(skb_frag));
+ sg_init_one(sgs[out_sg],
+ va + skb_frag_off(skb_frag),
+ skb_frag_size(skb_frag));
+ out_sg++;
+ }
+ }
+
+ ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, gfp);
+ /* Usually this means that there is no more space available in
+ * the vq
+ */
+ if (ret < 0)
+ return ret;
+
+ virtio_transport_deliver_tap_pkt(skb);
+ return 0;
+}
+
static void
virtio_transport_send_pkt_work(struct work_struct *work)
{
@@ -111,66 +168,22 @@ virtio_transport_send_pkt_work(struct work_struct *work)
vq = vsock->vqs[VSOCK_VQ_TX];
for (;;) {
- int ret, in_sg = 0, out_sg = 0;
- struct scatterlist **sgs;
struct sk_buff *skb;
bool reply;
+ int ret;
skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
if (!skb)
break;
reply = virtio_vsock_skb_reply(skb);
- sgs = vsock->out_sgs;
- sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb),
- sizeof(*virtio_vsock_hdr(skb)));
- out_sg++;
-
- if (!skb_is_nonlinear(skb)) {
- if (skb->len > 0) {
- sg_init_one(sgs[out_sg], skb->data, skb->len);
- out_sg++;
- }
- } else {
- struct skb_shared_info *si;
- int i;
-
- /* If skb is nonlinear, then its buffer must contain
- * only header and nothing more. Data is stored in
- * the fragged part.
- */
- WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb)));
-
- si = skb_shinfo(skb);
-
- for (i = 0; i < si->nr_frags; i++) {
- skb_frag_t *skb_frag = &si->frags[i];
- void *va;
- /* We will use 'page_to_virt()' for the userspace page
- * here, because virtio or dma-mapping layers will call
- * 'virt_to_phys()' later to fill the buffer descriptor.
- * We don't touch memory at "virtual" address of this page.
- */
- va = page_to_virt(skb_frag_page(skb_frag));
- sg_init_one(sgs[out_sg],
- va + skb_frag_off(skb_frag),
- skb_frag_size(skb_frag));
- out_sg++;
- }
- }
-
- ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
- /* Usually this means that there is no more space available in
- * the vq
- */
+ ret = virtio_transport_send_skb(skb, vq, vsock, GFP_KERNEL);
if (ret < 0) {
virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
break;
}
- virtio_transport_deliver_tap_pkt(skb);
-
if (reply) {
struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
int val;
@@ -195,6 +208,28 @@ out:
queue_work(virtio_vsock_workqueue, &vsock->rx_work);
}
+/* Caller need to hold RCU for vsock.
+ * Returns 0 if the packet is successfully put on the vq.
+ */
+static int virtio_transport_send_skb_fast_path(struct virtio_vsock *vsock, struct sk_buff *skb)
+{
+ struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX];
+ int ret;
+
+ /* Inside RCU, can't sleep! */
+ ret = mutex_trylock(&vsock->tx_lock);
+ if (unlikely(ret == 0))
+ return -EBUSY;
+
+ ret = virtio_transport_send_skb(skb, vq, vsock, GFP_ATOMIC);
+ if (ret == 0)
+ virtqueue_kick(vq);
+
+ mutex_unlock(&vsock->tx_lock);
+
+ return ret;
+}
+
static int
virtio_transport_send_pkt(struct sk_buff *skb)
{
@@ -218,11 +253,20 @@ virtio_transport_send_pkt(struct sk_buff *skb)
goto out_rcu;
}
- if (virtio_vsock_skb_reply(skb))
- atomic_inc(&vsock->queued_replies);
+ /* If send_pkt_queue is empty, we can safely bypass this queue
+ * because packet order is maintained and (try) to put the packet
+ * on the virtqueue using virtio_transport_send_skb_fast_path.
+ * If this fails we simply put the packet on the intermediate
+ * queue and schedule the worker.
+ */
+ if (!skb_queue_empty_lockless(&vsock->send_pkt_queue) ||
+ virtio_transport_send_skb_fast_path(vsock, skb)) {
+ if (virtio_vsock_skb_reply(skb))
+ atomic_inc(&vsock->queued_replies);
- virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
- queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
+ virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
+ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
+ }
out_rcu:
rcu_read_unlock();
@@ -311,7 +355,7 @@ static void virtio_transport_tx_work(struct work_struct *work)
virtqueue_disable_cb(vq);
while ((skb = virtqueue_get_buf(vq, &len)) != NULL) {
- consume_skb(skb);
+ virtio_transport_consume_skb_sent(skb, true);
added = true;
}
} while (!virtqueue_enable_cb(vq));
@@ -540,6 +584,8 @@ static struct virtio_transport virtio_transport = {
.notify_buffer_size = virtio_transport_notify_buffer_size,
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
+ .unsent_bytes = virtio_transport_unsent_bytes,
+
.read_skb = virtio_transport_read_skb,
},
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 16ff976a86e3..9acc13ab3f82 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -400,6 +400,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
if (virtio_transport_init_zcopy_skb(vsk, skb,
info->msg,
can_zcopy)) {
+ kfree_skb(skb);
ret = -ENOMEM;
break;
}
@@ -463,6 +464,26 @@ void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *
}
EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
+void virtio_transport_consume_skb_sent(struct sk_buff *skb, bool consume)
+{
+ struct sock *s = skb->sk;
+
+ if (s && skb->len) {
+ struct vsock_sock *vs = vsock_sk(s);
+ struct virtio_vsock_sock *vvs;
+
+ vvs = vs->trans;
+
+ spin_lock_bh(&vvs->tx_lock);
+ vvs->bytes_unsent -= skb->len;
+ spin_unlock_bh(&vvs->tx_lock);
+ }
+
+ if (consume)
+ consume_skb(skb);
+}
+EXPORT_SYMBOL_GPL(virtio_transport_consume_skb_sent);
+
u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
{
u32 ret;
@@ -475,6 +496,7 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
if (ret > credit)
ret = credit;
vvs->tx_cnt += ret;
+ vvs->bytes_unsent += ret;
spin_unlock_bh(&vvs->tx_lock);
return ret;
@@ -488,6 +510,7 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
spin_lock_bh(&vvs->tx_lock);
vvs->tx_cnt -= credit;
+ vvs->bytes_unsent -= credit;
spin_unlock_bh(&vvs->tx_lock);
}
EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
@@ -1087,9 +1110,23 @@ void virtio_transport_destruct(struct vsock_sock *vsk)
struct virtio_vsock_sock *vvs = vsk->trans;
kfree(vvs);
+ vsk->trans = NULL;
}
EXPORT_SYMBOL_GPL(virtio_transport_destruct);
+ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ size_t ret;
+
+ spin_lock_bh(&vvs->tx_lock);
+ ret = vvs->bytes_unsent;
+ spin_unlock_bh(&vvs->tx_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes);
+
static int virtio_transport_reset(struct vsock_sock *vsk,
struct sk_buff *skb)
{
@@ -1477,6 +1514,14 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
return -ENOMEM;
}
+ /* __vsock_release() might have already flushed accept_queue.
+ * Subsequent enqueues would lead to a memory leak.
+ */
+ if (sk->sk_shutdown == SHUTDOWN_MASK) {
+ virtio_transport_reset_no_sock(t, skb);
+ return -ESHUTDOWN;
+ }
+
child = vsock_create_connected(sk);
if (!child) {
virtio_transport_reset_no_sock(t, skb);
@@ -1672,6 +1717,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
{
struct virtio_vsock_sock *vvs = vsk->trans;
struct sock *sk = sk_vsock(vsk);
+ struct virtio_vsock_hdr *hdr;
struct sk_buff *skb;
int off = 0;
int err;
@@ -1681,10 +1727,19 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
* works for types other than dgrams.
*/
skb = __skb_recv_datagram(sk, &vvs->rx_queue, MSG_DONTWAIT, &off, &err);
+ if (!skb) {
+ spin_unlock_bh(&vvs->rx_lock);
+ return err;
+ }
+
+ hdr = virtio_vsock_hdr(skb);
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
+ vvs->msg_count--;
+
+ virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len));
spin_unlock_bh(&vvs->rx_lock);
- if (!skb)
- return err;
+ virtio_transport_send_credit_update(vsk);
return recv_actor(sk, skb);
}
diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c
index c42c5cc18f32..4aa6e74ec295 100644
--- a/net/vmw_vsock/vsock_bpf.c
+++ b/net/vmw_vsock/vsock_bpf.c
@@ -114,14 +114,6 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
return copied;
}
-/* Copy of original proto with updated sock_map methods */
-static struct proto vsock_bpf_prot = {
- .close = sock_map_close,
- .recvmsg = vsock_bpf_recvmsg,
- .sock_is_readable = sk_msg_is_readable,
- .unhash = sock_map_unhash,
-};
-
static void vsock_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
{
*prot = *base;
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index 6dea6119f5b2..6e78927a598e 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -98,6 +98,8 @@ static struct virtio_transport loopback_transport = {
.notify_buffer_size = virtio_transport_notify_buffer_size,
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
+ .unsent_bytes = virtio_transport_unsent_bytes,
+
.read_skb = virtio_transport_read_skb,
},
@@ -123,6 +125,10 @@ static void vsock_loopback_work(struct work_struct *work)
spin_unlock_bh(&vsock->pkt_queue.lock);
while ((skb = __skb_dequeue(&pkts))) {
+ /* Decrement the bytes_unsent counter without deallocating skb
+ * It is freed by the receiver.
+ */
+ virtio_transport_consume_skb_sent(skb, false);
virtio_transport_deliver_tap_pkt(skb);
virtio_transport_recv_pkt(&loopback_transport, skb);
}
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 10345388ad13..2d67b5f2010e 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -11,9 +11,6 @@ config WEXT_PROC
depends on PROC_FS
depends on WEXT_CORE
-config WEXT_SPY
- bool
-
config WEXT_PRIV
bool
@@ -188,19 +185,12 @@ config CFG80211_CRDA_SUPPORT
If unsure, say Y.
config CFG80211_WEXT
- bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT
+ bool "cfg80211 wireless extensions compatibility"
select WEXT_CORE
- default y if CFG80211_WEXT_EXPORT
help
Enable this option if you need old userspace for wireless
extensions with cfg80211-based drivers.
-config CFG80211_WEXT_EXPORT
- bool
- help
- Drivers should select this option if they require cfg80211's
- wext compatibility symbols to be exported.
-
config CFG80211_KUNIT_TEST
tristate "KUnit tests for cfg80211" if !KUNIT_ALL_TESTS
depends on KUNIT
@@ -212,36 +202,3 @@ config CFG80211_KUNIT_TEST
If unsure, say N.
endif # CFG80211
-
-config LIB80211
- tristate
- default n
- help
- This options enables a library of common routines used
- by IEEE802.11 wireless LAN drivers.
-
- Drivers should select this themselves if needed.
-
-config LIB80211_CRYPT_WEP
- tristate
- select CRYPTO_LIB_ARC4
-
-config LIB80211_CRYPT_CCMP
- tristate
- select CRYPTO
- select CRYPTO_AES
- select CRYPTO_CCM
-
-config LIB80211_CRYPT_TKIP
- tristate
- select CRYPTO_LIB_ARC4
-
-config LIB80211_DEBUG
- bool "lib80211 debugging messages"
- depends on LIB80211
- default n
- help
- You can enable this if you want verbose debugging messages
- from lib80211.
-
- If unsure, say N.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 1d49cc8b6da1..62a83faf0e07 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -1,14 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_CFG80211) += cfg80211.o
-obj-$(CONFIG_LIB80211) += lib80211.o
-obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
-obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o
-obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o
obj-y += tests/
obj-$(CONFIG_WEXT_CORE) += wext-core.o
obj-$(CONFIG_WEXT_PROC) += wext-proc.o
-obj-$(CONFIG_WEXT_SPY) += wext-spy.o
obj-$(CONFIG_WEXT_PRIV) += wext-priv.o
cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index e579d7e1425f..40b6375a5de4 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -289,7 +289,7 @@ static bool cfg80211_valid_center_freq(u32 center,
/*
* Valid channels are packed from lowest frequency towards higher ones.
- * So test that the lower frequency alignes with one of these steps.
+ * So test that the lower frequency aligns with one of these steps.
*/
return (center - bw / 2 - 5945) % step == 0;
}
@@ -1628,6 +1628,7 @@ bool cfg80211_reg_check_beaconing(struct wiphy *wiphy,
EXPORT_SYMBOL(cfg80211_reg_check_beaconing);
int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
struct cfg80211_chan_def *chandef)
{
if (!rdev->ops->set_monitor_channel)
@@ -1635,7 +1636,7 @@ int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
if (!cfg80211_has_monitors_only(rdev))
return -EBUSY;
- return rdev_set_monitor_channel(rdev, chandef);
+ return rdev_set_monitor_channel(rdev, dev, chandef);
}
bool cfg80211_any_usable_channels(struct wiphy *wiphy,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 4d5d351bd0b5..afbdc549fb4a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -165,11 +165,11 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
if (!wdev->netdev)
continue;
- wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ wdev->netdev->netns_local = false;
err = dev_change_net_namespace(wdev->netdev, net, "wlan%d");
if (err)
break;
- wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ wdev->netdev->netns_local = true;
}
if (err) {
@@ -181,11 +181,11 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
list) {
if (!wdev->netdev)
continue;
- wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ wdev->netdev->netns_local = false;
err = dev_change_net_namespace(wdev->netdev, net,
"wlan%d");
WARN_ON(err);
- wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ wdev->netdev->netns_local = true;
}
return err;
@@ -603,16 +603,20 @@ use_default_name:
}
EXPORT_SYMBOL(wiphy_new_nm);
-static int wiphy_verify_combinations(struct wiphy *wiphy)
+static
+int wiphy_verify_iface_combinations(struct wiphy *wiphy,
+ const struct ieee80211_iface_combination *iface_comb,
+ int n_iface_comb,
+ bool combined_radio)
{
const struct ieee80211_iface_combination *c;
int i, j;
- for (i = 0; i < wiphy->n_iface_combinations; i++) {
+ for (i = 0; i < n_iface_comb; i++) {
u32 cnt = 0;
u16 all_iftypes = 0;
- c = &wiphy->iface_combinations[i];
+ c = &iface_comb[i];
/*
* Combinations with just one interface aren't real,
@@ -625,9 +629,13 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
if (WARN_ON(!c->num_different_channels))
return -EINVAL;
- /* DFS only works on one channel. */
- if (WARN_ON(c->radar_detect_widths &&
- (c->num_different_channels > 1)))
+ /* DFS only works on one channel. Avoid this check
+ * for multi-radio global combination, since it hold
+ * the capabilities of all radio combinations.
+ */
+ if (!combined_radio &&
+ WARN_ON(c->radar_detect_widths &&
+ c->num_different_channels > 1))
return -EINVAL;
if (WARN_ON(!c->n_limits))
@@ -648,13 +656,21 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
if (WARN_ON(wiphy->software_iftypes & types))
return -EINVAL;
- /* Only a single P2P_DEVICE can be allowed */
- if (WARN_ON(types & BIT(NL80211_IFTYPE_P2P_DEVICE) &&
+ /* Only a single P2P_DEVICE can be allowed, avoid this
+ * check for multi-radio global combination, since it
+ * hold the capabilities of all radio combinations.
+ */
+ if (!combined_radio &&
+ WARN_ON(types & BIT(NL80211_IFTYPE_P2P_DEVICE) &&
c->limits[j].max > 1))
return -EINVAL;
- /* Only a single NAN can be allowed */
- if (WARN_ON(types & BIT(NL80211_IFTYPE_NAN) &&
+ /* Only a single NAN can be allowed, avoid this
+ * check for multi-radio global combination, since it
+ * hold the capabilities of all radio combinations.
+ */
+ if (!combined_radio &&
+ WARN_ON(types & BIT(NL80211_IFTYPE_NAN) &&
c->limits[j].max > 1))
return -EINVAL;
@@ -693,6 +709,34 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
return 0;
}
+static int wiphy_verify_combinations(struct wiphy *wiphy)
+{
+ int i, ret;
+ bool combined_radio = false;
+
+ if (wiphy->n_radio) {
+ for (i = 0; i < wiphy->n_radio; i++) {
+ const struct wiphy_radio *radio = &wiphy->radio[i];
+
+ ret = wiphy_verify_iface_combinations(wiphy,
+ radio->iface_combinations,
+ radio->n_iface_combinations,
+ false);
+ if (ret)
+ return ret;
+ }
+
+ combined_radio = true;
+ }
+
+ ret = wiphy_verify_iface_combinations(wiphy,
+ wiphy->iface_combinations,
+ wiphy->n_iface_combinations,
+ combined_radio);
+
+ return ret;
+}
+
int wiphy_register(struct wiphy *wiphy)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -1236,6 +1280,7 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
/* deleted from the list, so can't be found from nl80211 any more */
cqm_config = rcu_access_pointer(wdev->cqm_config);
kfree_rcu(cqm_config, rcu_head);
+ RCU_INIT_POINTER(wdev->cqm_config, NULL);
/*
* Ensure that all events have been processed and
@@ -1386,6 +1431,8 @@ void cfg80211_init_wdev(struct wireless_dev *wdev)
/* allow mac80211 to determine the timeout */
wdev->ps_timeout = -1;
+ wdev->radio_mask = BIT(wdev->wiphy->n_radio) - 1;
+
if ((wdev->iftype == NL80211_IFTYPE_STATION ||
wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
@@ -1473,7 +1520,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
SET_NETDEV_DEVTYPE(dev, &wiphy_type);
wdev->netdev = dev;
/* can only change netns with wiphy */
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
cfg80211_init_wdev(wdev);
break;
@@ -1704,6 +1751,13 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy,
}
EXPORT_SYMBOL_GPL(wiphy_delayed_work_flush);
+bool wiphy_delayed_work_pending(struct wiphy *wiphy,
+ struct wiphy_delayed_work *dwork)
+{
+ return timer_pending(&dwork->timer);
+}
+EXPORT_SYMBOL_GPL(wiphy_delayed_work_pending);
+
static int __init cfg80211_init(void)
{
int err;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 41c8c0e3ba2e..4c45f994a8c0 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -170,6 +170,12 @@ static inline int for_each_rdev_check_rtnl(void)
if (for_each_rdev_check_rtnl()) {} else \
list_for_each_entry(rdev, &cfg80211_rdev_list, list)
+enum bss_source_type {
+ BSS_SOURCE_DIRECT = 0,
+ BSS_SOURCE_MBSSID,
+ BSS_SOURCE_STA_PROFILE,
+};
+
struct cfg80211_internal_bss {
struct list_head list;
struct list_head hidden_list;
@@ -191,6 +197,8 @@ struct cfg80211_internal_bss {
*/
u8 parent_bssid[ETH_ALEN] __aligned(2);
+ enum bss_source_type bss_source;
+
/* must be last because of priv member */
struct cfg80211_bss pub;
};
@@ -508,6 +516,7 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
}
int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
struct cfg80211_chan_def *chandef);
int ieee80211_get_ratemask(struct ieee80211_supported_band *sband,
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 34e5acff3935..1e3ed29f7cfc 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -94,7 +94,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
lockdep_assert_held(&rdev->wiphy.mtx);
- if (wdev->cac_started)
+ if (wdev->links[0].cac_started)
return -EBUSY;
if (wdev->u.ibss.ssid_len)
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
deleted file mode 100644
index d66a913027e0..000000000000
--- a/net/wireless/lib80211.c
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * lib80211 -- common bits for IEEE802.11 drivers
- *
- * Copyright(c) 2008 John W. Linville <linville@tuxdriver.com>
- *
- * Portions copied from old ieee80211 component, w/ original copyright
- * notices below:
- *
- * Host AP crypto routines
- *
- * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
- * Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com>
- *
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/ctype.h>
-#include <linux/ieee80211.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-
-#include <net/lib80211.h>
-
-#define DRV_DESCRIPTION "common routines for IEEE802.11 drivers"
-
-MODULE_DESCRIPTION(DRV_DESCRIPTION);
-MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
-MODULE_LICENSE("GPL");
-
-struct lib80211_crypto_alg {
- struct list_head list;
- struct lib80211_crypto_ops *ops;
-};
-
-static LIST_HEAD(lib80211_crypto_algs);
-static DEFINE_SPINLOCK(lib80211_crypto_lock);
-
-static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info,
- int force);
-static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info);
-static void lib80211_crypt_deinit_handler(struct timer_list *t);
-
-int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name,
- spinlock_t *lock)
-{
- memset(info, 0, sizeof(*info));
-
- info->name = name;
- info->lock = lock;
-
- INIT_LIST_HEAD(&info->crypt_deinit_list);
- timer_setup(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler,
- 0);
-
- return 0;
-}
-EXPORT_SYMBOL(lib80211_crypt_info_init);
-
-void lib80211_crypt_info_free(struct lib80211_crypt_info *info)
-{
- int i;
-
- lib80211_crypt_quiescing(info);
- del_timer_sync(&info->crypt_deinit_timer);
- lib80211_crypt_deinit_entries(info, 1);
-
- for (i = 0; i < NUM_WEP_KEYS; i++) {
- struct lib80211_crypt_data *crypt = info->crypt[i];
- if (crypt) {
- if (crypt->ops) {
- crypt->ops->deinit(crypt->priv);
- module_put(crypt->ops->owner);
- }
- kfree(crypt);
- info->crypt[i] = NULL;
- }
- }
-}
-EXPORT_SYMBOL(lib80211_crypt_info_free);
-
-static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info,
- int force)
-{
- struct lib80211_crypt_data *entry, *next;
- unsigned long flags;
-
- spin_lock_irqsave(info->lock, flags);
- list_for_each_entry_safe(entry, next, &info->crypt_deinit_list, list) {
- if (atomic_read(&entry->refcnt) != 0 && !force)
- continue;
-
- list_del(&entry->list);
-
- if (entry->ops) {
- entry->ops->deinit(entry->priv);
- module_put(entry->ops->owner);
- }
- kfree(entry);
- }
- spin_unlock_irqrestore(info->lock, flags);
-}
-
-/* After this, crypt_deinit_list won't accept new members */
-static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info)
-{
- unsigned long flags;
-
- spin_lock_irqsave(info->lock, flags);
- info->crypt_quiesced = 1;
- spin_unlock_irqrestore(info->lock, flags);
-}
-
-static void lib80211_crypt_deinit_handler(struct timer_list *t)
-{
- struct lib80211_crypt_info *info = from_timer(info, t,
- crypt_deinit_timer);
- unsigned long flags;
-
- lib80211_crypt_deinit_entries(info, 0);
-
- spin_lock_irqsave(info->lock, flags);
- if (!list_empty(&info->crypt_deinit_list) && !info->crypt_quiesced) {
- printk(KERN_DEBUG "%s: entries remaining in delayed crypt "
- "deletion list\n", info->name);
- info->crypt_deinit_timer.expires = jiffies + HZ;
- add_timer(&info->crypt_deinit_timer);
- }
- spin_unlock_irqrestore(info->lock, flags);
-}
-
-void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info,
- struct lib80211_crypt_data **crypt)
-{
- struct lib80211_crypt_data *tmp;
- unsigned long flags;
-
- if (*crypt == NULL)
- return;
-
- tmp = *crypt;
- *crypt = NULL;
-
- /* must not run ops->deinit() while there may be pending encrypt or
- * decrypt operations. Use a list of delayed deinits to avoid needing
- * locking. */
-
- spin_lock_irqsave(info->lock, flags);
- if (!info->crypt_quiesced) {
- list_add(&tmp->list, &info->crypt_deinit_list);
- if (!timer_pending(&info->crypt_deinit_timer)) {
- info->crypt_deinit_timer.expires = jiffies + HZ;
- add_timer(&info->crypt_deinit_timer);
- }
- }
- spin_unlock_irqrestore(info->lock, flags);
-}
-EXPORT_SYMBOL(lib80211_crypt_delayed_deinit);
-
-int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops)
-{
- unsigned long flags;
- struct lib80211_crypto_alg *alg;
-
- alg = kzalloc(sizeof(*alg), GFP_KERNEL);
- if (alg == NULL)
- return -ENOMEM;
-
- alg->ops = ops;
-
- spin_lock_irqsave(&lib80211_crypto_lock, flags);
- list_add(&alg->list, &lib80211_crypto_algs);
- spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
-
- printk(KERN_DEBUG "lib80211_crypt: registered algorithm '%s'\n",
- ops->name);
-
- return 0;
-}
-EXPORT_SYMBOL(lib80211_register_crypto_ops);
-
-int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops)
-{
- struct lib80211_crypto_alg *alg;
- unsigned long flags;
-
- spin_lock_irqsave(&lib80211_crypto_lock, flags);
- list_for_each_entry(alg, &lib80211_crypto_algs, list) {
- if (alg->ops == ops)
- goto found;
- }
- spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
- return -EINVAL;
-
- found:
- printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm '%s'\n",
- ops->name);
- list_del(&alg->list);
- spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
- kfree(alg);
- return 0;
-}
-EXPORT_SYMBOL(lib80211_unregister_crypto_ops);
-
-struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name)
-{
- struct lib80211_crypto_alg *alg;
- unsigned long flags;
-
- spin_lock_irqsave(&lib80211_crypto_lock, flags);
- list_for_each_entry(alg, &lib80211_crypto_algs, list) {
- if (strcmp(alg->ops->name, name) == 0)
- goto found;
- }
- spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
- return NULL;
-
- found:
- spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
- return alg->ops;
-}
-EXPORT_SYMBOL(lib80211_get_crypto_ops);
-
-static void *lib80211_crypt_null_init(int keyidx)
-{
- return (void *)1;
-}
-
-static void lib80211_crypt_null_deinit(void *priv)
-{
-}
-
-static struct lib80211_crypto_ops lib80211_crypt_null = {
- .name = "NULL",
- .init = lib80211_crypt_null_init,
- .deinit = lib80211_crypt_null_deinit,
- .owner = THIS_MODULE,
-};
-
-static int __init lib80211_init(void)
-{
- pr_info(DRV_DESCRIPTION "\n");
- return lib80211_register_crypto_ops(&lib80211_crypt_null);
-}
-
-static void __exit lib80211_exit(void)
-{
- lib80211_unregister_crypto_ops(&lib80211_crypt_null);
- BUG_ON(!list_empty(&lib80211_crypto_algs));
-}
-
-module_init(lib80211_init);
-module_exit(lib80211_exit);
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
deleted file mode 100644
index cca5e1cf089e..000000000000
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ /dev/null
@@ -1,448 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * lib80211 crypt: host-based CCMP encryption implementation for lib80211
- *
- * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
- * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/if_ether.h>
-#include <linux/if_arp.h>
-#include <asm/string.h>
-#include <linux/wireless.h>
-
-#include <linux/ieee80211.h>
-
-#include <linux/crypto.h>
-#include <crypto/aead.h>
-
-#include <net/lib80211.h>
-
-MODULE_AUTHOR("Jouni Malinen");
-MODULE_DESCRIPTION("Host AP crypt: CCMP");
-MODULE_LICENSE("GPL");
-
-#define AES_BLOCK_LEN 16
-#define CCMP_HDR_LEN 8
-#define CCMP_MIC_LEN 8
-#define CCMP_TK_LEN 16
-#define CCMP_PN_LEN 6
-
-struct lib80211_ccmp_data {
- u8 key[CCMP_TK_LEN];
- int key_set;
-
- u8 tx_pn[CCMP_PN_LEN];
- u8 rx_pn[CCMP_PN_LEN];
-
- u32 dot11RSNAStatsCCMPFormatErrors;
- u32 dot11RSNAStatsCCMPReplays;
- u32 dot11RSNAStatsCCMPDecryptErrors;
-
- int key_idx;
-
- struct crypto_aead *tfm;
-
- /* scratch buffers for virt_to_page() (crypto API) */
- u8 tx_aad[2 * AES_BLOCK_LEN];
- u8 rx_aad[2 * AES_BLOCK_LEN];
-};
-
-static void *lib80211_ccmp_init(int key_idx)
-{
- struct lib80211_ccmp_data *priv;
-
- priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
- if (priv == NULL)
- goto fail;
- priv->key_idx = key_idx;
-
- priv->tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(priv->tfm)) {
- priv->tfm = NULL;
- goto fail;
- }
-
- return priv;
-
- fail:
- if (priv) {
- if (priv->tfm)
- crypto_free_aead(priv->tfm);
- kfree(priv);
- }
-
- return NULL;
-}
-
-static void lib80211_ccmp_deinit(void *priv)
-{
- struct lib80211_ccmp_data *_priv = priv;
- if (_priv && _priv->tfm)
- crypto_free_aead(_priv->tfm);
- kfree(priv);
-}
-
-static int ccmp_init_iv_and_aad(const struct ieee80211_hdr *hdr,
- const u8 *pn, u8 *iv, u8 *aad)
-{
- u8 *pos, qc = 0;
- size_t aad_len;
- int a4_included, qc_included;
-
- a4_included = ieee80211_has_a4(hdr->frame_control);
- qc_included = ieee80211_is_data_qos(hdr->frame_control);
-
- aad_len = 22;
- if (a4_included)
- aad_len += 6;
- if (qc_included) {
- pos = (u8 *) & hdr->addr4;
- if (a4_included)
- pos += 6;
- qc = *pos & 0x0f;
- aad_len += 2;
- }
-
- /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC
- * mode authentication are not allowed to collide, yet both are derived
- * from the same vector. We only set L := 1 here to indicate that the
- * data size can be represented in (L+1) bytes. The CCM layer will take
- * care of storing the data length in the top (L+1) bytes and setting
- * and clearing the other bits as is required to derive the two IVs.
- */
- iv[0] = 0x1;
-
- /* Nonce: QC | A2 | PN */
- iv[1] = qc;
- memcpy(iv + 2, hdr->addr2, ETH_ALEN);
- memcpy(iv + 8, pn, CCMP_PN_LEN);
-
- /* AAD:
- * FC with bits 4..6 and 11..13 masked to zero; 14 is always one
- * A1 | A2 | A3
- * SC with bits 4..15 (seq#) masked to zero
- * A4 (if present)
- * QC (if present)
- */
- pos = (u8 *) hdr;
- aad[0] = pos[0] & 0x8f;
- aad[1] = pos[1] & 0xc7;
- memcpy(aad + 2, &hdr->addrs, 3 * ETH_ALEN);
- pos = (u8 *) & hdr->seq_ctrl;
- aad[20] = pos[0] & 0x0f;
- aad[21] = 0; /* all bits masked */
- memset(aad + 22, 0, 8);
- if (a4_included)
- memcpy(aad + 22, hdr->addr4, ETH_ALEN);
- if (qc_included) {
- aad[a4_included ? 28 : 22] = qc;
- /* rest of QC masked */
- }
- return aad_len;
-}
-
-static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len,
- u8 *aeskey, int keylen, void *priv)
-{
- struct lib80211_ccmp_data *key = priv;
- int i;
- u8 *pos;
-
- if (skb_headroom(skb) < CCMP_HDR_LEN || skb->len < hdr_len)
- return -1;
-
- if (aeskey != NULL && keylen >= CCMP_TK_LEN)
- memcpy(aeskey, key->key, CCMP_TK_LEN);
-
- pos = skb_push(skb, CCMP_HDR_LEN);
- memmove(pos, pos + CCMP_HDR_LEN, hdr_len);
- pos += hdr_len;
-
- i = CCMP_PN_LEN - 1;
- while (i >= 0) {
- key->tx_pn[i]++;
- if (key->tx_pn[i] != 0)
- break;
- i--;
- }
-
- *pos++ = key->tx_pn[5];
- *pos++ = key->tx_pn[4];
- *pos++ = 0;
- *pos++ = (key->key_idx << 6) | (1 << 5) /* Ext IV included */ ;
- *pos++ = key->tx_pn[3];
- *pos++ = key->tx_pn[2];
- *pos++ = key->tx_pn[1];
- *pos++ = key->tx_pn[0];
-
- return CCMP_HDR_LEN;
-}
-
-static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
- struct lib80211_ccmp_data *key = priv;
- struct ieee80211_hdr *hdr;
- struct aead_request *req;
- struct scatterlist sg[2];
- u8 *aad = key->tx_aad;
- u8 iv[AES_BLOCK_LEN];
- int len, data_len, aad_len;
- int ret;
-
- if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len)
- return -1;
-
- data_len = skb->len - hdr_len;
- len = lib80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv);
- if (len < 0)
- return -1;
-
- req = aead_request_alloc(key->tfm, GFP_ATOMIC);
- if (!req)
- return -ENOMEM;
-
- hdr = (struct ieee80211_hdr *)skb->data;
- aad_len = ccmp_init_iv_and_aad(hdr, key->tx_pn, iv, aad);
-
- skb_put(skb, CCMP_MIC_LEN);
-
- sg_init_table(sg, 2);
- sg_set_buf(&sg[0], aad, aad_len);
- sg_set_buf(&sg[1], skb->data + hdr_len + CCMP_HDR_LEN,
- data_len + CCMP_MIC_LEN);
-
- aead_request_set_callback(req, 0, NULL, NULL);
- aead_request_set_ad(req, aad_len);
- aead_request_set_crypt(req, sg, sg, data_len, iv);
-
- ret = crypto_aead_encrypt(req);
- aead_request_free(req);
-
- return ret;
-}
-
-/*
- * deal with seq counter wrapping correctly.
- * refer to timer_after() for jiffies wrapping handling
- */
-static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o)
-{
- u32 iv32_n, iv16_n;
- u32 iv32_o, iv16_o;
-
- iv32_n = (pn_n[0] << 24) | (pn_n[1] << 16) | (pn_n[2] << 8) | pn_n[3];
- iv16_n = (pn_n[4] << 8) | pn_n[5];
-
- iv32_o = (pn_o[0] << 24) | (pn_o[1] << 16) | (pn_o[2] << 8) | pn_o[3];
- iv16_o = (pn_o[4] << 8) | pn_o[5];
-
- if ((s32)iv32_n - (s32)iv32_o < 0 ||
- (iv32_n == iv32_o && iv16_n <= iv16_o))
- return 1;
- return 0;
-}
-
-static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
- struct lib80211_ccmp_data *key = priv;
- u8 keyidx, *pos;
- struct ieee80211_hdr *hdr;
- struct aead_request *req;
- struct scatterlist sg[2];
- u8 *aad = key->rx_aad;
- u8 iv[AES_BLOCK_LEN];
- u8 pn[6];
- int aad_len, ret;
- size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN;
-
- if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) {
- key->dot11RSNAStatsCCMPFormatErrors++;
- return -1;
- }
-
- hdr = (struct ieee80211_hdr *)skb->data;
- pos = skb->data + hdr_len;
- keyidx = pos[3];
- if (!(keyidx & (1 << 5))) {
- net_dbg_ratelimited("CCMP: received packet without ExtIV flag from %pM\n",
- hdr->addr2);
- key->dot11RSNAStatsCCMPFormatErrors++;
- return -2;
- }
- keyidx >>= 6;
- if (key->key_idx != keyidx) {
- net_dbg_ratelimited("CCMP: RX tkey->key_idx=%d frame keyidx=%d\n",
- key->key_idx, keyidx);
- return -6;
- }
- if (!key->key_set) {
- net_dbg_ratelimited("CCMP: received packet from %pM with keyid=%d that does not have a configured key\n",
- hdr->addr2, keyidx);
- return -3;
- }
-
- pn[0] = pos[7];
- pn[1] = pos[6];
- pn[2] = pos[5];
- pn[3] = pos[4];
- pn[4] = pos[1];
- pn[5] = pos[0];
- pos += 8;
-
- if (ccmp_replay_check(pn, key->rx_pn)) {
-#ifdef CONFIG_LIB80211_DEBUG
- net_dbg_ratelimited("CCMP: replay detected: STA=%pM previous PN %02x%02x%02x%02x%02x%02x received PN %02x%02x%02x%02x%02x%02x\n",
- hdr->addr2,
- key->rx_pn[0], key->rx_pn[1], key->rx_pn[2],
- key->rx_pn[3], key->rx_pn[4], key->rx_pn[5],
- pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]);
-#endif
- key->dot11RSNAStatsCCMPReplays++;
- return -4;
- }
-
- req = aead_request_alloc(key->tfm, GFP_ATOMIC);
- if (!req)
- return -ENOMEM;
-
- aad_len = ccmp_init_iv_and_aad(hdr, pn, iv, aad);
-
- sg_init_table(sg, 2);
- sg_set_buf(&sg[0], aad, aad_len);
- sg_set_buf(&sg[1], pos, data_len);
-
- aead_request_set_callback(req, 0, NULL, NULL);
- aead_request_set_ad(req, aad_len);
- aead_request_set_crypt(req, sg, sg, data_len, iv);
-
- ret = crypto_aead_decrypt(req);
- aead_request_free(req);
-
- if (ret) {
- net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM (%d)\n",
- hdr->addr2, ret);
- key->dot11RSNAStatsCCMPDecryptErrors++;
- return -5;
- }
-
- memcpy(key->rx_pn, pn, CCMP_PN_LEN);
-
- /* Remove hdr and MIC */
- memmove(skb->data + CCMP_HDR_LEN, skb->data, hdr_len);
- skb_pull(skb, CCMP_HDR_LEN);
- skb_trim(skb, skb->len - CCMP_MIC_LEN);
-
- return keyidx;
-}
-
-static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv)
-{
- struct lib80211_ccmp_data *data = priv;
- int keyidx;
- struct crypto_aead *tfm = data->tfm;
-
- keyidx = data->key_idx;
- memset(data, 0, sizeof(*data));
- data->key_idx = keyidx;
- data->tfm = tfm;
- if (len == CCMP_TK_LEN) {
- memcpy(data->key, key, CCMP_TK_LEN);
- data->key_set = 1;
- if (seq) {
- data->rx_pn[0] = seq[5];
- data->rx_pn[1] = seq[4];
- data->rx_pn[2] = seq[3];
- data->rx_pn[3] = seq[2];
- data->rx_pn[4] = seq[1];
- data->rx_pn[5] = seq[0];
- }
- if (crypto_aead_setauthsize(data->tfm, CCMP_MIC_LEN) ||
- crypto_aead_setkey(data->tfm, data->key, CCMP_TK_LEN))
- return -1;
- } else if (len == 0)
- data->key_set = 0;
- else
- return -1;
-
- return 0;
-}
-
-static int lib80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv)
-{
- struct lib80211_ccmp_data *data = priv;
-
- if (len < CCMP_TK_LEN)
- return -1;
-
- if (!data->key_set)
- return 0;
- memcpy(key, data->key, CCMP_TK_LEN);
-
- if (seq) {
- seq[0] = data->tx_pn[5];
- seq[1] = data->tx_pn[4];
- seq[2] = data->tx_pn[3];
- seq[3] = data->tx_pn[2];
- seq[4] = data->tx_pn[1];
- seq[5] = data->tx_pn[0];
- }
-
- return CCMP_TK_LEN;
-}
-
-static void lib80211_ccmp_print_stats(struct seq_file *m, void *priv)
-{
- struct lib80211_ccmp_data *ccmp = priv;
-
- seq_printf(m,
- "key[%d] alg=CCMP key_set=%d "
- "tx_pn=%02x%02x%02x%02x%02x%02x "
- "rx_pn=%02x%02x%02x%02x%02x%02x "
- "format_errors=%d replays=%d decrypt_errors=%d\n",
- ccmp->key_idx, ccmp->key_set,
- ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2],
- ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5],
- ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2],
- ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5],
- ccmp->dot11RSNAStatsCCMPFormatErrors,
- ccmp->dot11RSNAStatsCCMPReplays,
- ccmp->dot11RSNAStatsCCMPDecryptErrors);
-}
-
-static struct lib80211_crypto_ops lib80211_crypt_ccmp = {
- .name = "CCMP",
- .init = lib80211_ccmp_init,
- .deinit = lib80211_ccmp_deinit,
- .encrypt_mpdu = lib80211_ccmp_encrypt,
- .decrypt_mpdu = lib80211_ccmp_decrypt,
- .encrypt_msdu = NULL,
- .decrypt_msdu = NULL,
- .set_key = lib80211_ccmp_set_key,
- .get_key = lib80211_ccmp_get_key,
- .print_stats = lib80211_ccmp_print_stats,
- .extra_mpdu_prefix_len = CCMP_HDR_LEN,
- .extra_mpdu_postfix_len = CCMP_MIC_LEN,
- .owner = THIS_MODULE,
-};
-
-static int __init lib80211_crypto_ccmp_init(void)
-{
- return lib80211_register_crypto_ops(&lib80211_crypt_ccmp);
-}
-
-static void __exit lib80211_crypto_ccmp_exit(void)
-{
- lib80211_unregister_crypto_ops(&lib80211_crypt_ccmp);
-}
-
-module_init(lib80211_crypto_ccmp_init);
-module_exit(lib80211_crypto_ccmp_exit);
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
deleted file mode 100644
index 5c8cdf7681e3..000000000000
--- a/net/wireless/lib80211_crypt_tkip.c
+++ /dev/null
@@ -1,738 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * lib80211 crypt: host-based TKIP encryption implementation for lib80211
- *
- * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
- * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/err.h>
-#include <linux/fips.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/mm.h>
-#include <linux/if_ether.h>
-#include <linux/if_arp.h>
-#include <asm/string.h>
-
-#include <linux/wireless.h>
-#include <linux/ieee80211.h>
-#include <net/iw_handler.h>
-
-#include <crypto/arc4.h>
-#include <crypto/hash.h>
-#include <linux/crypto.h>
-#include <linux/crc32.h>
-
-#include <net/lib80211.h>
-
-MODULE_AUTHOR("Jouni Malinen");
-MODULE_DESCRIPTION("lib80211 crypt: TKIP");
-MODULE_LICENSE("GPL");
-
-#define TKIP_HDR_LEN 8
-
-struct lib80211_tkip_data {
-#define TKIP_KEY_LEN 32
- u8 key[TKIP_KEY_LEN];
- int key_set;
-
- u32 tx_iv32;
- u16 tx_iv16;
- u16 tx_ttak[5];
- int tx_phase1_done;
-
- u32 rx_iv32;
- u16 rx_iv16;
- u16 rx_ttak[5];
- int rx_phase1_done;
- u32 rx_iv32_new;
- u16 rx_iv16_new;
-
- u32 dot11RSNAStatsTKIPReplays;
- u32 dot11RSNAStatsTKIPICVErrors;
- u32 dot11RSNAStatsTKIPLocalMICFailures;
-
- int key_idx;
-
- struct arc4_ctx rx_ctx_arc4;
- struct arc4_ctx tx_ctx_arc4;
- struct crypto_shash *rx_tfm_michael;
- struct crypto_shash *tx_tfm_michael;
-
- /* scratch buffers for virt_to_page() (crypto API) */
- u8 rx_hdr[16], tx_hdr[16];
-
- unsigned long flags;
-};
-
-static unsigned long lib80211_tkip_set_flags(unsigned long flags, void *priv)
-{
- struct lib80211_tkip_data *_priv = priv;
- unsigned long old_flags = _priv->flags;
- _priv->flags = flags;
- return old_flags;
-}
-
-static unsigned long lib80211_tkip_get_flags(void *priv)
-{
- struct lib80211_tkip_data *_priv = priv;
- return _priv->flags;
-}
-
-static void *lib80211_tkip_init(int key_idx)
-{
- struct lib80211_tkip_data *priv;
-
- if (fips_enabled)
- return NULL;
-
- priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
- if (priv == NULL)
- goto fail;
-
- priv->key_idx = key_idx;
-
- priv->tx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0);
- if (IS_ERR(priv->tx_tfm_michael)) {
- priv->tx_tfm_michael = NULL;
- goto fail;
- }
-
- priv->rx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0);
- if (IS_ERR(priv->rx_tfm_michael)) {
- priv->rx_tfm_michael = NULL;
- goto fail;
- }
-
- return priv;
-
- fail:
- if (priv) {
- crypto_free_shash(priv->tx_tfm_michael);
- crypto_free_shash(priv->rx_tfm_michael);
- kfree(priv);
- }
-
- return NULL;
-}
-
-static void lib80211_tkip_deinit(void *priv)
-{
- struct lib80211_tkip_data *_priv = priv;
- if (_priv) {
- crypto_free_shash(_priv->tx_tfm_michael);
- crypto_free_shash(_priv->rx_tfm_michael);
- }
- kfree_sensitive(priv);
-}
-
-static inline u16 RotR1(u16 val)
-{
- return (val >> 1) | (val << 15);
-}
-
-static inline u8 Lo8(u16 val)
-{
- return val & 0xff;
-}
-
-static inline u8 Hi8(u16 val)
-{
- return val >> 8;
-}
-
-static inline u16 Lo16(u32 val)
-{
- return val & 0xffff;
-}
-
-static inline u16 Hi16(u32 val)
-{
- return val >> 16;
-}
-
-static inline u16 Mk16(u8 hi, u8 lo)
-{
- return lo | (((u16) hi) << 8);
-}
-
-static inline u16 Mk16_le(__le16 * v)
-{
- return le16_to_cpu(*v);
-}
-
-static const u16 Sbox[256] = {
- 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154,
- 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A,
- 0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B,
- 0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B,
- 0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F,
- 0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F,
- 0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5,
- 0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F,
- 0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB,
- 0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397,
- 0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED,
- 0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A,
- 0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194,
- 0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3,
- 0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104,
- 0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D,
- 0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39,
- 0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695,
- 0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83,
- 0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76,
- 0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4,
- 0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B,
- 0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0,
- 0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018,
- 0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751,
- 0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85,
- 0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12,
- 0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9,
- 0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7,
- 0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A,
- 0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8,
- 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A,
-};
-
-static inline u16 _S_(u16 v)
-{
- u16 t = Sbox[Hi8(v)];
- return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8));
-}
-
-#define PHASE1_LOOP_COUNT 8
-
-static void tkip_mixing_phase1(u16 * TTAK, const u8 * TK, const u8 * TA,
- u32 IV32)
-{
- int i, j;
-
- /* Initialize the 80-bit TTAK from TSC (IV32) and TA[0..5] */
- TTAK[0] = Lo16(IV32);
- TTAK[1] = Hi16(IV32);
- TTAK[2] = Mk16(TA[1], TA[0]);
- TTAK[3] = Mk16(TA[3], TA[2]);
- TTAK[4] = Mk16(TA[5], TA[4]);
-
- for (i = 0; i < PHASE1_LOOP_COUNT; i++) {
- j = 2 * (i & 1);
- TTAK[0] += _S_(TTAK[4] ^ Mk16(TK[1 + j], TK[0 + j]));
- TTAK[1] += _S_(TTAK[0] ^ Mk16(TK[5 + j], TK[4 + j]));
- TTAK[2] += _S_(TTAK[1] ^ Mk16(TK[9 + j], TK[8 + j]));
- TTAK[3] += _S_(TTAK[2] ^ Mk16(TK[13 + j], TK[12 + j]));
- TTAK[4] += _S_(TTAK[3] ^ Mk16(TK[1 + j], TK[0 + j])) + i;
- }
-}
-
-static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK,
- u16 IV16)
-{
- /* Make temporary area overlap WEP seed so that the final copy can be
- * avoided on little endian hosts. */
- u16 *PPK = (u16 *) & WEPSeed[4];
-
- /* Step 1 - make copy of TTAK and bring in TSC */
- PPK[0] = TTAK[0];
- PPK[1] = TTAK[1];
- PPK[2] = TTAK[2];
- PPK[3] = TTAK[3];
- PPK[4] = TTAK[4];
- PPK[5] = TTAK[4] + IV16;
-
- /* Step 2 - 96-bit bijective mixing using S-box */
- PPK[0] += _S_(PPK[5] ^ Mk16_le((__le16 *) & TK[0]));
- PPK[1] += _S_(PPK[0] ^ Mk16_le((__le16 *) & TK[2]));
- PPK[2] += _S_(PPK[1] ^ Mk16_le((__le16 *) & TK[4]));
- PPK[3] += _S_(PPK[2] ^ Mk16_le((__le16 *) & TK[6]));
- PPK[4] += _S_(PPK[3] ^ Mk16_le((__le16 *) & TK[8]));
- PPK[5] += _S_(PPK[4] ^ Mk16_le((__le16 *) & TK[10]));
-
- PPK[0] += RotR1(PPK[5] ^ Mk16_le((__le16 *) & TK[12]));
- PPK[1] += RotR1(PPK[0] ^ Mk16_le((__le16 *) & TK[14]));
- PPK[2] += RotR1(PPK[1]);
- PPK[3] += RotR1(PPK[2]);
- PPK[4] += RotR1(PPK[3]);
- PPK[5] += RotR1(PPK[4]);
-
- /* Step 3 - bring in last of TK bits, assign 24-bit WEP IV value
- * WEPSeed[0..2] is transmitted as WEP IV */
- WEPSeed[0] = Hi8(IV16);
- WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F;
- WEPSeed[2] = Lo8(IV16);
- WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((__le16 *) & TK[0])) >> 1);
-
-#ifdef __BIG_ENDIAN
- {
- int i;
- for (i = 0; i < 6; i++)
- PPK[i] = (PPK[i] << 8) | (PPK[i] >> 8);
- }
-#endif
-}
-
-static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
- u8 * rc4key, int keylen, void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
- u8 *pos;
- struct ieee80211_hdr *hdr;
-
- hdr = (struct ieee80211_hdr *)skb->data;
-
- if (skb_headroom(skb) < TKIP_HDR_LEN || skb->len < hdr_len)
- return -1;
-
- if (rc4key == NULL || keylen < 16)
- return -1;
-
- if (!tkey->tx_phase1_done) {
- tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2,
- tkey->tx_iv32);
- tkey->tx_phase1_done = 1;
- }
- tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16);
-
- pos = skb_push(skb, TKIP_HDR_LEN);
- memmove(pos, pos + TKIP_HDR_LEN, hdr_len);
- pos += hdr_len;
-
- *pos++ = *rc4key;
- *pos++ = *(rc4key + 1);
- *pos++ = *(rc4key + 2);
- *pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */ ;
- *pos++ = tkey->tx_iv32 & 0xff;
- *pos++ = (tkey->tx_iv32 >> 8) & 0xff;
- *pos++ = (tkey->tx_iv32 >> 16) & 0xff;
- *pos++ = (tkey->tx_iv32 >> 24) & 0xff;
-
- tkey->tx_iv16++;
- if (tkey->tx_iv16 == 0) {
- tkey->tx_phase1_done = 0;
- tkey->tx_iv32++;
- }
-
- return TKIP_HDR_LEN;
-}
-
-static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
- int len;
- u8 rc4key[16], *pos, *icv;
- u32 crc;
-
- if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
- net_dbg_ratelimited("TKIP countermeasures: dropped TX packet to %pM\n",
- hdr->addr1);
- return -1;
- }
-
- if (skb_tailroom(skb) < 4 || skb->len < hdr_len)
- return -1;
-
- len = skb->len - hdr_len;
- pos = skb->data + hdr_len;
-
- if ((lib80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0)
- return -1;
-
- crc = ~crc32_le(~0, pos, len);
- icv = skb_put(skb, 4);
- icv[0] = crc;
- icv[1] = crc >> 8;
- icv[2] = crc >> 16;
- icv[3] = crc >> 24;
-
- arc4_setkey(&tkey->tx_ctx_arc4, rc4key, 16);
- arc4_crypt(&tkey->tx_ctx_arc4, pos, pos, len + 4);
-
- return 0;
-}
-
-/*
- * deal with seq counter wrapping correctly.
- * refer to timer_after() for jiffies wrapping handling
- */
-static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n,
- u32 iv32_o, u16 iv16_o)
-{
- if ((s32)iv32_n - (s32)iv32_o < 0 ||
- (iv32_n == iv32_o && iv16_n <= iv16_o))
- return 1;
- return 0;
-}
-
-static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
- u8 rc4key[16];
- u8 keyidx, *pos;
- u32 iv32;
- u16 iv16;
- struct ieee80211_hdr *hdr;
- u8 icv[4];
- u32 crc;
- int plen;
-
- hdr = (struct ieee80211_hdr *)skb->data;
-
- if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
- net_dbg_ratelimited("TKIP countermeasures: dropped received packet from %pM\n",
- hdr->addr2);
- return -1;
- }
-
- if (skb->len < hdr_len + TKIP_HDR_LEN + 4)
- return -1;
-
- pos = skb->data + hdr_len;
- keyidx = pos[3];
- if (!(keyidx & (1 << 5))) {
- net_dbg_ratelimited("TKIP: received packet without ExtIV flag from %pM\n",
- hdr->addr2);
- return -2;
- }
- keyidx >>= 6;
- if (tkey->key_idx != keyidx) {
- net_dbg_ratelimited("TKIP: RX tkey->key_idx=%d frame keyidx=%d\n",
- tkey->key_idx, keyidx);
- return -6;
- }
- if (!tkey->key_set) {
- net_dbg_ratelimited("TKIP: received packet from %pM with keyid=%d that does not have a configured key\n",
- hdr->addr2, keyidx);
- return -3;
- }
- iv16 = (pos[0] << 8) | pos[2];
- iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24);
- pos += TKIP_HDR_LEN;
-
- if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) {
-#ifdef CONFIG_LIB80211_DEBUG
- net_dbg_ratelimited("TKIP: replay detected: STA=%pM previous TSC %08x%04x received TSC %08x%04x\n",
- hdr->addr2, tkey->rx_iv32, tkey->rx_iv16,
- iv32, iv16);
-#endif
- tkey->dot11RSNAStatsTKIPReplays++;
- return -4;
- }
-
- if (iv32 != tkey->rx_iv32 || !tkey->rx_phase1_done) {
- tkip_mixing_phase1(tkey->rx_ttak, tkey->key, hdr->addr2, iv32);
- tkey->rx_phase1_done = 1;
- }
- tkip_mixing_phase2(rc4key, tkey->key, tkey->rx_ttak, iv16);
-
- plen = skb->len - hdr_len - 12;
-
- arc4_setkey(&tkey->rx_ctx_arc4, rc4key, 16);
- arc4_crypt(&tkey->rx_ctx_arc4, pos, pos, plen + 4);
-
- crc = ~crc32_le(~0, pos, plen);
- icv[0] = crc;
- icv[1] = crc >> 8;
- icv[2] = crc >> 16;
- icv[3] = crc >> 24;
- if (memcmp(icv, pos + plen, 4) != 0) {
- if (iv32 != tkey->rx_iv32) {
- /* Previously cached Phase1 result was already lost, so
- * it needs to be recalculated for the next packet. */
- tkey->rx_phase1_done = 0;
- }
-#ifdef CONFIG_LIB80211_DEBUG
- net_dbg_ratelimited("TKIP: ICV error detected: STA=%pM\n",
- hdr->addr2);
-#endif
- tkey->dot11RSNAStatsTKIPICVErrors++;
- return -5;
- }
-
- /* Update real counters only after Michael MIC verification has
- * completed */
- tkey->rx_iv32_new = iv32;
- tkey->rx_iv16_new = iv16;
-
- /* Remove IV and ICV */
- memmove(skb->data + TKIP_HDR_LEN, skb->data, hdr_len);
- skb_pull(skb, TKIP_HDR_LEN);
- skb_trim(skb, skb->len - 4);
-
- return keyidx;
-}
-
-static int michael_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *hdr,
- u8 *data, size_t data_len, u8 *mic)
-{
- SHASH_DESC_ON_STACK(desc, tfm_michael);
- int err;
-
- if (tfm_michael == NULL) {
- pr_warn("%s(): tfm_michael == NULL\n", __func__);
- return -1;
- }
-
- desc->tfm = tfm_michael;
-
- if (crypto_shash_setkey(tfm_michael, key, 8))
- return -1;
-
- err = crypto_shash_init(desc);
- if (err)
- goto out;
- err = crypto_shash_update(desc, hdr, 16);
- if (err)
- goto out;
- err = crypto_shash_update(desc, data, data_len);
- if (err)
- goto out;
- err = crypto_shash_final(desc, mic);
-
-out:
- shash_desc_zero(desc);
- return err;
-}
-
-static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
-{
- struct ieee80211_hdr *hdr11;
-
- hdr11 = (struct ieee80211_hdr *)skb->data;
-
- switch (le16_to_cpu(hdr11->frame_control) &
- (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) {
- case IEEE80211_FCTL_TODS:
- memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */
- memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */
- break;
- case IEEE80211_FCTL_FROMDS:
- memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */
- memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN); /* SA */
- break;
- case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS:
- memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */
- memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */
- break;
- default:
- memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */
- memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */
- break;
- }
-
- if (ieee80211_is_data_qos(hdr11->frame_control)) {
- hdr[12] = le16_to_cpu(*((__le16 *)ieee80211_get_qos_ctl(hdr11)))
- & IEEE80211_QOS_CTL_TID_MASK;
- } else
- hdr[12] = 0; /* priority */
-
- hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */
-}
-
-static int lib80211_michael_mic_add(struct sk_buff *skb, int hdr_len,
- void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
- u8 *pos;
-
- if (skb_tailroom(skb) < 8 || skb->len < hdr_len) {
- printk(KERN_DEBUG "Invalid packet for Michael MIC add "
- "(tailroom=%d hdr_len=%d skb->len=%d)\n",
- skb_tailroom(skb), hdr_len, skb->len);
- return -1;
- }
-
- michael_mic_hdr(skb, tkey->tx_hdr);
- pos = skb_put(skb, 8);
- if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr,
- skb->data + hdr_len, skb->len - 8 - hdr_len, pos))
- return -1;
-
- return 0;
-}
-
-static void lib80211_michael_mic_failure(struct net_device *dev,
- struct ieee80211_hdr *hdr,
- int keyidx)
-{
- union iwreq_data wrqu;
- struct iw_michaelmicfailure ev;
-
- /* TODO: needed parameters: count, keyid, key type, TSC */
- memset(&ev, 0, sizeof(ev));
- ev.flags = keyidx & IW_MICFAILURE_KEY_ID;
- if (hdr->addr1[0] & 0x01)
- ev.flags |= IW_MICFAILURE_GROUP;
- else
- ev.flags |= IW_MICFAILURE_PAIRWISE;
- ev.src_addr.sa_family = ARPHRD_ETHER;
- memcpy(ev.src_addr.sa_data, hdr->addr2, ETH_ALEN);
- memset(&wrqu, 0, sizeof(wrqu));
- wrqu.data.length = sizeof(ev);
- wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev);
-}
-
-static int lib80211_michael_mic_verify(struct sk_buff *skb, int keyidx,
- int hdr_len, void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
- u8 mic[8];
-
- if (!tkey->key_set)
- return -1;
-
- michael_mic_hdr(skb, tkey->rx_hdr);
- if (michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr,
- skb->data + hdr_len, skb->len - 8 - hdr_len, mic))
- return -1;
- if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) {
- struct ieee80211_hdr *hdr;
- hdr = (struct ieee80211_hdr *)skb->data;
- printk(KERN_DEBUG "%s: Michael MIC verification failed for "
- "MSDU from %pM keyidx=%d\n",
- skb->dev ? skb->dev->name : "N/A", hdr->addr2,
- keyidx);
- if (skb->dev)
- lib80211_michael_mic_failure(skb->dev, hdr, keyidx);
- tkey->dot11RSNAStatsTKIPLocalMICFailures++;
- return -1;
- }
-
- /* Update TSC counters for RX now that the packet verification has
- * completed. */
- tkey->rx_iv32 = tkey->rx_iv32_new;
- tkey->rx_iv16 = tkey->rx_iv16_new;
-
- skb_trim(skb, skb->len - 8);
-
- return 0;
-}
-
-static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
- int keyidx;
- struct crypto_shash *tfm = tkey->tx_tfm_michael;
- struct arc4_ctx *tfm2 = &tkey->tx_ctx_arc4;
- struct crypto_shash *tfm3 = tkey->rx_tfm_michael;
- struct arc4_ctx *tfm4 = &tkey->rx_ctx_arc4;
-
- keyidx = tkey->key_idx;
- memset(tkey, 0, sizeof(*tkey));
- tkey->key_idx = keyidx;
- tkey->tx_tfm_michael = tfm;
- tkey->tx_ctx_arc4 = *tfm2;
- tkey->rx_tfm_michael = tfm3;
- tkey->rx_ctx_arc4 = *tfm4;
- if (len == TKIP_KEY_LEN) {
- memcpy(tkey->key, key, TKIP_KEY_LEN);
- tkey->key_set = 1;
- tkey->tx_iv16 = 1; /* TSC is initialized to 1 */
- if (seq) {
- tkey->rx_iv32 = (seq[5] << 24) | (seq[4] << 16) |
- (seq[3] << 8) | seq[2];
- tkey->rx_iv16 = (seq[1] << 8) | seq[0];
- }
- } else if (len == 0)
- tkey->key_set = 0;
- else
- return -1;
-
- return 0;
-}
-
-static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
-
- if (len < TKIP_KEY_LEN)
- return -1;
-
- if (!tkey->key_set)
- return 0;
- memcpy(key, tkey->key, TKIP_KEY_LEN);
-
- if (seq) {
- /*
- * Not clear if this should return the value as is
- * or - as the code previously seemed to partially
- * have been written as - subtract one from it. It
- * was working this way for a long time so leave it.
- */
- seq[0] = tkey->tx_iv16;
- seq[1] = tkey->tx_iv16 >> 8;
- seq[2] = tkey->tx_iv32;
- seq[3] = tkey->tx_iv32 >> 8;
- seq[4] = tkey->tx_iv32 >> 16;
- seq[5] = tkey->tx_iv32 >> 24;
- }
-
- return TKIP_KEY_LEN;
-}
-
-static void lib80211_tkip_print_stats(struct seq_file *m, void *priv)
-{
- struct lib80211_tkip_data *tkip = priv;
- seq_printf(m,
- "key[%d] alg=TKIP key_set=%d "
- "tx_pn=%02x%02x%02x%02x%02x%02x "
- "rx_pn=%02x%02x%02x%02x%02x%02x "
- "replays=%d icv_errors=%d local_mic_failures=%d\n",
- tkip->key_idx, tkip->key_set,
- (tkip->tx_iv32 >> 24) & 0xff,
- (tkip->tx_iv32 >> 16) & 0xff,
- (tkip->tx_iv32 >> 8) & 0xff,
- tkip->tx_iv32 & 0xff,
- (tkip->tx_iv16 >> 8) & 0xff,
- tkip->tx_iv16 & 0xff,
- (tkip->rx_iv32 >> 24) & 0xff,
- (tkip->rx_iv32 >> 16) & 0xff,
- (tkip->rx_iv32 >> 8) & 0xff,
- tkip->rx_iv32 & 0xff,
- (tkip->rx_iv16 >> 8) & 0xff,
- tkip->rx_iv16 & 0xff,
- tkip->dot11RSNAStatsTKIPReplays,
- tkip->dot11RSNAStatsTKIPICVErrors,
- tkip->dot11RSNAStatsTKIPLocalMICFailures);
-}
-
-static struct lib80211_crypto_ops lib80211_crypt_tkip = {
- .name = "TKIP",
- .init = lib80211_tkip_init,
- .deinit = lib80211_tkip_deinit,
- .encrypt_mpdu = lib80211_tkip_encrypt,
- .decrypt_mpdu = lib80211_tkip_decrypt,
- .encrypt_msdu = lib80211_michael_mic_add,
- .decrypt_msdu = lib80211_michael_mic_verify,
- .set_key = lib80211_tkip_set_key,
- .get_key = lib80211_tkip_get_key,
- .print_stats = lib80211_tkip_print_stats,
- .extra_mpdu_prefix_len = 4 + 4, /* IV + ExtIV */
- .extra_mpdu_postfix_len = 4, /* ICV */
- .extra_msdu_postfix_len = 8, /* MIC */
- .get_flags = lib80211_tkip_get_flags,
- .set_flags = lib80211_tkip_set_flags,
- .owner = THIS_MODULE,
-};
-
-static int __init lib80211_crypto_tkip_init(void)
-{
- return lib80211_register_crypto_ops(&lib80211_crypt_tkip);
-}
-
-static void __exit lib80211_crypto_tkip_exit(void)
-{
- lib80211_unregister_crypto_ops(&lib80211_crypt_tkip);
-}
-
-module_init(lib80211_crypto_tkip_init);
-module_exit(lib80211_crypto_tkip_exit);
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
deleted file mode 100644
index 6ab9957b8f96..000000000000
--- a/net/wireless/lib80211_crypt_wep.c
+++ /dev/null
@@ -1,256 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * lib80211 crypt: host-based WEP encryption implementation for lib80211
- *
- * Copyright (c) 2002-2004, Jouni Malinen <j@w1.fi>
- * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
- */
-
-#include <linux/err.h>
-#include <linux/fips.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <linux/skbuff.h>
-#include <linux/mm.h>
-#include <asm/string.h>
-
-#include <net/lib80211.h>
-
-#include <crypto/arc4.h>
-#include <linux/crc32.h>
-
-MODULE_AUTHOR("Jouni Malinen");
-MODULE_DESCRIPTION("lib80211 crypt: WEP");
-MODULE_LICENSE("GPL");
-
-struct lib80211_wep_data {
- u32 iv;
-#define WEP_KEY_LEN 13
- u8 key[WEP_KEY_LEN + 1];
- u8 key_len;
- u8 key_idx;
- struct arc4_ctx tx_ctx;
- struct arc4_ctx rx_ctx;
-};
-
-static void *lib80211_wep_init(int keyidx)
-{
- struct lib80211_wep_data *priv;
-
- if (fips_enabled)
- return NULL;
-
- priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
- if (priv == NULL)
- return NULL;
- priv->key_idx = keyidx;
-
- /* start WEP IV from a random value */
- get_random_bytes(&priv->iv, 4);
-
- return priv;
-}
-
-static void lib80211_wep_deinit(void *priv)
-{
- kfree_sensitive(priv);
-}
-
-/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */
-static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len,
- u8 *key, int keylen, void *priv)
-{
- struct lib80211_wep_data *wep = priv;
- u32 klen;
- u8 *pos;
-
- if (skb_headroom(skb) < 4 || skb->len < hdr_len)
- return -1;
-
- pos = skb_push(skb, 4);
- memmove(pos, pos + 4, hdr_len);
- pos += hdr_len;
-
- klen = 3 + wep->key_len;
-
- wep->iv++;
-
- /* Fluhrer, Mantin, and Shamir have reported weaknesses in the key
- * scheduling algorithm of RC4. At least IVs (KeyByte + 3, 0xff, N)
- * can be used to speedup attacks, so avoid using them. */
- if ((wep->iv & 0xff00) == 0xff00) {
- u8 B = (wep->iv >> 16) & 0xff;
- if (B >= 3 && B < klen)
- wep->iv += 0x0100;
- }
-
- /* Prepend 24-bit IV to RC4 key and TX frame */
- *pos++ = (wep->iv >> 16) & 0xff;
- *pos++ = (wep->iv >> 8) & 0xff;
- *pos++ = wep->iv & 0xff;
- *pos++ = wep->key_idx << 6;
-
- return 0;
-}
-
-/* Perform WEP encryption on given skb that has at least 4 bytes of headroom
- * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted,
- * so the payload length increases with 8 bytes.
- *
- * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data))
- */
-static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
- struct lib80211_wep_data *wep = priv;
- u32 crc, klen, len;
- u8 *pos, *icv;
- u8 key[WEP_KEY_LEN + 3];
-
- /* other checks are in lib80211_wep_build_iv */
- if (skb_tailroom(skb) < 4)
- return -1;
-
- /* add the IV to the frame */
- if (lib80211_wep_build_iv(skb, hdr_len, NULL, 0, priv))
- return -1;
-
- /* Copy the IV into the first 3 bytes of the key */
- skb_copy_from_linear_data_offset(skb, hdr_len, key, 3);
-
- /* Copy rest of the WEP key (the secret part) */
- memcpy(key + 3, wep->key, wep->key_len);
-
- len = skb->len - hdr_len - 4;
- pos = skb->data + hdr_len + 4;
- klen = 3 + wep->key_len;
-
- /* Append little-endian CRC32 over only the data and encrypt it to produce ICV */
- crc = ~crc32_le(~0, pos, len);
- icv = skb_put(skb, 4);
- icv[0] = crc;
- icv[1] = crc >> 8;
- icv[2] = crc >> 16;
- icv[3] = crc >> 24;
-
- arc4_setkey(&wep->tx_ctx, key, klen);
- arc4_crypt(&wep->tx_ctx, pos, pos, len + 4);
-
- return 0;
-}
-
-/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
- * the frame: IV (4 bytes), encrypted payload (including SNAP header),
- * ICV (4 bytes). len includes both IV and ICV.
- *
- * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on
- * failure. If frame is OK, IV and ICV will be removed.
- */
-static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
- struct lib80211_wep_data *wep = priv;
- u32 crc, klen, plen;
- u8 key[WEP_KEY_LEN + 3];
- u8 keyidx, *pos, icv[4];
-
- if (skb->len < hdr_len + 8)
- return -1;
-
- pos = skb->data + hdr_len;
- key[0] = *pos++;
- key[1] = *pos++;
- key[2] = *pos++;
- keyidx = *pos++ >> 6;
- if (keyidx != wep->key_idx)
- return -1;
-
- klen = 3 + wep->key_len;
-
- /* Copy rest of the WEP key (the secret part) */
- memcpy(key + 3, wep->key, wep->key_len);
-
- /* Apply RC4 to data and compute CRC32 over decrypted data */
- plen = skb->len - hdr_len - 8;
-
- arc4_setkey(&wep->rx_ctx, key, klen);
- arc4_crypt(&wep->rx_ctx, pos, pos, plen + 4);
-
- crc = ~crc32_le(~0, pos, plen);
- icv[0] = crc;
- icv[1] = crc >> 8;
- icv[2] = crc >> 16;
- icv[3] = crc >> 24;
- if (memcmp(icv, pos + plen, 4) != 0) {
- /* ICV mismatch - drop frame */
- return -2;
- }
-
- /* Remove IV and ICV */
- memmove(skb->data + 4, skb->data, hdr_len);
- skb_pull(skb, 4);
- skb_trim(skb, skb->len - 4);
-
- return 0;
-}
-
-static int lib80211_wep_set_key(void *key, int len, u8 * seq, void *priv)
-{
- struct lib80211_wep_data *wep = priv;
-
- if (len < 0 || len > WEP_KEY_LEN)
- return -1;
-
- memcpy(wep->key, key, len);
- wep->key_len = len;
-
- return 0;
-}
-
-static int lib80211_wep_get_key(void *key, int len, u8 * seq, void *priv)
-{
- struct lib80211_wep_data *wep = priv;
-
- if (len < wep->key_len)
- return -1;
-
- memcpy(key, wep->key, wep->key_len);
-
- return wep->key_len;
-}
-
-static void lib80211_wep_print_stats(struct seq_file *m, void *priv)
-{
- struct lib80211_wep_data *wep = priv;
- seq_printf(m, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len);
-}
-
-static struct lib80211_crypto_ops lib80211_crypt_wep = {
- .name = "WEP",
- .init = lib80211_wep_init,
- .deinit = lib80211_wep_deinit,
- .encrypt_mpdu = lib80211_wep_encrypt,
- .decrypt_mpdu = lib80211_wep_decrypt,
- .encrypt_msdu = NULL,
- .decrypt_msdu = NULL,
- .set_key = lib80211_wep_set_key,
- .get_key = lib80211_wep_get_key,
- .print_stats = lib80211_wep_print_stats,
- .extra_mpdu_prefix_len = 4, /* IV */
- .extra_mpdu_postfix_len = 4, /* ICV */
- .owner = THIS_MODULE,
-};
-
-static int __init lib80211_crypto_wep_init(void)
-{
- return lib80211_register_crypto_ops(&lib80211_crypt_wep);
-}
-
-static void __exit lib80211_crypto_wep_exit(void)
-{
- lib80211_unregister_crypto_ops(&lib80211_crypt_wep);
-}
-
-module_init(lib80211_crypto_wep_init);
-module_exit(lib80211_crypto_wep_exit);
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index aaca65b66af4..2c6654075ca9 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -127,7 +127,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
if (!rdev->ops->join_mesh)
return -EOPNOTSUPP;
- if (wdev->cac_started)
+ if (wdev->links[0].cac_started)
return -EBUSY;
if (!setup->chandef.chan) {
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 4052041a19ea..a5eb92d93074 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -340,12 +340,6 @@ cfg80211_mlme_check_mlo_compat(const struct ieee80211_multi_link_elem *mle_a,
return -EINVAL;
}
- if (ieee80211_mle_get_eml_med_sync_delay((const u8 *)mle_a) !=
- ieee80211_mle_get_eml_med_sync_delay((const u8 *)mle_b)) {
- NL_SET_ERR_MSG(extack, "link EML medium sync delay mismatch");
- return -EINVAL;
- }
-
if (ieee80211_mle_get_eml_cap((const u8 *)mle_a) !=
ieee80211_mle_get_eml_cap((const u8 *)mle_b)) {
NL_SET_ERR_MSG(extack, "link EML capabilities mismatch");
@@ -1110,26 +1104,28 @@ EXPORT_SYMBOL(__cfg80211_radar_event);
void cfg80211_cac_event(struct net_device *netdev,
const struct cfg80211_chan_def *chandef,
- enum nl80211_radar_event event, gfp_t gfp)
+ enum nl80211_radar_event event, gfp_t gfp,
+ unsigned int link_id)
{
struct wireless_dev *wdev = netdev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
unsigned long timeout;
- /* not yet supported */
- if (wdev->valid_links)
+ if (WARN_ON(wdev->valid_links &&
+ !(wdev->valid_links & BIT(link_id))))
return;
- trace_cfg80211_cac_event(netdev, event);
+ trace_cfg80211_cac_event(netdev, event, link_id);
- if (WARN_ON(!wdev->cac_started && event != NL80211_RADAR_CAC_STARTED))
+ if (WARN_ON(!wdev->links[link_id].cac_started &&
+ event != NL80211_RADAR_CAC_STARTED))
return;
switch (event) {
case NL80211_RADAR_CAC_FINISHED:
- timeout = wdev->cac_start_time +
- msecs_to_jiffies(wdev->cac_time_ms);
+ timeout = wdev->links[link_id].cac_start_time +
+ msecs_to_jiffies(wdev->links[link_id].cac_time_ms);
WARN_ON(!time_after_eq(jiffies, timeout));
cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE);
memcpy(&rdev->cac_done_chandef, chandef,
@@ -1138,10 +1134,10 @@ void cfg80211_cac_event(struct net_device *netdev,
cfg80211_sched_dfs_chan_update(rdev);
fallthrough;
case NL80211_RADAR_CAC_ABORTED:
- wdev->cac_started = false;
+ wdev->links[link_id].cac_started = false;
break;
case NL80211_RADAR_CAC_STARTED:
- wdev->cac_started = true;
+ wdev->links[link_id].cac_started = true;
break;
default:
WARN_ON(1);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7397a372c78e..dd84fc54fb9b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -814,7 +814,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MLO_LINKS] =
NLA_POLICY_NESTED_ARRAY(nl80211_policy),
[NL80211_ATTR_MLO_LINK_ID] =
- NLA_POLICY_RANGE(NLA_U8, 0, IEEE80211_MLD_MAX_NUM_LINKS),
+ NLA_POLICY_RANGE(NLA_U8, 0, IEEE80211_MLD_MAX_NUM_LINKS - 1),
[NL80211_ATTR_MLD_ADDR] = NLA_POLICY_EXACT_LEN(ETH_ALEN),
[NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG },
[NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT },
@@ -829,6 +829,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MLO_TTLM_DLINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
[NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
[NL80211_ATTR_ASSOC_SPP_AMSDU] = { .type = NLA_FLAG },
+ [NL80211_ATTR_VIF_RADIO_MASK] = { .type = NLA_U32 },
};
/* policy for the key attributes */
@@ -1285,10 +1286,7 @@ static unsigned int nl80211_link_id(struct nlattr **attrs)
{
struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID];
- if (!linkid)
- return 0;
-
- return nla_get_u8(linkid);
+ return nla_get_u8_default(linkid, 0);
}
static int nl80211_link_id_or_invalid(struct nlattr **attrs)
@@ -2430,6 +2428,11 @@ static int nl80211_put_radio(struct wiphy *wiphy, struct sk_buff *msg, int idx)
if (nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_INDEX, idx))
goto nla_put_failure;
+ if (r->antenna_mask &&
+ nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK,
+ r->antenna_mask))
+ goto nla_put_failure;
+
for (i = 0; i < r->n_freq_range; i++) {
const struct wiphy_radio_freq_range *range = &r->freq_range[i];
@@ -3408,11 +3411,9 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
if (attrs[NL80211_ATTR_CENTER_FREQ1]) {
chandef->center_freq1 =
nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
- if (attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET])
- chandef->freq1_offset = nla_get_u32(
- attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET]);
- else
- chandef->freq1_offset = 0;
+ chandef->freq1_offset =
+ nla_get_u32_default(attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET],
+ 0);
}
if (attrs[NL80211_ATTR_CENTER_FREQ2])
chandef->center_freq2 =
@@ -3561,7 +3562,7 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
case NL80211_IFTYPE_MESH_POINT:
return cfg80211_set_mesh_channel(rdev, wdev, &chandef);
case NL80211_IFTYPE_MONITOR:
- return cfg80211_set_monitor_channel(rdev, &chandef);
+ return cfg80211_set_monitor_channel(rdev, dev, &chandef);
default:
break;
}
@@ -3996,7 +3997,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
nla_put_u32(msg, NL80211_ATTR_GENERATION,
rdev->devlist_generation ^
(cfg80211_rdev_list_generation << 2)) ||
- nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr))
+ nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr) ||
+ nla_put_u32(msg, NL80211_ATTR_VIF_RADIO_MASK, wdev->radio_mask))
goto nla_put_failure;
if (rdev->ops->get_channel && !wdev->valid_links) {
@@ -4199,6 +4201,7 @@ static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = {
[NL80211_MNTR_FLAG_OTHER_BSS] = { .type = NLA_FLAG },
[NL80211_MNTR_FLAG_COOK_FRAMES] = { .type = NLA_FLAG },
[NL80211_MNTR_FLAG_ACTIVE] = { .type = NLA_FLAG },
+ [NL80211_MNTR_FLAG_SKIP_TX] = { .type = NLA_FLAG },
};
static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags)
@@ -4312,6 +4315,29 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev,
return -EOPNOTSUPP;
}
+static int nl80211_parse_vif_radio_mask(struct genl_info *info,
+ u32 *radio_mask)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct nlattr *attr = info->attrs[NL80211_ATTR_VIF_RADIO_MASK];
+ u32 mask, allowed;
+
+ if (!attr) {
+ *radio_mask = 0;
+ return 0;
+ }
+
+ allowed = BIT(rdev->wiphy.n_radio) - 1;
+ mask = nla_get_u32(attr);
+ if (mask & ~allowed)
+ return -EINVAL;
+ if (!mask)
+ mask = allowed;
+ *radio_mask = mask;
+
+ return 1;
+}
+
static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -4319,6 +4345,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
int err;
enum nl80211_iftype otype, ntype;
struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ u32 radio_mask = 0;
bool change = false;
memset(&params, 0, sizeof(params));
@@ -4332,8 +4360,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
}
if (info->attrs[NL80211_ATTR_MESH_ID]) {
- struct wireless_dev *wdev = dev->ieee80211_ptr;
-
if (ntype != NL80211_IFTYPE_MESH_POINT)
return -EINVAL;
if (otype != NL80211_IFTYPE_MESH_POINT)
@@ -4364,6 +4390,12 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
if (err > 0)
change = true;
+ err = nl80211_parse_vif_radio_mask(info, &radio_mask);
+ if (err < 0)
+ return err;
+ if (err && netif_running(dev))
+ return -EBUSY;
+
if (change)
err = cfg80211_change_iface(rdev, dev, ntype, &params);
else
@@ -4372,11 +4404,11 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
if (!err && params.use_4addr != -1)
dev->ieee80211_ptr->use_4addr = params.use_4addr;
- if (change && !err) {
- struct wireless_dev *wdev = dev->ieee80211_ptr;
+ if (radio_mask)
+ wdev->radio_mask = radio_mask;
+ if (change && !err)
nl80211_notify_iface(rdev, wdev, NL80211_CMD_SET_INTERFACE);
- }
return err;
}
@@ -4387,6 +4419,7 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
struct vif_params params;
struct wireless_dev *wdev;
struct sk_buff *msg;
+ u32 radio_mask;
int err;
enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
@@ -4424,6 +4457,10 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
if (err < 0)
return err;
+ err = nl80211_parse_vif_radio_mask(info, &radio_mask);
+ if (err < 0)
+ return err;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -4465,6 +4502,9 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
break;
}
+ if (radio_mask)
+ wdev->radio_mask = radio_mask;
+
if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0,
rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) {
nlmsg_free(msg);
@@ -6066,7 +6106,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->start_ap)
return -EOPNOTSUPP;
- if (wdev->cac_started)
+ if (wdev->links[link_id].cac_started)
return -EBUSY;
if (wdev->links[link_id].ap.beacon_interval)
@@ -6078,6 +6118,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
!info->attrs[NL80211_ATTR_BEACON_HEAD])
return -EINVAL;
+ if (info->attrs[NL80211_ATTR_SMPS_MODE] &&
+ nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]) != NL80211_SMPS_OFF)
+ return -EOPNOTSUPP;
+
params = kzalloc(sizeof(*params), GFP_KERNEL);
if (!params)
return -ENOMEM;
@@ -6227,34 +6271,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- if (info->attrs[NL80211_ATTR_SMPS_MODE]) {
- params->smps_mode =
- nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]);
- switch (params->smps_mode) {
- case NL80211_SMPS_OFF:
- break;
- case NL80211_SMPS_STATIC:
- if (!(rdev->wiphy.features &
- NL80211_FEATURE_STATIC_SMPS)) {
- err = -EINVAL;
- goto out;
- }
- break;
- case NL80211_SMPS_DYNAMIC:
- if (!(rdev->wiphy.features &
- NL80211_FEATURE_DYNAMIC_SMPS)) {
- err = -EINVAL;
- goto out;
- }
- break;
- default:
- err = -EINVAL;
- goto out;
- }
- } else {
- params->smps_mode = NL80211_SMPS_OFF;
- }
-
params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) {
err = -EOPNOTSUPP;
@@ -8244,11 +8260,9 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
if (unlikely(!rcu_access_pointer(cfg80211_regdomain)))
return -EINPROGRESS;
- if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE])
- user_reg_hint_type =
- nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]);
- else
- user_reg_hint_type = NL80211_USER_REG_HINT_USER;
+ user_reg_hint_type =
+ nla_get_u32_default(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE],
+ NL80211_USER_REG_HINT_USER);
switch (user_reg_hint_type) {
case NL80211_USER_REG_HINT_USER:
@@ -9180,6 +9194,9 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev,
lockdep_assert_wiphy(wdev->wiphy);
+ if (!cfg80211_wdev_channel_allowed(wdev, chan))
+ return false;
+
if (!cfg80211_beaconing_iface_active(wdev))
return true;
@@ -9392,7 +9409,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
}
/* ignore disabled channels */
- if (chan->flags & IEEE80211_CHAN_DISABLED)
+ if (chan->flags & IEEE80211_CHAN_DISABLED ||
+ !cfg80211_wdev_channel_allowed(wdev, chan))
continue;
request->channels[i] = chan;
@@ -9412,7 +9430,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
chan = &wiphy->bands[band]->channels[j];
- if (chan->flags & IEEE80211_CHAN_DISABLED)
+ if (chan->flags & IEEE80211_CHAN_DISABLED ||
+ !cfg80211_wdev_channel_allowed(wdev, chan))
continue;
request->channels[i] = chan;
@@ -9776,9 +9795,11 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
request = kzalloc(size, GFP_KERNEL);
if (!request)
return ERR_PTR(-ENOMEM);
+ request->n_channels = n_channels;
if (n_ssids)
- request->ssids = (void *)&request->channels[n_channels];
+ request->ssids = (void *)request +
+ struct_size(request, channels, n_channels);
request->n_ssids = n_ssids;
if (ie_len) {
if (n_ssids)
@@ -10072,6 +10093,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ int link_id = nl80211_link_id(info->attrs);
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_chan_def chandef;
enum nl80211_dfs_regions dfs_region;
@@ -10121,7 +10143,20 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
goto unlock;
}
- if (cfg80211_beaconing_iface_active(wdev) || wdev->cac_started) {
+ if (cfg80211_beaconing_iface_active(wdev)) {
+ /* During MLO other link(s) can beacon, only the current link
+ * can not already beacon
+ */
+ if (wdev->valid_links &&
+ !wdev->links[link_id].ap.beacon_interval) {
+ /* nothing */
+ } else {
+ err = -EBUSY;
+ goto unlock;
+ }
+ }
+
+ if (wdev->links[link_id].cac_started) {
err = -EBUSY;
goto unlock;
}
@@ -10141,12 +10176,26 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
if (WARN_ON(!cac_time_ms))
cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
- err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms);
+ err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms,
+ link_id);
if (!err) {
- wdev->links[0].ap.chandef = chandef;
- wdev->cac_started = true;
- wdev->cac_start_time = jiffies;
- wdev->cac_time_ms = cac_time_ms;
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ wdev->links[0].ap.chandef = chandef;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ wdev->u.ibss.chandef = chandef;
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+ wdev->u.mesh.chandef = chandef;
+ break;
+ default:
+ break;
+ }
+ wdev->links[link_id].cac_started = true;
+ wdev->links[link_id].cac_start_time = jiffies;
+ wdev->links[link_id].cac_time_ms = cac_time_ms;
}
unlock:
wiphy_unlock(wiphy);
@@ -10494,17 +10543,21 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
NL80211_BSS_CHAIN_SIGNAL))
goto nla_put_failure;
- switch (rdev->wiphy.signal_type) {
- case CFG80211_SIGNAL_TYPE_MBM:
- if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal))
- goto nla_put_failure;
- break;
- case CFG80211_SIGNAL_TYPE_UNSPEC:
- if (nla_put_u8(msg, NL80211_BSS_SIGNAL_UNSPEC, res->signal))
- goto nla_put_failure;
- break;
- default:
- break;
+ if (intbss->bss_source != BSS_SOURCE_STA_PROFILE) {
+ switch (rdev->wiphy.signal_type) {
+ case CFG80211_SIGNAL_TYPE_MBM:
+ if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM,
+ res->signal))
+ goto nla_put_failure;
+ break;
+ case CFG80211_SIGNAL_TYPE_UNSPEC:
+ if (nla_put_u8(msg, NL80211_BSS_SIGNAL_UNSPEC,
+ res->signal))
+ goto nla_put_failure;
+ break;
+ default:
+ break;
+ }
}
switch (wdev->iftype) {
@@ -11028,11 +11081,9 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
nla_len(info->attrs[NL80211_ATTR_SAE_PASSWORD]);
}
- if (info->attrs[NL80211_ATTR_SAE_PWE])
- settings->sae_pwe =
- nla_get_u8(info->attrs[NL80211_ATTR_SAE_PWE]);
- else
- settings->sae_pwe = NL80211_SAE_PWE_UNSPECIFIED;
+ settings->sae_pwe =
+ nla_get_u8_default(info->attrs[NL80211_ATTR_SAE_PWE],
+ NL80211_SAE_PWE_UNSPECIFIED);
return 0;
}
@@ -12288,10 +12339,8 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
return -EPERM;
- if (!info->attrs[NL80211_ATTR_REASON_CODE])
- reason = WLAN_REASON_DEAUTH_LEAVING;
- else
- reason = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
+ reason = nla_get_u16_default(info->attrs[NL80211_ATTR_REASON_CODE],
+ WLAN_REASON_DEAUTH_LEAVING);
if (reason == 0)
return -EINVAL;
@@ -12413,7 +12462,7 @@ static int nl80211_del_pmksa(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_MAC]) {
pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
} else if (info->attrs[NL80211_ATTR_SSID]) {
- /* SSID based pmksa flush suppported only for FILS,
+ /* SSID based pmksa flush supported only for FILS,
* OWE/SAE OFFLOAD cases
*/
if (info->attrs[NL80211_ATTR_FILS_CACHE_ID] &&
@@ -13637,10 +13686,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
cfg->dst = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_DST_IPV4]);
memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]),
ETH_ALEN);
- if (tb[NL80211_WOWLAN_TCP_SRC_PORT])
- port = nla_get_u16(tb[NL80211_WOWLAN_TCP_SRC_PORT]);
- else
- port = 0;
+ port = nla_get_u16_default(tb[NL80211_WOWLAN_TCP_SRC_PORT], 0);
#ifdef CONFIG_INET
/* allocate a socket and port for it and use it */
err = __sock_create(wiphy_net(&rdev->wiphy), PF_INET, SOCK_STREAM,
@@ -13851,11 +13897,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
pat_len < wowlan->pattern_min_len)
goto error;
- if (!pat_tb[NL80211_PKTPAT_OFFSET])
- pkt_offset = 0;
- else
- pkt_offset = nla_get_u32(
- pat_tb[NL80211_PKTPAT_OFFSET]);
+ pkt_offset =
+ nla_get_u32_default(pat_tb[NL80211_PKTPAT_OFFSET],
+ 0);
if (pkt_offset > wowlan->max_pkt_offset)
goto error;
new_triggers.patterns[i].pkt_offset = pkt_offset;
@@ -14028,8 +14072,6 @@ void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce)
for (i = 0; i < coalesce->n_rules; i++) {
rule = &coalesce->rules[i];
- if (!rule)
- continue;
for (j = 0; j < rule->n_patterns; j++)
kfree(rule->patterns[j].mask);
kfree(rule->patterns);
@@ -14101,10 +14143,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
pat_len < coalesce->pattern_min_len)
return -EINVAL;
- if (!pat_tb[NL80211_PKTPAT_OFFSET])
- pkt_offset = 0;
- else
- pkt_offset = nla_get_u32(pat_tb[NL80211_PKTPAT_OFFSET]);
+ pkt_offset = nla_get_u32_default(pat_tb[NL80211_PKTPAT_OFFSET],
+ 0);
if (pkt_offset > coalesce->max_pkt_offset)
return -EINVAL;
new_rule->patterns[i].pkt_offset = pkt_offset;
@@ -15465,7 +15505,7 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info)
if (tsid >= IEEE80211_FIRST_TSPEC_TSID) {
/* TODO: handle 802.11 TSPEC/admission control
* need more attributes for that (e.g. BA session requirement);
- * change the WMM adminssion test above to allow both then
+ * change the WMM admission test above to allow both then
*/
return -EINVAL;
}
@@ -16498,10 +16538,10 @@ nl80211_set_ttlm(struct sk_buff *skb, struct genl_info *info)
SELECTOR(__sel, NETDEV_UP_NOTMX, \
NL80211_FLAG_NEED_NETDEV_UP | \
NL80211_FLAG_NO_WIPHY_MTX) \
- SELECTOR(__sel, NETDEV_UP_NOTMX_NOMLO, \
+ SELECTOR(__sel, NETDEV_UP_NOTMX_MLO, \
NL80211_FLAG_NEED_NETDEV_UP | \
NL80211_FLAG_NO_WIPHY_MTX | \
- NL80211_FLAG_MLO_UNSUPPORTED) \
+ NL80211_FLAG_MLO_VALID_LINK_ID) \
SELECTOR(__sel, NETDEV_UP_CLEAR, \
NL80211_FLAG_NEED_NETDEV_UP | \
NL80211_FLAG_CLEAR_SKB) \
@@ -17396,7 +17436,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NO_WIPHY_MTX |
- NL80211_FLAG_MLO_UNSUPPORTED),
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_GET_PROTOCOL_FEATURES,
@@ -17953,10 +17993,8 @@ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
genlmsg_end(msg, hdr);
- rcu_read_lock();
genlmsg_multicast_allns(&nl80211_fam, msg, 0,
- NL80211_MCGRP_REGULATORY, GFP_ATOMIC);
- rcu_read_unlock();
+ NL80211_MCGRP_REGULATORY);
return;
@@ -18689,10 +18727,8 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
genlmsg_end(msg, hdr);
- rcu_read_lock();
genlmsg_multicast_allns(&nl80211_fam, msg, 0,
- NL80211_MCGRP_REGULATORY, GFP_ATOMIC);
- rcu_read_unlock();
+ NL80211_MCGRP_REGULATORY);
return;
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index ae2e1a896461..326faea38ca3 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -18,7 +18,7 @@
#include <linux/export.h>
#include <net/cfg80211.h>
#include <net/ieee80211_radiotap.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
/* function prototypes and related defs are in include/net/cfg80211.h */
@@ -200,7 +200,7 @@ static void find_ns(struct ieee80211_radiotap_iterator *iterator,
* present fields. @this_arg can be changed by the caller (eg,
* incremented to move inside a compound argument like
* IEEE80211_RADIOTAP_CHANNEL). The args pointed to are in
- * little-endian format whatever the endianess of your CPU.
+ * little-endian format whatever the endianness of your CPU.
*
* Alignment Gotcha:
* You must take care when dereferencing iterator.this_arg
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index ec3f4aa1c807..adb6105bbb7d 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -445,11 +445,12 @@ rdev_libertas_set_mesh_channel(struct cfg80211_registered_device *rdev,
static inline int
rdev_set_monitor_channel(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
struct cfg80211_chan_def *chandef)
{
int ret;
- trace_rdev_set_monitor_channel(&rdev->wiphy, chandef);
- ret = rdev->ops->set_monitor_channel(&rdev->wiphy, chandef);
+ trace_rdev_set_monitor_channel(&rdev->wiphy, dev, chandef);
+ ret = rdev->ops->set_monitor_channel(&rdev->wiphy, dev, chandef);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -1200,26 +1201,27 @@ static inline int
rdev_start_radar_detection(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct cfg80211_chan_def *chandef,
- u32 cac_time_ms)
+ u32 cac_time_ms, int link_id)
{
int ret = -EOPNOTSUPP;
trace_rdev_start_radar_detection(&rdev->wiphy, dev, chandef,
- cac_time_ms);
+ cac_time_ms, link_id);
if (rdev->ops->start_radar_detection)
ret = rdev->ops->start_radar_detection(&rdev->wiphy, dev,
- chandef, cac_time_ms);
+ chandef, cac_time_ms,
+ link_id);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
static inline void
rdev_end_cac(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+ struct net_device *dev, unsigned int link_id)
{
- trace_rdev_end_cac(&rdev->wiphy, dev);
+ trace_rdev_end_cac(&rdev->wiphy, dev, link_id);
if (rdev->ops->end_cac)
- rdev->ops->end_cac(&rdev->wiphy, dev);
+ rdev->ops->end_cac(&rdev->wiphy, dev, link_id);
trace_rdev_return_void(&rdev->wiphy);
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4a27f3823e25..1df65a5a44f7 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1147,7 +1147,7 @@ static const struct ieee80211_regdomain *reg_get_regdomain(struct wiphy *wiphy)
/*
* Follow the driver's regulatory domain, if present, unless a country
- * IE has been processed or a user wants to help complaince further
+ * IE has been processed or a user wants to help compliance further
*/
if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
lr->initiator != NL80211_REGDOM_SET_BY_USER &&
@@ -4229,6 +4229,8 @@ EXPORT_SYMBOL(regulatory_pre_cac_allowed);
static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev)
{
struct wireless_dev *wdev;
+ unsigned int link_id;
+
/* If we finished CAC or received radar, we should end any
* CAC running on the same channels.
* the check !cfg80211_chandef_dfs_usable contain 2 options:
@@ -4241,16 +4243,17 @@ static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev)
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
struct cfg80211_chan_def *chandef;
- if (!wdev->cac_started)
- continue;
+ for_each_valid_link(wdev, link_id) {
+ if (!wdev->links[link_id].cac_started)
+ continue;
- /* FIXME: radar detection is tied to link 0 for now */
- chandef = wdev_chandef(wdev, 0);
- if (!chandef)
- continue;
+ chandef = wdev_chandef(wdev, link_id);
+ if (!chandef)
+ continue;
- if (!cfg80211_chandef_dfs_usable(&rdev->wiphy, chandef))
- rdev_end_cac(rdev, wdev->netdev);
+ if (!cfg80211_chandef_dfs_usable(&rdev->wiphy, chandef))
+ rdev_end_cac(rdev, wdev->netdev, link_id);
+ }
}
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 64eeed82d43d..1c6fd45aa809 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -956,7 +956,8 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
struct ieee80211_channel *chan =
ieee80211_get_channel(&rdev->wiphy, ap->center_freq);
- if (!chan || chan->flags & IEEE80211_CHAN_DISABLED)
+ if (!chan || chan->flags & IEEE80211_CHAN_DISABLED ||
+ !cfg80211_wdev_channel_allowed(rdev_req->wdev, chan))
continue;
for (i = 0; i < rdev_req->n_channels; i++) {
@@ -1910,6 +1911,7 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
known->pub.bssid_index = new->pub.bssid_index;
known->pub.use_for &= new->pub.use_for;
known->pub.cannot_use_reasons = new->pub.cannot_use_reasons;
+ known->bss_source = new->bss_source;
return true;
}
@@ -2008,10 +2010,10 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
return found;
free_ies:
- ies = (void *)rcu_dereference(tmp->pub.beacon_ies);
+ ies = (void *)rcu_access_pointer(tmp->pub.beacon_ies);
if (ies)
kfree_rcu(ies, rcu_head);
- ies = (void *)rcu_dereference(tmp->pub.proberesp_ies);
+ ies = (void *)rcu_access_pointer(tmp->pub.proberesp_ies);
if (ies)
kfree_rcu(ies, rcu_head);
@@ -2149,11 +2151,7 @@ struct cfg80211_inform_single_bss_data {
const u8 *ie;
size_t ielen;
- enum {
- BSS_SOURCE_DIRECT = 0,
- BSS_SOURCE_MBSSID,
- BSS_SOURCE_STA_PROFILE,
- } bss_source;
+ enum bss_source_type bss_source;
/* Set if reporting bss_source != BSS_SOURCE_DIRECT */
struct cfg80211_bss *source_bss;
u8 max_bssid_indicator;
@@ -2268,6 +2266,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
IEEE80211_MAX_CHAINS);
tmp.pub.use_for = data->use_for;
tmp.pub.cannot_use_reasons = data->cannot_use_reasons;
+ tmp.bss_source = data->bss_source;
switch (data->bss_source) {
case BSS_SOURCE_MBSSID:
@@ -2907,6 +2906,9 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
struct element *reporter_rnr = NULL;
struct ieee80211_multi_link_elem *ml_elem;
struct cfg80211_mle *mle;
+ const struct element *ssid_elem;
+ const u8 *ssid = NULL;
+ size_t ssid_len = 0;
u16 control;
u8 ml_common_len;
u8 *new_ie = NULL;
@@ -2961,6 +2963,13 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
bss_change_count,
gfp);
+ ssid_elem = cfg80211_find_elem(WLAN_EID_SSID, tx_data->ie,
+ tx_data->ielen);
+ if (ssid_elem) {
+ ssid = ssid_elem->data;
+ ssid_len = ssid_elem->datalen;
+ }
+
for (i = 0; i < ARRAY_SIZE(mle->sta_prof) && mle->sta_prof[i]; i++) {
const struct ieee80211_neighbor_ap_info *ap_info;
enum nl80211_band band;
@@ -3042,6 +3051,27 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
freq = ieee80211_channel_to_freq_khz(ap_info->channel, band);
data.channel = ieee80211_get_channel_khz(wiphy, freq);
+ /* Skip if RNR element specifies an unsupported channel */
+ if (!data.channel)
+ continue;
+
+ /* Skip if BSS entry generated from MBSSID or DIRECT source
+ * frame data available already.
+ */
+ bss = cfg80211_get_bss(wiphy, data.channel, data.bssid, ssid,
+ ssid_len, IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY);
+ if (bss) {
+ struct cfg80211_internal_bss *ibss = bss_from_pub(bss);
+
+ if (data.capability == bss->capability &&
+ ibss->bss_source != BSS_SOURCE_STA_PROFILE) {
+ cfg80211_put_bss(wiphy, bss);
+ continue;
+ }
+ cfg80211_put_bss(wiphy, bss);
+ }
+
if (use_for == NL80211_BSS_USE_FOR_MLD_LINK &&
!(wiphy->flags & WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY)) {
use_for = 0;
@@ -3467,8 +3497,8 @@ int cfg80211_wext_siwscan(struct net_device *dev,
n_channels = ieee80211_get_num_supported_channels(wiphy);
}
- creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) +
- n_channels * sizeof(void *),
+ creq = kzalloc(struct_size(creq, channels, n_channels) +
+ sizeof(struct cfg80211_ssid),
GFP_ATOMIC);
if (!creq)
return -ENOMEM;
@@ -3476,7 +3506,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
creq->wiphy = wiphy;
creq->wdev = dev->ieee80211_ptr;
/* SSIDs come after channels */
- creq->ssids = (void *)&creq->channels[n_channels];
+ creq->ssids = (void *)creq + struct_size(creq, channels, n_channels);
creq->n_channels = n_channels;
creq->n_ssids = 1;
creq->scan_start = jiffies;
@@ -3490,9 +3520,12 @@ int cfg80211_wext_siwscan(struct net_device *dev,
continue;
for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
+ struct ieee80211_channel *chan;
+
/* ignore disabled channels */
- if (wiphy->bands[band]->channels[j].flags &
- IEEE80211_CHAN_DISABLED)
+ chan = &wiphy->bands[band]->channels[j];
+ if (chan->flags & IEEE80211_CHAN_DISABLED ||
+ !cfg80211_wdev_channel_allowed(creq->wdev, chan))
continue;
/* If we have a wireless request structure and the
@@ -3569,7 +3602,6 @@ int cfg80211_wext_siwscan(struct net_device *dev,
kfree(creq);
return err;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_siwscan);
static char *ieee80211_scan_add_ies(struct iw_request_info *info,
const struct cfg80211_bss_ies *ies,
@@ -3941,5 +3973,4 @@ int cfg80211_wext_giwscan(struct net_device *dev,
return res;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwscan);
#endif
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index d9d7bf8bb5c1..268171600087 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -83,6 +83,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
if (!request)
return -ENOMEM;
+ request->n_channels = n_channels;
if (wdev->conn->params.channel) {
enum nl80211_band band = wdev->conn->params.channel->band;
struct ieee80211_supported_band *sband =
@@ -115,7 +116,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
n_channels = i;
}
request->n_channels = n_channels;
- request->ssids = (void *)&request->channels[n_channels];
+ request->ssids = (void *)request +
+ struct_size(request, channels, n_channels);
request->n_ssids = 1;
memcpy(request->ssids[0].ssid, wdev->conn->params.ssid,
diff --git a/net/wireless/tests/chan.c b/net/wireless/tests/chan.c
index 74bbee25085f..7b97b731993c 100644
--- a/net/wireless/tests/chan.c
+++ b/net/wireless/tests/chan.c
@@ -7,7 +7,7 @@
#include <net/cfg80211.h>
#include <kunit/test.h>
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
static struct ieee80211_channel chan_6ghz_1 = {
.band = NL80211_BAND_6GHZ,
diff --git a/net/wireless/tests/scan.c b/net/wireless/tests/scan.c
index 9f458be71659..e12f620b5f42 100644
--- a/net/wireless/tests/scan.c
+++ b/net/wireless/tests/scan.c
@@ -14,7 +14,7 @@
/* mac80211 helpers for element building */
#include "../../mac80211/ieee80211_i.h"
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
struct test_elem {
u8 id;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 5c26f065bd68..d5c9bb614fa6 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -805,9 +805,22 @@ DEFINE_EVENT(wiphy_netdev_evt, rdev_flush_pmksa,
TP_ARGS(wiphy, netdev)
);
-DEFINE_EVENT(wiphy_netdev_evt, rdev_end_cac,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
- TP_ARGS(wiphy, netdev)
+TRACE_EVENT(rdev_end_cac,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ unsigned int link_id),
+ TP_ARGS(wiphy, netdev, link_id),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(unsigned int, link_id)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->link_id = link_id;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id)
);
DECLARE_EVENT_CLASS(station_add_change,
@@ -1305,19 +1318,21 @@ TRACE_EVENT(rdev_libertas_set_mesh_channel,
);
TRACE_EVENT(rdev_set_monitor_channel,
- TP_PROTO(struct wiphy *wiphy,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
struct cfg80211_chan_def *chandef),
- TP_ARGS(wiphy, chandef),
+ TP_ARGS(wiphy, netdev, chandef),
TP_STRUCT__entry(
WIPHY_ENTRY
+ NETDEV_ENTRY
CHAN_DEF_ENTRY
),
TP_fast_assign(
WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
),
- TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT,
- WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
);
TRACE_EVENT(rdev_auth,
@@ -2652,24 +2667,26 @@ TRACE_EVENT(rdev_external_auth,
TRACE_EVENT(rdev_start_radar_detection,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
struct cfg80211_chan_def *chandef,
- u32 cac_time_ms),
- TP_ARGS(wiphy, netdev, chandef, cac_time_ms),
+ u32 cac_time_ms, int link_id),
+ TP_ARGS(wiphy, netdev, chandef, cac_time_ms, link_id),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
CHAN_DEF_ENTRY
__field(u32, cac_time_ms)
+ __field(int, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
__entry->cac_time_ms = cac_time_ms;
+ __entry->link_id = link_id;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
- ", cac_time_ms=%u",
+ ", cac_time_ms=%u, link_id=%d",
WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
- __entry->cac_time_ms)
+ __entry->cac_time_ms, __entry->link_id)
);
TRACE_EVENT(rdev_set_mcast_rate,
@@ -3483,18 +3500,21 @@ TRACE_EVENT(cfg80211_radar_event,
);
TRACE_EVENT(cfg80211_cac_event,
- TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt),
- TP_ARGS(netdev, evt),
+ TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt,
+ unsigned int link_id),
+ TP_ARGS(netdev, evt, link_id),
TP_STRUCT__entry(
NETDEV_ENTRY
__field(enum nl80211_radar_event, evt)
+ __field(unsigned int, link_id)
),
TP_fast_assign(
NETDEV_ASSIGN;
__entry->evt = evt;
+ __entry->link_id = link_id;
),
- TP_printk(NETDEV_PR_FMT ", event: %d",
- NETDEV_PR_ARG, __entry->evt)
+ TP_printk(NETDEV_PR_FMT ", event: %d, link_id=%u",
+ NETDEV_PR_ARG, __entry->evt, __entry->link_id)
);
DECLARE_EVENT_CLASS(cfg80211_rx_evt,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 9a7c3adc8a3b..65c8e47246b7 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -743,7 +743,7 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
return NULL;
/*
- * When reusing framents, copy some data to the head to simplify
+ * When reusing fragments, copy some data to the head to simplify
* ethernet header handling and speed up protocol header processing
* in the stack later.
*/
@@ -998,10 +998,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
* Diffserv Service Classes no update is needed:
* - Standard: DF
* - Low Priority Data: CS1
- * - Multimedia Streaming: AF31, AF32, AF33
* - Multimedia Conferencing: AF41, AF42, AF43
* - Network Control Traffic: CS7
* - Real-Time Interactive: CS4
+ * - Signaling: CS5
*/
switch (dscp >> 2) {
case 10:
@@ -1026,9 +1026,11 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
/* Broadcasting video: CS3 */
ret = 4;
break;
- case 40:
- /* Signaling: CS5 */
- ret = 5;
+ case 26:
+ case 28:
+ case 30:
+ /* Multimedia Streaming: AF31, AF32, AF33 */
+ ret = 4;
break;
case 44:
/* Voice Admit: VA */
@@ -2435,8 +2437,8 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
if (params->num_different_channels > c->num_different_channels)
continue;
- limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
- GFP_KERNEL);
+ limits = kmemdup_array(c->limits, c->n_limits, sizeof(*limits),
+ GFP_KERNEL);
if (!limits)
return -ENOMEM;
@@ -2841,10 +2843,9 @@ void cfg80211_remove_link(struct wireless_dev *wdev, unsigned int link_id)
break;
}
- wdev->valid_links &= ~BIT(link_id);
-
rdev_del_intf_link(rdev, wdev, link_id);
+ wdev->valid_links &= ~BIT(link_id);
eth_zero_addr(wdev->links[link_id].addr);
}
@@ -2921,3 +2922,32 @@ bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio,
return true;
}
EXPORT_SYMBOL(cfg80211_radio_chandef_valid);
+
+bool cfg80211_wdev_channel_allowed(struct wireless_dev *wdev,
+ struct ieee80211_channel *chan)
+{
+ struct wiphy *wiphy = wdev->wiphy;
+ const struct wiphy_radio *radio;
+ struct cfg80211_chan_def chandef;
+ u32 radio_mask;
+ int i;
+
+ radio_mask = wdev->radio_mask;
+ if (!wiphy->n_radio || radio_mask == BIT(wiphy->n_radio) - 1)
+ return true;
+
+ cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20);
+ for (i = 0; i < wiphy->n_radio; i++) {
+ if (!(radio_mask & BIT(i)))
+ continue;
+
+ radio = &wiphy->radio[i];
+ if (!cfg80211_radio_chandef_valid(radio, &chandef))
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(cfg80211_wdev_channel_allowed);
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 2371069f3c43..90d5c0592667 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -30,7 +30,6 @@ int cfg80211_wext_giwname(struct net_device *dev,
strcpy(wrqu->name, "IEEE 802.11");
return 0;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwname);
int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
union iwreq_data *wrqu, char *extra)
@@ -69,7 +68,6 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
return ret;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_siwmode);
int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info,
union iwreq_data *wrqu, char *extra)
@@ -105,7 +103,6 @@ int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info,
}
return 0;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwmode);
int cfg80211_wext_giwrange(struct net_device *dev,
@@ -220,7 +217,6 @@ int cfg80211_wext_giwrange(struct net_device *dev,
return 0;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwrange);
/**
@@ -281,7 +277,6 @@ out:
wiphy_unlock(&rdev->wiphy);
return err;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_siwrts);
int cfg80211_wext_giwrts(struct net_device *dev,
struct iw_request_info *info,
@@ -296,7 +291,6 @@ int cfg80211_wext_giwrts(struct net_device *dev,
return 0;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwrts);
int cfg80211_wext_siwfrag(struct net_device *dev,
struct iw_request_info *info,
@@ -327,7 +321,6 @@ out:
return err;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_siwfrag);
int cfg80211_wext_giwfrag(struct net_device *dev,
struct iw_request_info *info,
@@ -342,7 +335,6 @@ int cfg80211_wext_giwfrag(struct net_device *dev,
return 0;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwfrag);
static int cfg80211_wext_siwretry(struct net_device *dev,
struct iw_request_info *info,
@@ -413,7 +405,6 @@ int cfg80211_wext_giwretry(struct net_device *dev,
return 0;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwretry);
static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
struct net_device *dev, bool pairwise,
@@ -830,7 +821,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev,
ret = -EINVAL;
break;
}
- ret = cfg80211_set_monitor_channel(rdev, &chandef);
+ ret = cfg80211_set_monitor_channel(rdev, dev, &chandef);
break;
case NL80211_IFTYPE_MESH_POINT:
freq = cfg80211_wext_freq(wextfreq);
@@ -1204,7 +1195,7 @@ static int cfg80211_wext_siwpower(struct net_device *dev,
switch (wrq->flags & IW_POWER_MODE) {
case IW_POWER_ON: /* If not specified */
case IW_POWER_MODE: /* If set all mask */
- case IW_POWER_ALL_R: /* If explicitely state all */
+ case IW_POWER_ALL_R: /* If explicitly state all */
ps = true;
break;
default: /* Otherwise we ignore */
diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h
index c02eb789e676..8251ca5df8ae 100644
--- a/net/wireless/wext-compat.h
+++ b/net/wireless/wext-compat.h
@@ -5,12 +5,6 @@
#include <net/iw_handler.h>
#include <linux/wireless.h>
-#ifdef CONFIG_CFG80211_WEXT_EXPORT
-#define EXPORT_WEXT_HANDLER(h) EXPORT_SYMBOL_GPL(h)
-#else
-#define EXPORT_WEXT_HANDLER(h)
-#endif /* CONFIG_CFG80211_WEXT_EXPORT */
-
int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
struct iw_request_info *info,
struct iw_freq *wextfreq, char *extra);
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 838ad6541a17..3bb04b05c5ce 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -1159,7 +1159,7 @@ char *iwe_stream_add_event(struct iw_request_info *info, char *stream,
/* Check if it's possible */
if (likely((stream + event_len) < ends)) {
iwe->len = event_len;
- /* Beware of alignement issues on 64 bits */
+ /* Beware of alignment issues on 64 bits */
memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN);
memcpy(stream + lcp_len, &iwe->u,
event_len - lcp_len);
diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c
deleted file mode 100644
index b379a0371653..000000000000
--- a/net/wireless/wext-spy.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * This file implement the Wireless Extensions spy API.
- *
- * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com>
- * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved.
- *
- * (As all part of the Linux kernel, this file is GPL)
- */
-
-#include <linux/wireless.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/export.h>
-#include <net/iw_handler.h>
-#include <net/arp.h>
-#include <net/wext.h>
-
-static inline struct iw_spy_data *get_spydata(struct net_device *dev)
-{
- /* This is the new way */
- if (dev->wireless_data)
- return dev->wireless_data->spy_data;
- return NULL;
-}
-
-int iw_handler_set_spy(struct net_device * dev,
- struct iw_request_info * info,
- union iwreq_data * wrqu,
- char * extra)
-{
- struct iw_spy_data * spydata = get_spydata(dev);
- struct sockaddr * address = (struct sockaddr *) extra;
-
- /* Make sure driver is not buggy or using the old API */
- if (!spydata)
- return -EOPNOTSUPP;
-
- /* Disable spy collection while we copy the addresses.
- * While we copy addresses, any call to wireless_spy_update()
- * will NOP. This is OK, as anyway the addresses are changing. */
- spydata->spy_number = 0;
-
- /* We want to operate without locking, because wireless_spy_update()
- * most likely will happen in the interrupt handler, and therefore
- * have its own locking constraints and needs performance.
- * The rtnl_lock() make sure we don't race with the other iw_handlers.
- * This make sure wireless_spy_update() "see" that the spy list
- * is temporarily disabled. */
- smp_wmb();
-
- /* Are there are addresses to copy? */
- if (wrqu->data.length > 0) {
- int i;
-
- /* Copy addresses */
- for (i = 0; i < wrqu->data.length; i++)
- memcpy(spydata->spy_address[i], address[i].sa_data,
- ETH_ALEN);
- /* Reset stats */
- memset(spydata->spy_stat, 0,
- sizeof(struct iw_quality) * IW_MAX_SPY);
- }
-
- /* Make sure above is updated before re-enabling */
- smp_wmb();
-
- /* Enable addresses */
- spydata->spy_number = wrqu->data.length;
-
- return 0;
-}
-EXPORT_SYMBOL(iw_handler_set_spy);
-
-int iw_handler_get_spy(struct net_device * dev,
- struct iw_request_info * info,
- union iwreq_data * wrqu,
- char * extra)
-{
- struct iw_spy_data * spydata = get_spydata(dev);
- struct sockaddr * address = (struct sockaddr *) extra;
- int i;
-
- /* Make sure driver is not buggy or using the old API */
- if (!spydata)
- return -EOPNOTSUPP;
-
- wrqu->data.length = spydata->spy_number;
-
- /* Copy addresses. */
- for (i = 0; i < spydata->spy_number; i++) {
- memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN);
- address[i].sa_family = AF_UNIX;
- }
- /* Copy stats to the user buffer (just after). */
- if (spydata->spy_number > 0)
- memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number),
- spydata->spy_stat,
- sizeof(struct iw_quality) * spydata->spy_number);
- /* Reset updated flags. */
- for (i = 0; i < spydata->spy_number; i++)
- spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED;
- return 0;
-}
-EXPORT_SYMBOL(iw_handler_get_spy);
-
-/*------------------------------------------------------------------*/
-/*
- * Standard Wireless Handler : set spy threshold
- */
-int iw_handler_set_thrspy(struct net_device * dev,
- struct iw_request_info *info,
- union iwreq_data * wrqu,
- char * extra)
-{
- struct iw_spy_data * spydata = get_spydata(dev);
- struct iw_thrspy * threshold = (struct iw_thrspy *) extra;
-
- /* Make sure driver is not buggy or using the old API */
- if (!spydata)
- return -EOPNOTSUPP;
-
- /* Just do it */
- spydata->spy_thr_low = threshold->low;
- spydata->spy_thr_high = threshold->high;
-
- /* Clear flag */
- memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
-
- return 0;
-}
-EXPORT_SYMBOL(iw_handler_set_thrspy);
-
-/*------------------------------------------------------------------*/
-/*
- * Standard Wireless Handler : get spy threshold
- */
-int iw_handler_get_thrspy(struct net_device * dev,
- struct iw_request_info *info,
- union iwreq_data * wrqu,
- char * extra)
-{
- struct iw_spy_data * spydata = get_spydata(dev);
- struct iw_thrspy * threshold = (struct iw_thrspy *) extra;
-
- /* Make sure driver is not buggy or using the old API */
- if (!spydata)
- return -EOPNOTSUPP;
-
- /* Just do it */
- threshold->low = spydata->spy_thr_low;
- threshold->high = spydata->spy_thr_high;
-
- return 0;
-}
-EXPORT_SYMBOL(iw_handler_get_thrspy);
-
-/*------------------------------------------------------------------*/
-/*
- * Prepare and send a Spy Threshold event
- */
-static void iw_send_thrspy_event(struct net_device * dev,
- struct iw_spy_data * spydata,
- unsigned char * address,
- struct iw_quality * wstats)
-{
- union iwreq_data wrqu;
- struct iw_thrspy threshold;
-
- /* Init */
- wrqu.data.length = 1;
- wrqu.data.flags = 0;
- /* Copy address */
- memcpy(threshold.addr.sa_data, address, ETH_ALEN);
- threshold.addr.sa_family = ARPHRD_ETHER;
- /* Copy stats */
- threshold.qual = *wstats;
- /* Copy also thresholds */
- threshold.low = spydata->spy_thr_low;
- threshold.high = spydata->spy_thr_high;
-
- /* Send event to user space */
- wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Call for the driver to update the spy data.
- * For now, the spy data is a simple array. As the size of the array is
- * small, this is good enough. If we wanted to support larger number of
- * spy addresses, we should use something more efficient...
- */
-void wireless_spy_update(struct net_device * dev,
- unsigned char * address,
- struct iw_quality * wstats)
-{
- struct iw_spy_data * spydata = get_spydata(dev);
- int i;
- int match = -1;
-
- /* Make sure driver is not buggy or using the old API */
- if (!spydata)
- return;
-
- /* Update all records that match */
- for (i = 0; i < spydata->spy_number; i++)
- if (ether_addr_equal(address, spydata->spy_address[i])) {
- memcpy(&(spydata->spy_stat[i]), wstats,
- sizeof(struct iw_quality));
- match = i;
- }
-
- /* Generate an event if we cross the spy threshold.
- * To avoid event storms, we have a simple hysteresis : we generate
- * event only when we go under the low threshold or above the
- * high threshold. */
- if (match >= 0) {
- if (spydata->spy_thr_under[match]) {
- if (wstats->level > spydata->spy_thr_high.level) {
- spydata->spy_thr_under[match] = 0;
- iw_send_thrspy_event(dev, spydata,
- address, wstats);
- }
- } else {
- if (wstats->level < spydata->spy_thr_low.level) {
- spydata->spy_thr_under[match] = 1;
- iw_send_thrspy_event(dev, spydata,
- address, wstats);
- }
- }
- }
-}
-EXPORT_SYMBOL(wireless_spy_update);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 7e16336044b2..3fa70286c846 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -141,7 +141,7 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
u64 addr;
int err;
- addr = xp_get_handle(xskb);
+ addr = xp_get_handle(xskb, xskb->pool);
err = xskq_prod_reserve_desc(xs->rx, addr, len, flags);
if (err) {
xs->rx_queue_full++;
@@ -171,14 +171,14 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return 0;
xskb_list = &xskb->pool->xskb_list;
- list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
+ list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
if (list_is_singular(xskb_list))
contd = 0;
len = pos->xdp.data_end - pos->xdp.data;
err = __xsk_rcv_zc(xs, pos, len, contd);
if (err)
goto err;
- list_del(&pos->xskb_list_node);
+ list_del(&pos->list_node);
}
return 0;
@@ -527,34 +527,34 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
-static int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr)
+static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr)
{
unsigned long flags;
int ret;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- ret = xskq_prod_reserve_addr(xs->pool->cq, addr);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_lock, flags);
+ ret = xskq_prod_reserve_addr(pool->cq, addr);
+ spin_unlock_irqrestore(&pool->cq_lock, flags);
return ret;
}
-static void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n)
+static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n)
{
unsigned long flags;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_submit_n(xs->pool->cq, n);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_lock, flags);
+ xskq_prod_submit_n(pool->cq, n);
+ spin_unlock_irqrestore(&pool->cq_lock, flags);
}
-static void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n)
+static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
{
unsigned long flags;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_cancel_n(xs->pool->cq, n);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_lock, flags);
+ xskq_prod_cancel_n(pool->cq, n);
+ spin_unlock_irqrestore(&pool->cq_lock, flags);
}
static u32 xsk_get_num_desc(struct sk_buff *skb)
@@ -571,7 +571,7 @@ static void xsk_destruct_skb(struct sk_buff *skb)
*compl->tx_timestamp = ktime_get_tai_fast_ns();
}
- xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
+ xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb));
sock_wfree(skb);
}
@@ -587,7 +587,7 @@ static void xsk_consume_skb(struct sk_buff *skb)
struct xdp_sock *xs = xdp_sk(skb->sk);
skb->destructor = sock_wfree;
- xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb));
+ xsk_cq_cancel_locked(xs->pool, xsk_get_num_desc(skb));
/* Free skb without triggering the perf drop trace */
consume_skb(skb);
xs->skb = NULL;
@@ -675,6 +675,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
len = desc->len;
if (!skb) {
+ first_frag = true;
+
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
tr = dev->needed_tailroom;
skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
@@ -685,12 +687,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb_put(skb, len);
err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err)) {
- kfree_skb(skb);
+ if (unlikely(err))
goto free_err;
- }
-
- first_frag = true;
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
struct page *page;
@@ -758,6 +756,9 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
return skb;
free_err:
+ if (first_frag && skb)
+ kfree_skb(skb);
+
if (err == -EOVERFLOW) {
/* Drop the packet */
xsk_set_destructor_arg(xs->skb);
@@ -765,7 +766,7 @@ free_err:
xskq_cons_release(xs->tx);
} else {
/* Let application retry */
- xsk_cq_cancel_locked(xs, 1);
+ xsk_cq_cancel_locked(xs->pool, 1);
}
return ERR_PTR(err);
@@ -802,7 +803,7 @@ static int __xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- if (xsk_cq_reserve_addr_locked(xs, desc.addr))
+ if (xsk_cq_reserve_addr_locked(xs->pool, desc.addr))
goto out;
skb = xsk_build_skb(xs, &desc);
@@ -1320,14 +1321,6 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
-struct xdp_umem_reg_v2 {
- __u64 addr; /* Start of packet data area */
- __u64 len; /* Length of packet data area */
- __u32 chunk_size;
- __u32 headroom;
- __u32 flags;
-};
-
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1371,10 +1364,19 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
- else if (optlen < sizeof(struct xdp_umem_reg_v2))
- mr_size = sizeof(struct xdp_umem_reg_v1);
else if (optlen < sizeof(mr))
- mr_size = sizeof(struct xdp_umem_reg_v2);
+ mr_size = sizeof(struct xdp_umem_reg_v1);
+
+ BUILD_BUG_ON(sizeof(struct xdp_umem_reg_v1) >= sizeof(struct xdp_umem_reg));
+
+ /* Make sure the last field of the struct doesn't have
+ * uninitialized padding. All padding has to be explicit
+ * and has to be set to zero by the userspace to make
+ * struct xdp_umem_reg extensible in the future.
+ */
+ BUILD_BUG_ON(offsetof(struct xdp_umem_reg, tx_metadata_len) +
+ sizeof_field(struct xdp_umem_reg, tx_metadata_len) !=
+ sizeof(struct xdp_umem_reg));
if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index c0e0204b9630..1f7975b49657 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -101,8 +101,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
xskb = &pool->heads[i];
xskb->pool = pool;
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
- INIT_LIST_HEAD(&xskb->free_list_node);
- INIT_LIST_HEAD(&xskb->xskb_list_node);
+ INIT_LIST_HEAD(&xskb->list_node);
if (pool->unaligned)
pool->free_heads[i] = xskb;
else
@@ -211,6 +210,11 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
goto err_unreg_pool;
}
+ if (dev_get_min_mp_channel_count(netdev)) {
+ err = -EBUSY;
+ goto err_unreg_pool;
+ }
+
bpf.command = XDP_SETUP_XSK_POOL;
bpf.xsk.pool = pool;
bpf.xsk.queue_id = queue_id;
@@ -225,6 +229,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
goto err_unreg_xsk;
}
pool->umem->zc = true;
+ pool->xdp_zc_max_segs = netdev->xdp_zc_max_segs;
return 0;
err_unreg_xsk:
@@ -382,10 +387,9 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
return;
}
- if (!refcount_dec_and_test(&dma_map->users))
- return;
+ if (refcount_dec_and_test(&dma_map->users))
+ __xp_dma_unmap(dma_map, attrs);
- __xp_dma_unmap(dma_map, attrs);
kvfree(pool->dma_pages);
pool->dma_pages = NULL;
pool->dma_pages_cnt = 0;
@@ -412,8 +416,10 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_
for (i = 0; i < pool->heads_cnt; i++) {
struct xdp_buff_xsk *xskb = &pool->heads[i];
+ u64 orig_addr;
- xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr);
+ orig_addr = xskb->xdp.data_hard_start - pool->addrs - pool->headroom;
+ xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, orig_addr);
}
}
@@ -496,6 +502,22 @@ static bool xp_check_aligned(struct xsk_buff_pool *pool, u64 *addr)
return *addr < pool->addrs_cnt;
}
+static struct xdp_buff_xsk *xp_get_xskb(struct xsk_buff_pool *pool, u64 addr)
+{
+ struct xdp_buff_xsk *xskb;
+
+ if (pool->unaligned) {
+ xskb = pool->free_heads[--pool->free_heads_cnt];
+ xp_init_xskb_addr(xskb, pool, addr);
+ if (pool->dma_pages)
+ xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
+ } else {
+ xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
+ }
+
+ return xskb;
+}
+
static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
{
struct xdp_buff_xsk *xskb;
@@ -521,14 +543,7 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
break;
}
- if (pool->unaligned) {
- xskb = pool->free_heads[--pool->free_heads_cnt];
- xp_init_xskb_addr(xskb, pool, addr);
- if (pool->dma_pages)
- xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
- } else {
- xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
- }
+ xskb = xp_get_xskb(pool, addr);
xskq_cons_release(pool->fq);
return xskb;
@@ -545,8 +560,8 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
} else {
pool->free_list_cnt--;
xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk,
- free_list_node);
- list_del_init(&xskb->free_list_node);
+ list_node);
+ list_del_init(&xskb->list_node);
}
xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
@@ -586,14 +601,7 @@ static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xd
continue;
}
- if (pool->unaligned) {
- xskb = pool->free_heads[--pool->free_heads_cnt];
- xp_init_xskb_addr(xskb, pool, addr);
- if (pool->dma_pages)
- xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
- } else {
- xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
- }
+ xskb = xp_get_xskb(pool, addr);
*xdp = &xskb->xdp;
xdp++;
@@ -612,8 +620,8 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3
i = nb_entries;
while (i--) {
- xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node);
- list_del_init(&xskb->free_list_node);
+ xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, list_node);
+ list_del_init(&xskb->list_node);
*xdp = &xskb->xdp;
xdp++;
@@ -623,20 +631,31 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3
return nb_entries;
}
-u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
+static u32 xp_alloc_slow(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
+ u32 max)
{
- u32 nb_entries1 = 0, nb_entries2;
+ int i;
- if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) {
+ for (i = 0; i < max; i++) {
struct xdp_buff *buff;
- /* Slow path */
buff = xp_alloc(pool);
- if (buff)
- *xdp = buff;
- return !!buff;
+ if (unlikely(!buff))
+ return i;
+ *xdp = buff;
+ xdp++;
}
+ return max;
+}
+
+u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
+{
+ u32 nb_entries1 = 0, nb_entries2;
+
+ if (unlikely(pool->dev && dma_dev_need_sync(pool->dev)))
+ return xp_alloc_slow(pool, xdp, max);
+
if (unlikely(pool->free_list_cnt)) {
nb_entries1 = xp_alloc_reused(pool, xdp, max);
if (nb_entries1 == max)
@@ -656,19 +675,27 @@ EXPORT_SYMBOL(xp_alloc_batch);
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
{
+ u32 req_count, avail_count;
+
if (pool->free_list_cnt >= count)
return true;
- return xskq_cons_has_entries(pool->fq, count - pool->free_list_cnt);
+
+ req_count = count - pool->free_list_cnt;
+ avail_count = xskq_cons_nb_entries(pool->fq, req_count);
+ if (!avail_count)
+ pool->fq->queue_empty_descs++;
+
+ return avail_count >= req_count;
}
EXPORT_SYMBOL(xp_can_alloc);
void xp_free(struct xdp_buff_xsk *xskb)
{
- if (!list_empty(&xskb->free_list_node))
+ if (!list_empty(&xskb->list_node))
return;
xskb->pool->free_list_cnt++;
- list_add(&xskb->free_list_node, &xskb->pool->free_list);
+ list_add(&xskb->list_node, &xskb->pool->free_list);
}
EXPORT_SYMBOL(xp_free);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 6f2d1621c992..46d87e961ad6 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -260,7 +260,7 @@ u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
nr_frags = 0;
} else {
nr_frags++;
- if (nr_frags == pool->netdev->xdp_zc_max_segs) {
+ if (nr_frags == pool->xdp_zc_max_segs) {
nr_frags = 0;
break;
}
@@ -306,11 +306,6 @@ static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max)
return entries >= max ? max : entries;
}
-static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt)
-{
- return xskq_cons_nb_entries(q, cnt) >= cnt;
-}
-
static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr)
{
if (q->cached_prod == q->cached_cons)
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index e1c526f97ce3..afa457506274 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -224,7 +224,7 @@ static long xsk_map_delete_elem(struct bpf_map *map, void *key)
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct xdp_sock __rcu **map_entry;
struct xdp_sock *old_xs;
- int k = *(u32 *)key;
+ u32 k = *(u32 *)key;
if (k >= map->max_entries)
return -EINVAL;
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index 91357ccaf4af..5b9ee63e30b6 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -132,6 +132,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
[XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
[XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
+ [XFRMA_SA_PCPU] = { .type = NLA_U32 },
};
static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb,
@@ -282,9 +283,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
case XFRMA_MTIMER_THRESH:
case XFRMA_SA_DIR:
case XFRMA_NAT_KEEPALIVE_INTERVAL:
+ case XFRMA_SA_PCPU:
return xfrm_nla_cpy(dst, src, nla_len(src));
default:
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU);
pr_warn_once("unsupported nla_type %d\n", src->nla_type);
return -EOPNOTSUPP;
}
@@ -439,7 +441,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
int err;
if (type > XFRMA_MAX) {
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU);
NL_SET_ERR_MSG(extack, "Bad attribute");
return -EOPNOTSUPP;
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 9a44d363ba62..b33c4591e09a 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -269,6 +269,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
+ struct xfrm_dst_lookup_params params;
+
if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
saddr = &x->props.saddr;
daddr = &x->id.daddr;
@@ -277,9 +279,12 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
daddr = &x->props.saddr;
}
- dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr,
- x->props.family,
- xfrm_smark_get(0, x));
+ memset(&params, 0, sizeof(params));
+ params.net = net;
+ params.saddr = saddr;
+ params.daddr = daddr;
+ params.mark = xfrm_smark_get(0, x);
+ dst = __xfrm_dst_lookup(x->props.family, &params);
if (IS_ERR(dst))
return (is_packet_offload) ? -EINVAL : 0;
@@ -328,12 +333,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
/* User explicitly requested packet offload mode and configured
* policy in addition to the XFRM state. So be civil to users,
* and return an error instead of taking fallback path.
- *
- * This WARN_ON() can be seen as a documentation for driver
- * authors to do not return -EOPNOTSUPP in packet offload mode.
*/
- WARN_ON(err == -EOPNOTSUPP && is_packet_offload);
- if (err != -EOPNOTSUPP || is_packet_offload) {
+ if ((err != -EOPNOTSUPP && !is_packet_offload) || is_packet_offload) {
NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this state");
return err;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 749e7eea99e4..841a60a6fbfe 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -572,7 +572,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
- x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
+ x = xfrm_input_state_lookup(net, mark, daddr, spi, nexthdr, family);
if (x == NULL) {
secpath_reset(skb);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index e50e4bf993fa..98f1e2b67c76 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -769,7 +769,7 @@ static int xfrmi_dev_init(struct net_device *dev)
if (err)
return err;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->features |= XFRMI_FEATURES;
dev->hw_features |= XFRMI_FEATURES;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c56c61b0c12e..4408c11c0835 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -45,6 +45,7 @@
#ifdef CONFIG_XFRM_ESPINTCP
#include <net/espintcp.h>
#endif
+#include <net/inet_dscp.h>
#include "xfrm_hash.h"
@@ -109,7 +110,11 @@ struct xfrm_pol_inexact_node {
* 4. saddr:any list from saddr tree
*
* This result set then needs to be searched for the policy with
- * the lowest priority. If two results have same prio, youngest one wins.
+ * the lowest priority. If two candidates have the same priority, the
+ * struct xfrm_policy pos member with the lower number is used.
+ *
+ * This replicates previous single-list-search algorithm which would
+ * return first matching policy in the (ordered-by-priority) list.
*/
struct xfrm_pol_inexact_key {
@@ -196,8 +201,6 @@ xfrm_policy_inexact_lookup_rcu(struct net *net,
static struct xfrm_policy *
xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy,
bool excl);
-static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
- struct xfrm_policy *policy);
static bool
xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
@@ -267,10 +270,8 @@ static const struct xfrm_if_cb *xfrm_if_get_cb(void)
return rcu_dereference(xfrm_if_cb);
}
-struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
- const xfrm_address_t *saddr,
- const xfrm_address_t *daddr,
- int family, u32 mark)
+struct dst_entry *__xfrm_dst_lookup(int family,
+ const struct xfrm_dst_lookup_params *params)
{
const struct xfrm_policy_afinfo *afinfo;
struct dst_entry *dst;
@@ -279,7 +280,7 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
if (unlikely(afinfo == NULL))
return ERR_PTR(-EAFNOSUPPORT);
- dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark);
+ dst = afinfo->dst_lookup(params);
rcu_read_unlock();
@@ -288,11 +289,12 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
EXPORT_SYMBOL(__xfrm_dst_lookup);
static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
- int tos, int oif,
+ dscp_t dscp, int oif,
xfrm_address_t *prev_saddr,
xfrm_address_t *prev_daddr,
int family, u32 mark)
{
+ struct xfrm_dst_lookup_params params;
struct net *net = xs_net(x);
xfrm_address_t *saddr = &x->props.saddr;
xfrm_address_t *daddr = &x->id.daddr;
@@ -307,7 +309,29 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
daddr = x->coaddr;
}
- dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark);
+ params.net = net;
+ params.saddr = saddr;
+ params.daddr = daddr;
+ params.dscp = dscp;
+ params.oif = oif;
+ params.mark = mark;
+ params.ipproto = x->id.proto;
+ if (x->encap) {
+ switch (x->encap->encap_type) {
+ case UDP_ENCAP_ESPINUDP:
+ params.ipproto = IPPROTO_UDP;
+ params.uli.ports.sport = x->encap->encap_sport;
+ params.uli.ports.dport = x->encap->encap_dport;
+ break;
+ case TCP_ENCAP_ESPINTCP:
+ params.ipproto = IPPROTO_TCP;
+ params.uli.ports.sport = x->encap->encap_sport;
+ params.uli.ports.dport = x->encap->encap_dport;
+ break;
+ }
+ }
+
+ dst = __xfrm_dst_lookup(family, &params);
if (!IS_ERR(dst)) {
if (prev_saddr != saddr)
@@ -410,7 +434,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
- INIT_HLIST_NODE(&policy->bydst_inexact_list);
+ INIT_HLIST_HEAD(&policy->state_cache_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
@@ -452,6 +476,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
+ struct net *net = xp_net(policy);
+ struct xfrm_state *x;
+
xfrm_dev_policy_delete(policy);
write_lock_bh(&policy->lock);
@@ -467,6 +494,13 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
if (del_timer(&policy->timer))
xfrm_pol_put(policy);
+ /* XXX: Flush state cache */
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) {
+ hlist_del_init_rcu(&x->state_cache);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
xfrm_pol_put(policy);
}
@@ -1228,26 +1262,31 @@ xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl)
return ERR_PTR(-EEXIST);
}
- chain = &net->xfrm.policy_inexact[dir];
- xfrm_policy_insert_inexact_list(chain, policy);
-
if (delpol)
__xfrm_policy_inexact_prune_bin(bin, false);
return delpol;
}
+static bool xfrm_policy_is_dead_or_sk(const struct xfrm_policy *policy)
+{
+ int dir;
+
+ if (policy->walk.dead)
+ return true;
+
+ dir = xfrm_policy_id2dir(policy->index);
+ return dir >= XFRM_POLICY_MAX;
+}
+
static void xfrm_hash_rebuild(struct work_struct *work)
{
struct net *net = container_of(work, struct net,
xfrm.policy_hthresh.work);
- unsigned int hmask;
struct xfrm_policy *pol;
struct xfrm_policy *policy;
struct hlist_head *chain;
- struct hlist_head *odst;
struct hlist_node *newpos;
- int i;
int dir;
unsigned seq;
u8 lbits4, rbits4, lbits6, rbits6;
@@ -1274,13 +1313,10 @@ static void xfrm_hash_rebuild(struct work_struct *work)
struct xfrm_pol_inexact_bin *bin;
u8 dbits, sbits;
- if (policy->walk.dead)
+ if (xfrm_policy_is_dead_or_sk(policy))
continue;
dir = xfrm_policy_id2dir(policy->index);
- if (dir >= XFRM_POLICY_MAX)
- continue;
-
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
if (policy->family == AF_INET) {
dbits = rbits4;
@@ -1311,23 +1347,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
goto out_unlock;
}
- /* reset the bydst and inexact table in all directions */
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
- struct hlist_node *n;
-
- hlist_for_each_entry_safe(policy, n,
- &net->xfrm.policy_inexact[dir],
- bydst_inexact_list) {
- hlist_del_rcu(&policy->bydst);
- hlist_del_init(&policy->bydst_inexact_list);
- }
-
- hmask = net->xfrm.policy_bydst[dir].hmask;
- odst = net->xfrm.policy_bydst[dir].table;
- for (i = hmask; i >= 0; i--) {
- hlist_for_each_entry_safe(policy, n, odst + i, bydst)
- hlist_del_rcu(&policy->bydst);
- }
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
/* dir out => dst = remote, src = local */
net->xfrm.policy_bydst[dir].dbits4 = rbits4;
@@ -1345,14 +1365,13 @@ static void xfrm_hash_rebuild(struct work_struct *work)
/* re-insert all policies by order of creation */
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
- if (policy->walk.dead)
+ if (xfrm_policy_is_dead_or_sk(policy))
continue;
- dir = xfrm_policy_id2dir(policy->index);
- if (dir >= XFRM_POLICY_MAX) {
- /* skip socket policies */
- continue;
- }
+
+ hlist_del_rcu(&policy->bydst);
+
newpos = NULL;
+ dir = xfrm_policy_id2dir(policy->index);
chain = policy_hash_bysel(net, &policy->selector,
policy->family, dir);
@@ -1519,42 +1538,6 @@ static const struct rhashtable_params xfrm_pol_inexact_params = {
.automatic_shrinking = true,
};
-static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
- struct xfrm_policy *policy)
-{
- struct xfrm_policy *pol, *delpol = NULL;
- struct hlist_node *newpos = NULL;
- int i = 0;
-
- hlist_for_each_entry(pol, chain, bydst_inexact_list) {
- if (pol->type == policy->type &&
- pol->if_id == policy->if_id &&
- !selector_cmp(&pol->selector, &policy->selector) &&
- xfrm_policy_mark_match(&policy->mark, pol) &&
- xfrm_sec_ctx_match(pol->security, policy->security) &&
- !WARN_ON(delpol)) {
- delpol = pol;
- if (policy->priority > pol->priority)
- continue;
- } else if (policy->priority >= pol->priority) {
- newpos = &pol->bydst_inexact_list;
- continue;
- }
- if (delpol)
- break;
- }
-
- if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
- hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
- else
- hlist_add_head_rcu(&policy->bydst_inexact_list, chain);
-
- hlist_for_each_entry(pol, chain, bydst_inexact_list) {
- pol->pos = i;
- i++;
- }
-}
-
static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
struct xfrm_policy *policy,
bool excl)
@@ -2294,10 +2277,52 @@ out:
return pol;
}
+static u32 xfrm_gen_pos_slow(struct net *net)
+{
+ struct xfrm_policy *policy;
+ u32 i = 0;
+
+ /* oldest entry is last in list */
+ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+ if (!xfrm_policy_is_dead_or_sk(policy))
+ policy->pos = ++i;
+ }
+
+ return i;
+}
+
+static u32 xfrm_gen_pos(struct net *net)
+{
+ const struct xfrm_policy *policy;
+ u32 i = 0;
+
+ /* most recently added policy is at the head of the list */
+ list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
+ if (xfrm_policy_is_dead_or_sk(policy))
+ continue;
+
+ if (policy->pos == UINT_MAX)
+ return xfrm_gen_pos_slow(net);
+
+ i = policy->pos + 1;
+ break;
+ }
+
+ return i;
+}
+
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
{
struct net *net = xp_net(pol);
+ switch (dir) {
+ case XFRM_POLICY_IN:
+ case XFRM_POLICY_FWD:
+ case XFRM_POLICY_OUT:
+ pol->pos = xfrm_gen_pos(net);
+ break;
+ }
+
list_add(&pol->walk.all, &net->xfrm.policy_all);
net->xfrm.policy_count[dir]++;
xfrm_pol_hold(pol);
@@ -2314,7 +2339,6 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
/* Socket policies are not hashed. */
if (!hlist_unhashed(&pol->bydst)) {
hlist_del_rcu(&pol->bydst);
- hlist_del_init(&pol->bydst_inexact_list);
hlist_del(&pol->byidx);
}
@@ -2440,15 +2464,15 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
}
static int
-xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
- xfrm_address_t *remote, unsigned short family, u32 mark)
+xfrm_get_saddr(unsigned short family, xfrm_address_t *saddr,
+ const struct xfrm_dst_lookup_params *params)
{
int err;
const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
if (unlikely(afinfo == NULL))
return -EINVAL;
- err = afinfo->get_saddr(net, oif, local, remote, mark);
+ err = afinfo->get_saddr(saddr, params);
rcu_read_unlock();
return err;
}
@@ -2477,9 +2501,14 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
remote = &tmpl->id.daddr;
local = &tmpl->saddr;
if (xfrm_addr_any(local, tmpl->encap_family)) {
- error = xfrm_get_saddr(net, fl->flowi_oif,
- &tmp, remote,
- tmpl->encap_family, 0);
+ struct xfrm_dst_lookup_params params;
+
+ memset(&params, 0, sizeof(params));
+ params.net = net;
+ params.oif = fl->flowi_oif;
+ params.daddr = remote;
+ error = xfrm_get_saddr(tmpl->encap_family, &tmp,
+ &params);
if (error)
goto fail;
local = &tmp;
@@ -2558,10 +2587,10 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
}
-static int xfrm_get_tos(const struct flowi *fl, int family)
+static dscp_t xfrm_get_dscp(const struct flowi *fl, int family)
{
if (family == AF_INET)
- return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos;
+ return inet_dsfield_to_dscp(fl->u.ip4.flowi4_tos);
return 0;
}
@@ -2649,13 +2678,13 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
int header_len = 0;
int nfheader_len = 0;
int trailer_len = 0;
- int tos;
int family = policy->selector.family;
xfrm_address_t saddr, daddr;
+ dscp_t dscp;
xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
- tos = xfrm_get_tos(fl, family);
+ dscp = xfrm_get_dscp(fl, family);
dst_hold(dst);
@@ -2703,8 +2732,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
family = xfrm[i]->props.family;
oif = fl->flowi_oif ? : fl->flowi_l3mdev;
- dst = xfrm_dst_lookup(xfrm[i], tos, oif,
- &saddr, &daddr, family, mark);
+ dst = xfrm_dst_lookup(xfrm[i], dscp, oif, &saddr,
+ &daddr, family, mark);
err = PTR_ERR(dst);
if (IS_ERR(dst))
goto put_states;
@@ -3257,6 +3286,7 @@ no_transform:
dst_release(dst);
dst = dst_orig;
}
+
ok:
xfrm_pols_put(pols, drop_pols);
if (dst && dst->xfrm &&
@@ -4188,7 +4218,6 @@ static int __net_init xfrm_policy_init(struct net *net)
net->xfrm.policy_count[dir] = 0;
net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
- INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
htab = &net->xfrm.policy_bydst[dir];
htab->table = xfrm_hash_alloc(sz);
@@ -4242,8 +4271,6 @@ static void xfrm_policy_fini(struct net *net)
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
struct xfrm_policy_hash *htab;
- WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
-
htab = &net->xfrm.policy_bydst[dir];
sz = (htab->hmask + 1) * sizeof(struct hlist_head);
WARN_ON(!hlist_empty(htab->table));
@@ -4437,63 +4464,50 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
#endif
#ifdef CONFIG_XFRM_MIGRATE
-static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
- const struct xfrm_selector *sel_tgt)
-{
- if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
- if (sel_tgt->family == sel_cmp->family &&
- xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
- sel_cmp->family) &&
- xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
- sel_cmp->family) &&
- sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
- sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
- return true;
- }
- } else {
- if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
- return true;
- }
- }
- return false;
-}
-
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
u8 dir, u8 type, struct net *net, u32 if_id)
{
- struct xfrm_policy *pol, *ret = NULL;
- struct hlist_head *chain;
- u32 priority = ~0U;
+ struct xfrm_policy *pol;
+ struct flowi fl;
- spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
- hlist_for_each_entry(pol, chain, bydst) {
- if ((if_id == 0 || pol->if_id == if_id) &&
- xfrm_migrate_selector_match(sel, &pol->selector) &&
- pol->type == type) {
- ret = pol;
- priority = ret->priority;
- break;
- }
- }
- chain = &net->xfrm.policy_inexact[dir];
- hlist_for_each_entry(pol, chain, bydst_inexact_list) {
- if ((pol->priority >= priority) && ret)
- break;
+ memset(&fl, 0, sizeof(fl));
- if ((if_id == 0 || pol->if_id == if_id) &&
- xfrm_migrate_selector_match(sel, &pol->selector) &&
- pol->type == type) {
- ret = pol;
+ fl.flowi_proto = sel->proto;
+
+ switch (sel->family) {
+ case AF_INET:
+ fl.u.ip4.saddr = sel->saddr.a4;
+ fl.u.ip4.daddr = sel->daddr.a4;
+ if (sel->proto == IPSEC_ULPROTO_ANY)
break;
- }
+ fl.u.flowi4_oif = sel->ifindex;
+ fl.u.ip4.fl4_sport = sel->sport;
+ fl.u.ip4.fl4_dport = sel->dport;
+ break;
+ case AF_INET6:
+ fl.u.ip6.saddr = sel->saddr.in6;
+ fl.u.ip6.daddr = sel->daddr.in6;
+ if (sel->proto == IPSEC_ULPROTO_ANY)
+ break;
+ fl.u.flowi6_oif = sel->ifindex;
+ fl.u.ip6.fl4_sport = sel->sport;
+ fl.u.ip6.fl4_dport = sel->dport;
+ break;
+ default:
+ return ERR_PTR(-EAFNOSUPPORT);
}
- xfrm_pol_hold(ret);
+ rcu_read_lock();
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id);
+ if (IS_ERR_OR_NULL(pol))
+ goto out_unlock;
- return ret;
+ if (!xfrm_pol_hold_rcu(pol))
+ pol = NULL;
+out_unlock:
+ rcu_read_unlock();
+ return pol;
}
static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
@@ -4630,9 +4644,9 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
/* Stage 1 - find policy */
pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id);
- if (!pol) {
+ if (IS_ERR_OR_NULL(pol)) {
NL_SET_ERR_MSG(extack, "Target policy not found");
- err = -ENOENT;
+ err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT;
goto out;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 37478d36a8df..67ca7ac955a3 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -665,6 +665,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
refcount_set(&x->refcnt, 1);
atomic_set(&x->tunnel_users, 0);
INIT_LIST_HEAD(&x->km.all);
+ INIT_HLIST_NODE(&x->state_cache);
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
@@ -679,6 +680,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
x->lft.hard_packet_limit = XFRM_INF;
x->replay_maxage = 0;
x->replay_maxdiff = 0;
+ x->pcpu_num = UINT_MAX;
spin_lock_init(&x->lock);
}
return x;
@@ -743,12 +745,18 @@ int __xfrm_state_delete(struct xfrm_state *x)
if (x->km.state != XFRM_STATE_DEAD) {
x->km.state = XFRM_STATE_DEAD;
+
spin_lock(&net->xfrm.xfrm_state_lock);
list_del(&x->km.all);
hlist_del_rcu(&x->bydst);
hlist_del_rcu(&x->bysrc);
if (x->km.seq)
hlist_del_rcu(&x->byseq);
+ if (!hlist_unhashed(&x->state_cache))
+ hlist_del_rcu(&x->state_cache);
+ if (!hlist_unhashed(&x->state_cache_input))
+ hlist_del_rcu(&x->state_cache_input);
+
if (x->id.spi)
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
@@ -1101,6 +1109,52 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
return NULL;
}
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+ const xfrm_address_t *daddr,
+ __be32 spi, u8 proto,
+ unsigned short family)
+{
+ struct hlist_head *state_cache_input;
+ struct xfrm_state *x = NULL;
+ int cpu = get_cpu();
+
+ state_cache_input = per_cpu_ptr(net->xfrm.state_cache_input, cpu);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) {
+ if (x->props.family != family ||
+ x->id.spi != spi ||
+ x->id.proto != proto ||
+ !xfrm_addr_equal(&x->id.daddr, daddr, family))
+ continue;
+
+ if ((mark & x->mark.m) != x->mark.v)
+ continue;
+ if (!xfrm_state_hold_rcu(x))
+ continue;
+ goto out;
+ }
+
+ x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
+
+ if (x && x->km.state == XFRM_STATE_VALID) {
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ if (hlist_unhashed(&x->state_cache_input)) {
+ hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+ } else {
+ hlist_del_rcu(&x->state_cache_input);
+ hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ }
+
+out:
+ rcu_read_unlock();
+ put_cpu();
+ return x;
+}
+EXPORT_SYMBOL(xfrm_input_state_lookup);
+
static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
@@ -1155,6 +1209,12 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
struct xfrm_state **best, int *acq_in_progress,
int *error)
{
+ /* We need the cpu id just as a lookup key,
+ * we don't require it to be stable.
+ */
+ unsigned int pcpu_id = get_cpu();
+ put_cpu();
+
/* Resolution logic:
* 1. There is a valid state with matching selector. Done.
* 2. Valid state with inappropriate selector. Skip.
@@ -1174,13 +1234,18 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
&fl->u.__fl_common))
return;
+ if (x->pcpu_num != UINT_MAX && x->pcpu_num != pcpu_id)
+ return;
+
if (!*best ||
+ ((*best)->pcpu_num == UINT_MAX && x->pcpu_num == pcpu_id) ||
(*best)->km.dying > x->km.dying ||
((*best)->km.dying == x->km.dying &&
(*best)->curlft.add_time < x->curlft.add_time))
*best = x;
} else if (x->km.state == XFRM_STATE_ACQ) {
- *acq_in_progress = 1;
+ if (!*best || x->pcpu_num == pcpu_id)
+ *acq_in_progress = 1;
} else if (x->km.state == XFRM_STATE_ERROR ||
x->km.state == XFRM_STATE_EXPIRED) {
if ((!x->sel.family ||
@@ -1209,12 +1274,60 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
unsigned short encap_family = tmpl->encap_family;
unsigned int sequence;
struct km_event c;
+ unsigned int pcpu_id;
+ bool cached = false;
+
+ /* We need the cpu id just as a lookup key,
+ * we don't require it to be stable.
+ */
+ pcpu_id = get_cpu();
+ put_cpu();
to_put = NULL;
sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
rcu_read_lock();
+ hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+ if (x->props.family == encap_family &&
+ x->props.reqid == tmpl->reqid &&
+ (mark & x->mark.m) == x->mark.v &&
+ x->if_id == if_id &&
+ !(x->props.flags & XFRM_STATE_WILDRECV) &&
+ xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
+ tmpl->mode == x->props.mode &&
+ tmpl->id.proto == x->id.proto &&
+ (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+ xfrm_state_look_at(pol, x, fl, encap_family,
+ &best, &acquire_in_progress, &error);
+ }
+
+ if (best)
+ goto cached;
+
+ hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+ if (x->props.family == encap_family &&
+ x->props.reqid == tmpl->reqid &&
+ (mark & x->mark.m) == x->mark.v &&
+ x->if_id == if_id &&
+ !(x->props.flags & XFRM_STATE_WILDRECV) &&
+ xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
+ tmpl->mode == x->props.mode &&
+ tmpl->id.proto == x->id.proto &&
+ (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+ xfrm_state_look_at(pol, x, fl, family,
+ &best, &acquire_in_progress, &error);
+ }
+
+cached:
+ cached = true;
+ if (best)
+ goto found;
+ else if (error)
+ best = NULL;
+ else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
+ WARN_ON(1);
+
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
@@ -1282,7 +1395,10 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
}
found:
- x = best;
+ if (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) ||
+ (best && (best->pcpu_num == pcpu_id)))
+ x = best;
+
if (!x && !error && !acquire_in_progress) {
if (tmpl->id.spi &&
(x0 = __xfrm_state_lookup_all(net, mark, daddr,
@@ -1314,6 +1430,8 @@ found:
xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
memcpy(&x->mark, &pol->mark, sizeof(x->mark));
x->if_id = if_id;
+ if ((pol->flags & XFRM_POLICY_CPU_ACQUIRE) && best)
+ x->pcpu_num = pcpu_id;
error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
if (error) {
@@ -1352,6 +1470,7 @@ found:
x->km.state = XFRM_STATE_ACQ;
x->dir = XFRM_SA_DIR_OUT;
list_add(&x->km.all, &net->xfrm.state_all);
+ h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
XFRM_STATE_INSERT(bydst, &x->bydst,
net->xfrm.state_bydst + h,
x->xso.type);
@@ -1359,6 +1478,7 @@ found:
XFRM_STATE_INSERT(bysrc, &x->bysrc,
net->xfrm.state_bysrc + h,
x->xso.type);
+ INIT_HLIST_NODE(&x->state_cache);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
XFRM_STATE_INSERT(byspi, &x->byspi,
@@ -1392,6 +1512,11 @@ found:
x = NULL;
error = -ESRCH;
}
+
+ /* Use the already installed 'fallback' while the CPU-specific
+ * SA acquire is handled*/
+ if (best)
+ x = best;
}
out:
if (x) {
@@ -1402,6 +1527,15 @@ out:
} else {
*err = acquire_in_progress ? -EAGAIN : error;
}
+
+ if (x && x->km.state == XFRM_STATE_VALID && !cached &&
+ (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || x->pcpu_num == pcpu_id)) {
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ if (hlist_unhashed(&x->state_cache))
+ hlist_add_head_rcu(&x->state_cache, &pol->state_cache_list);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ }
+
rcu_read_unlock();
if (to_put)
xfrm_state_put(to_put);
@@ -1524,12 +1658,14 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
unsigned int h;
u32 mark = xnew->mark.v & xnew->mark.m;
u32 if_id = xnew->if_id;
+ u32 cpu_id = xnew->pcpu_num;
h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
if (x->props.family == family &&
x->props.reqid == reqid &&
x->if_id == if_id &&
+ x->pcpu_num == cpu_id &&
(mark & x->mark.m) == x->mark.v &&
xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
@@ -1552,7 +1688,7 @@ EXPORT_SYMBOL(xfrm_state_insert);
static struct xfrm_state *__find_acq_core(struct net *net,
const struct xfrm_mark *m,
unsigned short family, u8 mode,
- u32 reqid, u32 if_id, u8 proto,
+ u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
int create)
@@ -1569,6 +1705,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
x->id.spi != 0 ||
x->id.proto != proto ||
(mark & x->mark.m) != x->mark.v ||
+ x->pcpu_num != pcpu_num ||
!xfrm_addr_equal(&x->id.daddr, daddr, family) ||
!xfrm_addr_equal(&x->props.saddr, saddr, family))
continue;
@@ -1602,6 +1739,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
break;
}
+ x->pcpu_num = pcpu_num;
x->km.state = XFRM_STATE_ACQ;
x->id.proto = proto;
x->props.family = family;
@@ -1630,7 +1768,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
return x;
}
-static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
+static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
int xfrm_state_add(struct xfrm_state *x)
{
@@ -1656,7 +1794,7 @@ int xfrm_state_add(struct xfrm_state *x)
}
if (use_spi && x->km.seq) {
- x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
+ x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq, x->pcpu_num);
if (x1 && ((x1->id.proto != x->id.proto) ||
!xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
to_put = x1;
@@ -1666,7 +1804,7 @@ int xfrm_state_add(struct xfrm_state *x)
if (use_spi && !x1)
x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
- x->props.reqid, x->if_id, x->id.proto,
+ x->props.reqid, x->if_id, x->pcpu_num, x->id.proto,
&x->id.daddr, &x->props.saddr, 0);
__xfrm_state_bump_genids(x);
@@ -1791,6 +1929,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
x->props.flags = orig->props.flags;
x->props.extra_flags = orig->props.extra_flags;
+ x->pcpu_num = orig->pcpu_num;
x->if_id = orig->if_id;
x->tfcpad = orig->tfcpad;
x->replay_maxdiff = orig->replay_maxdiff;
@@ -2066,13 +2205,14 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
struct xfrm_state *
xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
- u32 if_id, u8 proto, const xfrm_address_t *daddr,
+ u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr,
const xfrm_address_t *saddr, int create, unsigned short family)
{
struct xfrm_state *x;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
+ x = __find_acq_core(net, mark, family, mode, reqid, if_id, pcpu_num,
+ proto, daddr, saddr, create);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
return x;
@@ -2207,7 +2347,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
/* Silly enough, but I'm lazy to build resolution list */
-static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
+static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
{
unsigned int h = xfrm_seq_hash(net, seq);
struct xfrm_state *x;
@@ -2215,6 +2355,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
if (x->km.seq == seq &&
(mark & x->mark.m) == x->mark.v &&
+ x->pcpu_num == pcpu_num &&
x->km.state == XFRM_STATE_ACQ) {
xfrm_state_hold(x);
return x;
@@ -2224,12 +2365,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
return NULL;
}
-struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
+struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
{
struct xfrm_state *x;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- x = __xfrm_find_acq_byseq(net, mark, seq);
+ x = __xfrm_find_acq_byseq(net, mark, seq, pcpu_num);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
return x;
}
@@ -2988,6 +3129,11 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_byseq = xfrm_hash_alloc(sz);
if (!net->xfrm.state_byseq)
goto out_byseq;
+
+ net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
+ if (!net->xfrm.state_cache_input)
+ goto out_state_cache_input;
+
net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
net->xfrm.state_num = 0;
@@ -2997,6 +3143,8 @@ int __net_init xfrm_state_init(struct net *net)
&net->xfrm.xfrm_state_lock);
return 0;
+out_state_cache_input:
+ xfrm_hash_free(net->xfrm.state_byseq, sz);
out_byseq:
xfrm_hash_free(net->xfrm.state_byspi, sz);
out_byspi:
@@ -3026,6 +3174,7 @@ void xfrm_state_fini(struct net *net)
xfrm_hash_free(net->xfrm.state_bysrc, sz);
WARN_ON(!hlist_empty(net->xfrm.state_bydst));
xfrm_hash_free(net->xfrm.state_bydst, sz);
+ free_percpu(net->xfrm.state_cache_input);
}
#ifdef CONFIG_AUDITSYSCALL
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 55f039ec3d59..b2876e09328b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -33,7 +33,7 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <linux/in6.h>
#endif
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type,
struct netlink_ext_ack *extack)
@@ -200,7 +200,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
struct netlink_ext_ack *extack)
{
int err;
- u8 sa_dir = attrs[XFRMA_SA_DIR] ? nla_get_u8(attrs[XFRMA_SA_DIR]) : 0;
+ u8 sa_dir = nla_get_u8_default(attrs[XFRMA_SA_DIR], 0);
+ u16 family = p->sel.family;
err = -EINVAL;
switch (p->family) {
@@ -221,7 +222,10 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
goto out;
}
- switch (p->sel.family) {
+ if (!family && !(p->flags & XFRM_STATE_AF_UNSPEC))
+ family = p->family;
+
+ switch (family) {
case AF_UNSPEC:
break;
@@ -456,6 +460,12 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
}
}
+ if (!sa_dir && attrs[XFRMA_SA_PCPU]) {
+ NL_SET_ERR_MSG(extack, "SA_PCPU only supported with SA_DIR");
+ err = -EINVAL;
+ goto out;
+ }
+
out:
return err;
}
@@ -763,10 +773,8 @@ static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m)
{
if (attrs[XFRMA_SET_MARK]) {
m->v = nla_get_u32(attrs[XFRMA_SET_MARK]);
- if (attrs[XFRMA_SET_MARK_MASK])
- m->m = nla_get_u32(attrs[XFRMA_SET_MARK_MASK]);
- else
- m->m = 0xffffffff;
+ m->m = nla_get_u32_default(attrs[XFRMA_SET_MARK_MASK],
+ 0xffffffff);
} else {
m->v = m->m = 0;
}
@@ -837,6 +845,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
x->nat_keepalive_interval =
nla_get_u32(attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]);
+ if (attrs[XFRMA_SA_PCPU]) {
+ x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
+ if (x->pcpu_num >= num_possible_cpus())
+ goto error;
+ }
+
err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack);
if (err)
goto error;
@@ -1085,7 +1099,7 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)
if (!nla)
return -EMSGSIZE;
algo = nla_data(nla);
- strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name));
+ strscpy_pad(algo->alg_name, auth->alg_name, sizeof(algo->alg_name));
if (redact_secret && auth->alg_key_len)
memset(algo->alg_key, 0, (auth->alg_key_len + 7) / 8);
@@ -1098,7 +1112,9 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)
if (!nla)
return -EMSGSIZE;
ap = nla_data(nla);
- memcpy(ap, auth, sizeof(struct xfrm_algo_auth));
+ strscpy_pad(ap->alg_name, auth->alg_name, sizeof(ap->alg_name));
+ ap->alg_key_len = auth->alg_key_len;
+ ap->alg_trunc_len = auth->alg_trunc_len;
if (redact_secret && auth->alg_key_len)
memset(ap->alg_key, 0, (auth->alg_key_len + 7) / 8);
else
@@ -1290,6 +1306,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
if (ret)
goto out;
}
+ if (x->pcpu_num != UINT_MAX) {
+ ret = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
+ if (ret)
+ goto out;
+ }
if (x->dir)
ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
@@ -1694,6 +1715,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 mark;
struct xfrm_mark m;
u32 if_id = 0;
+ u32 pcpu_num = UINT_MAX;
p = nlmsg_data(nlh);
err = verify_spi_info(p->info.id.proto, p->min, p->max, extack);
@@ -1710,8 +1732,16 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ if (attrs[XFRMA_SA_PCPU]) {
+ pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
+ if (pcpu_num >= num_possible_cpus()) {
+ err = -EINVAL;
+ goto out_noput;
+ }
+ }
+
if (p->info.seq) {
- x = xfrm_find_acq_byseq(net, mark, p->info.seq);
+ x = xfrm_find_acq_byseq(net, mark, p->info.seq, pcpu_num);
if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {
xfrm_state_put(x);
x = NULL;
@@ -1720,7 +1750,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!x)
x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid,
- if_id, p->info.id.proto, daddr,
+ if_id, pcpu_num, p->info.id.proto, daddr,
&p->info.saddr, 1,
family);
err = -ENOENT;
@@ -2520,7 +2550,8 @@ static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x)
+ nla_total_size(sizeof(struct xfrm_mark))
+ nla_total_size(4) /* XFRM_AE_RTHR */
+ nla_total_size(4) /* XFRM_AE_ETHR */
- + nla_total_size(sizeof(x->dir)); /* XFRMA_SA_DIR */
+ + nla_total_size(sizeof(x->dir)) /* XFRMA_SA_DIR */
+ + nla_total_size(4); /* XFRMA_SA_PCPU */
}
static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
@@ -2576,6 +2607,11 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
err = xfrm_if_id_put(skb, x->if_id);
if (err)
goto out_cancel;
+ if (x->pcpu_num != UINT_MAX) {
+ err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
+ if (err)
+ goto out_cancel;
+ }
if (x->dir) {
err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
@@ -2846,6 +2882,13 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
xfrm_mark_get(attrs, &mark);
+ if (attrs[XFRMA_SA_PCPU]) {
+ x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
+ err = -EINVAL;
+ if (x->pcpu_num >= num_possible_cpus())
+ goto free_state;
+ }
+
err = verify_newpolicy_info(&ua->policy, extack);
if (err)
goto free_state;
@@ -3176,6 +3219,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
[XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
[XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
+ [XFRMA_SA_PCPU] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(xfrma_policy);
@@ -3239,6 +3283,20 @@ static int xfrm_reject_unused_attr(int type, struct nlattr **attrs,
}
}
+ if (attrs[XFRMA_SA_PCPU]) {
+ switch (type) {
+ case XFRM_MSG_NEWSA:
+ case XFRM_MSG_UPDSA:
+ case XFRM_MSG_ALLOCSPI:
+ case XFRM_MSG_ACQUIRE:
+
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid attribute SA_PCPU");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -3342,7 +3400,8 @@ static inline unsigned int xfrm_expire_msgsize(void)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) +
nla_total_size(sizeof(struct xfrm_mark)) +
- nla_total_size(sizeof_field(struct xfrm_state, dir));
+ nla_total_size(sizeof_field(struct xfrm_state, dir)) +
+ nla_total_size(4); /* XFRMA_SA_PCPU */
}
static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
@@ -3368,6 +3427,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
err = xfrm_if_id_put(skb, x->if_id);
if (err)
return err;
+ if (x->pcpu_num != UINT_MAX) {
+ err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
+ if (err)
+ return err;
+ }
if (x->dir) {
err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
@@ -3475,6 +3539,8 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
}
if (x->if_id)
l += nla_total_size(sizeof(x->if_id));
+ if (x->pcpu_num)
+ l += nla_total_size(sizeof(x->pcpu_num));
/* Must count x->lastused as it may become non-zero behind our back. */
l += nla_total_size_64bit(sizeof(u64));
@@ -3581,6 +3647,7 @@ static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x,
+ nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
+ nla_total_size(sizeof(struct xfrm_mark))
+ nla_total_size(xfrm_user_sec_ctx_size(x->security))
+ + nla_total_size(4) /* XFRMA_SA_PCPU */
+ userpolicy_type_attrsize();
}
@@ -3617,6 +3684,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
err = xfrm_if_id_put(skb, xp->if_id);
if (!err && xp->xdo.dev)
err = copy_user_offload(&xp->xdo, skb);
+ if (!err && x->pcpu_num != UINT_MAX)
+ err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
if (err) {
nlmsg_cancel(skb, nlh);
return err;