summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c27
-rw-r--r--net/9p/client.c46
-rw-r--r--net/9p/trans_fd.c4
-rw-r--r--net/9p/trans_virtio.c8
-rw-r--r--net/Kconfig5
-rw-r--r--net/batman-adv/bat_iv_ogm.c1
-rw-r--r--net/batman-adv/bat_v.c23
-rw-r--r--net/batman-adv/bat_v_elp.c3
-rw-r--r--net/batman-adv/bat_v_ogm.c7
-rw-r--r--net/batman-adv/gateway_common.c162
-rw-r--r--net/batman-adv/gateway_common.h7
-rw-r--r--net/batman-adv/hard-interface.c34
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/netlink.c18
-rw-r--r--net/batman-adv/netlink.h6
-rw-r--r--net/batman-adv/routing.h4
-rw-r--r--net/batman-adv/soft-interface.c5
-rw-r--r--net/batman-adv/translation-table.c1
-rw-r--r--net/batman-adv/types.h13
-rw-r--r--net/bluetooth/af_bluetooth.c53
-rw-r--r--net/bluetooth/amp.h1
-rw-r--r--net/bluetooth/bnep/sock.c10
-rw-r--r--net/bluetooth/coredump.c3
-rw-r--r--net/bluetooth/eir.c2
-rw-r--r--net/bluetooth/hci_conn.c742
-rw-r--r--net/bluetooth/hci_core.c74
-rw-r--r--net/bluetooth/hci_debugfs.c3
-rw-r--r--net/bluetooth/hci_event.c280
-rw-r--r--net/bluetooth/hci_request.c21
-rw-r--r--net/bluetooth/hci_sock.c77
-rw-r--r--net/bluetooth/hci_sync.c427
-rw-r--r--net/bluetooth/hidp/sock.c10
-rw-r--r--net/bluetooth/iso.c355
-rw-r--r--net/bluetooth/l2cap_sock.c29
-rw-r--r--net/bluetooth/mgmt.c61
-rw-r--r--net/bluetooth/msft.c412
-rw-r--r--net/bluetooth/rfcomm/sock.c13
-rw-r--r--net/bluetooth/sco.c57
-rw-r--r--net/bpf/test_run.c21
-rw-r--r--net/bridge/br.c8
-rw-r--r--net/bridge/br_forward.c1
-rw-r--r--net/bridge/br_netlink.c12
-rw-r--r--net/bridge/br_private.h20
-rw-r--r--net/bridge/br_switchdev.c15
-rw-r--r--net/bridge/br_vlan_tunnel.c15
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/can/bcm.c12
-rw-r--r--net/can/isotp.c22
-rw-r--r--net/can/raw.c79
-rw-r--r--net/ceph/messenger.c1
-rw-r--r--net/ceph/messenger_v2.c41
-rw-r--r--net/ceph/osd_client.c20
-rw-r--r--net/core/bpf_sk_storage.c5
-rw-r--r--net/core/dev.c370
-rw-r--r--net/core/dev_ioctl.c187
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/filter.c21
-rw-r--r--net/core/flow_dissector.c55
-rw-r--r--net/core/flow_offload.c7
-rw-r--r--net/core/lwt_bpf.c7
-rw-r--r--net/core/net-sysfs.c1
-rw-r--r--net/core/net-traces.c2
-rw-r--r--net/core/netdev-genl.c54
-rw-r--r--net/core/of_net.c1
-rw-r--r--net/core/page_pool.c87
-rw-r--r--net/core/rtnetlink.c44
-rw-r--r--net/core/scm.c3
-rw-r--r--net/core/skbuff.c179
-rw-r--r--net/core/skmsg.c18
-rw-r--r--net/core/sock.c136
-rw-r--r--net/core/sock_map.c12
-rw-r--r--net/core/xdp.c4
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/dccp/feat.h1
-rw-r--r--net/dccp/ipv4.c24
-rw-r--r--net/dccp/ipv6.c20
-rw-r--r--net/dccp/ipv6.h4
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/dccp/proto.c30
-rw-r--r--net/devlink/Makefile3
-rw-r--r--net/devlink/core.c6
-rw-r--r--net/devlink/dev.c79
-rw-r--r--net/devlink/devl_internal.h143
-rw-r--r--net/devlink/dpipe.c917
-rw-r--r--net/devlink/health.c42
-rw-r--r--net/devlink/leftover.c9507
-rw-r--r--net/devlink/linecard.c606
-rw-r--r--net/devlink/netlink.c393
-rw-r--r--net/devlink/netlink_gen.c481
-rw-r--r--net/devlink/netlink_gen.h79
-rw-r--r--net/devlink/param.c865
-rw-r--r--net/devlink/port.c1515
-rw-r--r--net/devlink/rate.c722
-rw-r--r--net/devlink/region.c1260
-rw-r--r--net/devlink/resource.c579
-rw-r--r--net/devlink/sb.c996
-rw-r--r--net/devlink/trap.c1861
-rw-r--r--net/dsa/port.c54
-rw-r--r--net/dsa/slave.c9
-rw-r--r--net/dsa/tag_qca.c8
-rw-r--r--net/ethtool/channels.c2
-rw-r--r--net/ethtool/coalesce.c6
-rw-r--r--net/ethtool/common.c3
-rw-r--r--net/ethtool/debug.c2
-rw-r--r--net/ethtool/eee.c2
-rw-r--r--net/ethtool/eeprom.c9
-rw-r--r--net/ethtool/features.c2
-rw-r--r--net/ethtool/fec.c2
-rw-r--r--net/ethtool/ioctl.c91
-rw-r--r--net/ethtool/linkinfo.c2
-rw-r--r--net/ethtool/linkmodes.c2
-rw-r--r--net/ethtool/linkstate.c2
-rw-r--r--net/ethtool/mm.c2
-rw-r--r--net/ethtool/module.c5
-rw-r--r--net/ethtool/netlink.c96
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/ethtool/pause.c5
-rw-r--r--net/ethtool/phc_vclocks.c2
-rw-r--r--net/ethtool/plca.c4
-rw-r--r--net/ethtool/privflags.c2
-rw-r--r--net/ethtool/pse-pd.c6
-rw-r--r--net/ethtool/rings.c5
-rw-r--r--net/ethtool/rss.c3
-rw-r--r--net/ethtool/stats.c5
-rw-r--r--net/ethtool/strset.c2
-rw-r--r--net/ethtool/tsinfo.c2
-rw-r--r--net/ethtool/tunnels.c73
-rw-r--r--net/ethtool/wol.c5
-rw-r--r--net/handshake/Makefile2
-rw-r--r--net/handshake/alert.c110
-rw-r--r--net/handshake/handshake.h6
-rw-r--r--net/handshake/tlshd.c23
-rw-r--r--net/handshake/trace.c2
-rw-r--r--net/hsr/hsr_netlink.h2
-rw-r--r--net/ieee802154/nl802154.c4
-rw-r--r--net/ipv4/af_inet.c64
-rw-r--r--net/ipv4/bpf_tcp_ca.c2
-rw-r--r--net/ipv4/cipso_ipv4.c4
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c23
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c26
-rw-r--r--net/ipv4/inet_hashtables.c83
-rw-r--r--net/ipv4/inet_timewait_sock.c10
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_output.c28
-rw-r--r--net/ipv4/ip_sockglue.c405
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ip_vti.c4
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c19
-rw-r--r--net/ipv4/nexthop.c93
-rw-r--r--net/ipv4/ping.c7
-rw-r--r--net/ipv4/raw.c28
-rw-r--r--net/ipv4/route.c12
-rw-r--r--net/ipv4/tcp.c161
-rw-r--r--net/ipv4/tcp_fastopen.c8
-rw-r--r--net/ipv4/tcp_input.c69
-rw-r--r--net/ipv4/tcp_ipv4.c26
-rw-r--r--net/ipv4/tcp_metrics.c89
-rw-r--r--net/ipv4/tcp_minisocks.c18
-rw-r--r--net/ipv4/tcp_output.c46
-rw-r--r--net/ipv4/tcp_timer.c93
-rw-r--r--net/ipv4/udp.c105
-rw-r--r--net/ipv4/udp_offload.c23
-rw-r--r--net/ipv4/udp_tunnel_core.c2
-rw-r--r--net/ipv4/xfrm4_policy.c11
-rw-r--r--net/ipv6/Kconfig2
-rw-r--r--net/ipv6/addrconf.c107
-rw-r--r--net/ipv6/af_inet6.c22
-rw-r--r--net/ipv6/anycast.c2
-rw-r--r--net/ipv6/datagram.c9
-rw-r--r--net/ipv6/exthdrs.c7
-rw-r--r--net/ipv6/icmp.c11
-rw-r--r--net/ipv6/ila/ila_main.c1
-rw-r--r--net/ipv6/ila/ila_xlat.c1
-rw-r--r--net/ipv6/inet6_hashtables.c69
-rw-r--r--net/ipv6/ip6_fib.c55
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/ip6_output.c18
-rw-r--r--net/ipv6/ip6_vti.c4
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c22
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/ndisc.c20
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c11
-rw-r--r--net/ipv6/ping.c3
-rw-r--r--net/ipv6/raw.c23
-rw-r--r--net/ipv6/route.c30
-rw-r--r--net/ipv6/rpl_iptunnel.c3
-rw-r--r--net/ipv6/seg6_local.c108
-rw-r--r--net/ipv6/tcp_ipv6.c14
-rw-r--r--net/ipv6/udp.c115
-rw-r--r--net/ipv6/udp_offload.c10
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_policy.c6
-rw-r--r--net/key/af_key.c5
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/l2tp/l2tp_ip6.c6
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/llc/llc_conn.c60
-rw-r--r--net/llc/llc_if.c2
-rw-r--r--net/llc/llc_input.c3
-rw-r--r--net/llc/llc_sap.c18
-rw-r--r--net/mac80211/cfg.c27
-rw-r--r--net/mac80211/fils_aead.c2
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/key.c2
-rw-r--r--net/mac80211/mesh.h1
-rw-r--r--net/mac80211/rx.c16
-rw-r--r--net/mac80211/wpa.c2
-rw-r--r--net/mptcp/Makefile2
-rw-r--r--net/mptcp/bpf.c15
-rw-r--r--net/mptcp/ctrl.c14
-rw-r--r--net/mptcp/pm.c9
-rw-r--r--net/mptcp/pm_netlink.c33
-rw-r--r--net/mptcp/protocol.c502
-rw-r--r--net/mptcp/protocol.h42
-rw-r--r--net/mptcp/sched.c173
-rw-r--r--net/mptcp/sockopt.c79
-rw-r--r--net/mptcp/subflow.c60
-rw-r--r--net/ncsi/ncsi-netlink.c2
-rw-r--r--net/ncsi/ncsi-netlink.h2
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipset/ip_set_core.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c4
-rw-r--r--net/netfilter/nf_bpf_link.c125
-rw-r--r--net/netfilter/nf_conntrack_bpf.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c22
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_conntrack_helper.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c8
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c10
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c6
-rw-r--r--net/netfilter/nf_flow_table_offload.c22
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c557
-rw-r--r--net/netfilter/nf_tables_offload.c13
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nft_byteorder.c14
-rw-r--r--net/netfilter/nft_cmp.c2
-rw-r--r--net/netfilter/nft_ct.c4
-rw-r--r--net/netfilter/nft_dynset.c3
-rw-r--r--net/netfilter/nft_fib.c15
-rw-r--r--net/netfilter/nft_flow_offload.c6
-rw-r--r--net/netfilter/nft_immediate.c35
-rw-r--r--net/netfilter/nft_lookup.c6
-rw-r--r--net/netfilter/nft_masq.c8
-rw-r--r--net/netfilter/nft_meta.c6
-rw-r--r--net/netfilter/nft_nat.c8
-rw-r--r--net/netfilter/nft_objref.c8
-rw-r--r--net/netfilter/nft_osf.c6
-rw-r--r--net/netfilter/nft_redir.c8
-rw-r--r--net/netfilter/nft_set_hash.c88
-rw-r--r--net/netfilter/nft_set_pipapo.c101
-rw-r--r--net/netfilter/nft_set_rbtree.c167
-rw-r--r--net/netfilter/nft_socket.c2
-rw-r--r--net/netfilter/x_tables.c5
-rw-r--r--net/netfilter/xt_repldata.h2
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/netlabel/netlabel_cipso_v4.h3
-rw-r--r--net/netlink/af_netlink.c128
-rw-r--r--net/netlink/af_netlink.h26
-rw-r--r--net/netlink/diag.c10
-rw-r--r--net/netlink/genetlink.c125
-rw-r--r--net/netrom/af_netrom.c5
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/openvswitch/actions.c42
-rw-r--r--net/openvswitch/conntrack.c83
-rw-r--r--net/openvswitch/datapath.c53
-rw-r--r--net/openvswitch/drop.h41
-rw-r--r--net/openvswitch/flow_netlink.c10
-rw-r--r--net/openvswitch/meter.c10
-rw-r--r--net/packet/af_packet.c34
-rw-r--r--net/qrtr/af_qrtr.c5
-rw-r--r--net/qrtr/ns.c139
-rw-r--r--net/rds/rdma_transport.h1
-rw-r--r--net/rds/rds.h3
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/sched/Kconfig4
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_ct.c3
-rw-r--r--net/sched/cls_bpf.c99
-rw-r--r--net/sched/cls_flower.c149
-rw-r--r--net/sched/cls_fw.c11
-rw-r--r--net/sched/cls_matchall.c35
-rw-r--r--net/sched/cls_route.c1
-rw-r--r--net/sched/cls_u32.c105
-rw-r--r--net/sched/em_meta.c6
-rw-r--r--net/sched/sch_api.c53
-rw-r--r--net/sched/sch_drr.c11
-rw-r--r--net/sched/sch_hfsc.c14
-rw-r--r--net/sched/sch_htb.c17
-rw-r--r--net/sched/sch_ingress.c61
-rw-r--r--net/sched/sch_mqprio.c14
-rw-r--r--net/sched/sch_netem.c49
-rw-r--r--net/sched/sch_qfq.c30
-rw-r--r--net/sched/sch_taprio.c83
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/protocol.c5
-rw-r--r--net/sctp/socket.c7
-rw-r--r--net/smc/af_smc.c167
-rw-r--r--net/smc/smc.h7
-rw-r--r--net/smc/smc_clc.c151
-rw-r--r--net/smc/smc_clc.h53
-rw-r--r--net/smc/smc_core.c38
-rw-r--r--net/smc/smc_core.h26
-rw-r--r--net/smc/smc_ib.h1
-rw-r--r--net/smc/smc_llc.c25
-rw-r--r--net/smc/smc_sysctl.c10
-rw-r--r--net/socket.c167
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/svcsock.c53
-rw-r--r--net/sunrpc/xprtrdma/verbs.c9
-rw-r--r--net/sunrpc/xprtsock.c45
-rw-r--r--net/switchdev/switchdev.c25
-rw-r--r--net/tipc/addr.h1
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/core.h2
-rw-r--r--net/tipc/crypto.c3
-rw-r--r--net/tipc/link.c10
-rw-r--r--net/tipc/link.h2
-rw-r--r--net/tipc/name_distr.h1
-rw-r--r--net/tipc/net.h1
-rw-r--r--net/tipc/netlink_compat.c4
-rw-r--r--net/tipc/node.c6
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tipc/udp_media.c2
-rw-r--r--net/tls/tls.h60
-rw-r--r--net/tls/tls_device.c122
-rw-r--r--net/tls/tls_device_fallback.c62
-rw-r--r--net/tls/tls_main.c277
-rw-r--r--net/tls/tls_strp.c3
-rw-r--r--net/tls/tls_sw.c318
-rw-r--r--net/unix/af_unix.c25
-rw-r--r--net/unix/scm.c3
-rw-r--r--net/vmw_vsock/virtio_transport_common.c104
-rw-r--r--net/vmw_vsock/vmci_transport.h3
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/mlme.c13
-rw-r--r--net/wireless/nl80211.c13
-rw-r--r--net/wireless/nl80211.h1
-rw-r--r--net/wireless/ocb.c3
-rw-r--r--net/wireless/pmsr.c3
-rw-r--r--net/wireless/scan.c2
-rw-r--r--net/wireless/util.c2
-rw-r--r--net/xdp/xsk.c369
-rw-r--r--net/xdp/xsk_buff_pool.c7
-rw-r--r--net/xdp/xsk_queue.h95
-rw-r--r--net/xfrm/xfrm_compat.c2
-rw-r--r--net/xfrm/xfrm_device.c13
-rw-r--r--net/xfrm/xfrm_input.c22
-rw-r--r--net/xfrm/xfrm_interface_core.c4
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--net/xfrm/xfrm_state.c8
-rw-r--r--net/xfrm/xfrm_user.c15
360 files changed, 18730 insertions, 14851 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b90781b9ece6..2a7f1b15714a 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -354,6 +354,26 @@ out:
return 0;
}
+static int vlan_hwtstamp_get(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg)
+{
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+
+ return generic_hwtstamp_get_lower(real_dev, cfg);
+}
+
+static int vlan_hwtstamp_set(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+
+ if (!net_eq(dev_net(dev), dev_net(real_dev)))
+ return -EOPNOTSUPP;
+
+ return generic_hwtstamp_set_lower(real_dev, cfg, extack);
+}
+
static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
@@ -365,14 +385,9 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
ifrr.ifr_ifru = ifr->ifr_ifru;
switch (cmd) {
- case SIOCSHWTSTAMP:
- if (!net_eq(dev_net(dev), dev_net(real_dev)))
- break;
- fallthrough;
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- case SIOCGHWTSTAMP:
if (netif_device_present(real_dev) && ops->ndo_eth_ioctl)
err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd);
break;
@@ -1081,6 +1096,8 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_fix_features = vlan_dev_fix_features,
.ndo_get_iflink = vlan_dev_get_iflink,
.ndo_fill_forward_path = vlan_dev_fill_forward_path,
+ .ndo_hwtstamp_get = vlan_hwtstamp_get,
+ .ndo_hwtstamp_set = vlan_hwtstamp_set,
};
static void vlan_dev_free(struct net_device *dev)
diff --git a/net/9p/client.c b/net/9p/client.c
index a3340268ec8d..86bbc7147fc1 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -904,7 +904,7 @@ EXPORT_SYMBOL(do_trace_9p_fid_put);
static int p9_client_version(struct p9_client *c)
{
- int err = 0;
+ int err;
struct p9_req_t *req;
char *version = NULL;
int msize;
@@ -975,7 +975,6 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
struct p9_client *clnt;
char *client_id;
- err = 0;
clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
if (!clnt)
return ERR_PTR(-ENOMEM);
@@ -1094,7 +1093,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
const char *uname, kuid_t n_uname,
const char *aname)
{
- int err = 0;
+ int err;
struct p9_req_t *req;
struct p9_fid *fid;
struct p9_qid qid;
@@ -1147,7 +1146,6 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
struct p9_req_t *req;
u16 nwqids, count;
- err = 0;
wqids = NULL;
clnt = oldfid->clnt;
if (clone) {
@@ -1224,7 +1222,6 @@ int p9_client_open(struct p9_fid *fid, int mode)
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",
p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
- err = 0;
if (fid->mode != -1)
return -EINVAL;
@@ -1262,7 +1259,7 @@ EXPORT_SYMBOL(p9_client_open);
int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags,
u32 mode, kgid_t gid, struct p9_qid *qid)
{
- int err = 0;
+ int err;
struct p9_client *clnt;
struct p9_req_t *req;
int iounit;
@@ -1314,7 +1311,6 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",
fid->fid, name, perm, mode);
- err = 0;
clnt = fid->clnt;
if (fid->mode != -1)
@@ -1350,7 +1346,7 @@ EXPORT_SYMBOL(p9_client_fcreate);
int p9_client_symlink(struct p9_fid *dfid, const char *name,
const char *symtgt, kgid_t gid, struct p9_qid *qid)
{
- int err = 0;
+ int err;
struct p9_client *clnt;
struct p9_req_t *req;
@@ -1402,13 +1398,12 @@ EXPORT_SYMBOL(p9_client_link);
int p9_client_fsync(struct p9_fid *fid, int datasync)
{
- int err;
+ int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
fid->fid, datasync);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync);
@@ -1428,7 +1423,7 @@ EXPORT_SYMBOL(p9_client_fsync);
int p9_client_clunk(struct p9_fid *fid)
{
- int err;
+ int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
int retries = 0;
@@ -1436,7 +1431,6 @@ int p9_client_clunk(struct p9_fid *fid)
again:
p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n",
fid->fid, retries);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid);
@@ -1465,12 +1459,11 @@ EXPORT_SYMBOL(p9_client_clunk);
int p9_client_remove(struct p9_fid *fid)
{
- int err;
+ int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid);
@@ -1680,7 +1673,6 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
if (!ret)
return ERR_PTR(-ENOMEM);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid);
@@ -1733,7 +1725,6 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
if (!ret)
return ERR_PTR(-ENOMEM);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask);
@@ -1812,11 +1803,10 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = fid->clnt;
wst->size = p9_client_statsize(wst, clnt->proto_version);
p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n",
@@ -1851,11 +1841,10 @@ EXPORT_SYMBOL(p9_client_wstat);
int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid);
p9_debug(P9_DEBUG_9P, " valid=%x mode=%x uid=%d gid=%d size=%lld\n",
@@ -1887,7 +1876,6 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid);
@@ -1921,11 +1909,10 @@ EXPORT_SYMBOL(p9_client_statfs);
int p9_client_rename(struct p9_fid *fid,
struct p9_fid *newdirfid, const char *name)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
@@ -1949,11 +1936,10 @@ EXPORT_SYMBOL(p9_client_rename);
int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
struct p9_fid *newdirfid, const char *new_name)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = olddirfid->clnt;
p9_debug(P9_DEBUG_9P,
@@ -1986,7 +1972,6 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
struct p9_client *clnt;
struct p9_fid *attr_fid;
- err = 0;
clnt = file_fid->clnt;
attr_fid = p9_fid_create(clnt);
if (!attr_fid) {
@@ -2027,14 +2012,13 @@ EXPORT_SYMBOL_GPL(p9_client_xattrwalk);
int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
u64 attr_size, int flags)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
p9_debug(P9_DEBUG_9P,
">>> TXATTRCREATE fid %d name %s size %llu flag %d\n",
fid->fid, name, attr_size, flags);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd",
fid->fid, name, attr_size, flags);
@@ -2063,7 +2047,6 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
fid->fid, offset, count);
- err = 0;
clnt = fid->clnt;
rsize = fid->iounit;
@@ -2122,7 +2105,6 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TMKNOD fid %d name %s mode %d major %d minor %d\n",
@@ -2153,7 +2135,6 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
fid->fid, name, mode, from_kgid(&init_user_ns, gid));
@@ -2182,7 +2163,6 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TLOCK fid %d type %i flags %d start %lld length %lld proc_id %d client_id %s\n",
@@ -2214,7 +2194,6 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TGETLOCK fid %d, type %i start %lld length %lld proc_id %d client_id %s\n",
@@ -2251,7 +2230,6 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 00b684616e8d..c4015f30f9fa 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1019,7 +1019,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
}
}
- err = csocket->ops->connect(csocket,
+ err = READ_ONCE(csocket->ops)->connect(csocket,
(struct sockaddr *)&sin_server,
sizeof(struct sockaddr_in), 0);
if (err < 0) {
@@ -1060,7 +1060,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
return err;
}
- err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
+ err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sun_server,
sizeof(struct sockaddr_un) - 1, 0);
if (err < 0) {
pr_err("%s (%d): problem connecting socket: %s: %d\n",
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 3c27ffb781e3..e305071eb7b8 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -384,7 +384,7 @@ static void handle_rerror(struct p9_req_t *req, int in_hdr_len,
void *to = req->rc.sdata + in_hdr_len;
// Fits entirely into the static data? Nothing to do.
- if (req->rc.size < in_hdr_len)
+ if (req->rc.size < in_hdr_len || !pages)
return;
// Really long error message? Tough, truncate the reply. Might get
@@ -428,7 +428,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
struct page **in_pages = NULL, **out_pages = NULL;
struct virtio_chan *chan = client->trans;
struct scatterlist *sgs[4];
- size_t offs;
+ size_t offs = 0;
int need_drop = 0;
int kicked = 0;
@@ -501,8 +501,8 @@ req_retry_pinned:
if (in_pages) {
sgs[out_sgs + in_sgs++] = chan->sg + out + in;
- in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
- in_pages, in_nr_pages, offs, inlen);
+ pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
+ in_pages, in_nr_pages, offs, inlen);
}
BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
diff --git a/net/Kconfig b/net/Kconfig
index 2fb25b534df5..d532ec33f1fe 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -52,6 +52,11 @@ config NET_INGRESS
config NET_EGRESS
bool
+config NET_XGRESS
+ select NET_INGRESS
+ select NET_EGRESS
+ bool
+
config NET_REDIRECT
bool
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 828fb393ee94..74b49c35ddc1 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -2516,6 +2516,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
},
.gw = {
.init_sel_class = batadv_iv_init_sel_class,
+ .sel_class_max = BATADV_TQ_MAX_VALUE,
.get_best_gw_node = batadv_iv_gw_get_best_gw_node,
.is_eligible = batadv_iv_gw_is_eligible,
.dump = batadv_iv_gw_dump,
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 54e41fc709c3..ac11f1f08db0 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kref.h>
+#include <linux/limits.h>
#include <linux/list.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
@@ -34,7 +35,6 @@
#include "bat_v_elp.h"
#include "bat_v_ogm.h"
#include "gateway_client.h"
-#include "gateway_common.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
@@ -512,25 +512,6 @@ static void batadv_v_init_sel_class(struct batadv_priv *bat_priv)
atomic_set(&bat_priv->gw.sel_class, 50);
}
-static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
- char *buff, size_t count)
-{
- u32 old_class, class;
-
- if (!batadv_parse_throughput(bat_priv->soft_iface, buff,
- "B.A.T.M.A.N. V GW selection class",
- &class))
- return -EINVAL;
-
- old_class = atomic_read(&bat_priv->gw.sel_class);
- atomic_set(&bat_priv->gw.sel_class, class);
-
- if (old_class != class)
- batadv_gw_reselect(bat_priv);
-
- return count;
-}
-
/**
* batadv_v_gw_throughput_get() - retrieve the GW-bandwidth for a given GW
* @gw_node: the GW to retrieve the metric for
@@ -818,7 +799,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
},
.gw = {
.init_sel_class = batadv_v_init_sel_class,
- .store_sel_class = batadv_v_store_sel_class,
+ .sel_class_max = U32_MAX,
.get_best_gw_node = batadv_v_gw_get_best_gw_node,
.is_eligible = batadv_v_gw_is_eligible,
.dump = batadv_v_gw_dump,
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index acff565849ae..1d704574e6bf 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -505,7 +505,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_elp_packet *elp_packet;
struct batadv_hard_iface *primary_if;
- struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ struct ethhdr *ethhdr;
bool res;
int ret = NET_RX_DROP;
@@ -513,6 +513,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
if (!res)
goto free_skb;
+ ethhdr = eth_hdr(skb);
if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
goto free_skb;
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index e710e9afe78f..e503ee0d896b 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -123,8 +123,10 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- if (hard_iface->if_status != BATADV_IF_ACTIVE)
+ if (hard_iface->if_status != BATADV_IF_ACTIVE) {
+ kfree_skb(skb);
return;
+ }
batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX);
batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES,
@@ -985,7 +987,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_ogm2_packet *ogm_packet;
- struct ethhdr *ethhdr = eth_hdr(skb);
+ struct ethhdr *ethhdr;
int ogm_offset;
u8 *packet_pos;
int ret = NET_RX_DROP;
@@ -999,6 +1001,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
goto free_skb;
+ ethhdr = eth_hdr(skb);
if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
goto free_skb;
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 6a964a773f57..2dd36ef03c84 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -9,124 +9,15 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
-#include <linux/errno.h>
-#include <linux/kstrtox.h>
-#include <linux/limits.h>
-#include <linux/math64.h>
-#include <linux/netdevice.h>
#include <linux/stddef.h>
-#include <linux/string.h>
+#include <linux/types.h>
#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
#include "gateway_client.h"
-#include "log.h"
#include "tvlv.h"
/**
- * batadv_parse_throughput() - parse supplied string buffer to extract
- * throughput information
- * @net_dev: the soft interface net device
- * @buff: string buffer to parse
- * @description: text shown when throughput string cannot be parsed
- * @throughput: pointer holding the returned throughput information
- *
- * Return: false on parse error and true otherwise.
- */
-bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
- const char *description, u32 *throughput)
-{
- enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT;
- u64 lthroughput;
- char *tmp_ptr;
- int ret;
-
- if (strlen(buff) > 4) {
- tmp_ptr = buff + strlen(buff) - 4;
-
- if (strncasecmp(tmp_ptr, "mbit", 4) == 0)
- bw_unit_type = BATADV_BW_UNIT_MBIT;
-
- if (strncasecmp(tmp_ptr, "kbit", 4) == 0 ||
- bw_unit_type == BATADV_BW_UNIT_MBIT)
- *tmp_ptr = '\0';
- }
-
- ret = kstrtou64(buff, 10, &lthroughput);
- if (ret) {
- batadv_err(net_dev,
- "Invalid throughput speed for %s: %s\n",
- description, buff);
- return false;
- }
-
- switch (bw_unit_type) {
- case BATADV_BW_UNIT_MBIT:
- /* prevent overflow */
- if (U64_MAX / 10 < lthroughput) {
- batadv_err(net_dev,
- "Throughput speed for %s too large: %s\n",
- description, buff);
- return false;
- }
-
- lthroughput *= 10;
- break;
- case BATADV_BW_UNIT_KBIT:
- default:
- lthroughput = div_u64(lthroughput, 100);
- break;
- }
-
- if (lthroughput > U32_MAX) {
- batadv_err(net_dev,
- "Throughput speed for %s too large: %s\n",
- description, buff);
- return false;
- }
-
- *throughput = lthroughput;
-
- return true;
-}
-
-/**
- * batadv_parse_gw_bandwidth() - parse supplied string buffer to extract
- * download and upload bandwidth information
- * @net_dev: the soft interface net device
- * @buff: string buffer to parse
- * @down: pointer holding the returned download bandwidth information
- * @up: pointer holding the returned upload bandwidth information
- *
- * Return: false on parse error and true otherwise.
- */
-static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
- u32 *down, u32 *up)
-{
- char *slash_ptr;
- bool ret;
-
- slash_ptr = strchr(buff, '/');
- if (slash_ptr)
- *slash_ptr = 0;
-
- ret = batadv_parse_throughput(net_dev, buff, "download gateway speed",
- down);
- if (!ret)
- return false;
-
- /* we also got some upload info */
- if (slash_ptr) {
- ret = batadv_parse_throughput(net_dev, slash_ptr + 1,
- "upload gateway speed", up);
- if (!ret)
- return false;
- }
-
- return true;
-}
-
-/**
* batadv_gw_tvlv_container_update() - update the gw tvlv container after
* gateway setting change
* @bat_priv: the bat priv with all the soft interface information
@@ -156,57 +47,6 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv)
}
/**
- * batadv_gw_bandwidth_set() - Parse and set download/upload gateway bandwidth
- * from supplied string buffer
- * @net_dev: netdev struct of the soft interface
- * @buff: the buffer containing the user data
- * @count: number of bytes in the buffer
- *
- * Return: 'count' on success or a negative error code in case of failure
- */
-ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
- size_t count)
-{
- struct batadv_priv *bat_priv = netdev_priv(net_dev);
- u32 down_curr;
- u32 up_curr;
- u32 down_new = 0;
- u32 up_new = 0;
- bool ret;
-
- down_curr = (unsigned int)atomic_read(&bat_priv->gw.bandwidth_down);
- up_curr = (unsigned int)atomic_read(&bat_priv->gw.bandwidth_up);
-
- ret = batadv_parse_gw_bandwidth(net_dev, buff, &down_new, &up_new);
- if (!ret)
- return -EINVAL;
-
- if (!down_new)
- down_new = 1;
-
- if (!up_new)
- up_new = down_new / 5;
-
- if (!up_new)
- up_new = 1;
-
- if (down_curr == down_new && up_curr == up_new)
- return count;
-
- batadv_gw_reselect(bat_priv);
- batadv_info(net_dev,
- "Changing gateway bandwidth from: '%u.%u/%u.%u MBit' to: '%u.%u/%u.%u MBit'\n",
- down_curr / 10, down_curr % 10, up_curr / 10, up_curr % 10,
- down_new / 10, down_new % 10, up_new / 10, up_new % 10);
-
- atomic_set(&bat_priv->gw.bandwidth_down, down_new);
- atomic_set(&bat_priv->gw.bandwidth_up, up_new);
- batadv_gw_tvlv_container_update(bat_priv);
-
- return count;
-}
-
-/**
* batadv_gw_tvlv_ogm_handler_v1() - process incoming gateway tvlv container
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 87c37f907261..5d097d6a1dd9 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -9,9 +9,6 @@
#include "main.h"
-#include <linux/netdevice.h>
-#include <linux/types.h>
-
/**
* enum batadv_bandwidth_units - bandwidth unit types
*/
@@ -27,12 +24,8 @@ enum batadv_bandwidth_units {
#define BATADV_GW_MODE_CLIENT_NAME "client"
#define BATADV_GW_MODE_SERVER_NAME "server"
-ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
- size_t count);
void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv);
void batadv_gw_init(struct batadv_priv *bat_priv);
void batadv_gw_free(struct batadv_priv *bat_priv);
-bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
- const char *description, u32 *throughput);
#endif /* _NET_BATMAN_ADV_GATEWAY_COMMON_H_ */
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 41c1ad33d009..96a412beab2d 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/compiler.h>
#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/gfp.h>
@@ -630,7 +631,19 @@ out:
*/
void batadv_update_min_mtu(struct net_device *soft_iface)
{
- soft_iface->mtu = batadv_hardif_min_mtu(soft_iface);
+ struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+ int limit_mtu;
+ int mtu;
+
+ mtu = batadv_hardif_min_mtu(soft_iface);
+
+ if (bat_priv->mtu_set_by_user)
+ limit_mtu = bat_priv->mtu_set_by_user;
+ else
+ limit_mtu = ETH_DATA_LEN;
+
+ mtu = min(mtu, limit_mtu);
+ dev_set_mtu(soft_iface, mtu);
/* Check if the local translate table should be cleaned up to match a
* new (and smaller) MTU.
@@ -699,9 +712,14 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
struct batadv_priv *bat_priv;
__be16 ethertype = htons(ETH_P_BATMAN);
int max_header_len = batadv_max_header_len();
+ unsigned int required_mtu;
+ unsigned int hardif_mtu;
int ret;
- if (hard_iface->net_dev->mtu < ETH_MIN_MTU + max_header_len)
+ hardif_mtu = READ_ONCE(hard_iface->net_dev->mtu);
+ required_mtu = READ_ONCE(soft_iface->mtu) + max_header_len;
+
+ if (hardif_mtu < ETH_MIN_MTU + max_header_len)
return -EINVAL;
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
@@ -734,18 +752,18 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->net_dev->name);
if (atomic_read(&bat_priv->fragmentation) &&
- hard_iface->net_dev->mtu < ETH_DATA_LEN + max_header_len)
+ hardif_mtu < required_mtu)
batadv_info(hard_iface->soft_iface,
"The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %i would solve the problem.\n",
- hard_iface->net_dev->name, hard_iface->net_dev->mtu,
- ETH_DATA_LEN + max_header_len);
+ hard_iface->net_dev->name, hardif_mtu,
+ required_mtu);
if (!atomic_read(&bat_priv->fragmentation) &&
- hard_iface->net_dev->mtu < ETH_DATA_LEN + max_header_len)
+ hardif_mtu < required_mtu)
batadv_info(hard_iface->soft_iface,
"The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %i.\n",
- hard_iface->net_dev->name, hard_iface->net_dev->mtu,
- ETH_DATA_LEN + max_header_len);
+ hard_iface->net_dev->name, hardif_mtu,
+ required_mtu);
if (batadv_hardif_is_iface_up(hard_iface))
batadv_hardif_activate_interface(hard_iface);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 156ed39eded1..10007c5894a1 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2023.1"
+#define BATADV_SOURCE_VERSION "2023.3"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index ad5714f737be..0c64d81a7761 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -377,7 +377,7 @@ nla_put_failure:
*
* Return: 0 on success, < 0 on error
*/
-int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv)
+static int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv)
{
struct sk_buff *msg;
int ret;
@@ -495,7 +495,10 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED];
atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr));
+
+ rtnl_lock();
batadv_update_min_mtu(bat_priv->soft_iface);
+ rtnl_unlock();
}
if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) {
@@ -548,15 +551,12 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
* algorithm in use implements the GW API
*/
- u32 sel_class_max = 0xffffffffu;
+ u32 sel_class_max = bat_priv->algo_ops->gw.sel_class_max;
u32 sel_class;
attr = info->attrs[BATADV_ATTR_GW_SEL_CLASS];
sel_class = nla_get_u32(attr);
- if (!bat_priv->algo_ops->gw.store_sel_class)
- sel_class_max = BATADV_TQ_MAX_VALUE;
-
if (sel_class >= 1 && sel_class <= sel_class_max) {
atomic_set(&bat_priv->gw.sel_class, sel_class);
batadv_gw_reselect(bat_priv);
@@ -858,8 +858,8 @@ nla_put_failure:
*
* Return: 0 on success, < 0 on error
*/
-int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
- struct batadv_hard_iface *hard_iface)
+static int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *hard_iface)
{
struct sk_buff *msg;
int ret;
@@ -1073,8 +1073,8 @@ nla_put_failure:
*
* Return: 0 on success, < 0 on error
*/
-int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
- struct batadv_softif_vlan *vlan)
+static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
+ struct batadv_softif_vlan *vlan)
{
struct sk_buff *msg;
int ret;
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 48102cc7490c..876d2806a67d 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -21,12 +21,6 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
u8 result, u32 test_time, u64 total_bytes,
u32 cookie);
-int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv);
-int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
- struct batadv_hard_iface *hard_iface);
-int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
- struct batadv_softif_vlan *vlan);
-
extern struct genl_family batadv_netlink_family;
#endif /* _NET_BATMAN_ADV_NETLINK_H_ */
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 5f387786e9a7..afd15b3879f1 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -27,10 +27,6 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
struct batadv_hard_iface *iface);
int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
-int batadv_recv_tt_query(struct sk_buff *skb,
- struct batadv_hard_iface *recv_if);
-int batadv_recv_roam_adv(struct sk_buff *skb,
- struct batadv_hard_iface *recv_if);
int batadv_recv_unicast_tvlv(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index d3fdf82282af..1bf1232a4f75 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -153,11 +153,14 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+
/* check ranges */
- if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
+ if (new_mtu < ETH_MIN_MTU || new_mtu > batadv_hardif_min_mtu(dev))
return -EINVAL;
dev->mtu = new_mtu;
+ bat_priv->mtu_set_by_user = new_mtu;
return 0;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 36ca31252a73..b95c36765d04 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -774,7 +774,6 @@ check_roaming:
if (roamed_back) {
batadv_tt_global_free(bat_priv, tt_global,
"Roaming canceled");
- tt_global = NULL;
} else {
/* The global entry has to be marked as ROAMING and
* has to be kept for consistency purpose
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ca9449ec9836..17d5ea1d8e84 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1547,6 +1547,12 @@ struct batadv_priv {
struct net_device *soft_iface;
/**
+ * @mtu_set_by_user: MTU was set once by user
+ * protected by rtnl_lock
+ */
+ int mtu_set_by_user;
+
+ /**
* @bat_counters: mesh internal traffic statistic counters (see
* batadv_counters)
*/
@@ -2191,11 +2197,10 @@ struct batadv_algo_gw_ops {
void (*init_sel_class)(struct batadv_priv *bat_priv);
/**
- * @store_sel_class: parse and stores a new GW selection class
- * (optional)
+ * @sel_class_max: maximum allowed GW selection class
*/
- ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
- size_t count);
+ u32 sel_class_max;
+
/**
* @get_best_gw_node: select the best GW from the list of available
* nodes (optional)
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 1c3c7ff5c3c6..336a76165454 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -140,6 +140,35 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto,
return err;
}
+struct sock *bt_sock_alloc(struct net *net, struct socket *sock,
+ struct proto *prot, int proto, gfp_t prio, int kern)
+{
+ struct sock *sk;
+
+ sk = sk_alloc(net, PF_BLUETOOTH, prio, prot, kern);
+ if (!sk)
+ return NULL;
+
+ sock_init_data(sock, sk);
+ INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
+
+ sock_reset_flag(sk, SOCK_ZAPPED);
+
+ sk->sk_protocol = proto;
+ sk->sk_state = BT_OPEN;
+
+ /* Init peer information so it can be properly monitored */
+ if (!kern) {
+ spin_lock(&sk->sk_peer_lock);
+ sk->sk_peer_pid = get_pid(task_tgid(current));
+ sk->sk_peer_cred = get_current_cred();
+ spin_unlock(&sk->sk_peer_lock);
+ }
+
+ return sk;
+}
+EXPORT_SYMBOL(bt_sock_alloc);
+
void bt_sock_link(struct bt_sock_list *l, struct sock *sk)
{
write_lock(&l->lock);
@@ -158,6 +187,9 @@ EXPORT_SYMBOL(bt_sock_unlink);
void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
{
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
BT_DBG("parent %p, sk %p", parent, sk);
sock_hold(sk);
@@ -170,6 +202,19 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q);
bt_sk(sk)->parent = parent;
+ /* Copy credentials from parent since for incoming connections the
+ * socket is allocated by the kernel.
+ */
+ spin_lock(&sk->sk_peer_lock);
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
+ sk->sk_peer_pid = get_pid(parent->sk_peer_pid);
+ sk->sk_peer_cred = get_cred(parent->sk_peer_cred);
+ spin_unlock(&sk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
+
if (bh)
bh_unlock_sock(sk);
else
@@ -288,8 +333,12 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
bt_sk(sk)->skb_msg_name(skb, msg->msg_name,
&msg->msg_namelen);
- if (bt_sk(sk)->skb_put_cmsg)
- bt_sk(sk)->skb_put_cmsg(skb, msg, sk);
+ if (test_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags)) {
+ u8 pkt_status = hci_skb_pkt_status(skb);
+
+ put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_STATUS,
+ sizeof(pkt_status), &pkt_status);
+ }
}
skb_free_datagram(sk, skb);
diff --git a/net/bluetooth/amp.h b/net/bluetooth/amp.h
index 832764dfbfb3..97c87abd129f 100644
--- a/net/bluetooth/amp.h
+++ b/net/bluetooth/amp.h
@@ -28,7 +28,6 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr,
int phylink_gen_key(struct hci_conn *hcon, u8 *data, u8 *len, u8 *type);
-void amp_read_loc_info(struct hci_dev *hdev, struct amp_mgr *mgr);
void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle);
void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr);
void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 57d509d77cb4..00d47bcf4d7d 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -205,21 +205,13 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, kern);
+ sk = bt_sock_alloc(net, sock, &bnep_proto, protocol, GFP_ATOMIC, kern);
if (!sk)
return -ENOMEM;
- sock_init_data(sock, sk);
-
sock->ops = &bnep_sock_ops;
-
sock->state = SS_UNCONNECTED;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = protocol;
- sk->sk_state = BT_OPEN;
-
bt_sock_link(&bnep_sk_list, sk);
return 0;
}
diff --git a/net/bluetooth/coredump.c b/net/bluetooth/coredump.c
index d2d2624ec708..ec97a4bab1c9 100644
--- a/net/bluetooth/coredump.c
+++ b/net/bluetooth/coredump.c
@@ -100,8 +100,7 @@ void hci_devcd_reset(struct hci_dev *hdev)
/* Call with hci_dev_lock only. */
static void hci_devcd_free(struct hci_dev *hdev)
{
- if (hdev->dump.head)
- vfree(hdev->dump.head);
+ vfree(hdev->dump.head);
hci_devcd_reset(hdev);
}
diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c
index 8a85f6cdfbc1..9214189279e8 100644
--- a/net/bluetooth/eir.c
+++ b/net/bluetooth/eir.c
@@ -33,7 +33,7 @@ u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
size_t complete_len;
/* no space left for name (+ NULL + type + len) */
- if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3)
+ if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3)
return ad_len;
/* use complete name if present and fits */
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 056f9516e46d..9d5057cef30a 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -118,7 +118,7 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
*/
params->explicit_connect = false;
- list_del_init(&params->action);
+ hci_pend_le_list_del_init(params);
switch (params->auto_connect) {
case HCI_AUTO_CONN_EXPLICIT:
@@ -127,10 +127,10 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
return;
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
break;
case HCI_AUTO_CONN_REPORT:
- list_add(&params->action, &hdev->pend_le_reports);
+ hci_pend_le_list_add(params, &hdev->pend_le_reports);
break;
default:
break;
@@ -178,57 +178,6 @@ static void hci_conn_cleanup(struct hci_conn *conn)
hci_conn_put(conn);
}
-static void le_scan_cleanup(struct work_struct *work)
-{
- struct hci_conn *conn = container_of(work, struct hci_conn,
- le_scan_cleanup);
- struct hci_dev *hdev = conn->hdev;
- struct hci_conn *c = NULL;
-
- BT_DBG("%s hcon %p", hdev->name, conn);
-
- hci_dev_lock(hdev);
-
- /* Check that the hci_conn is still around */
- rcu_read_lock();
- list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) {
- if (c == conn)
- break;
- }
- rcu_read_unlock();
-
- if (c == conn) {
- hci_connect_le_scan_cleanup(conn, 0x00);
- hci_conn_cleanup(conn);
- }
-
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
- hci_conn_put(conn);
-}
-
-static void hci_connect_le_scan_remove(struct hci_conn *conn)
-{
- BT_DBG("%s hcon %p", conn->hdev->name, conn);
-
- /* We can't call hci_conn_del/hci_conn_cleanup here since that
- * could deadlock with another hci_conn_del() call that's holding
- * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work).
- * Instead, grab temporary extra references to the hci_dev and
- * hci_conn and perform the necessary cleanup in a separate work
- * callback.
- */
-
- hci_dev_hold(conn->hdev);
- hci_conn_get(conn);
-
- /* Even though we hold a reference to the hdev, many other
- * things might get cleaned up meanwhile, including the hdev's
- * own workqueue, so we can't use that for scheduling.
- */
- schedule_work(&conn->le_scan_cleanup);
-}
-
static void hci_acl_create_connection(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
@@ -679,13 +628,6 @@ static void hci_conn_timeout(struct work_struct *work)
if (refcnt > 0)
return;
- /* LE connections in scanning state need special handling */
- if (conn->state == BT_CONNECT && conn->type == LE_LINK &&
- test_bit(HCI_CONN_SCANNING, &conn->flags)) {
- hci_connect_le_scan_remove(conn);
- return;
- }
-
hci_abort_conn(conn, hci_proto_disconn_ind(conn));
}
@@ -791,7 +733,9 @@ struct iso_list_data {
u16 sync_handle;
};
int count;
- struct iso_cig_params pdu;
+ bool big_term;
+ bool pa_sync_term;
+ bool big_sync_term;
};
static void bis_list(struct hci_conn *conn, void *data)
@@ -809,17 +753,6 @@ static void bis_list(struct hci_conn *conn, void *data)
d->count++;
}
-static void find_bis(struct hci_conn *conn, void *data)
-{
- struct iso_list_data *d = data;
-
- /* Ignore unicast */
- if (bacmp(&conn->dst, BDADDR_ANY))
- return;
-
- d->count++;
-}
-
static int terminate_big_sync(struct hci_dev *hdev, void *data)
{
struct iso_list_data *d = data;
@@ -828,11 +761,8 @@ static int terminate_big_sync(struct hci_dev *hdev, void *data)
hci_remove_ext_adv_instance_sync(hdev, d->bis, NULL);
- /* Check if ISO connection is a BIS and terminate BIG if there are
- * no other connections using it.
- */
- hci_conn_hash_list_state(hdev, find_bis, ISO_LINK, BT_CONNECTED, d);
- if (d->count)
+ /* Only terminate BIG if it has been created */
+ if (!d->big_term)
return 0;
return hci_le_terminate_big_sync(hdev, d->big,
@@ -844,19 +774,21 @@ static void terminate_big_destroy(struct hci_dev *hdev, void *data, int err)
kfree(data);
}
-static int hci_le_terminate_big(struct hci_dev *hdev, u8 big, u8 bis)
+static int hci_le_terminate_big(struct hci_dev *hdev, struct hci_conn *conn)
{
struct iso_list_data *d;
int ret;
- bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", big, bis);
+ bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", conn->iso_qos.bcast.big,
+ conn->iso_qos.bcast.bis);
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d)
return -ENOMEM;
- d->big = big;
- d->bis = bis;
+ d->big = conn->iso_qos.bcast.big;
+ d->bis = conn->iso_qos.bcast.bis;
+ d->big_term = test_and_clear_bit(HCI_CONN_BIG_CREATED, &conn->flags);
ret = hci_cmd_sync_queue(hdev, terminate_big_sync, d,
terminate_big_destroy);
@@ -873,31 +805,30 @@ static int big_terminate_sync(struct hci_dev *hdev, void *data)
bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", d->big,
d->sync_handle);
- /* Check if ISO connection is a BIS and terminate BIG if there are
- * no other connections using it.
- */
- hci_conn_hash_list_state(hdev, find_bis, ISO_LINK, BT_CONNECTED, d);
- if (d->count)
- return 0;
+ if (d->big_sync_term)
+ hci_le_big_terminate_sync(hdev, d->big);
- hci_le_big_terminate_sync(hdev, d->big);
+ if (d->pa_sync_term)
+ return hci_le_pa_terminate_sync(hdev, d->sync_handle);
- return hci_le_pa_terminate_sync(hdev, d->sync_handle);
+ return 0;
}
-static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
+static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *conn)
{
struct iso_list_data *d;
int ret;
- bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, sync_handle);
+ bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, conn->sync_handle);
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d)
return -ENOMEM;
d->big = big;
- d->sync_handle = sync_handle;
+ d->sync_handle = conn->sync_handle;
+ d->pa_sync_term = test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags);
+ d->big_sync_term = test_and_clear_bit(HCI_CONN_BIG_SYNC, &conn->flags);
ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d,
terminate_big_destroy);
@@ -916,6 +847,7 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
static void bis_cleanup(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
+ struct hci_conn *bis;
bt_dev_dbg(hdev, "conn %p", conn);
@@ -923,17 +855,29 @@ static void bis_cleanup(struct hci_conn *conn)
if (!test_and_clear_bit(HCI_CONN_PER_ADV, &conn->flags))
return;
- hci_le_terminate_big(hdev, conn->iso_qos.bcast.big,
- conn->iso_qos.bcast.bis);
+ /* Check if ISO connection is a BIS and terminate advertising
+ * set and BIG if there are no other connections using it.
+ */
+ bis = hci_conn_hash_lookup_big(hdev, conn->iso_qos.bcast.big);
+ if (bis)
+ return;
+
+ hci_le_terminate_big(hdev, conn);
} else {
+ bis = hci_conn_hash_lookup_big_any_dst(hdev,
+ conn->iso_qos.bcast.big);
+
+ if (bis)
+ return;
+
hci_le_big_terminate(hdev, conn->iso_qos.bcast.big,
- conn->sync_handle);
+ conn);
}
}
static int remove_cig_sync(struct hci_dev *hdev, void *data)
{
- u8 handle = PTR_ERR(data);
+ u8 handle = PTR_UINT(data);
return hci_le_remove_cig_sync(hdev, handle);
}
@@ -942,7 +886,8 @@ static int hci_le_remove_cig(struct hci_dev *hdev, u8 handle)
{
bt_dev_dbg(hdev, "handle 0x%2.2x", handle);
- return hci_cmd_sync_queue(hdev, remove_cig_sync, ERR_PTR(handle), NULL);
+ return hci_cmd_sync_queue(hdev, remove_cig_sync, UINT_PTR(handle),
+ NULL);
}
static void find_cis(struct hci_conn *conn, void *data)
@@ -983,6 +928,25 @@ static void cis_cleanup(struct hci_conn *conn)
hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig);
}
+static u16 hci_conn_hash_alloc_unset(struct hci_dev *hdev)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+ u16 handle = HCI_CONN_HANDLE_MAX + 1;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ /* Find the first unused handle */
+ if (handle == 0xffff || c->handle != handle)
+ break;
+ handle++;
+ }
+ rcu_read_unlock();
+
+ return handle;
+}
+
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
u8 role)
{
@@ -996,7 +960,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
bacpy(&conn->dst, dst);
bacpy(&conn->src, &hdev->bdaddr);
- conn->handle = HCI_CONN_HANDLE_UNSET;
+ conn->handle = hci_conn_hash_alloc_unset(hdev);
conn->hdev = hdev;
conn->type = type;
conn->role = role;
@@ -1059,7 +1023,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept);
INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle);
INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout);
- INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup);
atomic_set(&conn->refcnt, 0);
@@ -1081,6 +1044,29 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
return conn;
}
+static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason)
+{
+ if (!reason)
+ reason = HCI_ERROR_REMOTE_USER_TERM;
+
+ /* Due to race, SCO/ISO conn might be not established yet at this point,
+ * and nothing else will clean it up. In other cases it is done via HCI
+ * events.
+ */
+ switch (conn->type) {
+ case SCO_LINK:
+ case ESCO_LINK:
+ if (HCI_CONN_HANDLE_UNSET(conn->handle))
+ hci_conn_failed(conn, reason);
+ break;
+ case ISO_LINK:
+ if (conn->state != BT_CONNECTED &&
+ !test_bit(HCI_CONN_CREATE_CIS, &conn->flags))
+ hci_conn_failed(conn, reason);
+ break;
+ }
+}
+
static void hci_conn_unlink(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
@@ -1103,14 +1089,7 @@ static void hci_conn_unlink(struct hci_conn *conn)
if (!test_bit(HCI_UP, &hdev->flags))
continue;
- /* Due to race, SCO connection might be not established
- * yet at this point. Delete it now, otherwise it is
- * possible for it to be stuck and can't be deleted.
- */
- if ((child->type == SCO_LINK ||
- child->type == ESCO_LINK) &&
- child->handle == HCI_CONN_HANDLE_UNSET)
- hci_conn_del(child);
+ hci_conn_cleanup_child(child, conn->abort_reason);
}
return;
@@ -1273,9 +1252,41 @@ void hci_conn_failed(struct hci_conn *conn, u8 status)
hci_conn_del(conn);
}
+/* This function requires the caller holds hdev->lock */
+u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ bt_dev_dbg(hdev, "hcon %p handle 0x%4.4x", conn, handle);
+
+ if (conn->handle == handle)
+ return 0;
+
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ handle, HCI_CONN_HANDLE_MAX);
+ return HCI_ERROR_INVALID_PARAMETERS;
+ }
+
+ /* If abort_reason has been sent it means the connection is being
+ * aborted and the handle shall not be changed.
+ */
+ if (conn->abort_reason)
+ return conn->abort_reason;
+
+ conn->handle = handle;
+
+ return 0;
+}
+
static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
{
- struct hci_conn *conn = data;
+ struct hci_conn *conn;
+ u16 handle = PTR_UINT(data);
+
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn)
+ return;
bt_dev_dbg(hdev, "err %d", err);
@@ -1300,10 +1311,18 @@ done:
static int hci_connect_le_sync(struct hci_dev *hdev, void *data)
{
- struct hci_conn *conn = data;
+ struct hci_conn *conn;
+ u16 handle = PTR_UINT(data);
+
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn)
+ return 0;
bt_dev_dbg(hdev, "conn %p", conn);
+ clear_bit(HCI_CONN_SCANNING, &conn->flags);
+ conn->state = BT_CONNECT;
+
return hci_le_create_conn_sync(hdev, conn);
}
@@ -1373,10 +1392,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
conn->sec_level = BT_SECURITY_LOW;
conn->conn_timeout = conn_timeout;
- conn->state = BT_CONNECT;
- clear_bit(HCI_CONN_SCANNING, &conn->flags);
-
- err = hci_cmd_sync_queue(hdev, hci_connect_le_sync, conn,
+ err = hci_cmd_sync_queue(hdev, hci_connect_le_sync,
+ UINT_PTR(conn->handle),
create_le_conn_complete);
if (err) {
hci_conn_del(conn);
@@ -1426,8 +1443,8 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev,
if (params->auto_connect == HCI_AUTO_CONN_DISABLED ||
params->auto_connect == HCI_AUTO_CONN_REPORT ||
params->auto_connect == HCI_AUTO_CONN_EXPLICIT) {
- list_del_init(&params->action);
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_del_init(params);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
}
params->explicit_connect = true;
@@ -1440,25 +1457,23 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev,
static int qos_set_big(struct hci_dev *hdev, struct bt_iso_qos *qos)
{
- struct iso_list_data data;
+ struct hci_conn *conn;
+ u8 big;
/* Allocate a BIG if not set */
if (qos->bcast.big == BT_ISO_QOS_BIG_UNSET) {
- for (data.big = 0x00; data.big < 0xef; data.big++) {
- data.count = 0;
- data.bis = 0xff;
+ for (big = 0x00; big < 0xef; big++) {
- hci_conn_hash_list_state(hdev, bis_list, ISO_LINK,
- BT_BOUND, &data);
- if (!data.count)
+ conn = hci_conn_hash_lookup_big(hdev, big);
+ if (!conn)
break;
}
- if (data.big == 0xef)
+ if (big == 0xef)
return -EADDRNOTAVAIL;
/* Update BIG */
- qos->bcast.big = data.big;
+ qos->bcast.big = big;
}
return 0;
@@ -1466,28 +1481,27 @@ static int qos_set_big(struct hci_dev *hdev, struct bt_iso_qos *qos)
static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos)
{
- struct iso_list_data data;
+ struct hci_conn *conn;
+ u8 bis;
/* Allocate BIS if not set */
if (qos->bcast.bis == BT_ISO_QOS_BIS_UNSET) {
/* Find an unused adv set to advertise BIS, skip instance 0x00
* since it is reserved as general purpose set.
*/
- for (data.bis = 0x01; data.bis < hdev->le_num_of_adv_sets;
- data.bis++) {
- data.count = 0;
+ for (bis = 0x01; bis < hdev->le_num_of_adv_sets;
+ bis++) {
- hci_conn_hash_list_state(hdev, bis_list, ISO_LINK,
- BT_BOUND, &data);
- if (!data.count)
+ conn = hci_conn_hash_lookup_bis(hdev, BDADDR_ANY, bis);
+ if (!conn)
break;
}
- if (data.bis == hdev->le_num_of_adv_sets)
+ if (bis == hdev->le_num_of_adv_sets)
return -EADDRNOTAVAIL;
/* Update BIS */
- qos->bcast.bis = data.bis;
+ qos->bcast.bis = bis;
}
return 0;
@@ -1495,10 +1509,10 @@ static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos)
/* This function requires the caller holds hdev->lock */
static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
- struct bt_iso_qos *qos)
+ struct bt_iso_qos *qos, __u8 base_len,
+ __u8 *base)
{
struct hci_conn *conn;
- struct iso_list_data data;
int err;
/* Let's make sure that le is enabled.*/
@@ -1516,24 +1530,26 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
if (err)
return ERR_PTR(err);
- data.big = qos->bcast.big;
- data.bis = qos->bcast.bis;
- data.count = 0;
-
- /* Check if there is already a matching BIG/BIS */
- hci_conn_hash_list_state(hdev, bis_list, ISO_LINK, BT_BOUND, &data);
- if (data.count)
+ /* Check if the LE Create BIG command has already been sent */
+ conn = hci_conn_hash_lookup_per_adv_bis(hdev, dst, qos->bcast.big,
+ qos->bcast.big);
+ if (conn)
return ERR_PTR(-EADDRINUSE);
- conn = hci_conn_hash_lookup_bis(hdev, dst, qos->bcast.big, qos->bcast.bis);
- if (conn)
+ /* Check BIS settings against other bound BISes, since all
+ * BISes in a BIG must have the same value for all parameters
+ */
+ conn = hci_conn_hash_lookup_big(hdev, qos->bcast.big);
+
+ if (conn && (memcmp(qos, &conn->iso_qos, sizeof(*qos)) ||
+ base_len != conn->le_per_adv_data_len ||
+ memcmp(conn->le_per_adv_data, base, base_len)))
return ERR_PTR(-EADDRINUSE);
conn = hci_conn_add(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
if (!conn)
return ERR_PTR(-ENOMEM);
- set_bit(HCI_CONN_PER_ADV, &conn->flags);
conn->state = BT_CONNECT;
hci_conn_hold(conn);
@@ -1684,7 +1700,7 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
if (!link) {
hci_conn_drop(acl);
hci_conn_drop(sco);
- return NULL;
+ return ERR_PTR(-ENOLINK);
}
sco->setting = setting;
@@ -1707,52 +1723,25 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
return sco;
}
-static void cis_add(struct iso_list_data *d, struct bt_iso_qos *qos)
-{
- struct hci_cis_params *cis = &d->pdu.cis[d->pdu.cp.num_cis];
-
- cis->cis_id = qos->ucast.cis;
- cis->c_sdu = cpu_to_le16(qos->ucast.out.sdu);
- cis->p_sdu = cpu_to_le16(qos->ucast.in.sdu);
- cis->c_phy = qos->ucast.out.phy ? qos->ucast.out.phy : qos->ucast.in.phy;
- cis->p_phy = qos->ucast.in.phy ? qos->ucast.in.phy : qos->ucast.out.phy;
- cis->c_rtn = qos->ucast.out.rtn;
- cis->p_rtn = qos->ucast.in.rtn;
-
- d->pdu.cp.num_cis++;
-}
-
-static void cis_list(struct hci_conn *conn, void *data)
-{
- struct iso_list_data *d = data;
-
- /* Skip if broadcast/ANY address */
- if (!bacmp(&conn->dst, BDADDR_ANY))
- return;
-
- if (d->cig != conn->iso_qos.ucast.cig || d->cis == BT_ISO_QOS_CIS_UNSET ||
- d->cis != conn->iso_qos.ucast.cis)
- return;
-
- d->count++;
-
- if (d->pdu.cp.cig_id == BT_ISO_QOS_CIG_UNSET ||
- d->count >= ARRAY_SIZE(d->pdu.cis))
- return;
-
- cis_add(d, &conn->iso_qos);
-}
-
static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos)
{
struct hci_dev *hdev = conn->hdev;
struct hci_cp_le_create_big cp;
+ struct iso_list_data data;
memset(&cp, 0, sizeof(cp));
+ data.big = qos->bcast.big;
+ data.bis = qos->bcast.bis;
+ data.count = 0;
+
+ /* Create a BIS for each bound connection */
+ hci_conn_hash_list_state(hdev, bis_list, ISO_LINK,
+ BT_BOUND, &data);
+
cp.handle = qos->bcast.big;
cp.adv_handle = qos->bcast.bis;
- cp.num_bis = 0x01;
+ cp.num_bis = data.count;
hci_cpu_to_le24(qos->bcast.out.interval, cp.bis.sdu_interval);
cp.bis.sdu = cpu_to_le16(qos->bcast.out.sdu);
cp.bis.latency = cpu_to_le16(qos->bcast.out.latency);
@@ -1766,25 +1755,62 @@ static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos)
return hci_send_cmd(hdev, HCI_OP_LE_CREATE_BIG, sizeof(cp), &cp);
}
-static void set_cig_params_complete(struct hci_dev *hdev, void *data, int err)
+static int set_cig_params_sync(struct hci_dev *hdev, void *data)
{
- struct iso_cig_params *pdu = data;
+ u8 cig_id = PTR_UINT(data);
+ struct hci_conn *conn;
+ struct bt_iso_qos *qos;
+ struct iso_cig_params pdu;
+ u8 cis_id;
- bt_dev_dbg(hdev, "");
+ conn = hci_conn_hash_lookup_cig(hdev, cig_id);
+ if (!conn)
+ return 0;
- if (err)
- bt_dev_err(hdev, "Unable to set CIG parameters: %d", err);
+ memset(&pdu, 0, sizeof(pdu));
- kfree(pdu);
-}
+ qos = &conn->iso_qos;
+ pdu.cp.cig_id = cig_id;
+ hci_cpu_to_le24(qos->ucast.out.interval, pdu.cp.c_interval);
+ hci_cpu_to_le24(qos->ucast.in.interval, pdu.cp.p_interval);
+ pdu.cp.sca = qos->ucast.sca;
+ pdu.cp.packing = qos->ucast.packing;
+ pdu.cp.framing = qos->ucast.framing;
+ pdu.cp.c_latency = cpu_to_le16(qos->ucast.out.latency);
+ pdu.cp.p_latency = cpu_to_le16(qos->ucast.in.latency);
+
+ /* Reprogram all CIS(s) with the same CIG, valid range are:
+ * num_cis: 0x00 to 0x1F
+ * cis_id: 0x00 to 0xEF
+ */
+ for (cis_id = 0x00; cis_id < 0xf0 &&
+ pdu.cp.num_cis < ARRAY_SIZE(pdu.cis); cis_id++) {
+ struct hci_cis_params *cis;
-static int set_cig_params_sync(struct hci_dev *hdev, void *data)
-{
- struct iso_cig_params *pdu = data;
- u32 plen;
+ conn = hci_conn_hash_lookup_cis(hdev, NULL, 0, cig_id, cis_id);
+ if (!conn)
+ continue;
+
+ qos = &conn->iso_qos;
+
+ cis = &pdu.cis[pdu.cp.num_cis++];
+ cis->cis_id = cis_id;
+ cis->c_sdu = cpu_to_le16(conn->iso_qos.ucast.out.sdu);
+ cis->p_sdu = cpu_to_le16(conn->iso_qos.ucast.in.sdu);
+ cis->c_phy = qos->ucast.out.phy ? qos->ucast.out.phy :
+ qos->ucast.in.phy;
+ cis->p_phy = qos->ucast.in.phy ? qos->ucast.in.phy :
+ qos->ucast.out.phy;
+ cis->c_rtn = qos->ucast.out.rtn;
+ cis->p_rtn = qos->ucast.in.rtn;
+ }
- plen = sizeof(pdu->cp) + pdu->cp.num_cis * sizeof(pdu->cis[0]);
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS, plen, pdu,
+ if (!pdu.cp.num_cis)
+ return 0;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS,
+ sizeof(pdu.cp) +
+ pdu.cp.num_cis * sizeof(pdu.cis[0]), &pdu,
HCI_CMD_TIMEOUT);
}
@@ -1792,7 +1818,6 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
{
struct hci_dev *hdev = conn->hdev;
struct iso_list_data data;
- struct iso_cig_params *pdu;
memset(&data, 0, sizeof(data));
@@ -1819,59 +1844,32 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
qos->ucast.cig = data.cig;
}
- data.pdu.cp.cig_id = qos->ucast.cig;
- hci_cpu_to_le24(qos->ucast.out.interval, data.pdu.cp.c_interval);
- hci_cpu_to_le24(qos->ucast.in.interval, data.pdu.cp.p_interval);
- data.pdu.cp.sca = qos->ucast.sca;
- data.pdu.cp.packing = qos->ucast.packing;
- data.pdu.cp.framing = qos->ucast.framing;
- data.pdu.cp.c_latency = cpu_to_le16(qos->ucast.out.latency);
- data.pdu.cp.p_latency = cpu_to_le16(qos->ucast.in.latency);
-
if (qos->ucast.cis != BT_ISO_QOS_CIS_UNSET) {
- data.count = 0;
- data.cig = qos->ucast.cig;
- data.cis = qos->ucast.cis;
-
- hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, BT_BOUND,
- &data);
- if (data.count)
+ if (hci_conn_hash_lookup_cis(hdev, NULL, 0, qos->ucast.cig,
+ qos->ucast.cis))
return false;
-
- cis_add(&data, qos);
+ goto done;
}
- /* Reprogram all CIS(s) with the same CIG */
- for (data.cig = qos->ucast.cig, data.cis = 0x00; data.cis < 0x11;
+ /* Allocate first available CIS if not set */
+ for (data.cig = qos->ucast.cig, data.cis = 0x00; data.cis < 0xf0;
data.cis++) {
- data.count = 0;
-
- hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, BT_BOUND,
- &data);
- if (data.count)
- continue;
-
- /* Allocate a CIS if not set */
- if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET) {
+ if (!hci_conn_hash_lookup_cis(hdev, NULL, 0, data.cig,
+ data.cis)) {
/* Update CIS */
qos->ucast.cis = data.cis;
- cis_add(&data, qos);
+ break;
}
}
- if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET || !data.pdu.cp.num_cis)
+ if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET)
return false;
- pdu = kmemdup(&data.pdu, sizeof(*pdu), GFP_KERNEL);
- if (!pdu)
+done:
+ if (hci_cmd_sync_queue(hdev, set_cig_params_sync,
+ UINT_PTR(qos->ucast.cig), NULL) < 0)
return false;
- if (hci_cmd_sync_queue(hdev, set_cig_params_sync, pdu,
- set_cig_params_complete) < 0) {
- kfree(pdu);
- return false;
- }
-
return true;
}
@@ -1888,6 +1886,8 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-ENOMEM);
cis->cleanup = cis_cleanup;
cis->dst_type = dst_type;
+ cis->iso_qos.ucast.cig = BT_ISO_QOS_CIG_UNSET;
+ cis->iso_qos.ucast.cis = BT_ISO_QOS_CIS_UNSET;
}
if (cis->state == BT_CONNECTED)
@@ -1931,6 +1931,8 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-EINVAL);
}
+ hci_conn_hold(cis);
+
cis->iso_qos = *qos;
cis->state = BT_BOUND;
@@ -1969,59 +1971,47 @@ bool hci_iso_setup_path(struct hci_conn *conn)
return true;
}
-static int hci_create_cis_sync(struct hci_dev *hdev, void *data)
+int hci_conn_check_create_cis(struct hci_conn *conn)
{
- return hci_le_create_cis_sync(hdev, data);
-}
+ if (conn->type != ISO_LINK || !bacmp(&conn->dst, BDADDR_ANY))
+ return -EINVAL;
-int hci_le_create_cis(struct hci_conn *conn)
-{
- struct hci_conn *cis;
- struct hci_link *link, *t;
- struct hci_dev *hdev = conn->hdev;
- int err;
+ if (!conn->parent || conn->parent->state != BT_CONNECTED ||
+ conn->state != BT_CONNECT || HCI_CONN_HANDLE_UNSET(conn->handle))
+ return 1;
- bt_dev_dbg(hdev, "hcon %p", conn);
+ return 0;
+}
- switch (conn->type) {
- case LE_LINK:
- if (conn->state != BT_CONNECTED || list_empty(&conn->link_list))
- return -EINVAL;
+static int hci_create_cis_sync(struct hci_dev *hdev, void *data)
+{
+ return hci_le_create_cis_sync(hdev);
+}
- cis = NULL;
+int hci_le_create_cis_pending(struct hci_dev *hdev)
+{
+ struct hci_conn *conn;
+ bool pending = false;
- /* hci_conn_link uses list_add_tail_rcu so the list is in
- * the same order as the connections are requested.
- */
- list_for_each_entry_safe(link, t, &conn->link_list, list) {
- if (link->conn->state == BT_BOUND) {
- err = hci_le_create_cis(link->conn);
- if (err)
- return err;
+ rcu_read_lock();
- cis = link->conn;
- }
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) {
+ rcu_read_unlock();
+ return -EBUSY;
}
- return cis ? 0 : -EINVAL;
- case ISO_LINK:
- cis = conn;
- break;
- default:
- return -EINVAL;
+ if (!hci_conn_check_create_cis(conn))
+ pending = true;
}
- if (cis->state == BT_CONNECT)
+ rcu_read_unlock();
+
+ if (!pending)
return 0;
/* Queue Create CIS */
- err = hci_cmd_sync_queue(hdev, hci_create_cis_sync, cis, NULL);
- if (err)
- return err;
-
- cis->state = BT_CONNECT;
-
- return 0;
+ return hci_cmd_sync_queue(hdev, hci_create_cis_sync, NULL, NULL);
}
static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn,
@@ -2051,16 +2041,6 @@ static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn,
qos->latency = conn->le_conn_latency;
}
-static void hci_bind_bis(struct hci_conn *conn,
- struct bt_iso_qos *qos)
-{
- /* Update LINK PHYs according to QoS preference */
- conn->le_tx_phy = qos->bcast.out.phy;
- conn->le_tx_phy = qos->bcast.out.phy;
- conn->iso_qos = *qos;
- conn->state = BT_BOUND;
-}
-
static int create_big_sync(struct hci_dev *hdev, void *data)
{
struct hci_conn *conn = data;
@@ -2140,7 +2120,8 @@ int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type,
return hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete);
}
-int hci_le_big_create_sync(struct hci_dev *hdev, struct bt_iso_qos *qos,
+int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
+ struct bt_iso_qos *qos,
__u16 sync_handle, __u8 num_bis, __u8 bis[])
{
struct _packed {
@@ -2156,6 +2137,9 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct bt_iso_qos *qos,
if (err)
return err;
+ if (hcon)
+ hcon->iso_qos.bcast.big = qos->bcast.big;
+
memset(&pdu, 0, sizeof(pdu));
pdu.cp.handle = qos->bcast.big;
pdu.cp.sync_handle = cpu_to_le16(sync_handle);
@@ -2183,27 +2167,80 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err)
}
}
-struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
- __u8 dst_type, struct bt_iso_qos *qos,
- __u8 base_len, __u8 *base)
+struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
+ struct bt_iso_qos *qos,
+ __u8 base_len, __u8 *base)
{
struct hci_conn *conn;
- int err;
+ __u8 eir[HCI_MAX_PER_AD_LENGTH];
+
+ if (base_len && base)
+ base_len = eir_append_service_data(eir, 0, 0x1851,
+ base, base_len);
/* We need hci_conn object using the BDADDR_ANY as dst */
- conn = hci_add_bis(hdev, dst, qos);
+ conn = hci_add_bis(hdev, dst, qos, base_len, eir);
if (IS_ERR(conn))
return conn;
- hci_bind_bis(conn, qos);
+ /* Update LINK PHYs according to QoS preference */
+ conn->le_tx_phy = qos->bcast.out.phy;
+ conn->le_tx_phy = qos->bcast.out.phy;
/* Add Basic Announcement into Peridic Adv Data if BASE is set */
if (base_len && base) {
- base_len = eir_append_service_data(conn->le_per_adv_data, 0,
- 0x1851, base, base_len);
+ memcpy(conn->le_per_adv_data, eir, sizeof(eir));
conn->le_per_adv_data_len = base_len;
}
+ hci_iso_qos_setup(hdev, conn, &qos->bcast.out,
+ conn->le_tx_phy ? conn->le_tx_phy :
+ hdev->le_tx_def_phys);
+
+ conn->iso_qos = *qos;
+ conn->state = BT_BOUND;
+
+ return conn;
+}
+
+static void bis_mark_per_adv(struct hci_conn *conn, void *data)
+{
+ struct iso_list_data *d = data;
+
+ /* Skip if not broadcast/ANY address */
+ if (bacmp(&conn->dst, BDADDR_ANY))
+ return;
+
+ if (d->big != conn->iso_qos.bcast.big ||
+ d->bis == BT_ISO_QOS_BIS_UNSET ||
+ d->bis != conn->iso_qos.bcast.bis)
+ return;
+
+ set_bit(HCI_CONN_PER_ADV, &conn->flags);
+}
+
+struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, struct bt_iso_qos *qos,
+ __u8 base_len, __u8 *base)
+{
+ struct hci_conn *conn;
+ int err;
+ struct iso_list_data data;
+
+ conn = hci_bind_bis(hdev, dst, qos, base_len, base);
+ if (IS_ERR(conn))
+ return conn;
+
+ data.big = qos->bcast.big;
+ data.bis = qos->bcast.bis;
+
+ /* Set HCI_CONN_PER_ADV for all bound connections, to mark that
+ * the start periodic advertising and create BIG commands have
+ * been queued
+ */
+ hci_conn_hash_list_state(hdev, bis_mark_per_adv, ISO_LINK,
+ BT_BOUND, &data);
+
/* Queue start periodic advertising and create BIG */
err = hci_cmd_sync_queue(hdev, create_big_sync, conn,
create_big_complete);
@@ -2212,10 +2249,6 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(err);
}
- hci_iso_qos_setup(hdev, conn, &qos->bcast.out,
- conn->le_tx_phy ? conn->le_tx_phy :
- hdev->le_tx_def_phys);
-
return conn;
}
@@ -2254,14 +2287,15 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
if (!link) {
hci_conn_drop(le);
hci_conn_drop(cis);
- return NULL;
+ return ERR_PTR(-ENOLINK);
}
- /* If LE is already connected and CIS handle is already set proceed to
- * Create CIS immediately.
- */
- if (le->state == BT_CONNECTED && cis->handle != HCI_CONN_HANDLE_UNSET)
- hci_le_create_cis(cis);
+ /* Link takes the refcount */
+ hci_conn_drop(cis);
+
+ cis->state = BT_CONNECT;
+
+ hci_le_create_cis_pending(hdev);
return cis;
}
@@ -2848,81 +2882,49 @@ u32 hci_conn_get_phy(struct hci_conn *conn)
return phys;
}
-int hci_abort_conn(struct hci_conn *conn, u8 reason)
+static int abort_conn_sync(struct hci_dev *hdev, void *data)
{
- int r = 0;
+ struct hci_conn *conn;
+ u16 handle = PTR_UINT(data);
- if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn)
return 0;
- switch (conn->state) {
- case BT_CONNECTED:
- case BT_CONFIG:
- if (conn->type == AMP_LINK) {
- struct hci_cp_disconn_phy_link cp;
+ return hci_abort_conn_sync(hdev, conn, conn->abort_reason);
+}
- cp.phy_handle = HCI_PHY_HANDLE(conn->handle);
- cp.reason = reason;
- r = hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK,
- sizeof(cp), &cp);
- } else {
- struct hci_cp_disconnect dc;
+int hci_abort_conn(struct hci_conn *conn, u8 reason)
+{
+ struct hci_dev *hdev = conn->hdev;
- dc.handle = cpu_to_le16(conn->handle);
- dc.reason = reason;
- r = hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT,
- sizeof(dc), &dc);
- }
+ /* If abort_reason has already been set it means the connection is
+ * already being aborted so don't attempt to overwrite it.
+ */
+ if (conn->abort_reason)
+ return 0;
- conn->state = BT_DISCONN;
+ bt_dev_dbg(hdev, "handle 0x%2.2x reason 0x%2.2x", conn->handle, reason);
- break;
- case BT_CONNECT:
- if (conn->type == LE_LINK) {
- if (test_bit(HCI_CONN_SCANNING, &conn->flags))
- break;
- r = hci_send_cmd(conn->hdev,
- HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
- } else if (conn->type == ACL_LINK) {
- if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2)
- break;
- r = hci_send_cmd(conn->hdev,
- HCI_OP_CREATE_CONN_CANCEL,
- 6, &conn->dst);
- }
- break;
- case BT_CONNECT2:
- if (conn->type == ACL_LINK) {
- struct hci_cp_reject_conn_req rej;
-
- bacpy(&rej.bdaddr, &conn->dst);
- rej.reason = reason;
-
- r = hci_send_cmd(conn->hdev,
- HCI_OP_REJECT_CONN_REQ,
- sizeof(rej), &rej);
- } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
- struct hci_cp_reject_sync_conn_req rej;
-
- bacpy(&rej.bdaddr, &conn->dst);
-
- /* SCO rejection has its own limited set of
- * allowed error values (0x0D-0x0F) which isn't
- * compatible with most values passed to this
- * function. To be safe hard-code one of the
- * values that's suitable for SCO.
- */
- rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES;
+ conn->abort_reason = reason;
- r = hci_send_cmd(conn->hdev,
- HCI_OP_REJECT_SYNC_CONN_REQ,
- sizeof(rej), &rej);
+ /* If the connection is pending check the command opcode since that
+ * might be blocking on hci_cmd_sync_work while waiting its respective
+ * event so we need to hci_cmd_sync_cancel to cancel it.
+ *
+ * hci_connect_le serializes the connection attempts so only one
+ * connection can be in BT_CONNECT at time.
+ */
+ if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) {
+ switch (hci_skb_event(hdev->sent_cmd)) {
+ case HCI_EV_LE_CONN_COMPLETE:
+ case HCI_EV_LE_ENHANCED_CONN_COMPLETE:
+ case HCI_EVT_LE_CIS_ESTABLISHED:
+ hci_cmd_sync_cancel(hdev, -ECANCELED);
+ break;
}
- break;
- default:
- conn->state = BT_CLOSED;
- break;
}
- return r;
+ return hci_cmd_sync_queue(hdev, abort_conn_sync, UINT_PTR(conn->handle),
+ NULL);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 48917c68358d..a5992f1b3c9b 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1074,9 +1074,9 @@ void hci_uuids_clear(struct hci_dev *hdev)
void hci_link_keys_clear(struct hci_dev *hdev)
{
- struct link_key *key;
+ struct link_key *key, *tmp;
- list_for_each_entry(key, &hdev->link_keys, list) {
+ list_for_each_entry_safe(key, tmp, &hdev->link_keys, list) {
list_del_rcu(&key->list);
kfree_rcu(key, rcu);
}
@@ -1084,9 +1084,9 @@ void hci_link_keys_clear(struct hci_dev *hdev)
void hci_smp_ltks_clear(struct hci_dev *hdev)
{
- struct smp_ltk *k;
+ struct smp_ltk *k, *tmp;
- list_for_each_entry(k, &hdev->long_term_keys, list) {
+ list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) {
list_del_rcu(&k->list);
kfree_rcu(k, rcu);
}
@@ -1094,9 +1094,9 @@ void hci_smp_ltks_clear(struct hci_dev *hdev)
void hci_smp_irks_clear(struct hci_dev *hdev)
{
- struct smp_irk *k;
+ struct smp_irk *k, *tmp;
- list_for_each_entry(k, &hdev->identity_resolving_keys, list) {
+ list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
list_del_rcu(&k->list);
kfree_rcu(k, rcu);
}
@@ -1104,9 +1104,9 @@ void hci_smp_irks_clear(struct hci_dev *hdev)
void hci_blocked_keys_clear(struct hci_dev *hdev)
{
- struct blocked_key *b;
+ struct blocked_key *b, *tmp;
- list_for_each_entry(b, &hdev->blocked_keys, list) {
+ list_for_each_entry_safe(b, tmp, &hdev->blocked_keys, list) {
list_del_rcu(&b->list);
kfree_rcu(b, rcu);
}
@@ -1949,15 +1949,15 @@ int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
switch (hci_get_adv_monitor_offload_ext(hdev)) {
case HCI_ADV_MONITOR_EXT_NONE:
- bt_dev_dbg(hdev, "%s add monitor %d status %d", hdev->name,
+ bt_dev_dbg(hdev, "add monitor %d status %d",
monitor->handle, status);
/* Message was not forwarded to controller - not an error */
break;
case HCI_ADV_MONITOR_EXT_MSFT:
status = msft_add_monitor_pattern(hdev, monitor);
- bt_dev_dbg(hdev, "%s add monitor %d msft status %d", hdev->name,
- monitor->handle, status);
+ bt_dev_dbg(hdev, "add monitor %d msft status %d",
+ handle, status);
break;
}
@@ -1972,17 +1972,19 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
struct adv_monitor *monitor)
{
int status = 0;
+ int handle;
switch (hci_get_adv_monitor_offload_ext(hdev)) {
case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */
- bt_dev_dbg(hdev, "%s remove monitor %d status %d", hdev->name,
+ bt_dev_dbg(hdev, "remove monitor %d status %d",
monitor->handle, status);
goto free_monitor;
case HCI_ADV_MONITOR_EXT_MSFT:
+ handle = monitor->handle;
status = msft_remove_monitor(hdev, monitor);
- bt_dev_dbg(hdev, "%s remove monitor %d msft status %d",
- hdev->name, monitor->handle, status);
+ bt_dev_dbg(hdev, "remove monitor %d msft status %d",
+ handle, status);
break;
}
@@ -2249,22 +2251,46 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
return NULL;
}
-/* This function requires the caller holds hdev->lock */
+/* This function requires the caller holds hdev->lock or rcu_read_lock */
struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
bdaddr_t *addr, u8 addr_type)
{
struct hci_conn_params *param;
- list_for_each_entry(param, list, action) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(param, list, action) {
if (bacmp(&param->addr, addr) == 0 &&
- param->addr_type == addr_type)
+ param->addr_type == addr_type) {
+ rcu_read_unlock();
return param;
+ }
}
+ rcu_read_unlock();
+
return NULL;
}
/* This function requires the caller holds hdev->lock */
+void hci_pend_le_list_del_init(struct hci_conn_params *param)
+{
+ if (list_empty(&param->action))
+ return;
+
+ list_del_rcu(&param->action);
+ synchronize_rcu();
+ INIT_LIST_HEAD(&param->action);
+}
+
+/* This function requires the caller holds hdev->lock */
+void hci_pend_le_list_add(struct hci_conn_params *param,
+ struct list_head *list)
+{
+ list_add_rcu(&param->action, list);
+}
+
+/* This function requires the caller holds hdev->lock */
struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
bdaddr_t *addr, u8 addr_type)
{
@@ -2297,14 +2323,15 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
return params;
}
-static void hci_conn_params_free(struct hci_conn_params *params)
+void hci_conn_params_free(struct hci_conn_params *params)
{
+ hci_pend_le_list_del_init(params);
+
if (params->conn) {
hci_conn_drop(params->conn);
hci_conn_put(params->conn);
}
- list_del(&params->action);
list_del(&params->list);
kfree(params);
}
@@ -2342,8 +2369,7 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev)
continue;
}
- list_del(&params->list);
- kfree(params);
+ hci_conn_params_free(params);
}
BT_DBG("All LE disabled connection parameters were removed");
@@ -2410,6 +2436,9 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
return NOTIFY_DONE;
+ /* To avoid a potential race with hci_unregister_dev. */
+ hci_dev_hold(hdev);
+
if (action == PM_SUSPEND_PREPARE)
ret = hci_suspend_dev(hdev);
else if (action == PM_POST_SUSPEND)
@@ -2419,6 +2448,7 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d",
action, ret);
+ hci_dev_put(hdev);
return NOTIFY_DONE;
}
@@ -3865,7 +3895,7 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
if (conn) {
/* Send to upper protocol */
- bt_cb(skb)->sco.pkt_status = flags & 0x03;
+ hci_skb_pkt_status(skb) = flags & 0x03;
sco_recv_scodata(conn, skb);
return;
} else {
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index ec0df2f9188e..6b7741f6e95b 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -22,6 +22,7 @@
*/
#include <linux/debugfs.h>
+#include <linux/kstrtox.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -1152,7 +1153,7 @@ static ssize_t force_no_mitm_write(struct file *file,
return -EFAULT;
buf[buf_size] = '\0';
- if (strtobool(buf, &enable))
+ if (kstrtobool(buf, &enable))
return -EINVAL;
if (enable == hci_dev_test_flag(hdev, HCI_FORCE_NO_MITM))
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 95816a938cea..35f251041eeb 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1564,7 +1564,7 @@ static u8 hci_cc_le_set_privacy_mode(struct hci_dev *hdev, void *data,
params = hci_conn_params_lookup(hdev, &cp->bdaddr, cp->bdaddr_type);
if (params)
- params->privacy_mode = cp->mode;
+ WRITE_ONCE(params->privacy_mode, cp->mode);
hci_dev_unlock(hdev);
@@ -1639,7 +1639,7 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data,
hci_dev_set_flag(hdev, HCI_LE_ADV);
- if (adv)
+ if (adv && !adv->periodic)
adv->enabled = true;
conn = hci_lookup_le_connect(hdev);
@@ -1747,7 +1747,7 @@ static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr,
{
struct discovery_state *d = &hdev->discovery;
- if (len > HCI_MAX_AD_LENGTH)
+ if (len > max_adv_len(hdev))
return;
bacpy(&d->last_adv_addr, bdaddr);
@@ -2784,6 +2784,9 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
hci_enable_advertising(hdev);
}
+ /* Inform sockets conn is gone before we delete it */
+ hci_disconn_cfm(conn, HCI_ERROR_UNSPECIFIED);
+
goto done;
}
@@ -2804,8 +2807,8 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_del_init(&params->action);
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_del_init(params);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
break;
default:
@@ -3170,19 +3173,15 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
* As the connection handle is set here for the first time, it indicates
* whether the connection is already set up.
*/
- if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ if (!HCI_CONN_HANDLE_UNSET(conn->handle)) {
bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
goto unlock;
}
if (!status) {
- conn->handle = __le16_to_cpu(ev->handle);
- if (conn->handle > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
- conn->handle, HCI_CONN_HANDLE_MAX);
- status = HCI_ERROR_INVALID_PARAMETERS;
+ status = hci_conn_set_handle(conn, __le16_to_cpu(ev->handle));
+ if (status)
goto done;
- }
if (conn->type == ACL_LINK) {
conn->state = BT_CONFIG;
@@ -3423,8 +3422,8 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data,
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_del_init(&params->action);
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_del_init(params);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
hci_update_passive_scan(hdev);
break;
@@ -3800,6 +3799,22 @@ static u8 hci_cc_le_read_buffer_size_v2(struct hci_dev *hdev, void *data,
return rp->status;
}
+static void hci_unbound_cis_failed(struct hci_dev *hdev, u8 cig, u8 status)
+{
+ struct hci_conn *conn, *tmp;
+
+ lockdep_assert_held(&hdev->lock);
+
+ list_for_each_entry_safe(conn, tmp, &hdev->conn_hash.list, list) {
+ if (conn->type != ISO_LINK || !bacmp(&conn->dst, BDADDR_ANY) ||
+ conn->state == BT_OPEN || conn->iso_qos.ucast.cig != cig)
+ continue;
+
+ if (HCI_CONN_HANDLE_UNSET(conn->handle))
+ hci_conn_failed(conn, status);
+ }
+}
+
static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -3807,6 +3822,7 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
struct hci_cp_le_set_cig_params *cp;
struct hci_conn *conn;
u8 status = rp->status;
+ bool pending = false;
int i;
bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
@@ -3820,12 +3836,15 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
+ /* BLUETOOTH CORE SPECIFICATION Version 5.4 | Vol 4, Part E page 2554
+ *
+ * If the Status return parameter is non-zero, then the state of the CIG
+ * and its CIS configurations shall not be changed by the command. If
+ * the CIG did not already exist, it shall not be created.
+ */
if (status) {
- while ((conn = hci_conn_hash_lookup_cig(hdev, rp->cig_id))) {
- conn->state = BT_CLOSED;
- hci_connect_cfm(conn, status);
- hci_conn_del(conn);
- }
+ /* Keep current configuration, fail only the unbound CIS */
+ hci_unbound_cis_failed(hdev, rp->cig_id, status);
goto unlock;
}
@@ -3845,17 +3864,17 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
if (conn->state != BT_BOUND && conn->state != BT_CONNECT)
continue;
- conn->handle = __le16_to_cpu(rp->handle[i]);
-
- bt_dev_dbg(hdev, "%p handle 0x%4.4x parent %p", conn,
- conn->handle, conn->parent);
+ if (hci_conn_set_handle(conn, __le16_to_cpu(rp->handle[i])))
+ continue;
- /* Create CIS if LE is already connected */
- if (conn->parent && conn->parent->state == BT_CONNECTED)
- hci_le_create_cis(conn);
+ if (conn->state == BT_CONNECT)
+ pending = true;
}
unlock:
+ if (pending)
+ hci_le_create_cis_pending(hdev);
+
hci_dev_unlock(hdev);
return rp->status;
@@ -3935,24 +3954,47 @@ static u8 hci_cc_le_set_per_adv_enable(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
struct hci_ev_status *rp = data;
- __u8 *sent;
+ struct hci_cp_le_set_per_adv_enable *cp;
+ struct adv_info *adv = NULL, *n;
+ u8 per_adv_cnt = 0;
bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
if (rp->status)
return rp->status;
- sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE);
- if (!sent)
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE);
+ if (!cp)
return rp->status;
hci_dev_lock(hdev);
- if (*sent)
+ adv = hci_find_adv_instance(hdev, cp->handle);
+
+ if (cp->enable) {
hci_dev_set_flag(hdev, HCI_LE_PER_ADV);
- else
+
+ if (adv)
+ adv->enabled = true;
+ } else {
+ /* If just one instance was disabled check if there are
+ * any other instance enabled before clearing HCI_LE_PER_ADV.
+ * The current periodic adv instance will be marked as
+ * disabled once extended advertising is also disabled.
+ */
+ list_for_each_entry_safe(adv, n, &hdev->adv_instances,
+ list) {
+ if (adv->periodic && adv->enabled)
+ per_adv_cnt++;
+ }
+
+ if (per_adv_cnt > 1)
+ goto unlock;
+
hci_dev_clear_flag(hdev, HCI_LE_PER_ADV);
+ }
+unlock:
hci_dev_unlock(hdev);
return rp->status;
@@ -4221,6 +4263,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data,
static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
{
struct hci_cp_le_create_cis *cp;
+ bool pending = false;
int i;
bt_dev_dbg(hdev, "status 0x%2.2x", status);
@@ -4243,12 +4286,18 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
conn = hci_conn_hash_lookup_handle(hdev, handle);
if (conn) {
+ if (test_and_clear_bit(HCI_CONN_CREATE_CIS,
+ &conn->flags))
+ pending = true;
conn->state = BT_CLOSED;
hci_connect_cfm(conn, status);
hci_conn_del(conn);
}
}
+ if (pending)
+ hci_le_create_cis_pending(hdev);
+
hci_dev_unlock(hdev);
}
@@ -4996,18 +5045,15 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
* As the connection handle is set here for the first time, it indicates
* whether the connection is already set up.
*/
- if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ if (!HCI_CONN_HANDLE_UNSET(conn->handle)) {
bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete event for existing connection");
goto unlock;
}
switch (status) {
case 0x00:
- conn->handle = __le16_to_cpu(ev->handle);
- if (conn->handle > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
- conn->handle, HCI_CONN_HANDLE_MAX);
- status = HCI_ERROR_INVALID_PARAMETERS;
+ status = hci_conn_set_handle(conn, __le16_to_cpu(ev->handle));
+ if (status) {
conn->state = BT_CLOSED;
break;
}
@@ -5860,7 +5906,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
* As the connection handle is set here for the first time, it indicates
* whether the connection is already set up.
*/
- if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ if (!HCI_CONN_HANDLE_UNSET(conn->handle)) {
bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
goto unlock;
}
@@ -5962,7 +6008,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
conn->dst_type);
if (params) {
- list_del_init(&params->action);
+ hci_pend_le_list_del_init(params);
if (params->conn) {
hci_conn_drop(params->conn);
hci_conn_put(params->conn);
@@ -6213,8 +6259,9 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
return;
}
- if (!ext_adv && len > HCI_MAX_AD_LENGTH) {
- bt_dev_err_ratelimited(hdev, "legacy adv larger than 31 bytes");
+ if (len > max_adv_len(hdev)) {
+ bt_dev_err_ratelimited(hdev,
+ "adv larger than maximum supported");
return;
}
@@ -6279,7 +6326,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
*/
conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved,
type);
- if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) {
+ if (!ext_adv && conn && type == LE_ADV_IND &&
+ len <= max_adv_len(hdev)) {
/* Store report for later inclusion by
* mgmt_device_connected
*/
@@ -6420,7 +6468,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data,
info->length + 1))
break;
- if (info->length <= HCI_MAX_AD_LENGTH) {
+ if (info->length <= max_adv_len(hdev)) {
rssi = info->data[info->length];
process_adv_report(hdev, info->type, &info->bdaddr,
info->bdaddr_type, NULL, 0, rssi,
@@ -6533,19 +6581,56 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
struct hci_ev_le_pa_sync_established *ev = data;
int mask = hdev->link_mode;
__u8 flags = 0;
+ struct hci_conn *bis;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
- if (ev->status)
- return;
-
hci_dev_lock(hdev);
hci_dev_clear_flag(hdev, HCI_PA_SYNC);
mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ISO_LINK, &flags);
- if (!(mask & HCI_LM_ACCEPT))
+ if (!(mask & HCI_LM_ACCEPT)) {
hci_le_pa_term_sync(hdev, ev->handle);
+ goto unlock;
+ }
+
+ if (!(flags & HCI_PROTO_DEFER))
+ goto unlock;
+
+ /* Add connection to indicate the PA sync event */
+ bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY,
+ HCI_ROLE_SLAVE);
+
+ if (!bis)
+ goto unlock;
+
+ if (ev->status)
+ set_bit(HCI_CONN_PA_SYNC_FAILED, &bis->flags);
+ else
+ set_bit(HCI_CONN_PA_SYNC, &bis->flags);
+
+ /* Notify connection to iso layer */
+ hci_connect_cfm(bis, ev->status);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_ev_le_per_adv_report *ev = data;
+ int mask = hdev->link_mode;
+ __u8 flags = 0;
+
+ bt_dev_dbg(hdev, "sync_handle 0x%4.4x", le16_to_cpu(ev->sync_handle));
+
+ hci_dev_lock(hdev);
+
+ mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags);
+ if (!(mask & HCI_LM_ACCEPT))
+ hci_le_pa_term_sync(hdev, ev->sync_handle);
hci_dev_unlock(hdev);
}
@@ -6787,6 +6872,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
struct hci_evt_le_cis_established *ev = data;
struct hci_conn *conn;
struct bt_iso_qos *qos;
+ bool pending = false;
u16 handle = __le16_to_cpu(ev->handle);
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6810,6 +6896,8 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
qos = &conn->iso_qos;
+ pending = test_and_clear_bit(HCI_CONN_CREATE_CIS, &conn->flags);
+
/* Convert ISO Interval (1.25 ms slots) to SDU Interval (us) */
qos->ucast.in.interval = le16_to_cpu(ev->interval) * 1250;
qos->ucast.out.interval = qos->ucast.in.interval;
@@ -6851,10 +6939,14 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
+ conn->state = BT_CLOSED;
hci_connect_cfm(conn, ev->status);
hci_conn_del(conn);
unlock:
+ if (pending)
+ hci_le_create_cis_pending(hdev);
+
hci_dev_unlock(hdev);
}
@@ -6933,6 +7025,7 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_evt_le_create_big_complete *ev = data;
struct hci_conn *conn;
+ __u8 i = 0;
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
@@ -6941,33 +7034,46 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
return;
hci_dev_lock(hdev);
+ rcu_read_lock();
- conn = hci_conn_hash_lookup_big(hdev, ev->handle);
- if (!conn)
- goto unlock;
+ /* Connect all BISes that are bound to the BIG */
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (bacmp(&conn->dst, BDADDR_ANY) ||
+ conn->type != ISO_LINK ||
+ conn->iso_qos.bcast.big != ev->handle)
+ continue;
- if (conn->type != ISO_LINK) {
- bt_dev_err(hdev,
- "Invalid connection link type handle 0x%2.2x",
- ev->handle);
- goto unlock;
- }
+ if (hci_conn_set_handle(conn,
+ __le16_to_cpu(ev->bis_handle[i++])))
+ continue;
- if (ev->num_bis)
- conn->handle = __le16_to_cpu(ev->bis_handle[0]);
+ if (!ev->status) {
+ conn->state = BT_CONNECTED;
+ set_bit(HCI_CONN_BIG_CREATED, &conn->flags);
+ rcu_read_unlock();
+ hci_debugfs_create_conn(conn);
+ hci_conn_add_sysfs(conn);
+ hci_iso_setup_path(conn);
+ rcu_read_lock();
+ continue;
+ }
- if (!ev->status) {
- conn->state = BT_CONNECTED;
- hci_debugfs_create_conn(conn);
- hci_conn_add_sysfs(conn);
- hci_iso_setup_path(conn);
- goto unlock;
+ hci_connect_cfm(conn, ev->status);
+ rcu_read_unlock();
+ hci_conn_del(conn);
+ rcu_read_lock();
}
- hci_connect_cfm(conn, ev->status);
- hci_conn_del(conn);
+ if (!ev->status && !i)
+ /* If no BISes have been connected for the BIG,
+ * terminate. This is in case all bound connections
+ * have been closed before the BIG creation
+ * has completed.
+ */
+ hci_le_terminate_big_sync(hdev, ev->handle,
+ HCI_ERROR_LOCAL_HOST_TERM);
-unlock:
+ rcu_read_unlock();
hci_dev_unlock(hdev);
}
@@ -6976,6 +7082,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
{
struct hci_evt_le_big_sync_estabilished *ev = data;
struct hci_conn *bis;
+ struct hci_conn *pa_sync;
int i;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6984,11 +7091,17 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
flex_array_size(ev, bis, ev->num_bis)))
return;
- if (ev->status)
- return;
-
hci_dev_lock(hdev);
+ if (!ev->status) {
+ pa_sync = hci_conn_hash_lookup_pa_sync(hdev, ev->handle);
+ if (pa_sync)
+ /* Also mark the BIG sync established event on the
+ * associated PA sync hcon
+ */
+ set_bit(HCI_CONN_BIG_SYNC, &pa_sync->flags);
+ }
+
for (i = 0; i < ev->num_bis; i++) {
u16 handle = le16_to_cpu(ev->bis[i]);
__le32 interval;
@@ -7002,6 +7115,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis->handle = handle;
}
+ if (ev->status != 0x42)
+ /* Mark PA sync as established */
+ set_bit(HCI_CONN_PA_SYNC, &bis->flags);
+
bis->iso_qos.bcast.big = ev->handle;
memset(&interval, 0, sizeof(interval));
memcpy(&interval, ev->latency, sizeof(ev->latency));
@@ -7010,9 +7127,25 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis->iso_qos.bcast.in.latency = le16_to_cpu(ev->interval) * 125 / 100;
bis->iso_qos.bcast.in.sdu = le16_to_cpu(ev->max_pdu);
- hci_iso_setup_path(bis);
+ if (!ev->status) {
+ set_bit(HCI_CONN_BIG_SYNC, &bis->flags);
+ hci_iso_setup_path(bis);
+ }
}
+ /* In case BIG sync failed, notify each failed connection to
+ * the user after all hci connections have been added
+ */
+ if (ev->status)
+ for (i = 0; i < ev->num_bis; i++) {
+ u16 handle = le16_to_cpu(ev->bis[i]);
+
+ bis = hci_conn_hash_lookup_handle(hdev, handle);
+
+ set_bit(HCI_CONN_BIG_SYNC_FAILED, &bis->flags);
+ hci_connect_cfm(bis, ev->status);
+ }
+
hci_dev_unlock(hdev);
}
@@ -7098,6 +7231,11 @@ static const struct hci_le_ev {
HCI_LE_EV(HCI_EV_LE_PA_SYNC_ESTABLISHED,
hci_le_pa_sync_estabilished_evt,
sizeof(struct hci_ev_le_pa_sync_established)),
+ /* [0x0f = HCI_EV_LE_PER_ADV_REPORT] */
+ HCI_LE_EV_VL(HCI_EV_LE_PER_ADV_REPORT,
+ hci_le_per_adv_report_evt,
+ sizeof(struct hci_ev_le_per_adv_report),
+ HCI_MAX_EVENT_SIZE),
/* [0x12 = HCI_EV_LE_EXT_ADV_SET_TERM] */
HCI_LE_EV(HCI_EV_LE_EXT_ADV_SET_TERM, hci_le_ext_adv_term_evt,
sizeof(struct hci_evt_le_ext_adv_set_term)),
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index f7e006a36382..6e023b0104b0 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -629,27 +629,6 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
}
}
-/* Returns true if an le connection is in the scanning state */
-static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev)
-{
- struct hci_conn_hash *h = &hdev->conn_hash;
- struct hci_conn *c;
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type == LE_LINK && c->state == BT_CONNECT &&
- test_bit(HCI_CONN_SCANNING, &c->flags)) {
- rcu_read_unlock();
- return true;
- }
- }
-
- rcu_read_unlock();
-
- return false;
-}
-
static void set_random_addr(struct hci_request *req, bdaddr_t *rpa);
static int hci_update_random_address(struct hci_request *req,
bool require_privacy, bool use_rpa,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1d249d839819..5e4f718073b7 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -264,6 +264,53 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
kfree_skb(skb_copy);
}
+static void hci_sock_copy_creds(struct sock *sk, struct sk_buff *skb)
+{
+ struct scm_creds *creds;
+
+ if (!sk || WARN_ON(!skb))
+ return;
+
+ creds = &bt_cb(skb)->creds;
+
+ /* Check if peer credentials is set */
+ if (!sk->sk_peer_pid) {
+ /* Check if parent peer credentials is set */
+ if (bt_sk(sk)->parent && bt_sk(sk)->parent->sk_peer_pid)
+ sk = bt_sk(sk)->parent;
+ else
+ return;
+ }
+
+ /* Check if scm_creds already set */
+ if (creds->pid == pid_vnr(sk->sk_peer_pid))
+ return;
+
+ memset(creds, 0, sizeof(*creds));
+
+ creds->pid = pid_vnr(sk->sk_peer_pid);
+ if (sk->sk_peer_cred) {
+ creds->uid = sk->sk_peer_cred->uid;
+ creds->gid = sk->sk_peer_cred->gid;
+ }
+}
+
+static struct sk_buff *hci_skb_clone(struct sk_buff *skb)
+{
+ struct sk_buff *nskb;
+
+ if (!skb)
+ return NULL;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return NULL;
+
+ hci_sock_copy_creds(skb->sk, nskb);
+
+ return nskb;
+}
+
/* Send frame to sockets with specific channel */
static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
int flag, struct sock *skip_sk)
@@ -289,7 +336,7 @@ static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
if (hci_pi(sk)->channel != channel)
continue;
- nskb = skb_clone(skb, GFP_ATOMIC);
+ nskb = hci_skb_clone(skb);
if (!nskb)
continue;
@@ -356,6 +403,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
if (!skb_copy)
return;
+ hci_sock_copy_creds(skb->sk, skb_copy);
+
/* Put header before the data */
hdr = skb_push(skb_copy, HCI_MON_HDR_SIZE);
hdr->opcode = opcode;
@@ -531,10 +580,12 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk)
return NULL;
}
- skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC);
+ skb = bt_skb_alloc(14 + TASK_COMM_LEN, GFP_ATOMIC);
if (!skb)
return NULL;
+ hci_sock_copy_creds(sk, skb);
+
flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0;
put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
@@ -580,6 +631,8 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk)
if (!skb)
return NULL;
+ hci_sock_copy_creds(sk, skb);
+
put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
__net_timestamp(skb);
@@ -606,6 +659,8 @@ static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index,
if (!skb)
return NULL;
+ hci_sock_copy_creds(sk, skb);
+
put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
put_unaligned_le16(opcode, skb_put(skb, 2));
@@ -638,6 +693,8 @@ send_monitor_note(struct sock *sk, const char *fmt, ...)
if (!skb)
return;
+ hci_sock_copy_creds(sk, skb);
+
va_start(args, fmt);
vsprintf(skb_put(skb, len), fmt, args);
*(u8 *)skb_put(skb, 1) = 0;
@@ -1494,6 +1551,7 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
+ struct scm_cookie scm;
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
@@ -1538,11 +1596,16 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
break;
}
+ memset(&scm, 0, sizeof(scm));
+ scm.creds = bt_cb(skb)->creds;
+
skb_free_datagram(sk, skb);
if (flags & MSG_TRUNC)
copied = skblen;
+ scm_recv(sock, msg, &scm, flags);
+
return err ? : copied;
}
@@ -2143,18 +2206,12 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &hci_sock_ops;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, kern);
+ sk = bt_sock_alloc(net, sock, &hci_sk_proto, protocol, GFP_ATOMIC,
+ kern);
if (!sk)
return -ENOMEM;
- sock_init_data(sock, sk);
-
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = protocol;
-
sock->state = SS_UNCONNECTED;
- sk->sk_state = BT_OPEN;
sk->sk_destruct = hci_sock_destruct;
bt_sock_link(&hci_sk_list, sk);
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 8561616abbe5..9b93653c6197 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -3,6 +3,7 @@
* BlueZ - Bluetooth protocol stack for Linux
*
* Copyright (C) 2021 Intel Corporation
+ * Copyright 2023 NXP
*/
#include <linux/property.h>
@@ -1319,9 +1320,11 @@ int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance)
static int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
{
struct hci_cp_le_set_per_adv_enable cp;
+ struct adv_info *adv = NULL;
/* If periodic advertising already disabled there is nothing to do. */
- if (!hci_dev_test_flag(hdev, HCI_LE_PER_ADV))
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv || !adv->periodic || !adv->enabled)
return 0;
memset(&cp, 0, sizeof(cp));
@@ -1386,9 +1389,11 @@ static int hci_set_per_adv_data_sync(struct hci_dev *hdev, u8 instance)
static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
{
struct hci_cp_le_set_per_adv_enable cp;
+ struct adv_info *adv = NULL;
/* If periodic advertising already enabled there is nothing to do. */
- if (hci_dev_test_flag(hdev, HCI_LE_PER_ADV))
+ adv = hci_find_adv_instance(hdev, instance);
+ if (adv && adv->periodic && adv->enabled)
return 0;
memset(&cp, 0, sizeof(cp));
@@ -1458,22 +1463,19 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
sync_interval);
if (IS_ERR(adv))
return PTR_ERR(adv);
+ adv->pending = false;
added = true;
}
}
- /* Only start advertising if instance 0 or if a dedicated instance has
- * been added.
- */
- if (!adv || added) {
- err = hci_start_ext_adv_sync(hdev, instance);
- if (err < 0)
- goto fail;
+ /* Start advertising */
+ err = hci_start_ext_adv_sync(hdev, instance);
+ if (err < 0)
+ goto fail;
- err = hci_adv_bcast_annoucement(hdev, adv);
- if (err < 0)
- goto fail;
- }
+ err = hci_adv_bcast_annoucement(hdev, adv);
+ if (err < 0)
+ goto fail;
err = hci_set_per_adv_params_sync(hdev, instance, min_interval,
max_interval);
@@ -2160,15 +2162,23 @@ static int hci_le_del_accept_list_sync(struct hci_dev *hdev,
return 0;
}
+struct conn_params {
+ bdaddr_t addr;
+ u8 addr_type;
+ hci_conn_flags_t flags;
+ u8 privacy_mode;
+};
+
/* Adds connection to resolve list if needed.
* Setting params to NULL programs local hdev->irk
*/
static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
- struct hci_conn_params *params)
+ struct conn_params *params)
{
struct hci_cp_le_add_to_resolv_list cp;
struct smp_irk *irk;
struct bdaddr_list_with_irk *entry;
+ struct hci_conn_params *p;
if (!use_ll_privacy(hdev))
return 0;
@@ -2203,6 +2213,16 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
/* Default privacy mode is always Network */
params->privacy_mode = HCI_NETWORK_PRIVACY;
+ rcu_read_lock();
+ p = hci_pend_le_action_lookup(&hdev->pend_le_conns,
+ &params->addr, params->addr_type);
+ if (!p)
+ p = hci_pend_le_action_lookup(&hdev->pend_le_reports,
+ &params->addr, params->addr_type);
+ if (p)
+ WRITE_ONCE(p->privacy_mode, HCI_NETWORK_PRIVACY);
+ rcu_read_unlock();
+
done:
if (hci_dev_test_flag(hdev, HCI_PRIVACY))
memcpy(cp.local_irk, hdev->irk, 16);
@@ -2215,7 +2235,7 @@ done:
/* Set Device Privacy Mode. */
static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
- struct hci_conn_params *params)
+ struct conn_params *params)
{
struct hci_cp_le_set_privacy_mode cp;
struct smp_irk *irk;
@@ -2240,6 +2260,8 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
bacpy(&cp.bdaddr, &irk->bdaddr);
cp.mode = HCI_DEVICE_PRIVACY;
+ /* Note: params->privacy_mode is not updated since it is a copy */
+
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PRIVACY_MODE,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
@@ -2249,7 +2271,7 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
* properly set the privacy mode.
*/
static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
- struct hci_conn_params *params,
+ struct conn_params *params,
u8 *num_entries)
{
struct hci_cp_le_add_to_accept_list cp;
@@ -2447,6 +2469,52 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
return __hci_cmd_sync_sk(hdev, opcode, 0, NULL, 0, HCI_CMD_TIMEOUT, sk);
}
+static struct conn_params *conn_params_copy(struct list_head *list, size_t *n)
+{
+ struct hci_conn_params *params;
+ struct conn_params *p;
+ size_t i;
+
+ rcu_read_lock();
+
+ i = 0;
+ list_for_each_entry_rcu(params, list, action)
+ ++i;
+ *n = i;
+
+ rcu_read_unlock();
+
+ p = kvcalloc(*n, sizeof(struct conn_params), GFP_KERNEL);
+ if (!p)
+ return NULL;
+
+ rcu_read_lock();
+
+ i = 0;
+ list_for_each_entry_rcu(params, list, action) {
+ /* Racing adds are handled in next scan update */
+ if (i >= *n)
+ break;
+
+ /* No hdev->lock, but: addr, addr_type are immutable.
+ * privacy_mode is only written by us or in
+ * hci_cc_le_set_privacy_mode that we wait for.
+ * We should be idempotent so MGMT updating flags
+ * while we are processing is OK.
+ */
+ bacpy(&p[i].addr, &params->addr);
+ p[i].addr_type = params->addr_type;
+ p[i].flags = READ_ONCE(params->flags);
+ p[i].privacy_mode = READ_ONCE(params->privacy_mode);
+ ++i;
+ }
+
+ rcu_read_unlock();
+
+ *n = i;
+ return p;
+}
+
/* Device must not be scanning when updating the accept list.
*
* Update is done using the following sequence:
@@ -2466,11 +2534,12 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
*/
static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
{
- struct hci_conn_params *params;
+ struct conn_params *params;
struct bdaddr_list *b, *t;
u8 num_entries = 0;
bool pend_conn, pend_report;
u8 filter_policy;
+ size_t i, n;
int err;
/* Pause advertising if resolving list can be used as controllers
@@ -2504,6 +2573,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
if (hci_conn_hash_lookup_le(hdev, &b->bdaddr, b->bdaddr_type))
continue;
+ /* Pointers not dereferenced, no locks needed */
pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
&b->bdaddr,
b->bdaddr_type);
@@ -2532,23 +2602,50 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
* available accept list entries in the controller, then
* just abort and return filer policy value to not use the
* accept list.
+ *
+ * The list and params may be mutated while we wait for events,
+ * so make a copy and iterate it.
*/
- list_for_each_entry(params, &hdev->pend_le_conns, action) {
- err = hci_le_add_accept_list_sync(hdev, params, &num_entries);
- if (err)
+
+ params = conn_params_copy(&hdev->pend_le_conns, &n);
+ if (!params) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ for (i = 0; i < n; ++i) {
+ err = hci_le_add_accept_list_sync(hdev, &params[i],
+ &num_entries);
+ if (err) {
+ kvfree(params);
goto done;
+ }
}
+ kvfree(params);
+
/* After adding all new pending connections, walk through
* the list of pending reports and also add these to the
* accept list if there is still space. Abort if space runs out.
*/
- list_for_each_entry(params, &hdev->pend_le_reports, action) {
- err = hci_le_add_accept_list_sync(hdev, params, &num_entries);
- if (err)
+
+ params = conn_params_copy(&hdev->pend_le_reports, &n);
+ if (!params) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ for (i = 0; i < n; ++i) {
+ err = hci_le_add_accept_list_sync(hdev, &params[i],
+ &num_entries);
+ if (err) {
+ kvfree(params);
goto done;
+ }
}
+ kvfree(params);
+
/* Use the allowlist unless the following conditions are all true:
* - We are not currently suspending
* - There are 1 or more ADV monitors registered and it's not offloaded
@@ -2575,27 +2672,6 @@ done:
return filter_policy;
}
-/* Returns true if an le connection is in the scanning state */
-static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev)
-{
- struct hci_conn_hash *h = &hdev->conn_hash;
- struct hci_conn *c;
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type == LE_LINK && c->state == BT_CONNECT &&
- test_bit(HCI_CONN_SCANNING, &c->flags)) {
- rcu_read_unlock();
- return true;
- }
- }
-
- rcu_read_unlock();
-
- return false;
-}
-
static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
u16 interval, u16 window,
u8 own_addr_type, u8 filter_policy)
@@ -4038,10 +4114,13 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev)
}
if (bis_capable(hdev)) {
+ events[1] |= 0x20; /* LE PA Report */
+ events[1] |= 0x40; /* LE PA Sync Established */
events[3] |= 0x04; /* LE Create BIG Complete */
events[3] |= 0x08; /* LE Terminate BIG Complete */
events[3] |= 0x10; /* LE BIG Sync Established */
events[3] |= 0x20; /* LE BIG Sync Loss */
+ events[4] |= 0x02; /* LE BIG Info Advertising Report */
}
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EVENT_MASK,
@@ -4589,7 +4668,10 @@ static const struct {
"advertised, but not supported."),
HCI_QUIRK_BROKEN(SET_RPA_TIMEOUT,
"HCI LE Set Random Private Address Timeout command is "
- "advertised, but not supported.")
+ "advertised, but not supported."),
+ HCI_QUIRK_BROKEN(LE_CODED,
+ "HCI LE Coded PHY feature bit is set, "
+ "but its usage is not supported.")
};
/* This function handles hdev setup stage:
@@ -4837,12 +4919,12 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
struct hci_conn_params *p;
list_for_each_entry(p, &hdev->le_conn_params, list) {
+ hci_pend_le_list_del_init(p);
if (p->conn) {
hci_conn_drop(p->conn);
hci_conn_put(p->conn);
p->conn = NULL;
}
- list_del_init(&p->action);
}
BT_DBG("All LE pending actions cleared");
@@ -5174,26 +5256,64 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
}
static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
- struct hci_conn *conn)
+ struct hci_conn *conn, u8 reason)
{
+ /* Return reason if scanning since the connection shall probably be
+ * cleanup directly.
+ */
if (test_bit(HCI_CONN_SCANNING, &conn->flags))
- return 0;
+ return reason;
- if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
+ if (conn->role == HCI_ROLE_SLAVE ||
+ test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL,
0, NULL, HCI_CMD_TIMEOUT);
}
-static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn)
+static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn,
+ u8 reason)
{
if (conn->type == LE_LINK)
- return hci_le_connect_cancel_sync(hdev, conn);
+ return hci_le_connect_cancel_sync(hdev, conn, reason);
+
+ if (conn->type == ISO_LINK) {
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 1857:
+ *
+ * If this command is issued for a CIS on the Central and the
+ * CIS is successfully terminated before being established,
+ * then an HCI_LE_CIS_Established event shall also be sent for
+ * this CIS with the Status Operation Cancelled by Host (0x44).
+ */
+ if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags))
+ return hci_disconnect_sync(hdev, conn, reason);
+
+ /* CIS with no Create CIS sent have nothing to cancel */
+ if (bacmp(&conn->dst, BDADDR_ANY))
+ return HCI_ERROR_LOCAL_HOST_TERM;
+
+ /* There is no way to cancel a BIS without terminating the BIG
+ * which is done later on connection cleanup.
+ */
+ return 0;
+ }
if (hdev->hci_ver < BLUETOOTH_VER_1_2)
return 0;
+ /* Wait for HCI_EV_CONN_COMPLETE, not HCI_EV_CMD_STATUS, when the
+ * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is
+ * used when suspending or powering off, where we don't want to wait
+ * for the peer's response.
+ */
+ if (reason != HCI_ERROR_REMOTE_POWER_OFF)
+ return __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN_CANCEL,
+ 6, &conn->dst,
+ HCI_EV_CONN_COMPLETE,
+ HCI_CMD_TIMEOUT, NULL);
+
return __hci_cmd_sync_status(hdev, HCI_OP_CREATE_CONN_CANCEL,
6, &conn->dst, HCI_CMD_TIMEOUT);
}
@@ -5217,11 +5337,27 @@ static int hci_reject_sco_sync(struct hci_dev *hdev, struct hci_conn *conn,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
+static int hci_le_reject_cis_sync(struct hci_dev *hdev, struct hci_conn *conn,
+ u8 reason)
+{
+ struct hci_cp_le_reject_cis cp;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.handle = cpu_to_le16(conn->handle);
+ cp.reason = reason;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_REJECT_CIS,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
u8 reason)
{
struct hci_cp_reject_conn_req cp;
+ if (conn->type == ISO_LINK)
+ return hci_le_reject_cis_sync(hdev, conn, reason);
+
if (conn->type == SCO_LINK || conn->type == ESCO_LINK)
return hci_reject_sco_sync(hdev, conn, reason);
@@ -5235,43 +5371,94 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
{
- int err;
+ int err = 0;
+ u16 handle = conn->handle;
+ struct hci_conn *c;
switch (conn->state) {
case BT_CONNECTED:
case BT_CONFIG:
- return hci_disconnect_sync(hdev, conn, reason);
+ err = hci_disconnect_sync(hdev, conn, reason);
+ break;
case BT_CONNECT:
- err = hci_connect_cancel_sync(hdev, conn);
- /* Cleanup hci_conn object if it cannot be cancelled as it
- * likelly means the controller and host stack are out of sync.
- */
- if (err) {
- hci_dev_lock(hdev);
- hci_conn_failed(conn, err);
- hci_dev_unlock(hdev);
- }
- return err;
+ err = hci_connect_cancel_sync(hdev, conn, reason);
+ break;
case BT_CONNECT2:
- return hci_reject_conn_sync(hdev, conn, reason);
+ err = hci_reject_conn_sync(hdev, conn, reason);
+ break;
+ case BT_OPEN:
+ hci_dev_lock(hdev);
+
+ /* Cleanup bis or pa sync connections */
+ if (test_and_clear_bit(HCI_CONN_BIG_SYNC_FAILED, &conn->flags) ||
+ test_and_clear_bit(HCI_CONN_PA_SYNC_FAILED, &conn->flags)) {
+ hci_conn_failed(conn, reason);
+ } else if (test_bit(HCI_CONN_PA_SYNC, &conn->flags) ||
+ test_bit(HCI_CONN_BIG_SYNC, &conn->flags)) {
+ conn->state = BT_CLOSED;
+ hci_disconn_cfm(conn, reason);
+ hci_conn_del(conn);
+ }
+
+ hci_dev_unlock(hdev);
+ return 0;
+ case BT_BOUND:
+ hci_dev_lock(hdev);
+ hci_conn_failed(conn, reason);
+ hci_dev_unlock(hdev);
+ return 0;
default:
+ hci_dev_lock(hdev);
conn->state = BT_CLOSED;
- break;
+ hci_disconn_cfm(conn, reason);
+ hci_conn_del(conn);
+ hci_dev_unlock(hdev);
+ return 0;
}
- return 0;
+ hci_dev_lock(hdev);
+
+ /* Check if the connection hasn't been cleanup while waiting
+ * commands to complete.
+ */
+ c = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!c || c != conn) {
+ err = 0;
+ goto unlock;
+ }
+
+ /* Cleanup hci_conn object if it cannot be cancelled as it
+ * likelly means the controller and host stack are out of sync
+ * or in case of LE it was still scanning so it can be cleanup
+ * safely.
+ */
+ hci_conn_failed(conn, reason);
+
+unlock:
+ hci_dev_unlock(hdev);
+ return err;
}
static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason)
{
- struct hci_conn *conn, *tmp;
- int err;
+ struct list_head *head = &hdev->conn_hash.list;
+ struct hci_conn *conn;
- list_for_each_entry_safe(conn, tmp, &hdev->conn_hash.list, list) {
- err = hci_abort_conn_sync(hdev, conn, reason);
- if (err)
- return err;
+ rcu_read_lock();
+ while ((conn = list_first_or_null_rcu(head, struct hci_conn, list))) {
+ /* Make sure the connection is not freed while unlocking */
+ conn = hci_conn_get(conn);
+ rcu_read_unlock();
+ /* Disregard possible errors since hci_conn_del shall have been
+ * called even in case of errors had occurred since it would
+ * then cause hci_conn_failed to be called which calls
+ * hci_conn_del internally.
+ */
+ hci_abort_conn_sync(hdev, conn, reason);
+ hci_conn_put(conn);
+ rcu_read_lock();
}
+ rcu_read_unlock();
return 0;
}
@@ -6158,63 +6345,99 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
done:
if (err == -ETIMEDOUT)
- hci_le_connect_cancel_sync(hdev, conn);
+ hci_le_connect_cancel_sync(hdev, conn, 0x00);
/* Re-enable advertising after the connection attempt is finished. */
hci_resume_advertising_sync(hdev);
return err;
}
-int hci_le_create_cis_sync(struct hci_dev *hdev, struct hci_conn *conn)
+int hci_le_create_cis_sync(struct hci_dev *hdev)
{
struct {
struct hci_cp_le_create_cis cp;
struct hci_cis cis[0x1f];
} cmd;
- u8 cig;
- struct hci_conn *hcon = conn;
+ struct hci_conn *conn;
+ u8 cig = BT_ISO_QOS_CIG_UNSET;
+
+ /* The spec allows only one pending LE Create CIS command at a time. If
+ * the command is pending now, don't do anything. We check for pending
+ * connections after each CIS Established event.
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2566:
+ *
+ * If the Host issues this command before all the
+ * HCI_LE_CIS_Established events from the previous use of the
+ * command have been generated, the Controller shall return the
+ * error code Command Disallowed (0x0C).
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2567:
+ *
+ * When the Controller receives the HCI_LE_Create_CIS command, the
+ * Controller sends the HCI_Command_Status event to the Host. An
+ * HCI_LE_CIS_Established event will be generated for each CIS when it
+ * is established or if it is disconnected or considered lost before
+ * being established; until all the events are generated, the command
+ * remains pending.
+ */
memset(&cmd, 0, sizeof(cmd));
- cmd.cis[0].acl_handle = cpu_to_le16(conn->parent->handle);
- cmd.cis[0].cis_handle = cpu_to_le16(conn->handle);
- cmd.cp.num_cis++;
- cig = conn->iso_qos.ucast.cig;
hci_dev_lock(hdev);
rcu_read_lock();
+ /* Wait until previous Create CIS has completed */
list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis];
+ if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags))
+ goto done;
+ }
- if (conn == hcon || conn->type != ISO_LINK ||
- conn->state == BT_CONNECTED ||
- conn->iso_qos.ucast.cig != cig)
+ /* Find CIG with all CIS ready */
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ struct hci_conn *link;
+
+ if (hci_conn_check_create_cis(conn))
continue;
- /* Check if all CIS(s) belonging to a CIG are ready */
- if (!conn->parent || conn->parent->state != BT_CONNECTED ||
- conn->state != BT_CONNECT) {
- cmd.cp.num_cis = 0;
- break;
+ cig = conn->iso_qos.ucast.cig;
+
+ list_for_each_entry_rcu(link, &hdev->conn_hash.list, list) {
+ if (hci_conn_check_create_cis(link) > 0 &&
+ link->iso_qos.ucast.cig == cig &&
+ link->state != BT_CONNECTED) {
+ cig = BT_ISO_QOS_CIG_UNSET;
+ break;
+ }
}
- /* Group all CIS with state BT_CONNECT since the spec don't
- * allow to send them individually:
- *
- * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
- * page 2566:
- *
- * If the Host issues this command before all the
- * HCI_LE_CIS_Established events from the previous use of the
- * command have been generated, the Controller shall return the
- * error code Command Disallowed (0x0C).
- */
+ if (cig != BT_ISO_QOS_CIG_UNSET)
+ break;
+ }
+
+ if (cig == BT_ISO_QOS_CIG_UNSET)
+ goto done;
+
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis];
+
+ if (hci_conn_check_create_cis(conn) ||
+ conn->iso_qos.ucast.cig != cig)
+ continue;
+
+ set_bit(HCI_CONN_CREATE_CIS, &conn->flags);
cis->acl_handle = cpu_to_le16(conn->parent->handle);
cis->cis_handle = cpu_to_le16(conn->handle);
cmd.cp.num_cis++;
+
+ if (cmd.cp.num_cis >= ARRAY_SIZE(cmd.cis))
+ break;
}
+done:
rcu_read_unlock();
hci_dev_unlock(hdev);
@@ -6338,7 +6561,7 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
static int _update_adv_data_sync(struct hci_dev *hdev, void *data)
{
- u8 instance = PTR_ERR(data);
+ u8 instance = PTR_UINT(data);
return hci_update_adv_data_sync(hdev, instance);
}
@@ -6346,5 +6569,5 @@ static int _update_adv_data_sync(struct hci_dev *hdev, void *data)
int hci_update_adv_data(struct hci_dev *hdev, u8 instance)
{
return hci_cmd_sync_queue(hdev, _update_adv_data_sync,
- ERR_PTR(instance), NULL);
+ UINT_PTR(instance), NULL);
}
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 369ed92dac99..c93aaeb3a3fa 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -256,21 +256,13 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, kern);
+ sk = bt_sock_alloc(net, sock, &hidp_proto, protocol, GFP_ATOMIC, kern);
if (!sk)
return -ENOMEM;
- sock_init_data(sock, sk);
-
sock->ops = &hidp_sock_ops;
-
sock->state = SS_UNCONNECTED;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = protocol;
- sk->sk_state = BT_OPEN;
-
bt_sock_link(&hidp_sk_list, sk);
return 0;
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 0e6cc57b3911..16da946f5881 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -48,6 +48,12 @@ static void iso_sock_kill(struct sock *sk);
#define EIR_SERVICE_DATA_LENGTH 4
#define BASE_MAX_LENGTH (HCI_MAX_PER_AD_LENGTH - EIR_SERVICE_DATA_LENGTH)
+/* iso_pinfo flags values */
+enum {
+ BT_SK_BIG_SYNC,
+ BT_SK_PA_SYNC,
+};
+
struct iso_pinfo {
struct bt_sock bt;
bdaddr_t src;
@@ -58,7 +64,7 @@ struct iso_pinfo {
__u8 bc_num_bis;
__u8 bc_bis[ISO_MAX_NUM_BIS];
__u16 sync_handle;
- __u32 flags;
+ unsigned long flags;
struct bt_iso_qos qos;
bool qos_user_set;
__u8 base_len;
@@ -70,6 +76,8 @@ static struct bt_iso_qos default_qos;
static bool check_ucast_qos(struct bt_iso_qos *qos);
static bool check_bcast_qos(struct bt_iso_qos *qos);
+static bool iso_match_sid(struct sock *sk, void *data);
+static void iso_sock_disconn(struct sock *sk);
/* ---- ISO timers ---- */
#define ISO_CONN_TIMEOUT (HZ * 40)
@@ -123,8 +131,11 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon)
{
struct iso_conn *conn = hcon->iso_data;
- if (conn)
+ if (conn) {
+ if (!conn->hcon)
+ conn->hcon = hcon;
return conn;
+ }
conn = kzalloc(sizeof(*conn), GFP_KERNEL);
if (!conn)
@@ -284,13 +295,24 @@ static int iso_connect_bis(struct sock *sk)
goto unlock;
}
- hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst,
- le_addr_type(iso_pi(sk)->dst_type),
- &iso_pi(sk)->qos, iso_pi(sk)->base_len,
- iso_pi(sk)->base);
- if (IS_ERR(hcon)) {
- err = PTR_ERR(hcon);
- goto unlock;
+ /* Just bind if DEFER_SETUP has been set */
+ if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst,
+ &iso_pi(sk)->qos, iso_pi(sk)->base_len,
+ iso_pi(sk)->base);
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto unlock;
+ }
+ } else {
+ hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst,
+ le_addr_type(iso_pi(sk)->dst_type),
+ &iso_pi(sk)->qos, iso_pi(sk)->base_len,
+ iso_pi(sk)->base);
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto unlock;
+ }
}
conn = iso_conn_add(hcon);
@@ -300,14 +322,13 @@ static int iso_connect_bis(struct sock *sk)
goto unlock;
}
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
+ lock_sock(sk);
err = iso_chan_add(conn, sk, NULL);
- if (err)
- return err;
-
- lock_sock(sk);
+ if (err) {
+ release_sock(sk);
+ goto unlock;
+ }
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
@@ -315,13 +336,15 @@ static int iso_connect_bis(struct sock *sk)
if (hcon->state == BT_CONNECTED) {
iso_sock_clear_timer(sk);
sk->sk_state = BT_CONNECTED;
+ } else if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ iso_sock_clear_timer(sk);
+ sk->sk_state = BT_CONNECT;
} else {
sk->sk_state = BT_CONNECT;
iso_sock_set_timer(sk, sk->sk_sndtimeo);
}
release_sock(sk);
- return err;
unlock:
hci_dev_unlock(hdev);
@@ -389,14 +412,13 @@ static int iso_connect_cis(struct sock *sk)
goto unlock;
}
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
+ lock_sock(sk);
err = iso_chan_add(conn, sk, NULL);
- if (err)
- return err;
-
- lock_sock(sk);
+ if (err) {
+ release_sock(sk);
+ goto unlock;
+ }
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
@@ -413,7 +435,6 @@ static int iso_connect_cis(struct sock *sk)
}
release_sock(sk);
- return err;
unlock:
hci_dev_unlock(hdev);
@@ -580,6 +601,15 @@ static void iso_sock_cleanup_listen(struct sock *parent)
iso_sock_kill(sk);
}
+ /* If listening socket stands for a PA sync connection,
+ * properly disconnect the hcon and socket.
+ */
+ if (iso_pi(parent)->conn && iso_pi(parent)->conn->hcon &&
+ test_bit(HCI_CONN_PA_SYNC, &iso_pi(parent)->conn->hcon->flags)) {
+ iso_sock_disconn(parent);
+ return;
+ }
+
parent->sk_state = BT_CLOSED;
sock_set_flag(parent, SOCK_ZAPPED);
}
@@ -601,16 +631,14 @@ static void iso_sock_kill(struct sock *sk)
sock_put(sk);
}
-static void iso_conn_defer_reject(struct hci_conn *conn)
+static void iso_sock_disconn(struct sock *sk)
{
- struct hci_cp_le_reject_cis cp;
-
- BT_DBG("conn %p", conn);
-
- memset(&cp, 0, sizeof(cp));
- cp.handle = cpu_to_le16(conn->handle);
- cp.reason = HCI_ERROR_REJ_BAD_ADDR;
- hci_send_cmd(conn->hdev, HCI_OP_LE_REJECT_CIS, sizeof(cp), &cp);
+ sk->sk_state = BT_DISCONN;
+ iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT);
+ iso_conn_lock(iso_pi(sk)->conn);
+ hci_conn_drop(iso_pi(sk)->conn->hcon);
+ iso_pi(sk)->conn->hcon = NULL;
+ iso_conn_unlock(iso_pi(sk)->conn);
}
static void __iso_sock_close(struct sock *sk)
@@ -622,37 +650,22 @@ static void __iso_sock_close(struct sock *sk)
iso_sock_cleanup_listen(sk);
break;
+ case BT_CONNECT:
case BT_CONNECTED:
case BT_CONFIG:
- if (iso_pi(sk)->conn->hcon) {
- sk->sk_state = BT_DISCONN;
- iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT);
- iso_conn_lock(iso_pi(sk)->conn);
- hci_conn_drop(iso_pi(sk)->conn->hcon);
- iso_pi(sk)->conn->hcon = NULL;
- iso_conn_unlock(iso_pi(sk)->conn);
- } else {
+ if (iso_pi(sk)->conn->hcon)
+ iso_sock_disconn(sk);
+ else
iso_chan_del(sk, ECONNRESET);
- }
break;
case BT_CONNECT2:
- if (iso_pi(sk)->conn->hcon)
- iso_conn_defer_reject(iso_pi(sk)->conn->hcon);
- iso_chan_del(sk, ECONNRESET);
- break;
- case BT_CONNECT:
- /* In case of DEFER_SETUP the hcon would be bound to CIG which
- * needs to be removed so just call hci_conn_del so the cleanup
- * callback do what is needed.
- */
- if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) &&
- iso_pi(sk)->conn->hcon) {
- hci_conn_del(iso_pi(sk)->conn->hcon);
- iso_pi(sk)->conn->hcon = NULL;
- }
-
- iso_chan_del(sk, ECONNRESET);
+ if (iso_pi(sk)->conn->hcon &&
+ (test_bit(HCI_CONN_PA_SYNC, &iso_pi(sk)->conn->hcon->flags) ||
+ test_bit(HCI_CONN_PA_SYNC_FAILED, &iso_pi(sk)->conn->hcon->flags)))
+ iso_sock_disconn(sk);
+ else
+ iso_chan_del(sk, ECONNRESET);
break;
case BT_DISCONN:
iso_chan_del(sk, ECONNRESET);
@@ -725,21 +738,13 @@ static struct sock *iso_sock_alloc(struct net *net, struct socket *sock,
{
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &iso_proto, kern);
+ sk = bt_sock_alloc(net, sock, &iso_proto, proto, prio, kern);
if (!sk)
return NULL;
- sock_init_data(sock, sk);
- INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
sk->sk_destruct = iso_sock_destruct;
sk->sk_sndtimeo = ISO_CONN_TIMEOUT;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = proto;
- sk->sk_state = BT_OPEN;
-
/* Set address type as public as default src address is BDADDR_ANY */
iso_pi(sk)->src_type = BDADDR_LE_PUBLIC;
@@ -1072,8 +1077,8 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
struct sock *sk = sock->sk;
- struct iso_conn *conn = iso_pi(sk)->conn;
struct sk_buff *skb, **frag;
+ size_t mtu;
int err;
BT_DBG("sock %p, sk %p", sock, sk);
@@ -1085,11 +1090,18 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
- if (sk->sk_state != BT_CONNECTED)
+ lock_sock(sk);
+
+ if (sk->sk_state != BT_CONNECTED) {
+ release_sock(sk);
return -ENOTCONN;
+ }
- skb = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
- HCI_ISO_DATA_HDR_SIZE, 0);
+ mtu = iso_pi(sk)->conn->hcon->hdev->iso_mtu;
+
+ release_sock(sk);
+
+ skb = bt_skb_sendmsg(sk, msg, len, mtu, HCI_ISO_DATA_HDR_SIZE, 0);
if (IS_ERR(skb))
return PTR_ERR(skb);
@@ -1102,8 +1114,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
while (len) {
struct sk_buff *tmp;
- tmp = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
- 0, 0);
+ tmp = bt_skb_sendmsg(sk, msg, len, mtu, 0, 0);
if (IS_ERR(tmp)) {
kfree_skb(skb);
return PTR_ERR(tmp);
@@ -1149,6 +1160,29 @@ static void iso_conn_defer_accept(struct hci_conn *conn)
hci_send_cmd(hdev, HCI_OP_LE_ACCEPT_CIS, sizeof(cp), &cp);
}
+static void iso_conn_big_sync(struct sock *sk)
+{
+ int err;
+ struct hci_dev *hdev;
+
+ hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
+ iso_pi(sk)->src_type);
+
+ if (!hdev)
+ return;
+
+ if (!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) {
+ err = hci_le_big_create_sync(hdev, iso_pi(sk)->conn->hcon,
+ &iso_pi(sk)->qos,
+ iso_pi(sk)->sync_handle,
+ iso_pi(sk)->bc_num_bis,
+ iso_pi(sk)->bc_bis);
+ if (err)
+ bt_dev_err(hdev, "hci_le_big_create_sync: %d",
+ err);
+ }
+}
+
static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
@@ -1158,15 +1192,26 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
BT_DBG("sk %p", sk);
if (test_and_clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ lock_sock(sk);
switch (sk->sk_state) {
case BT_CONNECT2:
- lock_sock(sk);
- iso_conn_defer_accept(pi->conn->hcon);
- sk->sk_state = BT_CONFIG;
+ if (pi->conn->hcon &&
+ test_bit(HCI_CONN_PA_SYNC, &pi->conn->hcon->flags)) {
+ iso_conn_big_sync(sk);
+ sk->sk_state = BT_LISTEN;
+ set_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags);
+ } else {
+ iso_conn_defer_accept(pi->conn->hcon);
+ sk->sk_state = BT_CONFIG;
+ }
release_sock(sk);
return 0;
case BT_CONNECT:
+ release_sock(sk);
return iso_connect_cis(sk);
+ default:
+ release_sock(sk);
+ break;
}
}
@@ -1193,6 +1238,12 @@ static bool check_io_qos(struct bt_iso_io_qos *qos)
static bool check_ucast_qos(struct bt_iso_qos *qos)
{
+ if (qos->ucast.cig > 0xef && qos->ucast.cig != BT_ISO_QOS_CIG_UNSET)
+ return false;
+
+ if (qos->ucast.cis > 0xef && qos->ucast.cis != BT_ISO_QOS_CIS_UNSET)
+ return false;
+
if (qos->ucast.sca > 0x07)
return false;
@@ -1282,6 +1333,18 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
break;
+ case BT_PKT_STATUS:
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+ err = -EFAULT;
+ break;
+ }
+
+ if (opt)
+ set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
+ else
+ clear_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
+ break;
+
case BT_ISO_QOS:
if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
sk->sk_state != BT_CONNECT2) {
@@ -1367,6 +1430,12 @@ static int iso_sock_getsockopt(struct socket *sock, int level, int optname,
break;
+ case BT_PKT_STATUS:
+ if (put_user(test_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags),
+ (int __user *)optval))
+ err = -EFAULT;
+ break;
+
case BT_ISO_QOS:
qos = iso_sock_get_qos(sk);
@@ -1377,7 +1446,8 @@ static int iso_sock_getsockopt(struct socket *sock, int level, int optname,
break;
case BT_ISO_BASE:
- if (sk->sk_state == BT_CONNECTED) {
+ if (sk->sk_state == BT_CONNECTED &&
+ !bacmp(&iso_pi(sk)->dst, BDADDR_ANY)) {
base_len = iso_pi(sk)->conn->hcon->le_per_adv_data_len;
base = iso_pi(sk)->conn->hcon->le_per_adv_data;
} else {
@@ -1457,7 +1527,7 @@ static int iso_sock_release(struct socket *sock)
iso_sock_close(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ if (sock_flag(sk, SOCK_LINGER) && READ_ONCE(sk->sk_lingertime) &&
!(current->flags & PF_EXITING)) {
lock_sock(sk);
err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
@@ -1495,11 +1565,17 @@ static bool iso_match_big(struct sock *sk, void *data)
return ev->handle == iso_pi(sk)->qos.bcast.big;
}
+static bool iso_match_pa_sync_flag(struct sock *sk, void *data)
+{
+ return test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags);
+}
+
static void iso_conn_ready(struct iso_conn *conn)
{
- struct sock *parent;
+ struct sock *parent = NULL;
struct sock *sk = conn->sk;
- struct hci_ev_le_big_sync_estabilished *ev;
+ struct hci_ev_le_big_sync_estabilished *ev = NULL;
+ struct hci_ev_le_pa_sync_established *ev2 = NULL;
struct hci_conn *hcon;
BT_DBG("conn %p", conn);
@@ -1511,15 +1587,32 @@ static void iso_conn_ready(struct iso_conn *conn)
if (!hcon)
return;
- ev = hci_recv_event_data(hcon->hdev,
- HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
- if (ev)
+ if (test_bit(HCI_CONN_BIG_SYNC, &hcon->flags) ||
+ test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) {
+ ev = hci_recv_event_data(hcon->hdev,
+ HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
+
+ /* Get reference to PA sync parent socket, if it exists */
parent = iso_get_sock_listen(&hcon->src,
&hcon->dst,
- iso_match_big, ev);
- else
+ iso_match_pa_sync_flag, NULL);
+ if (!parent && ev)
+ parent = iso_get_sock_listen(&hcon->src,
+ &hcon->dst,
+ iso_match_big, ev);
+ } else if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags) ||
+ test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) {
+ ev2 = hci_recv_event_data(hcon->hdev,
+ HCI_EV_LE_PA_SYNC_ESTABLISHED);
+ if (ev2)
+ parent = iso_get_sock_listen(&hcon->src,
+ &hcon->dst,
+ iso_match_sid, ev2);
+ }
+
+ if (!parent)
parent = iso_get_sock_listen(&hcon->src,
- BDADDR_ANY, NULL, NULL);
+ BDADDR_ANY, NULL, NULL);
if (!parent)
return;
@@ -1536,11 +1629,17 @@ static void iso_conn_ready(struct iso_conn *conn)
iso_sock_init(sk, parent);
bacpy(&iso_pi(sk)->src, &hcon->src);
- iso_pi(sk)->src_type = hcon->src_type;
+
+ /* Convert from HCI to three-value type */
+ if (hcon->src_type == ADDR_LE_DEV_PUBLIC)
+ iso_pi(sk)->src_type = BDADDR_LE_PUBLIC;
+ else
+ iso_pi(sk)->src_type = BDADDR_LE_RANDOM;
/* If hcon has no destination address (BDADDR_ANY) it means it
- * was created by HCI_EV_LE_BIG_SYNC_ESTABILISHED so we need to
- * initialize using the parent socket destination address.
+ * was created by HCI_EV_LE_BIG_SYNC_ESTABILISHED or
+ * HCI_EV_LE_PA_SYNC_ESTABLISHED so we need to initialize using
+ * the parent socket destination address.
*/
if (!bacmp(&hcon->dst, BDADDR_ANY)) {
bacpy(&hcon->dst, &iso_pi(parent)->dst);
@@ -1548,12 +1647,29 @@ static void iso_conn_ready(struct iso_conn *conn)
hcon->sync_handle = iso_pi(parent)->sync_handle;
}
+ if (ev2 && !ev2->status) {
+ iso_pi(sk)->sync_handle = iso_pi(parent)->sync_handle;
+ iso_pi(sk)->qos = iso_pi(parent)->qos;
+ iso_pi(sk)->bc_num_bis = iso_pi(parent)->bc_num_bis;
+ memcpy(iso_pi(sk)->bc_bis, iso_pi(parent)->bc_bis, ISO_MAX_NUM_BIS);
+ }
+
bacpy(&iso_pi(sk)->dst, &hcon->dst);
iso_pi(sk)->dst_type = hcon->dst_type;
+ iso_pi(sk)->sync_handle = iso_pi(parent)->sync_handle;
+ memcpy(iso_pi(sk)->base, iso_pi(parent)->base, iso_pi(parent)->base_len);
+ iso_pi(sk)->base_len = iso_pi(parent)->base_len;
hci_conn_hold(hcon);
iso_chan_add(conn, sk, parent);
+ if ((ev && ((struct hci_evt_le_big_sync_estabilished *)ev)->status) ||
+ (ev2 && ev2->status)) {
+ /* Trigger error signal on child socket */
+ sk->sk_err = ECONNREFUSED;
+ sk->sk_error_report(sk);
+ }
+
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
sk->sk_state = BT_CONNECT2;
else
@@ -1580,12 +1696,20 @@ static bool iso_match_sync_handle(struct sock *sk, void *data)
return le16_to_cpu(ev->sync_handle) == iso_pi(sk)->sync_handle;
}
+static bool iso_match_sync_handle_pa_report(struct sock *sk, void *data)
+{
+ struct hci_ev_le_per_adv_report *ev = data;
+
+ return le16_to_cpu(ev->sync_handle) == iso_pi(sk)->sync_handle;
+}
+
/* ----- ISO interface with lower layer (HCI) ----- */
int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
{
struct hci_ev_le_pa_sync_established *ev1;
struct hci_evt_le_big_info_adv_report *ev2;
+ struct hci_ev_le_per_adv_report *ev3;
struct sock *sk;
int lm = 0;
@@ -1601,12 +1725,15 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
* 2. HCI_EVT_LE_BIG_INFO_ADV_REPORT: When connect_ind is triggered by a
* a BIG Info it attempts to check if there any listening socket with
* the same sync_handle and if it does then attempt to create a sync.
+ * 3. HCI_EV_LE_PER_ADV_REPORT: When a PA report is received, it is stored
+ * in iso_pi(sk)->base so it can be passed up to user, in the case of a
+ * broadcast sink.
*/
ev1 = hci_recv_event_data(hdev, HCI_EV_LE_PA_SYNC_ESTABLISHED);
if (ev1) {
sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sid,
ev1);
- if (sk)
+ if (sk && !ev1->status)
iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle);
goto done;
@@ -1614,25 +1741,43 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
ev2 = hci_recv_event_data(hdev, HCI_EVT_LE_BIG_INFO_ADV_REPORT);
if (ev2) {
+ /* Try to get PA sync listening socket, if it exists */
sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
- iso_match_sync_handle, ev2);
+ iso_match_pa_sync_flag, NULL);
+ if (!sk)
+ sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
+ iso_match_sync_handle, ev2);
if (sk) {
int err;
if (ev2->num_bis < iso_pi(sk)->bc_num_bis)
iso_pi(sk)->bc_num_bis = ev2->num_bis;
- err = hci_le_big_create_sync(hdev,
- &iso_pi(sk)->qos,
- iso_pi(sk)->sync_handle,
- iso_pi(sk)->bc_num_bis,
- iso_pi(sk)->bc_bis);
- if (err) {
- bt_dev_err(hdev, "hci_le_big_create_sync: %d",
- err);
- sk = NULL;
+ if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) &&
+ !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) {
+ err = hci_le_big_create_sync(hdev, NULL,
+ &iso_pi(sk)->qos,
+ iso_pi(sk)->sync_handle,
+ iso_pi(sk)->bc_num_bis,
+ iso_pi(sk)->bc_bis);
+ if (err) {
+ bt_dev_err(hdev, "hci_le_big_create_sync: %d",
+ err);
+ sk = NULL;
+ }
}
}
+ }
+
+ ev3 = hci_recv_event_data(hdev, HCI_EV_LE_PER_ADV_REPORT);
+ if (ev3) {
+ sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
+ iso_match_sync_handle_pa_report, ev3);
+
+ if (sk) {
+ memcpy(iso_pi(sk)->base, ev3->data, ev3->length);
+ iso_pi(sk)->base_len = ev3->length;
+ }
} else {
sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL);
}
@@ -1667,13 +1812,19 @@ static void iso_connect_cfm(struct hci_conn *hcon, __u8 status)
}
/* Create CIS if pending */
- hci_le_create_cis(hcon);
+ hci_le_create_cis_pending(hcon->hdev);
return;
}
BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
- if (!status) {
+ /* Similar to the success case, if HCI_CONN_BIG_SYNC_FAILED or
+ * HCI_CONN_PA_SYNC_FAILED is set, queue the failed connection
+ * into the accept queue of the listening socket and wake up
+ * userspace, to inform the user about the event.
+ */
+ if (!status || test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags) ||
+ test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) {
struct iso_conn *conn;
conn = iso_conn_add(hcon);
@@ -1748,6 +1899,7 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
if (len == skb->len) {
/* Complete frame received */
+ hci_skb_pkt_status(skb) = flags & 0x03;
iso_recv_frame(conn, skb);
return;
}
@@ -1769,6 +1921,7 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
if (!conn->rx_skb)
goto drop;
+ hci_skb_pkt_status(conn->rx_skb) = flags & 0x03;
skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
skb->len);
conn->rx_len = len - skb->len;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 947ca580bb9a..3bdfc3f1e73d 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -178,21 +178,6 @@ done:
return err;
}
-static void l2cap_sock_init_pid(struct sock *sk)
-{
- struct l2cap_chan *chan = l2cap_pi(sk)->chan;
-
- /* Only L2CAP_MODE_EXT_FLOWCTL ever need to access the PID in order to
- * group the channels being requested.
- */
- if (chan->mode != L2CAP_MODE_EXT_FLOWCTL)
- return;
-
- spin_lock(&sk->sk_peer_lock);
- sk->sk_peer_pid = get_pid(task_tgid(current));
- spin_unlock(&sk->sk_peer_lock);
-}
-
static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags)
{
@@ -268,8 +253,6 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
chan->mode != L2CAP_MODE_EXT_FLOWCTL)
chan->mode = L2CAP_MODE_LE_FLOWCTL;
- l2cap_sock_init_pid(sk);
-
err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid),
&la.l2_bdaddr, la.l2_bdaddr_type);
if (err)
@@ -325,8 +308,6 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
goto done;
}
- l2cap_sock_init_pid(sk);
-
sk->sk_max_ack_backlog = backlog;
sk->sk_ack_backlog = 0;
@@ -1858,21 +1839,13 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
struct sock *sk;
struct l2cap_chan *chan;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, kern);
+ sk = bt_sock_alloc(net, sock, &l2cap_proto, proto, prio, kern);
if (!sk)
return NULL;
- sock_init_data(sock, sk);
- INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
sk->sk_destruct = l2cap_sock_destruct;
sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = proto;
- sk->sk_state = BT_OPEN;
-
chan = l2cap_chan_create();
if (!chan) {
sk_free(sk);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f7b2d0971f24..ba2e00646e8e 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -944,6 +944,12 @@ static u32 get_current_settings(struct hci_dev *hdev)
if (cis_peripheral_capable(hdev))
settings |= MGMT_SETTING_CIS_PERIPHERAL;
+ if (bis_capable(hdev))
+ settings |= MGMT_SETTING_ISO_BROADCASTER;
+
+ if (sync_recv_capable(hdev))
+ settings |= MGMT_SETTING_ISO_SYNC_RECEIVER;
+
return settings;
}
@@ -1297,15 +1303,15 @@ static void restart_le_actions(struct hci_dev *hdev)
/* Needed for AUTO_OFF case where might not "really"
* have been powered off.
*/
- list_del_init(&p->action);
+ hci_pend_le_list_del_init(p);
switch (p->auto_connect) {
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_add(&p->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(p, &hdev->pend_le_conns);
break;
case HCI_AUTO_CONN_REPORT:
- list_add(&p->action, &hdev->pend_le_reports);
+ hci_pend_le_list_add(p, &hdev->pend_le_reports);
break;
default:
break;
@@ -3580,18 +3586,6 @@ unlock:
return err;
}
-static int abort_conn_sync(struct hci_dev *hdev, void *data)
-{
- struct hci_conn *conn;
- u16 handle = PTR_ERR(data);
-
- conn = hci_conn_hash_lookup_handle(hdev, handle);
- if (!conn)
- return 0;
-
- return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM);
-}
-
static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
@@ -3642,8 +3636,7 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
le_addr_type(addr->type));
if (conn->conn_reason == CONN_REASON_PAIR_DEVICE)
- hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle),
- NULL);
+ hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
unlock:
hci_dev_unlock(hdev);
@@ -5169,7 +5162,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
- params->flags = current_flags;
+ WRITE_ONCE(params->flags, current_flags);
status = MGMT_STATUS_SUCCESS;
/* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY
@@ -5388,9 +5381,9 @@ static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count,
for (i = 0; i < pattern_count; i++) {
offset = patterns[i].offset;
length = patterns[i].length;
- if (offset >= HCI_MAX_AD_LENGTH ||
- length > HCI_MAX_AD_LENGTH ||
- (offset + length) > HCI_MAX_AD_LENGTH)
+ if (offset >= HCI_MAX_EXT_AD_LENGTH ||
+ length > HCI_MAX_EXT_AD_LENGTH ||
+ (offset + length) > HCI_MAX_EXT_AD_LENGTH)
return MGMT_STATUS_INVALID_PARAMS;
p = kmalloc(sizeof(*p), GFP_KERNEL);
@@ -7285,7 +7278,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- memcpy(&rp.addr, &cp->addr.bdaddr, sizeof(rp.addr));
+ memcpy(&rp.addr, &cp->addr, sizeof(rp.addr));
status = mgmt_status(err);
if (status == MGMT_STATUS_SUCCESS) {
@@ -7580,7 +7573,7 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
if (params->auto_connect == auto_connect)
return 0;
- list_del_init(&params->action);
+ hci_pend_le_list_del_init(params);
switch (auto_connect) {
case HCI_AUTO_CONN_DISABLED:
@@ -7589,18 +7582,18 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
* connect to device, keep connecting.
*/
if (params->explicit_connect)
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
break;
case HCI_AUTO_CONN_REPORT:
if (params->explicit_connect)
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
else
- list_add(&params->action, &hdev->pend_le_reports);
+ hci_pend_le_list_add(params, &hdev->pend_le_reports);
break;
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
if (!is_connected(hdev, addr, addr_type))
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
break;
}
@@ -7823,9 +7816,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- list_del(&params->action);
- list_del(&params->list);
- kfree(params);
+ hci_conn_params_free(params);
device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type);
} else {
@@ -7856,9 +7847,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
p->auto_connect = HCI_AUTO_CONN_EXPLICIT;
continue;
}
- list_del(&p->action);
- list_del(&p->list);
- kfree(p);
+ hci_conn_params_free(p);
}
bt_dev_dbg(hdev, "All LE connection parameters were removed");
@@ -8439,8 +8428,8 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
supported_flags = get_supported_adv_flags(hdev);
rp->supported_flags = cpu_to_le32(supported_flags);
- rp->max_adv_data_len = HCI_MAX_AD_LENGTH;
- rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH;
+ rp->max_adv_data_len = max_adv_len(hdev);
+ rp->max_scan_rsp_len = max_adv_len(hdev);
rp->max_instances = hdev->le_num_of_adv_sets;
rp->num_instances = hdev->adv_instance_cnt;
@@ -8476,7 +8465,7 @@ static u8 calculate_name_len(struct hci_dev *hdev)
static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags,
bool is_adv_data)
{
- u8 max_len = HCI_MAX_AD_LENGTH;
+ u8 max_len = max_adv_len(hdev);
if (is_adv_data) {
if (adv_flags & (MGMT_ADV_FLAG_DISCOV |
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index bf5cee48916c..abbafa6194ca 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -91,6 +91,33 @@ struct msft_ev_le_monitor_device {
struct msft_monitor_advertisement_handle_data {
__u8 msft_handle;
__u16 mgmt_handle;
+ __s8 rssi_high;
+ __s8 rssi_low;
+ __u8 rssi_low_interval;
+ __u8 rssi_sampling_period;
+ __u8 cond_type;
+ struct list_head list;
+};
+
+enum monitor_addr_filter_state {
+ AF_STATE_IDLE,
+ AF_STATE_ADDING,
+ AF_STATE_ADDED,
+ AF_STATE_REMOVING,
+};
+
+#define MSFT_MONITOR_ADVERTISEMENT_TYPE_ADDR 0x04
+struct msft_monitor_addr_filter_data {
+ __u8 msft_handle;
+ __u8 pattern_handle; /* address filters pertain to */
+ __u16 mgmt_handle;
+ int state;
+ __s8 rssi_high;
+ __s8 rssi_low;
+ __u8 rssi_low_interval;
+ __u8 rssi_sampling_period;
+ __u8 addr_type;
+ bdaddr_t bdaddr;
struct list_head list;
};
@@ -99,9 +126,12 @@ struct msft_data {
__u8 evt_prefix_len;
__u8 *evt_prefix;
struct list_head handle_map;
+ struct list_head address_filters;
__u8 resuming;
__u8 suspending;
__u8 filter_enabled;
+ /* To synchronize add/remove address filter and monitor device event.*/
+ struct mutex filter_lock;
};
bool msft_monitor_supported(struct hci_dev *hdev)
@@ -180,6 +210,24 @@ static struct msft_monitor_advertisement_handle_data *msft_find_handle_data
return NULL;
}
+/* This function requires the caller holds msft->filter_lock */
+static struct msft_monitor_addr_filter_data *msft_find_address_data
+ (struct hci_dev *hdev, u8 addr_type, bdaddr_t *addr,
+ u8 pattern_handle)
+{
+ struct msft_monitor_addr_filter_data *entry;
+ struct msft_data *msft = hdev->msft_data;
+
+ list_for_each_entry(entry, &msft->address_filters, list) {
+ if (entry->pattern_handle == pattern_handle &&
+ addr_type == entry->addr_type &&
+ !bacmp(addr, &entry->bdaddr))
+ return entry;
+ }
+
+ return NULL;
+}
+
/* This function requires the caller holds hdev->lock */
static int msft_monitor_device_del(struct hci_dev *hdev, __u16 mgmt_handle,
bdaddr_t *bdaddr, __u8 addr_type,
@@ -240,6 +288,7 @@ static int msft_le_monitor_advertisement_cb(struct hci_dev *hdev, u16 opcode,
handle_data->mgmt_handle = monitor->handle;
handle_data->msft_handle = rp->handle;
+ handle_data->cond_type = MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN;
INIT_LIST_HEAD(&handle_data->list);
list_add(&handle_data->list, &msft->handle_map);
@@ -254,6 +303,70 @@ unlock:
return status;
}
+/* This function requires the caller holds hci_req_sync_lock */
+static void msft_remove_addr_filters_sync(struct hci_dev *hdev, u8 handle)
+{
+ struct msft_monitor_addr_filter_data *address_filter, *n;
+ struct msft_cp_le_cancel_monitor_advertisement cp;
+ struct msft_data *msft = hdev->msft_data;
+ struct list_head head;
+ struct sk_buff *skb;
+
+ INIT_LIST_HEAD(&head);
+
+ /* Cancel all corresponding address monitors */
+ mutex_lock(&msft->filter_lock);
+
+ list_for_each_entry_safe(address_filter, n, &msft->address_filters,
+ list) {
+ if (address_filter->pattern_handle != handle)
+ continue;
+
+ list_del(&address_filter->list);
+
+ /* Keep the address filter and let
+ * msft_add_address_filter_sync() remove and free the address
+ * filter.
+ */
+ if (address_filter->state == AF_STATE_ADDING) {
+ address_filter->state = AF_STATE_REMOVING;
+ continue;
+ }
+
+ /* Keep the address filter and let
+ * msft_cancel_address_filter_sync() remove and free the address
+ * filter
+ */
+ if (address_filter->state == AF_STATE_REMOVING)
+ continue;
+
+ list_add_tail(&address_filter->list, &head);
+ }
+
+ mutex_unlock(&msft->filter_lock);
+
+ list_for_each_entry_safe(address_filter, n, &head, list) {
+ list_del(&address_filter->list);
+
+ cp.sub_opcode = MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT;
+ cp.handle = address_filter->msft_handle;
+
+ skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
+ HCI_CMD_TIMEOUT);
+ if (IS_ERR_OR_NULL(skb)) {
+ kfree(address_filter);
+ continue;
+ }
+
+ kfree_skb(skb);
+
+ bt_dev_dbg(hdev, "MSFT: Canceled device %pMR address filter",
+ &address_filter->bdaddr);
+
+ kfree(address_filter);
+ }
+}
+
static int msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
u16 opcode,
struct adv_monitor *monitor,
@@ -263,6 +376,7 @@ static int msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
struct msft_monitor_advertisement_handle_data *handle_data;
struct msft_data *msft = hdev->msft_data;
int status = 0;
+ u8 msft_handle;
rp = (struct msft_rp_le_cancel_monitor_advertisement *)skb->data;
if (skb->len < sizeof(*rp)) {
@@ -293,11 +407,17 @@ static int msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
NULL, 0, false);
}
+ msft_handle = handle_data->msft_handle;
+
list_del(&handle_data->list);
kfree(handle_data);
- }
- hci_dev_unlock(hdev);
+ hci_dev_unlock(hdev);
+
+ msft_remove_addr_filters_sync(hdev, msft_handle);
+ } else {
+ hci_dev_unlock(hdev);
+ }
done:
return status;
@@ -394,12 +514,14 @@ static int msft_add_monitor_sync(struct hci_dev *hdev,
{
struct msft_cp_le_monitor_advertisement *cp;
struct msft_le_monitor_advertisement_pattern_data *pattern_data;
+ struct msft_monitor_advertisement_handle_data *handle_data;
struct msft_le_monitor_advertisement_pattern *pattern;
struct adv_pattern *entry;
size_t total_size = sizeof(*cp) + sizeof(*pattern_data);
ptrdiff_t offset = 0;
u8 pattern_count = 0;
struct sk_buff *skb;
+ int err;
if (!msft_monitor_pattern_valid(monitor))
return -EINVAL;
@@ -436,16 +558,31 @@ static int msft_add_monitor_sync(struct hci_dev *hdev,
skb = __hci_cmd_sync(hdev, hdev->msft_opcode, total_size, cp,
HCI_CMD_TIMEOUT);
- kfree(cp);
if (IS_ERR_OR_NULL(skb)) {
- if (!skb)
- return -EIO;
- return PTR_ERR(skb);
+ err = PTR_ERR(skb);
+ goto out_free;
}
- return msft_le_monitor_advertisement_cb(hdev, hdev->msft_opcode,
- monitor, skb);
+ err = msft_le_monitor_advertisement_cb(hdev, hdev->msft_opcode,
+ monitor, skb);
+ if (err)
+ goto out_free;
+
+ handle_data = msft_find_handle_data(hdev, monitor->handle, true);
+ if (!handle_data) {
+ err = -ENODATA;
+ goto out_free;
+ }
+
+ handle_data->rssi_high = cp->rssi_high;
+ handle_data->rssi_low = cp->rssi_low;
+ handle_data->rssi_low_interval = cp->rssi_low_interval;
+ handle_data->rssi_sampling_period = cp->rssi_sampling_period;
+
+out_free:
+ kfree(cp);
+ return err;
}
/* This function requires the caller holds hci_req_sync_lock */
@@ -538,6 +675,7 @@ void msft_do_close(struct hci_dev *hdev)
{
struct msft_data *msft = hdev->msft_data;
struct msft_monitor_advertisement_handle_data *handle_data, *tmp;
+ struct msft_monitor_addr_filter_data *address_filter, *n;
struct adv_monitor *monitor;
if (!msft)
@@ -559,6 +697,14 @@ void msft_do_close(struct hci_dev *hdev)
kfree(handle_data);
}
+ mutex_lock(&msft->filter_lock);
+ list_for_each_entry_safe(address_filter, n, &msft->address_filters,
+ list) {
+ list_del(&address_filter->list);
+ kfree(address_filter);
+ }
+ mutex_unlock(&msft->filter_lock);
+
hci_dev_lock(hdev);
/* Clear any devices that are being monitored and notify device lost */
@@ -568,6 +714,49 @@ void msft_do_close(struct hci_dev *hdev)
hci_dev_unlock(hdev);
}
+static int msft_cancel_address_filter_sync(struct hci_dev *hdev, void *data)
+{
+ struct msft_monitor_addr_filter_data *address_filter = data;
+ struct msft_cp_le_cancel_monitor_advertisement cp;
+ struct msft_data *msft = hdev->msft_data;
+ struct sk_buff *skb;
+ int err = 0;
+
+ if (!msft) {
+ bt_dev_err(hdev, "MSFT: msft data is freed");
+ return -EINVAL;
+ }
+
+ /* The address filter has been removed by hci dev close */
+ if (!test_bit(HCI_UP, &hdev->flags))
+ return 0;
+
+ mutex_lock(&msft->filter_lock);
+ list_del(&address_filter->list);
+ mutex_unlock(&msft->filter_lock);
+
+ cp.sub_opcode = MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT;
+ cp.handle = address_filter->msft_handle;
+
+ skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
+ HCI_CMD_TIMEOUT);
+ if (IS_ERR_OR_NULL(skb)) {
+ bt_dev_err(hdev, "MSFT: Failed to cancel address (%pMR) filter",
+ &address_filter->bdaddr);
+ err = -EIO;
+ goto done;
+ }
+ kfree_skb(skb);
+
+ bt_dev_dbg(hdev, "MSFT: Canceled device %pMR address filter",
+ &address_filter->bdaddr);
+
+done:
+ kfree(address_filter);
+
+ return err;
+}
+
void msft_register(struct hci_dev *hdev)
{
struct msft_data *msft = NULL;
@@ -581,7 +770,9 @@ void msft_register(struct hci_dev *hdev)
}
INIT_LIST_HEAD(&msft->handle_map);
+ INIT_LIST_HEAD(&msft->address_filters);
hdev->msft_data = msft;
+ mutex_init(&msft->filter_lock);
}
void msft_unregister(struct hci_dev *hdev)
@@ -596,6 +787,7 @@ void msft_unregister(struct hci_dev *hdev)
hdev->msft_data = NULL;
kfree(msft->evt_prefix);
+ mutex_destroy(&msft->filter_lock);
kfree(msft);
}
@@ -645,11 +837,149 @@ static void *msft_skb_pull(struct hci_dev *hdev, struct sk_buff *skb,
return data;
}
+static int msft_add_address_filter_sync(struct hci_dev *hdev, void *data)
+{
+ struct msft_monitor_addr_filter_data *address_filter = data;
+ struct msft_rp_le_monitor_advertisement *rp;
+ struct msft_cp_le_monitor_advertisement *cp;
+ struct msft_data *msft = hdev->msft_data;
+ struct sk_buff *skb = NULL;
+ bool remove = false;
+ size_t size;
+
+ if (!msft) {
+ bt_dev_err(hdev, "MSFT: msft data is freed");
+ return -EINVAL;
+ }
+
+ /* The address filter has been removed by hci dev close */
+ if (!test_bit(HCI_UP, &hdev->flags))
+ return -ENODEV;
+
+ /* We are safe to use the address filter from now on.
+ * msft_monitor_device_evt() wouldn't delete this filter because it's
+ * not been added by now.
+ * And all other functions that requiring hci_req_sync_lock wouldn't
+ * touch this filter before this func completes because it's protected
+ * by hci_req_sync_lock.
+ */
+
+ if (address_filter->state == AF_STATE_REMOVING) {
+ mutex_lock(&msft->filter_lock);
+ list_del(&address_filter->list);
+ mutex_unlock(&msft->filter_lock);
+ kfree(address_filter);
+ return 0;
+ }
+
+ size = sizeof(*cp) +
+ sizeof(address_filter->addr_type) +
+ sizeof(address_filter->bdaddr);
+ cp = kzalloc(size, GFP_KERNEL);
+ if (!cp) {
+ bt_dev_err(hdev, "MSFT: Alloc cmd param err");
+ remove = true;
+ goto done;
+ }
+ cp->sub_opcode = MSFT_OP_LE_MONITOR_ADVERTISEMENT;
+ cp->rssi_high = address_filter->rssi_high;
+ cp->rssi_low = address_filter->rssi_low;
+ cp->rssi_low_interval = address_filter->rssi_low_interval;
+ cp->rssi_sampling_period = address_filter->rssi_sampling_period;
+ cp->cond_type = MSFT_MONITOR_ADVERTISEMENT_TYPE_ADDR;
+ cp->data[0] = address_filter->addr_type;
+ memcpy(&cp->data[1], &address_filter->bdaddr,
+ sizeof(address_filter->bdaddr));
+
+ skb = __hci_cmd_sync(hdev, hdev->msft_opcode, size, cp,
+ HCI_CMD_TIMEOUT);
+ if (IS_ERR_OR_NULL(skb)) {
+ bt_dev_err(hdev, "Failed to enable address %pMR filter",
+ &address_filter->bdaddr);
+ skb = NULL;
+ remove = true;
+ goto done;
+ }
+
+ rp = skb_pull_data(skb, sizeof(*rp));
+ if (!rp || rp->sub_opcode != MSFT_OP_LE_MONITOR_ADVERTISEMENT ||
+ rp->status)
+ remove = true;
+
+done:
+ mutex_lock(&msft->filter_lock);
+
+ if (remove) {
+ bt_dev_warn(hdev, "MSFT: Remove address (%pMR) filter",
+ &address_filter->bdaddr);
+ list_del(&address_filter->list);
+ kfree(address_filter);
+ } else {
+ address_filter->state = AF_STATE_ADDED;
+ address_filter->msft_handle = rp->handle;
+ bt_dev_dbg(hdev, "MSFT: Address %pMR filter enabled",
+ &address_filter->bdaddr);
+ }
+ mutex_unlock(&msft->filter_lock);
+
+ kfree_skb(skb);
+
+ return 0;
+}
+
+/* This function requires the caller holds msft->filter_lock */
+static struct msft_monitor_addr_filter_data *msft_add_address_filter
+ (struct hci_dev *hdev, u8 addr_type, bdaddr_t *bdaddr,
+ struct msft_monitor_advertisement_handle_data *handle_data)
+{
+ struct msft_monitor_addr_filter_data *address_filter = NULL;
+ struct msft_data *msft = hdev->msft_data;
+ int err;
+
+ address_filter = kzalloc(sizeof(*address_filter), GFP_KERNEL);
+ if (!address_filter)
+ return NULL;
+
+ address_filter->state = AF_STATE_ADDING;
+ address_filter->msft_handle = 0xff;
+ address_filter->pattern_handle = handle_data->msft_handle;
+ address_filter->mgmt_handle = handle_data->mgmt_handle;
+ address_filter->rssi_high = handle_data->rssi_high;
+ address_filter->rssi_low = handle_data->rssi_low;
+ address_filter->rssi_low_interval = handle_data->rssi_low_interval;
+ address_filter->rssi_sampling_period = handle_data->rssi_sampling_period;
+ address_filter->addr_type = addr_type;
+ bacpy(&address_filter->bdaddr, bdaddr);
+
+ /* With the above AF_STATE_ADDING, duplicated address filter can be
+ * avoided when receiving monitor device event (found/lost) frequently
+ * for the same device.
+ */
+ list_add_tail(&address_filter->list, &msft->address_filters);
+
+ err = hci_cmd_sync_queue(hdev, msft_add_address_filter_sync,
+ address_filter, NULL);
+ if (err < 0) {
+ bt_dev_err(hdev, "MSFT: Add address %pMR filter err", bdaddr);
+ list_del(&address_filter->list);
+ kfree(address_filter);
+ return NULL;
+ }
+
+ bt_dev_dbg(hdev, "MSFT: Add device %pMR address filter",
+ &address_filter->bdaddr);
+
+ return address_filter;
+}
+
/* This function requires the caller holds hdev->lock */
static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
+ struct msft_monitor_addr_filter_data *n, *address_filter = NULL;
struct msft_ev_le_monitor_device *ev;
struct msft_monitor_advertisement_handle_data *handle_data;
+ struct msft_data *msft = hdev->msft_data;
+ u16 mgmt_handle = 0xffff;
u8 addr_type;
ev = msft_skb_pull(hdev, skb, MSFT_EV_LE_MONITOR_DEVICE, sizeof(*ev));
@@ -662,9 +992,53 @@ static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
ev->monitor_state, &ev->bdaddr);
handle_data = msft_find_handle_data(hdev, ev->monitor_handle, false);
- if (!handle_data)
+
+ if (!test_bit(HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER, &hdev->quirks)) {
+ if (!handle_data)
+ return;
+ mgmt_handle = handle_data->mgmt_handle;
+ goto report_state;
+ }
+
+ if (handle_data) {
+ /* Don't report any device found/lost event from pattern
+ * monitors. Pattern monitor always has its address filters for
+ * tracking devices.
+ */
+
+ address_filter = msft_find_address_data(hdev, ev->addr_type,
+ &ev->bdaddr,
+ handle_data->msft_handle);
+ if (address_filter)
+ return;
+
+ if (ev->monitor_state && handle_data->cond_type ==
+ MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN)
+ msft_add_address_filter(hdev, ev->addr_type,
+ &ev->bdaddr, handle_data);
+
return;
+ }
+ /* This device event is not from pattern monitor.
+ * Report it if there is a corresponding address_filter for it.
+ */
+ list_for_each_entry(n, &msft->address_filters, list) {
+ if (n->state == AF_STATE_ADDED &&
+ n->msft_handle == ev->monitor_handle) {
+ mgmt_handle = n->mgmt_handle;
+ address_filter = n;
+ break;
+ }
+ }
+
+ if (!address_filter) {
+ bt_dev_warn(hdev, "MSFT: Unexpected device event %pMR, %u, %u",
+ &ev->bdaddr, ev->monitor_handle, ev->monitor_state);
+ return;
+ }
+
+report_state:
switch (ev->addr_type) {
case ADDR_LE_DEV_PUBLIC:
addr_type = BDADDR_LE_PUBLIC;
@@ -681,12 +1055,18 @@ static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
return;
}
- if (ev->monitor_state)
- msft_device_found(hdev, &ev->bdaddr, addr_type,
- handle_data->mgmt_handle);
- else
- msft_device_lost(hdev, &ev->bdaddr, addr_type,
- handle_data->mgmt_handle);
+ if (ev->monitor_state) {
+ msft_device_found(hdev, &ev->bdaddr, addr_type, mgmt_handle);
+ } else {
+ if (address_filter && address_filter->state == AF_STATE_ADDED) {
+ address_filter->state = AF_STATE_REMOVING;
+ hci_cmd_sync_queue(hdev,
+ msft_cancel_address_filter_sync,
+ address_filter,
+ NULL);
+ }
+ msft_device_lost(hdev, &ev->bdaddr, addr_type, mgmt_handle);
+ }
}
void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
@@ -724,7 +1104,9 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
switch (*evt) {
case MSFT_EV_LE_MONITOR_DEVICE:
+ mutex_lock(&msft->filter_lock);
msft_monitor_device_evt(hdev, skb);
+ mutex_unlock(&msft->filter_lock);
break;
default:
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4397e14ff560..b54e8a530f55 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -268,18 +268,16 @@ static struct proto rfcomm_proto = {
.obj_size = sizeof(struct rfcomm_pinfo)
};
-static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern)
+static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock,
+ int proto, gfp_t prio, int kern)
{
struct rfcomm_dlc *d;
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, kern);
+ sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern);
if (!sk)
return NULL;
- sock_init_data(sock, sk);
- INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
d = rfcomm_dlc_alloc(prio);
if (!d) {
sk_free(sk);
@@ -298,11 +296,6 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int
sk->sk_sndbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10;
sk->sk_rcvbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = proto;
- sk->sk_state = BT_OPEN;
-
bt_sock_link(&rfcomm_sk_list, sk);
BT_DBG("sk %p", sk);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index cd1a27ac555d..c736186aba26 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -68,7 +68,6 @@ struct sco_pinfo {
bdaddr_t dst;
__u32 flags;
__u16 setting;
- __u8 cmsg_mask;
struct bt_codec codec;
struct sco_conn *conn;
};
@@ -126,8 +125,11 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
struct hci_dev *hdev = hcon->hdev;
struct sco_conn *conn = hcon->sco_data;
- if (conn)
+ if (conn) {
+ if (!conn->hcon)
+ conn->hcon = hcon;
return conn;
+ }
conn = kzalloc(sizeof(struct sco_conn), GFP_KERNEL);
if (!conn)
@@ -268,21 +270,21 @@ static int sco_connect(struct sock *sk)
goto unlock;
}
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
-
conn = sco_conn_add(hcon);
if (!conn) {
hci_conn_drop(hcon);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto unlock;
}
- err = sco_chan_add(conn, sk, NULL);
- if (err)
- return err;
-
lock_sock(sk);
+ err = sco_chan_add(conn, sk, NULL);
+ if (err) {
+ release_sock(sk);
+ goto unlock;
+ }
+
/* Update source addr of the socket */
bacpy(&sco_pi(sk)->src, &hcon->src);
@@ -296,8 +298,6 @@ static int sco_connect(struct sock *sk)
release_sock(sk);
- return err;
-
unlock:
hci_dev_unlock(hdev);
hci_dev_put(hdev);
@@ -470,15 +470,6 @@ static void sco_sock_close(struct sock *sk)
release_sock(sk);
}
-static void sco_skb_put_cmsg(struct sk_buff *skb, struct msghdr *msg,
- struct sock *sk)
-{
- if (sco_pi(sk)->cmsg_mask & SCO_CMSG_PKT_STATUS)
- put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_STATUS,
- sizeof(bt_cb(skb)->sco.pkt_status),
- &bt_cb(skb)->sco.pkt_status);
-}
-
static void sco_sock_init(struct sock *sk, struct sock *parent)
{
BT_DBG("sk %p", sk);
@@ -487,8 +478,6 @@ static void sco_sock_init(struct sock *sk, struct sock *parent)
sk->sk_type = parent->sk_type;
bt_sk(sk)->flags = bt_sk(parent)->flags;
security_sk_clone(parent, sk);
- } else {
- bt_sk(sk)->skb_put_cmsg = sco_skb_put_cmsg;
}
}
@@ -503,21 +492,13 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
{
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, kern);
+ sk = bt_sock_alloc(net, sock, &sco_proto, proto, prio, kern);
if (!sk)
return NULL;
- sock_init_data(sock, sk);
- INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
sk->sk_destruct = sco_sock_destruct;
sk->sk_sndtimeo = SCO_CONN_TIMEOUT;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = proto;
- sk->sk_state = BT_OPEN;
-
sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
sco_pi(sk)->codec.id = BT_CODEC_CVSD;
sco_pi(sk)->codec.cid = 0xffff;
@@ -914,9 +895,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
}
if (opt)
- sco_pi(sk)->cmsg_mask |= SCO_CMSG_PKT_STATUS;
+ set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
else
- sco_pi(sk)->cmsg_mask &= SCO_CMSG_PKT_STATUS;
+ clear_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
break;
case BT_CODEC:
@@ -1047,7 +1028,6 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
int len, err = 0;
struct bt_voice voice;
u32 phys;
- int pkt_status;
int buf_len;
struct codec_list *c;
u8 num_codecs, i, __user *ptr;
@@ -1101,9 +1081,8 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
break;
case BT_PKT_STATUS:
- pkt_status = (sco_pi(sk)->cmsg_mask & SCO_CMSG_PKT_STATUS);
-
- if (put_user(pkt_status, (int __user *)optval))
+ if (put_user(test_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags),
+ (int __user *)optval))
err = -EFAULT;
break;
@@ -1266,7 +1245,7 @@ static int sco_sock_release(struct socket *sock)
sco_sock_close(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ if (sock_flag(sk, SOCK_LINGER) && READ_ONCE(sk->sk_lingertime) &&
!(current->flags & PF_EXITING)) {
lock_sock(sk);
err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2321bd2f9964..57a7a64b84ed 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -15,11 +15,12 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <net/net_namespace.h>
-#include <net/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <linux/error-injection.h>
#include <linux/smp.h>
#include <linux/sock_diag.h>
#include <linux/netfilter.h>
+#include <net/netdev_rx_queue.h>
#include <net/xdp.h>
#include <net/netfilter/nf_bpf_link.h>
@@ -555,12 +556,23 @@ __bpf_kfunc u32 bpf_fentry_test9(u32 *a)
return *a;
}
+void noinline bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
+{
+}
+
__bpf_kfunc int bpf_modify_return_test(int a, int *b)
{
*b += 1;
return a + *b;
}
+__bpf_kfunc int bpf_modify_return_test2(int a, int *b, short c, int d,
+ void *e, char f, int g)
+{
+ *b += 1;
+ return a + *b + c + d + (long)e + f + g;
+}
+
int noinline bpf_fentry_shadow_test(int a)
{
return a + 1;
@@ -596,6 +608,7 @@ __diag_pop();
BTF_SET8_START(bpf_test_modify_return_ids)
BTF_ID_FLAGS(func, bpf_modify_return_test)
+BTF_ID_FLAGS(func, bpf_modify_return_test2)
BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
BTF_SET8_END(bpf_test_modify_return_ids)
@@ -663,7 +676,11 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
case BPF_MODIFY_RETURN:
ret = bpf_modify_return_test(1, &b);
if (b != 2)
- side_effect = 1;
+ side_effect++;
+ b = 2;
+ ret += bpf_modify_return_test2(1, &b, 3, 4, (void *)5, 6, 7);
+ if (b != 2)
+ side_effect++;
break;
default:
goto out;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 4f5098d33a46..a6e94ceb7c9a 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -234,6 +234,14 @@ static int br_switchdev_blocking_event(struct notifier_block *nb,
br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb,
b->blocking_nb);
break;
+ case SWITCHDEV_BRPORT_REPLAY:
+ brport_info = ptr;
+ b = &brport_info->brport;
+
+ err = br_switchdev_port_replay(p, b->dev, b->ctx, b->atomic_nb,
+ b->blocking_nb, extack);
+ err = notifier_from_errno(err);
+ break;
}
out:
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6116eba1bd89..9d7bc8b96b53 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -154,6 +154,7 @@ void br_forward(const struct net_bridge_port *to,
backup_port = rcu_dereference(to->backup_port);
if (unlikely(!backup_port))
goto out;
+ BR_INPUT_SKB_CB(skb)->backup_nhid = READ_ONCE(to->backup_nhid);
to = backup_port;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 05c5863d2e20..10f0d33d8ccf 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -211,6 +211,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_IN_OPEN */
+ nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT */
+ nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_CNT */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_BACKUP_NHID */
+ 0;
}
@@ -319,6 +320,10 @@ static int br_port_fill_attrs(struct sk_buff *skb,
backup_p->dev->ifindex);
rcu_read_unlock();
+ if (p->backup_nhid &&
+ nla_put_u32(skb, IFLA_BRPORT_BACKUP_NHID, p->backup_nhid))
+ return -EMSGSIZE;
+
return 0;
}
@@ -895,6 +900,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
[IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
[IFLA_BRPORT_NEIGH_VLAN_SUPPRESS] = NLA_POLICY_MAX(NLA_U8, 1),
+ [IFLA_BRPORT_BACKUP_NHID] = { .type = NLA_U32 },
};
/* Change the state of the port and notify spanning tree */
@@ -1065,6 +1071,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
return err;
}
+ if (tb[IFLA_BRPORT_BACKUP_NHID]) {
+ u32 backup_nhid = nla_get_u32(tb[IFLA_BRPORT_BACKUP_NHID]);
+
+ WRITE_ONCE(p->backup_nhid, backup_nhid);
+ }
+
return 0;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a63b32c1638e..a1f4acfa6994 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -387,6 +387,7 @@ struct net_bridge_port {
struct net_bridge_vlan_group __rcu *vlgrp;
#endif
struct net_bridge_port __rcu *backup_port;
+ u32 backup_nhid;
/* STP */
u8 priority;
@@ -605,6 +606,8 @@ struct br_input_skb_cb {
*/
unsigned long fwd_hwdoms;
#endif
+
+ u32 backup_nhid;
};
#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
@@ -971,7 +974,6 @@ int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router);
int br_multicast_toggle(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack);
int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val);
-int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx,
unsigned long val);
#if IS_ENABLED(CONFIG_IPV6)
@@ -2115,6 +2117,12 @@ void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
struct notifier_block *atomic_nb,
struct notifier_block *blocking_nb);
+int br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack);
+
bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb);
void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb);
@@ -2165,6 +2173,16 @@ br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
{
}
+static inline int
+br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
{
return false;
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index ba95c4d74a60..ee84e783e1df 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -727,6 +727,8 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
err = br_switchdev_mdb_replay_one(nb, dev,
SWITCHDEV_OBJ_PORT_MDB(obj),
action, ctx, extack);
+ if (err == -EOPNOTSUPP)
+ err = 0;
if (err)
goto out_free_mdb;
}
@@ -759,8 +761,10 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
err = br_switchdev_mdb_replay(br_dev, dev, ctx, true, blocking_nb,
extack);
- if (err && err != -EOPNOTSUPP)
+ if (err) {
+ /* -EOPNOTSUPP not propagated from MDB replay. */
return err;
+ }
err = br_switchdev_fdb_replay(br_dev, ctx, true, atomic_nb);
if (err && err != -EOPNOTSUPP)
@@ -825,3 +829,12 @@ void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
nbp_switchdev_del(p);
}
+
+int br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ return nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack);
+}
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 6399a8a69d07..81833ca7a2c7 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -201,6 +201,21 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
if (err)
return err;
+ if (BR_INPUT_SKB_CB(skb)->backup_nhid) {
+ tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
+ tunnel_id, 0);
+ if (!tunnel_dst)
+ return -ENOMEM;
+
+ tunnel_dst->u.tun_info.mode |= IP_TUNNEL_INFO_TX |
+ IP_TUNNEL_INFO_BRIDGE;
+ tunnel_dst->u.tun_info.key.nhid =
+ BR_INPUT_SKB_CB(skb)->backup_nhid;
+ skb_dst_set(skb, &tunnel_dst->dst);
+
+ return 0;
+ }
+
tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
skb_dst_set(skb, &tunnel_dst->dst);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 757ec46fc45a..aa23479b20b2 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2115,8 +2115,7 @@ static int size_entry_mwt(const struct ebt_entry *entry, const unsigned char *ba
return ret;
offsets[0] = sizeof(struct ebt_entry); /* matches come first */
- memcpy(&offsets[1], &entry->watchers_offset,
- sizeof(offsets) - sizeof(offsets[0]));
+ memcpy(&offsets[1], &entry->offsets, sizeof(entry->offsets));
if (state->buf_kern_start) {
buf_start = state->buf_kern_start + state->buf_kern_offset;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 9ba35685b043..9168114fc87f 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1526,6 +1526,12 @@ static int bcm_release(struct socket *sock)
lock_sock(sk);
+#if IS_ENABLED(CONFIG_PROC_FS)
+ /* remove procfs entry */
+ if (net->can.bcmproc_dir && bo->bcm_proc_read)
+ remove_proc_entry(bo->procname, net->can.bcmproc_dir);
+#endif /* CONFIG_PROC_FS */
+
list_for_each_entry_safe(op, next, &bo->tx_ops, list)
bcm_remove_op(op);
@@ -1561,12 +1567,6 @@ static int bcm_release(struct socket *sock)
list_for_each_entry_safe(op, next, &bo->rx_ops, list)
bcm_remove_op(op);
-#if IS_ENABLED(CONFIG_PROC_FS)
- /* remove procfs entry */
- if (net->can.bcmproc_dir && bo->bcm_proc_read)
- remove_proc_entry(bo->procname, net->can.bcmproc_dir);
-#endif /* CONFIG_PROC_FS */
-
/* remove device reference */
if (bo->bound) {
bo->bound = 0;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 99770ed28531..f02b5d3e4733 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -188,12 +188,6 @@ static bool isotp_register_rxid(struct isotp_sock *so)
return (isotp_bc_flags(so) == 0);
}
-static bool isotp_register_txecho(struct isotp_sock *so)
-{
- /* all modes but SF_BROADCAST register for tx echo skbs */
- return (isotp_bc_flags(so) != CAN_ISOTP_SF_BROADCAST);
-}
-
static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer)
{
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
@@ -1209,7 +1203,7 @@ static int isotp_release(struct socket *sock)
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && isotp_register_txecho(so)) {
+ if (so->bound) {
if (so->ifindex) {
struct net_device *dev;
@@ -1332,14 +1326,12 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id),
isotp_rcv, sk, "isotp", sk);
- if (isotp_register_txecho(so)) {
- /* no consecutive frame echo skb in flight */
- so->cfecho = 0;
+ /* no consecutive frame echo skb in flight */
+ so->cfecho = 0;
- /* register for echo skb's */
- can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id),
- isotp_rcv_echo, sk, "isotpe", sk);
- }
+ /* register for echo skb's */
+ can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id),
+ isotp_rcv_echo, sk, "isotpe", sk);
dev_put(dev);
@@ -1560,7 +1552,7 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg,
case NETDEV_UNREGISTER:
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && isotp_register_txecho(so)) {
+ if (so->bound) {
if (isotp_register_rxid(so))
can_rx_unregister(dev_net(dev), dev, so->rxid,
SINGLE_MASK(so->rxid),
diff --git a/net/can/raw.c b/net/can/raw.c
index 15c79b079184..d50c3f3d892f 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -84,6 +84,8 @@ struct raw_sock {
struct sock sk;
int bound;
int ifindex;
+ struct net_device *dev;
+ netdevice_tracker dev_tracker;
struct list_head notifier;
int loopback;
int recv_own_msgs;
@@ -277,21 +279,24 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
if (!net_eq(dev_net(dev), sock_net(sk)))
return;
- if (ro->ifindex != dev->ifindex)
+ if (ro->dev != dev)
return;
switch (msg) {
case NETDEV_UNREGISTER:
lock_sock(sk);
/* remove current filters & unregister */
- if (ro->bound)
+ if (ro->bound) {
raw_disable_allfilters(dev_net(dev), dev, sk);
+ netdev_put(dev, &ro->dev_tracker);
+ }
if (ro->count > 1)
kfree(ro->filter);
ro->ifindex = 0;
ro->bound = 0;
+ ro->dev = NULL;
ro->count = 0;
release_sock(sk);
@@ -337,6 +342,7 @@ static int raw_init(struct sock *sk)
ro->bound = 0;
ro->ifindex = 0;
+ ro->dev = NULL;
/* set default filter to single entry dfilter */
ro->dfilter.can_id = 0;
@@ -383,18 +389,14 @@ static int raw_release(struct socket *sock)
list_del(&ro->notifier);
spin_unlock(&raw_notifier_lock);
+ rtnl_lock();
lock_sock(sk);
/* remove current filters & unregister */
if (ro->bound) {
- if (ro->ifindex) {
- struct net_device *dev;
-
- dev = dev_get_by_index(sock_net(sk), ro->ifindex);
- if (dev) {
- raw_disable_allfilters(dev_net(dev), dev, sk);
- dev_put(dev);
- }
+ if (ro->dev) {
+ raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk);
+ netdev_put(ro->dev, &ro->dev_tracker);
} else {
raw_disable_allfilters(sock_net(sk), NULL, sk);
}
@@ -405,6 +407,7 @@ static int raw_release(struct socket *sock)
ro->ifindex = 0;
ro->bound = 0;
+ ro->dev = NULL;
ro->count = 0;
free_percpu(ro->uniq);
@@ -412,6 +415,8 @@ static int raw_release(struct socket *sock)
sock->sk = NULL;
release_sock(sk);
+ rtnl_unlock();
+
sock_put(sk);
return 0;
@@ -422,6 +427,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct sock *sk = sock->sk;
struct raw_sock *ro = raw_sk(sk);
+ struct net_device *dev = NULL;
int ifindex;
int err = 0;
int notify_enetdown = 0;
@@ -431,24 +437,23 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (addr->can_family != AF_CAN)
return -EINVAL;
+ rtnl_lock();
lock_sock(sk);
if (ro->bound && addr->can_ifindex == ro->ifindex)
goto out;
if (addr->can_ifindex) {
- struct net_device *dev;
-
dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
if (!dev) {
err = -ENODEV;
goto out;
}
if (dev->type != ARPHRD_CAN) {
- dev_put(dev);
err = -ENODEV;
- goto out;
+ goto out_put_dev;
}
+
if (!(dev->flags & IFF_UP))
notify_enetdown = 1;
@@ -456,7 +461,9 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
/* filters set by default/setsockopt */
err = raw_enable_allfilters(sock_net(sk), dev, sk);
- dev_put(dev);
+ if (err)
+ goto out_put_dev;
+
} else {
ifindex = 0;
@@ -467,26 +474,30 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (!err) {
if (ro->bound) {
/* unregister old filters */
- if (ro->ifindex) {
- struct net_device *dev;
-
- dev = dev_get_by_index(sock_net(sk),
- ro->ifindex);
- if (dev) {
- raw_disable_allfilters(dev_net(dev),
- dev, sk);
- dev_put(dev);
- }
+ if (ro->dev) {
+ raw_disable_allfilters(dev_net(ro->dev),
+ ro->dev, sk);
+ /* drop reference to old ro->dev */
+ netdev_put(ro->dev, &ro->dev_tracker);
} else {
raw_disable_allfilters(sock_net(sk), NULL, sk);
}
}
ro->ifindex = ifindex;
ro->bound = 1;
+ /* bind() ok -> hold a reference for new ro->dev */
+ ro->dev = dev;
+ if (ro->dev)
+ netdev_hold(ro->dev, &ro->dev_tracker, GFP_KERNEL);
}
- out:
+out_put_dev:
+ /* remove potential reference from dev_get_by_index() */
+ if (dev)
+ dev_put(dev);
+out:
release_sock(sk);
+ rtnl_unlock();
if (notify_enetdown) {
sk->sk_err = ENETDOWN;
@@ -553,9 +564,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex) {
- dev = dev_get_by_index(sock_net(sk), ro->ifindex);
- if (!dev) {
+ dev = ro->dev;
+ if (ro->bound && dev) {
+ if (dev->reg_state != NETREG_REGISTERED) {
if (count > 1)
kfree(filter);
err = -ENODEV;
@@ -596,7 +607,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->count = count;
out_fil:
- dev_put(dev);
release_sock(sk);
rtnl_unlock();
@@ -614,9 +624,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex) {
- dev = dev_get_by_index(sock_net(sk), ro->ifindex);
- if (!dev) {
+ dev = ro->dev;
+ if (ro->bound && dev) {
+ if (dev->reg_state != NETREG_REGISTERED) {
err = -ENODEV;
goto out_err;
}
@@ -640,7 +650,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->err_mask = err_mask;
out_err:
- dev_put(dev);
release_sock(sk);
rtnl_unlock();
@@ -873,7 +882,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
skb->dev = dev;
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;
skb_setup_tx_timestamp(skb, sockc.tsflags);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index cd7b0bf5369e..5eb4898cccd4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1123,6 +1123,7 @@ bool ceph_addr_is_blank(const struct ceph_entity_addr *addr)
return true;
}
}
+EXPORT_SYMBOL(ceph_addr_is_blank);
int ceph_addr_port(const struct ceph_entity_addr *addr)
{
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index 1a888b86a494..1df1d29dee92 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -390,6 +390,8 @@ static int head_onwire_len(int ctrl_len, bool secure)
int head_len;
int rem_len;
+ BUG_ON(ctrl_len < 0 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN);
+
if (secure) {
head_len = CEPH_PREAMBLE_SECURE_LEN;
if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) {
@@ -408,6 +410,10 @@ static int head_onwire_len(int ctrl_len, bool secure)
static int __tail_onwire_len(int front_len, int middle_len, int data_len,
bool secure)
{
+ BUG_ON(front_len < 0 || front_len > CEPH_MSG_MAX_FRONT_LEN ||
+ middle_len < 0 || middle_len > CEPH_MSG_MAX_MIDDLE_LEN ||
+ data_len < 0 || data_len > CEPH_MSG_MAX_DATA_LEN);
+
if (!front_len && !middle_len && !data_len)
return 0;
@@ -520,29 +526,34 @@ static int decode_preamble(void *p, struct ceph_frame_desc *desc)
desc->fd_aligns[i] = ceph_decode_16(&p);
}
- /*
- * This would fire for FRAME_TAG_WAIT (it has one empty
- * segment), but we should never get it as client.
- */
- if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
- pr_err("last segment empty\n");
+ if (desc->fd_lens[0] < 0 ||
+ desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
+ pr_err("bad control segment length %d\n", desc->fd_lens[0]);
return -EINVAL;
}
-
- if (desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
- pr_err("control segment too big %d\n", desc->fd_lens[0]);
+ if (desc->fd_lens[1] < 0 ||
+ desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
+ pr_err("bad front segment length %d\n", desc->fd_lens[1]);
return -EINVAL;
}
- if (desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
- pr_err("front segment too big %d\n", desc->fd_lens[1]);
+ if (desc->fd_lens[2] < 0 ||
+ desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) {
+ pr_err("bad middle segment length %d\n", desc->fd_lens[2]);
return -EINVAL;
}
- if (desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) {
- pr_err("middle segment too big %d\n", desc->fd_lens[2]);
+ if (desc->fd_lens[3] < 0 ||
+ desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) {
+ pr_err("bad data segment length %d\n", desc->fd_lens[3]);
return -EINVAL;
}
- if (desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) {
- pr_err("data segment too big %d\n", desc->fd_lens[3]);
+
+ /*
+ * This would fire for FRAME_TAG_WAIT (it has one empty
+ * segment), but we should never get it as client.
+ */
+ if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
+ pr_err("last segment empty, segment count %d\n",
+ desc->fd_seg_cnt);
return -EINVAL;
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 11c04e7d928e..658a6f2320cf 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -3334,17 +3334,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq)
int ret;
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- ret = wait_for_completion_interruptible(&lreq->reg_commit_wait);
+ ret = wait_for_completion_killable(&lreq->reg_commit_wait);
return ret ?: lreq->reg_commit_error;
}
-static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq)
+static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq,
+ unsigned long timeout)
{
- int ret;
+ long left;
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- ret = wait_for_completion_interruptible(&lreq->notify_finish_wait);
- return ret ?: lreq->notify_finish_error;
+ left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait,
+ ceph_timeout_jiffies(timeout));
+ if (left <= 0)
+ left = left ?: -ETIMEDOUT;
+ else
+ left = lreq->notify_finish_error; /* completed */
+
+ return left;
}
/*
@@ -4896,7 +4903,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (!ret)
- ret = linger_notify_finish_wait(lreq);
+ ret = linger_notify_finish_wait(lreq,
+ msecs_to_jiffies(2 * timeout * MSEC_PER_SEC));
else
dout("lreq %p failed to initiate notify %d\n", lreq, ret);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d4172534dfa8..cca7594be92e 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -496,8 +496,11 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
return ERR_PTR(-EPERM);
nla_for_each_nested(nla, nla_stgs, rem) {
- if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
+ if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) {
+ if (nla_len(nla) != sizeof(u32))
+ return ERR_PTR(-EINVAL);
nr_maps++;
+ }
}
diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
diff --git a/net/core/dev.c b/net/core/dev.c
index 69a3e544676c..ccff2b6ef958 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -107,6 +107,7 @@
#include <net/pkt_cls.h>
#include <net/checksum.h>
#include <net/xfrm.h>
+#include <net/tcx.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -132,6 +133,7 @@
#include <trace/events/net.h>
#include <trace/events/skb.h>
#include <trace/events/qdisc.h>
+#include <trace/events/xdp.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
#include <linux/static_key.h>
@@ -150,11 +152,11 @@
#include <linux/pm_runtime.h>
#include <linux/prandom.h>
#include <linux/once_lite.h>
+#include <net/netdev_rx_queue.h>
#include "dev.h"
#include "net-sysfs.h"
-
static DEFINE_SPINLOCK(ptype_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly; /* Taps */
@@ -388,6 +390,8 @@ static void list_netdevice(struct net_device *dev)
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock(&dev_base_lock);
+ /* We reserved the ifindex, this can't fail */
+ WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
dev_base_seq_inc(net);
}
@@ -397,8 +401,12 @@ static void list_netdevice(struct net_device *dev)
*/
static void unlist_netdevice(struct net_device *dev, bool lock)
{
+ struct net *net = dev_net(dev);
+
ASSERT_RTNL();
+ xa_erase(&net->dev_by_index, dev->ifindex);
+
/* Unlink dev from the device chain */
if (lock)
write_lock(&dev_base_lock);
@@ -2384,8 +2392,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
struct xps_map *map = NULL;
int pos;
- if (dev_maps)
- map = xmap_dereference(dev_maps->attr_map[tci]);
+ map = xmap_dereference(dev_maps->attr_map[tci]);
if (!map)
return false;
@@ -3882,69 +3889,201 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
EXPORT_SYMBOL(dev_loopback_xmit);
#ifdef CONFIG_NET_EGRESS
-static struct sk_buff *
-sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+static struct netdev_queue *
+netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
{
+ int qm = skb_get_queue_mapping(skb);
+
+ return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
+}
+
+static bool netdev_xmit_txqueue_skipped(void)
+{
+ return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+}
+
+void netdev_xmit_skip_txqueue(bool skip)
+{
+ __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+}
+EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
+#endif /* CONFIG_NET_EGRESS */
+
+#ifdef CONFIG_NET_XGRESS
+static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
+{
+ int ret = TC_ACT_UNSPEC;
#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
- struct tcf_result cl_res;
+ struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
+ struct tcf_result res;
if (!miniq)
- return skb;
+ return ret;
- /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
- mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
+ mini_qdisc_bstats_cpu_update(miniq, skb);
+ ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
+ /* Only tcf related quirks below. */
+ switch (ret) {
+ case TC_ACT_SHOT:
+ mini_qdisc_qstats_cpu_drop(miniq);
+ break;
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
- skb->tc_index = TC_H_MIN(cl_res.classid);
+ skb->tc_index = TC_H_MIN(res.classid);
break;
+ }
+#endif /* CONFIG_NET_CLS_ACT */
+ return ret;
+}
+
+static DEFINE_STATIC_KEY_FALSE(tcx_needed_key);
+
+void tcx_inc(void)
+{
+ static_branch_inc(&tcx_needed_key);
+}
+
+void tcx_dec(void)
+{
+ static_branch_dec(&tcx_needed_key);
+}
+
+static __always_inline enum tcx_action_base
+tcx_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
+ const bool needs_mac)
+{
+ const struct bpf_mprog_fp *fp;
+ const struct bpf_prog *prog;
+ int ret = TCX_NEXT;
+
+ if (needs_mac)
+ __skb_push(skb, skb->mac_len);
+ bpf_mprog_foreach_prog(entry, fp, prog) {
+ bpf_compute_data_pointers(skb);
+ ret = bpf_prog_run(prog, skb);
+ if (ret != TCX_NEXT)
+ break;
+ }
+ if (needs_mac)
+ __skb_pull(skb, skb->mac_len);
+ return tcx_action_code(skb, ret);
+}
+
+static __always_inline struct sk_buff *
+sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev, bool *another)
+{
+ struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
+ int sch_ret;
+
+ if (!entry)
+ return skb;
+ if (*pt_prev) {
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
+ *pt_prev = NULL;
+ }
+
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ tcx_set_ingress(skb, true);
+
+ if (static_branch_unlikely(&tcx_needed_key)) {
+ sch_ret = tcx_run(entry, skb, true);
+ if (sch_ret != TC_ACT_UNSPEC)
+ goto ingress_verdict;
+ }
+ sch_ret = tc_run(tcx_entry(entry), skb);
+ingress_verdict:
+ switch (sch_ret) {
+ case TC_ACT_REDIRECT:
+ /* skb_mac_header check was done by BPF, so we can safely
+ * push the L2 header back before redirecting to another
+ * netdev.
+ */
+ __skb_push(skb, skb->mac_len);
+ if (skb_do_redirect(skb) == -EAGAIN) {
+ __skb_pull(skb, skb->mac_len);
+ *another = true;
+ break;
+ }
+ *ret = NET_RX_SUCCESS;
+ return NULL;
case TC_ACT_SHOT:
- mini_qdisc_qstats_cpu_drop(miniq);
- *ret = NET_XMIT_DROP;
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
+ *ret = NET_RX_DROP;
return NULL;
+ /* used by tc_run */
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
- *ret = NET_XMIT_SUCCESS;
consume_skb(skb);
+ fallthrough;
+ case TC_ACT_CONSUMED:
+ *ret = NET_RX_SUCCESS;
return NULL;
+ }
+
+ return skb;
+}
+
+static __always_inline struct sk_buff *
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+{
+ struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
+ int sch_ret;
+
+ if (!entry)
+ return skb;
+
+ /* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was
+ * already set by the caller.
+ */
+ if (static_branch_unlikely(&tcx_needed_key)) {
+ sch_ret = tcx_run(entry, skb, false);
+ if (sch_ret != TC_ACT_UNSPEC)
+ goto egress_verdict;
+ }
+ sch_ret = tc_run(tcx_entry(entry), skb);
+egress_verdict:
+ switch (sch_ret) {
case TC_ACT_REDIRECT:
/* No need to push/pop skb's mac_header here on egress! */
skb_do_redirect(skb);
*ret = NET_XMIT_SUCCESS;
return NULL;
- default:
- break;
+ case TC_ACT_SHOT:
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ *ret = NET_XMIT_DROP;
+ return NULL;
+ /* used by tc_run */
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ consume_skb(skb);
+ fallthrough;
+ case TC_ACT_CONSUMED:
+ *ret = NET_XMIT_SUCCESS;
+ return NULL;
}
-#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
-
-static struct netdev_queue *
-netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
-{
- int qm = skb_get_queue_mapping(skb);
-
- return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
-}
-
-static bool netdev_xmit_txqueue_skipped(void)
+#else
+static __always_inline struct sk_buff *
+sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev, bool *another)
{
- return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+ return skb;
}
-void netdev_xmit_skip_txqueue(bool skip)
+static __always_inline struct sk_buff *
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
- __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+ return skb;
}
-EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
-#endif /* CONFIG_NET_EGRESS */
+#endif /* CONFIG_NET_XGRESS */
#ifdef CONFIG_XPS
static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
@@ -4128,9 +4267,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
skb_update_prio(skb);
qdisc_pkt_len_init(skb);
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_at_ingress = 0;
-#endif
+ tcx_set_ingress(skb, false);
#ifdef CONFIG_NET_EGRESS
if (static_branch_unlikely(&egress_needed_key)) {
if (nf_hook_egress_active()) {
@@ -5064,72 +5201,6 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
#endif
-static inline struct sk_buff *
-sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
- struct net_device *orig_dev, bool *another)
-{
-#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
- struct tcf_result cl_res;
-
- /* If there's at least one ingress present somewhere (so
- * we get here via enabled static key), remaining devices
- * that are not configured with an ingress qdisc will bail
- * out here.
- */
- if (!miniq)
- return skb;
-
- if (*pt_prev) {
- *ret = deliver_skb(skb, *pt_prev, orig_dev);
- *pt_prev = NULL;
- }
-
- qdisc_skb_cb(skb)->pkt_len = skb->len;
- tc_skb_cb(skb)->mru = 0;
- tc_skb_cb(skb)->post_ct = false;
- skb->tc_at_ingress = 1;
- mini_qdisc_bstats_cpu_update(miniq, skb);
-
- switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
- case TC_ACT_OK:
- case TC_ACT_RECLASSIFY:
- skb->tc_index = TC_H_MIN(cl_res.classid);
- break;
- case TC_ACT_SHOT:
- mini_qdisc_qstats_cpu_drop(miniq);
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
- *ret = NET_RX_DROP;
- return NULL;
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- case TC_ACT_TRAP:
- consume_skb(skb);
- *ret = NET_RX_SUCCESS;
- return NULL;
- case TC_ACT_REDIRECT:
- /* skb_mac_header check was done by cls/act_bpf, so
- * we can safely push the L2 header back before
- * redirecting to another netdev
- */
- __skb_push(skb, skb->mac_len);
- if (skb_do_redirect(skb) == -EAGAIN) {
- __skb_pull(skb, skb->mac_len);
- *another = true;
- break;
- }
- *ret = NET_RX_SUCCESS;
- return NULL;
- case TC_ACT_CONSUMED:
- *ret = NET_RX_SUCCESS;
- return NULL;
- default:
- break;
- }
-#endif /* CONFIG_NET_CLS_ACT */
- return skb;
-}
-
/**
* netdev_is_rx_handler_busy - check if receive handler is registered
* @dev: device to check
@@ -6316,12 +6387,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
* softirq mode will happen in the next round of napi_schedule().
* This should not cause hiccups/stalls to the live traffic.
*/
- list_for_each_entry(napi, &dev->napi_list, dev_list) {
- if (threaded)
- set_bit(NAPI_STATE_THREADED, &napi->state);
- else
- clear_bit(NAPI_STATE_THREADED, &napi->state);
- }
+ list_for_each_entry(napi, &dev->napi_list, dev_list)
+ assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
return err;
}
@@ -9413,6 +9480,7 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct net *net = current->nsproxy->net_ns;
struct bpf_link_primer link_primer;
+ struct netlink_ext_ack extack = {};
struct bpf_xdp_link *link;
struct net_device *dev;
int err, fd;
@@ -9440,12 +9508,13 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
goto unlock;
}
- err = dev_xdp_attach_link(dev, NULL, link);
+ err = dev_xdp_attach_link(dev, &extack, link);
rtnl_unlock();
if (err) {
link->dev = NULL;
bpf_link_cleanup(&link_primer);
+ trace_bpf_xdp_link_attach_failed(extack._msg);
goto out_put_dev;
}
@@ -9509,23 +9578,40 @@ err_out:
}
/**
- * dev_new_index - allocate an ifindex
- * @net: the applicable net namespace
+ * dev_index_reserve() - allocate an ifindex in a namespace
+ * @net: the applicable net namespace
+ * @ifindex: requested ifindex, pass %0 to get one allocated
+ *
+ * Allocate a ifindex for a new device. Caller must either use the ifindex
+ * to store the device (via list_netdevice()) or call dev_index_release()
+ * to give the index up.
*
- * Returns a suitable unique value for a new device interface
- * number. The caller must hold the rtnl semaphore or the
- * dev_base_lock to be sure it remains unique.
+ * Return: a suitable unique value for a new device interface number or -errno.
*/
-static int dev_new_index(struct net *net)
+static int dev_index_reserve(struct net *net, u32 ifindex)
{
- int ifindex = net->ifindex;
+ int err;
- for (;;) {
- if (++ifindex <= 0)
- ifindex = 1;
- if (!__dev_get_by_index(net, ifindex))
- return net->ifindex = ifindex;
+ if (ifindex > INT_MAX) {
+ DEBUG_NET_WARN_ON_ONCE(1);
+ return -EINVAL;
}
+
+ if (!ifindex)
+ err = xa_alloc_cyclic(&net->dev_by_index, &ifindex, NULL,
+ xa_limit_31b, &net->ifindex, GFP_KERNEL);
+ else
+ err = xa_insert(&net->dev_by_index, ifindex, NULL, GFP_KERNEL);
+ if (err < 0)
+ return err;
+
+ return ifindex;
+}
+
+static void dev_index_release(struct net *net, int ifindex)
+{
+ /* Expect only unused indexes, unlist_netdevice() removes the used */
+ WARN_ON(xa_erase(&net->dev_by_index, ifindex));
}
/* Delayed registration/unregisteration */
@@ -9995,11 +10081,10 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
}
- ret = -EBUSY;
- if (!dev->ifindex)
- dev->ifindex = dev_new_index(net);
- else if (__dev_get_by_index(net, dev->ifindex))
+ ret = dev_index_reserve(net, dev->ifindex);
+ if (ret < 0)
goto err_uninit;
+ dev->ifindex = ret;
/* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO).
@@ -10046,7 +10131,7 @@ int register_netdevice(struct net_device *dev)
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
if (ret)
- goto err_uninit;
+ goto err_ifindex_release;
ret = netdev_register_kobject(dev);
write_lock(&dev_base_lock);
@@ -10102,6 +10187,8 @@ out:
err_uninit_notify:
call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
+err_ifindex_release:
+ dev_index_release(net, dev->ifindex);
err_uninit:
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
@@ -10617,6 +10704,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
+ dev->xdp_zc_max_segs = 1;
dev->gso_max_segs = GSO_MAX_SEGS;
dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
@@ -10838,7 +10926,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
/* Shutdown queueing discipline. */
dev_shutdown(dev);
-
+ dev_tcx_uninstall(dev);
dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);
@@ -10978,9 +11066,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
}
/* Check that new_ifindex isn't used yet. */
- err = -EBUSY;
- if (new_ifindex && __dev_get_by_index(net, new_ifindex))
- goto out;
+ if (new_ifindex) {
+ err = dev_index_reserve(net, new_ifindex);
+ if (err < 0)
+ goto out;
+ } else {
+ /* If there is an ifindex conflict assign a new one */
+ err = dev_index_reserve(net, dev->ifindex);
+ if (err == -EBUSY)
+ err = dev_index_reserve(net, 0);
+ if (err < 0)
+ goto out;
+ new_ifindex = err;
+ }
/*
* And now a mini version of register_netdevice unregister_netdevice.
@@ -11008,13 +11106,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
rcu_barrier();
new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
- /* If there is an ifindex conflict assign a new one */
- if (!new_ifindex) {
- if (__dev_get_by_index(net, dev->ifindex))
- new_ifindex = dev_new_index(net);
- else
- new_ifindex = dev->ifindex;
- }
rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
new_ifindex);
@@ -11192,6 +11283,8 @@ static int __net_init netdev_init(struct net *net)
if (net->dev_index_head == NULL)
goto err_idx;
+ xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC1);
+
RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
return 0;
@@ -11289,6 +11382,7 @@ static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
kfree(net->dev_index_head);
+ xa_destroy(&net->dev_by_index);
if (net != &init_net)
WARN_ON_ONCE(!list_empty(&net->dev_base_head));
}
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 3730945ee294..b46aedc36939 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -5,6 +5,7 @@
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
#include <linux/net_tstamp.h>
+#include <linux/phylib_stubs.h>
#include <linux/wireless.h>
#include <linux/if_bridge.h>
#include <net/dsa_stubs.h>
@@ -252,14 +253,121 @@ static int dev_eth_ioctl(struct net_device *dev,
return ops->ndo_eth_ioctl(dev, ifr, cmd);
}
+/**
+ * dev_get_hwtstamp_phylib() - Get hardware timestamping settings of NIC
+ * or of attached phylib PHY
+ * @dev: Network device
+ * @cfg: Timestamping configuration structure
+ *
+ * Helper for enforcing a common policy that phylib timestamping, if available,
+ * should take precedence in front of hardware timestamping provided by the
+ * netdev.
+ *
+ * Note: phy_mii_ioctl() only handles SIOCSHWTSTAMP (not SIOCGHWTSTAMP), and
+ * there only exists a phydev->mii_ts->hwtstamp() method. So this will return
+ * -EOPNOTSUPP for phylib for now, which is still more accurate than letting
+ * the netdev handle the GET request.
+ */
+static int dev_get_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg)
+{
+ if (phy_has_hwtstamp(dev->phydev))
+ return phy_hwtstamp_get(dev->phydev, cfg);
+
+ return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg);
+}
+
static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
{
- return dev_eth_ioctl(dev, ifr, SIOCGHWTSTAMP);
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct kernel_hwtstamp_config kernel_cfg = {};
+ struct hwtstamp_config cfg;
+ int err;
+
+ if (!ops->ndo_hwtstamp_get)
+ return dev_eth_ioctl(dev, ifr, SIOCGHWTSTAMP); /* legacy */
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ kernel_cfg.ifr = ifr;
+ err = dev_get_hwtstamp_phylib(dev, &kernel_cfg);
+ if (err)
+ return err;
+
+ /* If the request was resolved through an unconverted driver, omit
+ * the copy_to_user(), since the implementation has already done that
+ */
+ if (!kernel_cfg.copied_to_user) {
+ hwtstamp_config_from_kernel(&cfg, &kernel_cfg);
+
+ if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/**
+ * dev_set_hwtstamp_phylib() - Change hardware timestamping of NIC
+ * or of attached phylib PHY
+ * @dev: Network device
+ * @cfg: Timestamping configuration structure
+ * @extack: Netlink extended ack message structure, for error reporting
+ *
+ * Helper for enforcing a common policy that phylib timestamping, if available,
+ * should take precedence in front of hardware timestamping provided by the
+ * netdev. If the netdev driver needs to perform specific actions even for PHY
+ * timestamping to work properly (a switch port must trap the timestamped
+ * frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in
+ * dev->priv_flags.
+ */
+static int dev_set_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ bool phy_ts = phy_has_hwtstamp(dev->phydev);
+ struct kernel_hwtstamp_config old_cfg = {};
+ bool changed = false;
+ int err;
+
+ cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV;
+
+ if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) {
+ err = ops->ndo_hwtstamp_get(dev, &old_cfg);
+ if (err)
+ return err;
+ }
+
+ if (!phy_ts || (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) {
+ err = ops->ndo_hwtstamp_set(dev, cfg, extack);
+ if (err) {
+ if (extack->_msg)
+ netdev_err(dev, "%s\n", extack->_msg);
+ return err;
+ }
+ }
+
+ if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS))
+ changed = kernel_hwtstamp_config_changed(&old_cfg, cfg);
+
+ if (phy_ts) {
+ err = phy_hwtstamp_set(dev->phydev, cfg, extack);
+ if (err) {
+ if (changed)
+ ops->ndo_hwtstamp_set(dev, &old_cfg, NULL);
+ return err;
+ }
+ }
+
+ return 0;
}
static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
{
- struct kernel_hwtstamp_config kernel_cfg;
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct kernel_hwtstamp_config kernel_cfg = {};
struct netlink_ext_ack extack = {};
struct hwtstamp_config cfg;
int err;
@@ -268,6 +376,7 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
return -EFAULT;
hwtstamp_config_to_kernel(&kernel_cfg, &cfg);
+ kernel_cfg.ifr = ifr;
err = net_hwtstamp_validate(&kernel_cfg);
if (err)
@@ -280,8 +389,80 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
return err;
}
- return dev_eth_ioctl(dev, ifr, SIOCSHWTSTAMP);
+ if (!ops->ndo_hwtstamp_set)
+ return dev_eth_ioctl(dev, ifr, SIOCSHWTSTAMP); /* legacy */
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ err = dev_set_hwtstamp_phylib(dev, &kernel_cfg, &extack);
+ if (err)
+ return err;
+
+ /* The driver may have modified the configuration, so copy the
+ * updated version of it back to user space
+ */
+ if (!kernel_cfg.copied_to_user) {
+ hwtstamp_config_from_kernel(&cfg, &kernel_cfg);
+
+ if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int generic_hwtstamp_ioctl_lower(struct net_device *dev, int cmd,
+ struct kernel_hwtstamp_config *kernel_cfg)
+{
+ struct ifreq ifrr;
+ int err;
+
+ strscpy_pad(ifrr.ifr_name, dev->name, IFNAMSIZ);
+ ifrr.ifr_ifru = kernel_cfg->ifr->ifr_ifru;
+
+ err = dev_eth_ioctl(dev, &ifrr, cmd);
+ if (err)
+ return err;
+
+ kernel_cfg->ifr->ifr_ifru = ifrr.ifr_ifru;
+ kernel_cfg->copied_to_user = true;
+
+ return 0;
+}
+
+int generic_hwtstamp_get_lower(struct net_device *dev,
+ struct kernel_hwtstamp_config *kernel_cfg)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ if (ops->ndo_hwtstamp_get)
+ return dev_get_hwtstamp_phylib(dev, kernel_cfg);
+
+ /* Legacy path: unconverted lower driver */
+ return generic_hwtstamp_ioctl_lower(dev, SIOCGHWTSTAMP, kernel_cfg);
+}
+EXPORT_SYMBOL(generic_hwtstamp_get_lower);
+
+int generic_hwtstamp_set_lower(struct net_device *dev,
+ struct kernel_hwtstamp_config *kernel_cfg,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ if (ops->ndo_hwtstamp_set)
+ return dev_set_hwtstamp_phylib(dev, kernel_cfg, extack);
+
+ /* Legacy path: unconverted lower driver */
+ return generic_hwtstamp_ioctl_lower(dev, SIOCSHWTSTAMP, kernel_cfg);
}
+EXPORT_SYMBOL(generic_hwtstamp_set_lower);
static int dev_siocbond(struct net_device *dev,
struct ifreq *ifr, unsigned int cmd)
diff --git a/net/core/dst.c b/net/core/dst.c
index 79d9306ad1ee..980e2fd2f013 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -152,7 +152,7 @@ void dst_dev_put(struct dst_entry *dst)
dst->obsolete = DST_OBSOLETE_DEAD;
if (dst->ops->ifdown)
- dst->ops->ifdown(dst, dev, true);
+ dst->ops->ifdown(dst, dev);
dst->input = dst_discard;
dst->output = dst_discard_out;
dst->dev = blackhole_netdev;
diff --git a/net/core/filter.c b/net/core/filter.c
index 06ba0e56e369..a094694899c9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4116,12 +4116,6 @@ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
if (unlikely(data_end > data_hard_end))
return -EINVAL;
- /* ALL drivers MUST init xdp->frame_sz, chicken check below */
- if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
- WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
- return -EINVAL;
- }
-
if (unlikely(data_end < xdp->data + ETH_HLEN))
return -EINVAL;
@@ -4345,13 +4339,8 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
- if (map_type == BPF_MAP_TYPE_XSKMAP) {
- /* XDP_REDIRECT is not supported AF_XDP yet. */
- if (unlikely(xdp_buff_has_frags(xdp)))
- return -EOPNOTSUPP;
-
+ if (map_type == BPF_MAP_TYPE_XSKMAP)
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
- }
return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
xdp_prog);
@@ -7356,8 +7345,8 @@ BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
return -EOPNOTSUPP;
if (unlikely(dev_net(skb->dev) != sock_net(sk)))
return -ENETUNREACH;
- if (unlikely(sk_fullsock(sk) && sk->sk_reuseport))
- return -ESOCKTNOSUPPORT;
+ if (sk_unhashed(sk))
+ return -EOPNOTSUPP;
if (sk_is_refcounted(sk) &&
unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
return -ENOENT;
@@ -9312,7 +9301,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
__u8 value_reg = si->dst_reg;
__u8 skb_reg = si->src_reg;
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
/* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, read skb->tstamp as is if tstamp_type_access is true.
@@ -9346,7 +9335,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
__u8 value_reg = si->src_reg;
__u8 skb_reg = si->dst_reg;
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
/* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, write skb->tstamp as is if tstamp_type_access is true.
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 85a2d0d9bd39..89d15ceaf9af 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -40,7 +40,7 @@
static void dissector_set_key(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
{
- flow_dissector->used_keys |= (1 << key_id);
+ flow_dissector->used_keys |= (1ULL << key_id);
}
void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
@@ -205,6 +205,50 @@ static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen);
}
+static void __skb_flow_dissect_ah(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, const void *data,
+ int nhoff, int hlen)
+{
+ struct flow_dissector_key_ipsec *key_ah;
+ struct ip_auth_hdr _hdr, *hdr;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPSEC))
+ return;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ if (!hdr)
+ return;
+
+ key_ah = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPSEC,
+ target_container);
+
+ key_ah->spi = hdr->spi;
+}
+
+static void __skb_flow_dissect_esp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, const void *data,
+ int nhoff, int hlen)
+{
+ struct flow_dissector_key_ipsec *key_esp;
+ struct ip_esp_hdr _hdr, *hdr;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPSEC))
+ return;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ if (!hdr)
+ return;
+
+ key_esp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPSEC,
+ target_container);
+
+ key_esp->spi = hdr->spi;
+}
+
static void __skb_flow_dissect_l2tpv3(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, const void *data,
@@ -1571,7 +1615,14 @@ ip_proto_again:
__skb_flow_dissect_l2tpv3(skb, flow_dissector, target_container,
data, nhoff, hlen);
break;
-
+ case IPPROTO_ESP:
+ __skb_flow_dissect_esp(skb, flow_dissector, target_container,
+ data, nhoff, hlen);
+ break;
+ case IPPROTO_AH:
+ __skb_flow_dissect_ah(skb, flow_dissector, target_container,
+ data, nhoff, hlen);
+ break;
default:
break;
}
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index acfc1f88ea79..bc5169482710 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -146,6 +146,13 @@ void flow_rule_match_tcp(const struct flow_rule *rule,
}
EXPORT_SYMBOL(flow_rule_match_tcp);
+void flow_rule_match_ipsec(const struct flow_rule *rule,
+ struct flow_match_ipsec *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPSEC, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ipsec);
+
void flow_rule_match_icmp(const struct flow_rule *rule,
struct flow_match_icmp *out)
{
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 8b6b5e72b217..4a0797f0a154 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -60,9 +60,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
ret = BPF_OK;
} else {
skb_reset_mac_header(skb);
- ret = skb_do_redirect(skb);
- if (ret == 0)
- ret = BPF_REDIRECT;
+ skb_do_redirect(skb);
+ ret = BPF_REDIRECT;
}
break;
@@ -255,7 +254,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
if (unlikely(err))
- return err;
+ return net_xmit_errno(err);
/* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
return LWTUNNEL_XMIT_DONE;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 15e3f4606b5f..fccaa5bac0ed 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -23,6 +23,7 @@
#include <linux/of.h>
#include <linux/of_net.h>
#include <linux/cpu.h>
+#include <net/netdev_rx_queue.h>
#include "dev.h"
#include "net-sysfs.h"
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 805b7385dd8d..6aef976bc1da 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -63,4 +63,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum);
+EXPORT_TRACEPOINT_SYMBOL_GPL(udp_fail_queue_rcv_skb);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(sk_data_ready);
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index a4270fafdf11..c1aea8b756b6 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -10,11 +10,11 @@
static int
netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
- u32 portid, u32 seq, int flags, u32 cmd)
+ const struct genl_info *info)
{
void *hdr;
- hdr = genlmsg_put(rsp, portid, seq, &netdev_nl_family, flags, cmd);
+ hdr = genlmsg_iput(rsp, info);
if (!hdr)
return -EMSGSIZE;
@@ -25,6 +25,14 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
return -EINVAL;
}
+ if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
+ if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
+ netdev->xdp_zc_max_segs)) {
+ genlmsg_cancel(rsp, hdr);
+ return -EINVAL;
+ }
+ }
+
genlmsg_end(rsp, hdr);
return 0;
@@ -33,17 +41,20 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
static void
netdev_genl_dev_notify(struct net_device *netdev, int cmd)
{
+ struct genl_info info;
struct sk_buff *ntf;
if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev),
NETDEV_NLGRP_MGMT))
return;
+ genl_info_init_ntf(&info, &netdev_nl_family, cmd);
+
ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!ntf)
return;
- if (netdev_nl_dev_fill(netdev, ntf, 0, 0, 0, cmd)) {
+ if (netdev_nl_dev_fill(netdev, ntf, &info)) {
nlmsg_free(ntf);
return;
}
@@ -72,8 +83,7 @@ int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
netdev = __dev_get_by_index(genl_info_net(info), ifindex);
if (netdev)
- err = netdev_nl_dev_fill(netdev, rsp, info->snd_portid,
- info->snd_seq, 0, info->genlhdr->cmd);
+ err = netdev_nl_dev_fill(netdev, rsp, info);
else
err = -ENODEV;
@@ -93,43 +103,19 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
- int idx = 0, s_idx;
- int h, s_h;
- int err;
-
- s_h = cb->args[0];
- s_idx = cb->args[1];
+ int err = 0;
rtnl_lock();
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- struct hlist_head *head;
-
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry(netdev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- err = netdev_nl_dev_fill(netdev, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0,
- NETDEV_CMD_DEV_GET);
- if (err < 0)
- break;
-cont:
- idx++;
- }
+ for_each_netdev_dump(net, netdev, cb->args[0]) {
+ err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
+ if (err < 0)
+ break;
}
-
rtnl_unlock();
if (err != -EMSGSIZE)
return err;
- cb->args[1] = idx;
- cb->args[0] = h;
- cb->seq = net->dev_base_seq;
-
return skb->len;
}
diff --git a/net/core/of_net.c b/net/core/of_net.c
index 55d3fe229269..93ea425b9248 100644
--- a/net/core/of_net.c
+++ b/net/core/of_net.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/of_net.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/phy.h>
#include <linux/export.h>
#include <linux/device.h>
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index a3e12a61d456..77cb75e63aca 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -10,7 +10,7 @@
#include <linux/slab.h>
#include <linux/device.h>
-#include <net/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <net/xdp.h>
#include <linux/dma-direction.h>
@@ -58,6 +58,17 @@ static const char pp_stats[][ETH_GSTRING_LEN] = {
"rx_pp_recycle_released_ref",
};
+/**
+ * page_pool_get_stats() - fetch page pool stats
+ * @pool: pool from which page was allocated
+ * @stats: struct page_pool_stats to fill in
+ *
+ * Retrieve statistics about the page_pool. This API is only available
+ * if the kernel has been configured with ``CONFIG_PAGE_POOL_STATS=y``.
+ * A pointer to a caller allocated struct page_pool_stats structure
+ * is passed to this API which is filled in. The caller can then report
+ * those stats to the user (perhaps via ethtool, debugfs, etc.).
+ */
bool page_pool_get_stats(struct page_pool *pool,
struct page_pool_stats *stats)
{
@@ -224,6 +235,10 @@ static int page_pool_init(struct page_pool *pool,
return 0;
}
+/**
+ * page_pool_create() - create a page pool.
+ * @params: parameters, see struct page_pool_params
+ */
struct page_pool *page_pool_create(const struct page_pool_params *params)
{
struct page_pool *pool;
@@ -492,7 +507,7 @@ static s32 page_pool_inflight(struct page_pool *pool)
* a regular page (that will eventually be returned to the normal
* page-allocator via put_page).
*/
-void page_pool_release_page(struct page_pool *pool, struct page *page)
+static void page_pool_return_page(struct page_pool *pool, struct page *page)
{
dma_addr_t dma;
int count;
@@ -518,13 +533,6 @@ skip_dma_unmap:
*/
count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
trace_page_pool_state_release(pool, page, count);
-}
-EXPORT_SYMBOL(page_pool_release_page);
-
-/* Return a page to the page allocator, cleaning up our state */
-static void page_pool_return_page(struct page_pool *pool, struct page *page)
-{
- page_pool_release_page(pool, page);
put_page(page);
/* An optimization would be to call __free_pages(page, pool->p.order)
@@ -579,6 +587,8 @@ static __always_inline struct page *
__page_pool_put_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size, bool allow_direct)
{
+ lockdep_assert_no_hardirq();
+
/* This allocator is optimized for the XDP mode that uses
* one-frame-per-page, but have fallbacks that act like the
* regular page allocator APIs.
@@ -616,9 +626,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* will be invoking put_page.
*/
recycle_stat_inc(pool, released_refcnt);
- /* Do not replace this with page_pool_return_page() */
- page_pool_release_page(pool, page);
- put_page(page);
+ page_pool_return_page(pool, page);
return NULL;
}
@@ -635,7 +643,21 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
}
EXPORT_SYMBOL(page_pool_put_defragged_page);
-/* Caller must not use data area after call, as this function overwrites it */
+/**
+ * page_pool_put_page_bulk() - release references on multiple pages
+ * @pool: pool from which pages were allocated
+ * @data: array holding page pointers
+ * @count: number of pages in @data
+ *
+ * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring
+ * producer lock. If the ptr_ring is full, page_pool_put_page_bulk()
+ * will release leftover pages to the page allocator.
+ * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx
+ * completion loop for the XDP_REDIRECT use case.
+ *
+ * Please note the caller must not use data area after running
+ * page_pool_put_page_bulk(), as this function overwrites it.
+ */
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
{
@@ -915,42 +937,3 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
}
}
EXPORT_SYMBOL(page_pool_update_nid);
-
-bool page_pool_return_skb_page(struct page *page, bool napi_safe)
-{
- struct napi_struct *napi;
- struct page_pool *pp;
- bool allow_direct;
-
- page = compound_head(page);
-
- /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
- * in order to preserve any existing bits, such as bit 0 for the
- * head page of compound page and bit 1 for pfmemalloc page, so
- * mask those bits for freeing side when doing below checking,
- * and page_is_pfmemalloc() is checked in __page_pool_put_page()
- * to avoid recycling the pfmemalloc page.
- */
- if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
- return false;
-
- pp = page->pp;
-
- /* Allow direct recycle if we have reasons to believe that we are
- * in the same context as the consumer would run, so there's
- * no possible race.
- */
- napi = READ_ONCE(pp->p.napi);
- allow_direct = napi_safe && napi &&
- READ_ONCE(napi->list_owner) == smp_processor_id();
-
- /* Driver set this to memory recycling info. Reset it on recycle.
- * This will *not* work for NIC using a split-page memory model.
- * The page will be returned to the pool here regardless of the
- * 'flipped' fragment being in use or not.
- */
- page_pool_put_full_page(pp, page, allow_direct);
-
- return true;
-}
-EXPORT_SYMBOL(page_pool_return_skb_page);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3ad4e030846d..4a2ec33bfb51 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -61,7 +61,7 @@
#include "dev.h"
#define RTNL_MAX_TYPE 50
-#define RTNL_SLAVE_MAX_TYPE 43
+#define RTNL_SLAVE_MAX_TYPE 44
struct rtnl_link {
rtnl_doit_func doit;
@@ -1273,7 +1273,6 @@ static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
struct net_device *dev,
int vfs_num,
- struct nlattr *vfinfo,
u32 ext_filter_mask)
{
struct ifla_vf_rss_query_en vf_rss_query_en;
@@ -1343,7 +1342,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
vf_trust.setting = ivi.trusted;
vf = nla_nest_start_noflag(skb, IFLA_VF_INFO);
if (!vf)
- goto nla_put_vfinfo_failure;
+ return -EMSGSIZE;
if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
nla_put(skb, IFLA_VF_BROADCAST, sizeof(vf_broadcast), &vf_broadcast) ||
nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
@@ -1414,8 +1413,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
nla_put_vf_failure:
nla_nest_cancel(skb, vf);
-nla_put_vfinfo_failure:
- nla_nest_cancel(skb, vfinfo);
return -EMSGSIZE;
}
@@ -1441,8 +1438,10 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
return -EMSGSIZE;
for (i = 0; i < num_vfs; i++) {
- if (rtnl_fill_vfinfo(skb, dev, i, vfinfo, ext_filter_mask))
+ if (rtnl_fill_vfinfo(skb, dev, i, ext_filter_mask)) {
+ nla_nest_cancel(skb, vfinfo);
return -EMSGSIZE;
+ }
}
nla_nest_end(skb, vfinfo);
@@ -2268,13 +2267,27 @@ out_err:
return err;
}
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
- struct netlink_ext_ack *exterr)
+int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
+ struct netlink_ext_ack *exterr)
{
- return nla_parse_deprecated(tb, IFLA_MAX, head, len, ifla_policy,
+ const struct ifinfomsg *ifmp;
+ const struct nlattr *attrs;
+ size_t len;
+
+ ifmp = nla_data(nla_peer);
+ attrs = nla_data(nla_peer) + sizeof(struct ifinfomsg);
+ len = nla_len(nla_peer) - sizeof(struct ifinfomsg);
+
+ if (ifmp->ifi_index < 0) {
+ NL_SET_ERR_MSG_ATTR(exterr, nla_peer,
+ "ifindex can't be negative");
+ return -EINVAL;
+ }
+
+ return nla_parse_deprecated(tb, IFLA_MAX, attrs, len, ifla_policy,
exterr);
}
-EXPORT_SYMBOL(rtnl_nla_parse_ifla);
+EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg);
struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
{
@@ -3547,6 +3560,9 @@ replay:
if (ifm->ifi_index > 0) {
link_specified = true;
dev = __dev_get_by_index(net, ifm->ifi_index);
+ } else if (ifm->ifi_index < 0) {
+ NL_SET_ERR_MSG(extack, "ifindex can't be negative");
+ return -EINVAL;
} else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) {
link_specified = true;
dev = rtnl_dev_get(net, tb);
@@ -5140,13 +5156,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
- if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
+ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;
have_flags = true;
flags = nla_get_u16(attr);
- break;
+ }
+
+ if (nla_type(attr) == IFLA_BRIDGE_MODE) {
+ if (nla_len(attr) < sizeof(u16))
+ return -EINVAL;
}
}
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 3cd7dd377e53..880027ecf516 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -130,6 +130,7 @@ EXPORT_SYMBOL(__scm_destroy);
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
struct cmsghdr *cmsg;
int err;
@@ -153,7 +154,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
switch (cmsg->cmsg_type)
{
case SCM_RIGHTS:
- if (!sock->ops || sock->ops->family != PF_UNIX)
+ if (!ops || ops->family != PF_UNIX)
goto error;
err=scm_fp_copy(cmsg, &p->fp);
if (err<0)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6c5915efbc17..45707059082f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -73,7 +73,7 @@
#include <net/mpls.h>
#include <net/mptcp.h>
#include <net/mctp.h>
-#include <net/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <net/dropreason.h>
#include <linux/uaccess.h>
@@ -879,11 +879,56 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+bool napi_pp_put_page(struct page *page, bool napi_safe)
+{
+ bool allow_direct = false;
+ struct page_pool *pp;
+
+ page = compound_head(page);
+
+ /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
+ * in order to preserve any existing bits, such as bit 0 for the
+ * head page of compound page and bit 1 for pfmemalloc page, so
+ * mask those bits for freeing side when doing below checking,
+ * and page_is_pfmemalloc() is checked in __page_pool_put_page()
+ * to avoid recycling the pfmemalloc page.
+ */
+ if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
+ return false;
+
+ pp = page->pp;
+
+ /* Allow direct recycle if we have reasons to believe that we are
+ * in the same context as the consumer would run, so there's
+ * no possible race.
+ * __page_pool_put_page() makes sure we're not in hardirq context
+ * and interrupts are enabled prior to accessing the cache.
+ */
+ if (napi_safe || in_softirq()) {
+ const struct napi_struct *napi = READ_ONCE(pp->p.napi);
+
+ allow_direct = napi &&
+ READ_ONCE(napi->list_owner) == smp_processor_id();
+ }
+
+ /* Driver set this to memory recycling info. Reset it on recycle.
+ * This will *not* work for NIC using a split-page memory model.
+ * The page will be returned to the pool here regardless of the
+ * 'flipped' fragment being in use or not.
+ */
+ page_pool_put_full_page(pp, page, allow_direct);
+
+ return true;
+}
+EXPORT_SYMBOL(napi_pp_put_page);
+#endif
+
static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
{
if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
return false;
- return page_pool_return_skb_page(virt_to_page(data), napi_safe);
+ return napi_pp_put_page(virt_to_page(data), napi_safe);
}
static void skb_kfree_head(void *head, unsigned int end_offset)
@@ -3656,20 +3701,23 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
EXPORT_SYMBOL(skb_dequeue_tail);
/**
- * skb_queue_purge - empty a list
+ * skb_queue_purge_reason - empty a list
* @list: list to empty
+ * @reason: drop reason
*
* Delete all buffers on an &sk_buff list. Each buffer is removed from
* the list and one reference dropped. This function takes the list
* lock and is atomic with respect to other list locking functions.
*/
-void skb_queue_purge(struct sk_buff_head *list)
+void skb_queue_purge_reason(struct sk_buff_head *list,
+ enum skb_drop_reason reason)
{
struct sk_buff *skb;
+
while ((skb = skb_dequeue(list)) != NULL)
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
}
-EXPORT_SYMBOL(skb_queue_purge);
+EXPORT_SYMBOL(skb_queue_purge_reason);
/**
* skb_rbtree_purge - empty a skb rbtree
@@ -3697,6 +3745,27 @@ unsigned int skb_rbtree_purge(struct rb_root *root)
return sum;
}
+void skb_errqueue_purge(struct sk_buff_head *list)
+{
+ struct sk_buff *skb, *next;
+ struct sk_buff_head kill;
+ unsigned long flags;
+
+ __skb_queue_head_init(&kill);
+
+ spin_lock_irqsave(&list->lock, flags);
+ skb_queue_walk_safe(list, skb, next) {
+ if (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ZEROCOPY ||
+ SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING)
+ continue;
+ __skb_unlink(skb, list);
+ __skb_queue_tail(&kill, skb);
+ }
+ spin_unlock_irqrestore(&list->lock, flags);
+ __skb_queue_purge(&kill);
+}
+EXPORT_SYMBOL(skb_errqueue_purge);
+
/**
* skb_queue_head - queue a buffer at the list head
* @list: list to use
@@ -4261,6 +4330,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
skb_push(skb, -skb_network_offset(skb) + offset);
+ /* Ensure the head is writeable before touching the shared info */
+ err = skb_unclone(skb, GFP_ATOMIC);
+ if (err)
+ goto err_linearize;
+
skb_shinfo(skb)->frag_list = NULL;
while (list_skb) {
@@ -4711,23 +4785,13 @@ static const u8 skb_ext_type_len[] = {
static __always_inline unsigned int skb_ext_total_length(void)
{
- return SKB_EXT_CHUNKSIZEOF(struct skb_ext) +
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- skb_ext_type_len[SKB_EXT_BRIDGE_NF] +
-#endif
-#ifdef CONFIG_XFRM
- skb_ext_type_len[SKB_EXT_SEC_PATH] +
-#endif
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- skb_ext_type_len[TC_SKB_EXT] +
-#endif
-#if IS_ENABLED(CONFIG_MPTCP)
- skb_ext_type_len[SKB_EXT_MPTCP] +
-#endif
-#if IS_ENABLED(CONFIG_MCTP_FLOWS)
- skb_ext_type_len[SKB_EXT_MCTP] +
-#endif
- 0;
+ unsigned int l = SKB_EXT_CHUNKSIZEOF(struct skb_ext);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(skb_ext_type_len); i++)
+ l += skb_ext_type_len[i];
+
+ return l;
}
static void skb_extensions_init(void)
@@ -4745,12 +4809,23 @@ static void skb_extensions_init(void)
static void skb_extensions_init(void) {}
#endif
+/* The SKB kmem_cache slab is critical for network performance. Never
+ * merge/alias the slab with similar sized objects. This avoids fragmentation
+ * that hurts performance of kmem_cache_{alloc,free}_bulk APIs.
+ */
+#ifndef CONFIG_SLUB_TINY
+#define FLAG_SKB_NO_MERGE SLAB_NO_MERGE
+#else /* CONFIG_SLUB_TINY - simple loop in kmem_cache_alloc_bulk */
+#define FLAG_SKB_NO_MERGE 0
+#endif
+
void __init skb_init(void)
{
skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
sizeof(struct sk_buff),
0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|
+ FLAG_SKB_NO_MERGE,
offsetof(struct sk_buff, cb),
sizeof_field(struct sk_buff, cb),
NULL);
@@ -6199,7 +6274,7 @@ EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
*
* @header_len: size of linear part
* @data_len: needed length in frags
- * @max_page_order: max page order desired.
+ * @order: max page order desired.
* @errcode: pointer to error code if any
* @gfp_mask: allocation mask
*
@@ -6207,21 +6282,17 @@ EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
*/
struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
unsigned long data_len,
- int max_page_order,
+ int order,
int *errcode,
gfp_t gfp_mask)
{
- int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
unsigned long chunk;
struct sk_buff *skb;
struct page *page;
- int i;
+ int nr_frags = 0;
*errcode = -EMSGSIZE;
- /* Note this test could be relaxed, if we succeed to allocate
- * high order pages...
- */
- if (npages > MAX_SKB_FRAGS)
+ if (unlikely(data_len > MAX_SKB_FRAGS * (PAGE_SIZE << order)))
return NULL;
*errcode = -ENOBUFS;
@@ -6229,34 +6300,32 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
if (!skb)
return NULL;
- skb->truesize += npages << PAGE_SHIFT;
-
- for (i = 0; npages > 0; i++) {
- int order = max_page_order;
-
- while (order) {
- if (npages >= 1 << order) {
- page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
- __GFP_COMP |
- __GFP_NOWARN,
- order);
- if (page)
- goto fill_page;
- /* Do not retry other high order allocations */
- order = 1;
- max_page_order = 0;
- }
+ while (data_len) {
+ if (nr_frags == MAX_SKB_FRAGS - 1)
+ goto failure;
+ while (order && PAGE_ALIGN(data_len) < (PAGE_SIZE << order))
order--;
+
+ if (order) {
+ page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
+ __GFP_COMP |
+ __GFP_NOWARN,
+ order);
+ if (!page) {
+ order--;
+ continue;
+ }
+ } else {
+ page = alloc_page(gfp_mask);
+ if (!page)
+ goto failure;
}
- page = alloc_page(gfp_mask);
- if (!page)
- goto failure;
-fill_page:
chunk = min_t(unsigned long, data_len,
PAGE_SIZE << order);
- skb_fill_page_desc(skb, i, page, 0, chunk);
+ skb_fill_page_desc(skb, nr_frags, page, 0, chunk);
+ nr_frags++;
+ skb->truesize += (PAGE_SIZE << order);
data_len -= chunk;
- npages -= 1 << order;
}
return skb;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index a29508e1ff35..a0659fc29bcc 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1120,13 +1120,19 @@ static void sk_psock_strp_data_ready(struct sock *sk)
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
{
+ int ret;
+
static const struct strp_callbacks cb = {
.rcv_msg = sk_psock_strp_read,
.read_sock_done = sk_psock_strp_read_done,
.parse_msg = sk_psock_strp_parse,
};
- return strp_init(&psock->strp, sk, &cb);
+ ret = strp_init(&psock->strp, sk, &cb);
+ if (!ret)
+ sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
+
+ return ret;
}
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
@@ -1154,7 +1160,7 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
static void sk_psock_done_strp(struct sk_psock *psock)
{
/* Parser has been stopped */
- if (psock->progs.stream_parser)
+ if (sk_psock_test_state(psock, SK_PSOCK_RX_STRP_ENABLED))
strp_done(&psock->strp);
}
#else
@@ -1198,13 +1204,17 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
+ const struct proto_ops *ops;
int copied;
trace_sk_data_ready(sk);
- if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
+ if (unlikely(!sock))
+ return;
+ ops = READ_ONCE(sock->ops);
+ if (!ops || !ops->read_skb)
return;
- copied = sock->ops->read_skb(sk, sk_psock_verdict_recv);
+ copied = ops->read_skb(sk, sk_psock_verdict_recv);
if (copied >= 0) {
struct sk_psock *psock;
diff --git a/net/core/sock.c b/net/core/sock.c
index 9370fd50aa2c..666a17cab4f5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -429,6 +429,7 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
{
struct __kernel_sock_timeval tv;
int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
+ long val;
if (err)
return err;
@@ -439,7 +440,7 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
if (tv.tv_sec < 0) {
static int warned __read_mostly;
- *timeo_p = 0;
+ WRITE_ONCE(*timeo_p, 0);
if (warned < 10 && net_ratelimit()) {
warned++;
pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
@@ -447,11 +448,12 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
}
return 0;
}
- *timeo_p = MAX_SCHEDULE_TIMEOUT;
- if (tv.tv_sec == 0 && tv.tv_usec == 0)
- return 0;
- if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
- *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
+ val = MAX_SCHEDULE_TIMEOUT;
+ if ((tv.tv_sec || tv.tv_usec) &&
+ (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)))
+ val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
+ USEC_PER_SEC / HZ);
+ WRITE_ONCE(*timeo_p, val);
return 0;
}
@@ -765,7 +767,7 @@ bool sk_mc_loop(struct sock *sk)
return true;
switch (sk->sk_family) {
case AF_INET:
- return inet_sk(sk)->mc_loop;
+ return inet_test_bit(MC_LOOP, sk);
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
return inet6_sk(sk)->mc_loop;
@@ -795,7 +797,7 @@ EXPORT_SYMBOL(sock_set_reuseport);
void sock_no_linger(struct sock *sk)
{
lock_sock(sk);
- sk->sk_lingertime = 0;
+ WRITE_ONCE(sk->sk_lingertime, 0);
sock_set_flag(sk, SOCK_LINGER);
release_sock(sk);
}
@@ -804,7 +806,7 @@ EXPORT_SYMBOL(sock_no_linger);
void sock_set_priority(struct sock *sk, u32 priority)
{
lock_sock(sk);
- sk->sk_priority = priority;
+ WRITE_ONCE(sk->sk_priority, priority);
release_sock(sk);
}
EXPORT_SYMBOL(sock_set_priority);
@@ -813,9 +815,9 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs)
{
lock_sock(sk);
if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
- sk->sk_sndtimeo = secs * HZ;
+ WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
else
- sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+ WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
release_sock(sk);
}
EXPORT_SYMBOL(sock_set_sndtimeo);
@@ -988,7 +990,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf);
static void __sock_set_mark(struct sock *sk, u32 val)
{
if (val != sk->sk_mark) {
- sk->sk_mark = val;
+ WRITE_ONCE(sk->sk_mark, val);
sk_dst_reset(sk);
}
}
@@ -1007,7 +1009,7 @@ static void sock_release_reserved_memory(struct sock *sk, int bytes)
bytes = round_down(bytes, PAGE_SIZE);
WARN_ON(bytes > sk->sk_reserved_mem);
- sk->sk_reserved_mem -= bytes;
+ WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
sk_mem_reclaim(sk);
}
@@ -1044,7 +1046,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
}
sk->sk_forward_alloc += pages << PAGE_SHIFT;
- sk->sk_reserved_mem += pages << PAGE_SHIFT;
+ WRITE_ONCE(sk->sk_reserved_mem,
+ sk->sk_reserved_mem + (pages << PAGE_SHIFT));
return 0;
}
@@ -1213,7 +1216,7 @@ set_sndbuf:
if ((val >= 0 && val <= 6) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- sk->sk_priority = val;
+ WRITE_ONCE(sk->sk_priority, val);
else
ret = -EPERM;
break;
@@ -1227,15 +1230,15 @@ set_sndbuf:
ret = -EFAULT;
break;
}
- if (!ling.l_onoff)
+ if (!ling.l_onoff) {
sock_reset_flag(sk, SOCK_LINGER);
- else {
-#if (BITS_PER_LONG == 32)
- if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
- sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+ } else {
+ unsigned long t_sec = ling.l_linger;
+
+ if (t_sec >= MAX_SCHEDULE_TIMEOUT / HZ)
+ WRITE_ONCE(sk->sk_lingertime, MAX_SCHEDULE_TIMEOUT);
else
-#endif
- sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
+ WRITE_ONCE(sk->sk_lingertime, t_sec * HZ);
sock_set_flag(sk, SOCK_LINGER);
}
break;
@@ -1244,17 +1247,11 @@ set_sndbuf:
break;
case SO_PASSCRED:
- if (valbool)
- set_bit(SOCK_PASSCRED, &sock->flags);
- else
- clear_bit(SOCK_PASSCRED, &sock->flags);
+ assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
break;
case SO_PASSPIDFD:
- if (valbool)
- set_bit(SOCK_PASSPIDFD, &sock->flags);
- else
- clear_bit(SOCK_PASSPIDFD, &sock->flags);
+ assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
break;
case SO_TIMESTAMP_OLD:
@@ -1280,14 +1277,19 @@ set_sndbuf:
break;
case SO_RCVLOWAT:
+ {
+ int (*set_rcvlowat)(struct sock *sk, int val) = NULL;
+
if (val < 0)
val = INT_MAX;
- if (sock && sock->ops->set_rcvlowat)
- ret = sock->ops->set_rcvlowat(sk, val);
+ if (sock)
+ set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
+ if (set_rcvlowat)
+ ret = set_rcvlowat(sk, val);
else
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
break;
-
+ }
case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
@@ -1358,10 +1360,7 @@ set_sndbuf:
break;
case SO_PASSSEC:
- if (valbool)
- set_bit(SOCK_PASSSEC, &sock->flags);
- else
- clear_bit(SOCK_PASSSEC, &sock->flags);
+ assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
break;
case SO_MARK:
if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
@@ -1385,11 +1384,16 @@ set_sndbuf:
break;
case SO_PEEK_OFF:
- if (sock->ops->set_peek_off)
- ret = sock->ops->set_peek_off(sk, val);
+ {
+ int (*set_peek_off)(struct sock *sk, int val);
+
+ set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
+ if (set_peek_off)
+ ret = set_peek_off(sk, val);
else
ret = -EOPNOTSUPP;
break;
+ }
case SO_NOFCS:
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
@@ -1438,7 +1442,8 @@ set_sndbuf:
cmpxchg(&sk->sk_pacing_status,
SK_PACING_NONE,
SK_PACING_NEEDED);
- sk->sk_max_pacing_rate = ulval;
+ /* Pairs with READ_ONCE() from sk_getsockopt() */
+ WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
break;
}
@@ -1533,7 +1538,9 @@ set_sndbuf:
}
if ((u8)val == SOCK_TXREHASH_DEFAULT)
val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
- /* Paired with READ_ONCE() in tcp_rtx_synack() */
+ /* Paired with READ_ONCE() in tcp_rtx_synack()
+ * and sk_getsockopt().
+ */
WRITE_ONCE(sk->sk_txrehash, (u8)val);
break;
@@ -1633,11 +1640,11 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_SNDBUF:
- v.val = sk->sk_sndbuf;
+ v.val = READ_ONCE(sk->sk_sndbuf);
break;
case SO_RCVBUF:
- v.val = sk->sk_rcvbuf;
+ v.val = READ_ONCE(sk->sk_rcvbuf);
break;
case SO_REUSEADDR:
@@ -1679,13 +1686,13 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_PRIORITY:
- v.val = sk->sk_priority;
+ v.val = READ_ONCE(sk->sk_priority);
break;
case SO_LINGER:
lv = sizeof(v.ling);
v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
- v.ling.l_linger = sk->sk_lingertime / HZ;
+ v.ling.l_linger = READ_ONCE(sk->sk_lingertime) / HZ;
break;
case SO_BSDCOMPAT:
@@ -1717,16 +1724,18 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
- lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
+ lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
+ SO_RCVTIMEO_OLD == optname);
break;
case SO_SNDTIMEO_OLD:
case SO_SNDTIMEO_NEW:
- lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
+ lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
+ SO_SNDTIMEO_OLD == optname);
break;
case SO_RCVLOWAT:
- v.val = sk->sk_rcvlowat;
+ v.val = READ_ONCE(sk->sk_rcvlowat);
break;
case SO_SNDLOWAT:
@@ -1770,7 +1779,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
spin_unlock(&sk->sk_peer_lock);
if (!peer_pid)
- return -ESRCH;
+ return -ENODATA;
pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
put_pid(peer_pid);
@@ -1815,14 +1824,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
case SO_PEERNAME:
{
- char address[128];
+ struct sockaddr_storage address;
- lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
+ lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2);
if (lv < 0)
return -ENOTCONN;
if (lv < len)
return -EINVAL;
- if (copy_to_sockptr(optval, address, len))
+ if (copy_to_sockptr(optval, &address, len))
return -EFAULT;
goto lenout;
}
@@ -1843,7 +1852,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
optval, optlen, len);
case SO_MARK:
- v.val = sk->sk_mark;
+ v.val = READ_ONCE(sk->sk_mark);
break;
case SO_RCVMARK:
@@ -1859,10 +1868,10 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_PEEK_OFF:
- if (!sock->ops->set_peek_off)
+ if (!READ_ONCE(sock->ops)->set_peek_off)
return -EOPNOTSUPP;
- v.val = sk->sk_peek_off;
+ v.val = READ_ONCE(sk->sk_peek_off);
break;
case SO_NOFCS:
v.val = sock_flag(sk, SOCK_NOFCS);
@@ -1892,7 +1901,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_BUSY_POLL:
- v.val = sk->sk_ll_usec;
+ v.val = READ_ONCE(sk->sk_ll_usec);
break;
case SO_PREFER_BUSY_POLL:
v.val = READ_ONCE(sk->sk_prefer_busy_poll);
@@ -1900,12 +1909,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
#endif
case SO_MAX_PACING_RATE:
+ /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
lv = sizeof(v.ulval);
- v.ulval = sk->sk_max_pacing_rate;
+ v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
} else {
/* 32bit version */
- v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
+ v.val = min_t(unsigned long, ~0U,
+ READ_ONCE(sk->sk_max_pacing_rate));
}
break;
@@ -1973,11 +1984,12 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_RESERVE_MEM:
- v.val = sk->sk_reserved_mem;
+ v.val = READ_ONCE(sk->sk_reserved_mem);
break;
case SO_TXREHASH:
- v.val = sk->sk_txrehash;
+ /* Paired with WRITE_ONCE() in sk_setsockopt() */
+ v.val = READ_ONCE(sk->sk_txrehash);
break;
default:
@@ -3148,7 +3160,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
- if (sk_under_memory_pressure(sk) &&
+ if (sk_under_global_memory_pressure(sk) &&
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
sk_leave_memory_pressure(sk);
}
@@ -3168,7 +3180,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
int sk_set_peek_off(struct sock *sk, int val)
{
- sk->sk_peek_off = val;
+ WRITE_ONCE(sk->sk_peek_off, val);
return 0;
}
EXPORT_SYMBOL_GPL(sk_set_peek_off);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 19538d628714..8f07fea39d9e 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -115,7 +115,6 @@ static void sock_map_sk_acquire(struct sock *sk)
__acquires(&sk->sk_lock.slock)
{
lock_sock(sk);
- preempt_disable();
rcu_read_lock();
}
@@ -123,7 +122,6 @@ static void sock_map_sk_release(struct sock *sk)
__releases(&sk->sk_lock.slock)
{
rcu_read_unlock();
- preempt_enable();
release_sock(sk);
}
@@ -148,13 +146,13 @@ static void sock_map_del_link(struct sock *sk,
list_for_each_entry_safe(link, tmp, &psock->link, list) {
if (link->link_raw == link_raw) {
struct bpf_map *map = link->map;
- struct bpf_stab *stab = container_of(map, struct bpf_stab,
- map);
- if (psock->saved_data_ready && stab->progs.stream_parser)
+ struct sk_psock_progs *progs = sock_map_progs(map);
+
+ if (psock->saved_data_ready && progs->stream_parser)
strp_stop = true;
- if (psock->saved_data_ready && stab->progs.stream_verdict)
+ if (psock->saved_data_ready && progs->stream_verdict)
verdict_stop = true;
- if (psock->saved_data_ready && stab->progs.skb_verdict)
+ if (psock->saved_data_ready && progs->skb_verdict)
verdict_stop = true;
list_del(&link->list);
sk_psock_free_link(link);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 41e5ca8643ec..a70670fe9a2d 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -14,7 +14,7 @@
#include <linux/idr.h>
#include <linux/rhashtable.h>
#include <linux/bug.h>
-#include <net/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <net/xdp.h>
#include <net/xdp_priv.h> /* struct xdp_mem_allocator */
@@ -741,7 +741,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
__diag_pop();
BTF_SET8_START(xdp_metadata_kfunc_ids)
-#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, 0)
+#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
BTF_SET8_END(xdp_metadata_kfunc_ids)
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index c0c438128575..2e6b8c8fd2de 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -980,7 +980,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX,
- tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest,
+ tb[DCB_ATTR_BCN], dcbnl_bcn_nest,
NULL);
if (ret)
return ret;
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index d76c9be5bfca..57d9c026aa3f 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -105,7 +105,6 @@ extern int sysctl_dccp_rx_ccid;
extern int sysctl_dccp_tx_ccid;
int dccp_feat_init(struct sock *sk);
-void dccp_feat_initialise_sysctls(void);
int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
u8 const *list, u8 len);
int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index fa8079303cb0..8f56e8723c73 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -130,7 +130,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr,
inet->inet_sport,
inet->inet_dport);
- inet->inet_id = get_random_u16();
+ atomic_set(&inet->inet_id, get_random_u16());
err = dccp_connect(sk);
rt = NULL;
@@ -247,7 +247,6 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
const u8 offset = iph->ihl << 2;
const struct dccp_hdr *dh;
struct dccp_sock *dp;
- struct inet_sock *inet;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct sock *sk;
@@ -255,12 +254,17 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
int err;
struct net *net = dev_net(skb->dev);
- /* Only need dccph_dport & dccph_sport which are the first
- * 4 bytes in dccp header.
+ /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
+ * which is in byte 7 of the dccp header.
* Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
+ *
+ * Later on, we want to access the sequence number fields, which are
+ * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
*/
- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+ dh = (struct dccp_hdr *)(skb->data + offset);
+ if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
+ return -EINVAL;
+ iph = (struct iphdr *)skb->data;
dh = (struct dccp_hdr *)(skb->data + offset);
sk = __inet_lookup_established(net, &dccp_hashinfo,
@@ -361,8 +365,7 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
* --ANK (980905)
*/
- inet = inet_sk(sk);
- if (!sock_owned_by_user(sk) && inet->recverr) {
+ if (!sock_owned_by_user(sk) && inet_test_bit(RECVERR, sk)) {
sk->sk_err = err;
sk_error_report(sk);
} else { /* Only an error on timeout */
@@ -432,7 +435,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
- newinet->inet_id = get_random_u16();
+ atomic_set(&newinet->inet_id, get_random_u16());
if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
goto put_and_exit;
@@ -474,7 +477,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
.flowi4_oif = inet_iif(skb),
.daddr = iph->saddr,
.saddr = iph->daddr,
- .flowi4_tos = RT_CONN_FLAGS(sk),
+ .flowi4_tos = ip_sock_rt_tos(sk),
+ .flowi4_scope = ip_sock_rt_scope(sk),
.flowi4_proto = sk->sk_protocol,
.fl4_sport = dccp_hdr(skb)->dccph_dport,
.fl4_dport = dccp_hdr(skb)->dccph_sport,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7249ef218178..33f6ccf6ba77 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -74,7 +74,7 @@ static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+ const struct ipv6hdr *hdr;
const struct dccp_hdr *dh;
struct dccp_sock *dp;
struct ipv6_pinfo *np;
@@ -83,12 +83,17 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
__u64 seq;
struct net *net = dev_net(skb->dev);
- /* Only need dccph_dport & dccph_sport which are the first
- * 4 bytes in dccp header.
+ /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
+ * which is in byte 7 of the dccp header.
* Our caller (icmpv6_notify()) already pulled 8 bytes for us.
+ *
+ * Later on, we want to access the sequence number fields, which are
+ * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
*/
- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+ dh = (struct dccp_hdr *)(skb->data + offset);
+ if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
+ return -EINVAL;
+ hdr = (const struct ipv6hdr *)skb->data;
dh = (struct dccp_hdr *)(skb->data + offset);
sk = __inet6_lookup_established(net, &dccp_hashinfo,
@@ -238,8 +243,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
- sk->sk_priority);
+ err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt,
+ np->tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -1056,6 +1061,7 @@ static struct proto dccp_v6_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct dccp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.rsk_prot = &dccp6_request_sock_ops,
.twsk_prot = &dccp6_timewait_sock_ops,
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
index 7e4c2a3b322b..c5d14c48def1 100644
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -13,10 +13,6 @@
struct dccp6_sock {
struct dccp_sock dccp;
- /*
- * ipv6_pinfo has to be the last member of dccp6_sock,
- * see inet6_sk_generic.
- */
struct ipv6_pinfo inet6;
};
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b8a24734385e..fd2eb148d24d 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -187,7 +187,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
/* And store cached results */
icsk->icsk_pmtu_cookie = pmtu;
- dp->dccps_mss_cache = cur_mps;
+ WRITE_ONCE(dp->dccps_mss_cache, cur_mps);
return cur_mps;
}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index f331e5977a84..fcc5c9d64f46 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -315,11 +315,15 @@ EXPORT_SYMBOL_GPL(dccp_disconnect);
__poll_t dccp_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
- __poll_t mask;
struct sock *sk = sock->sk;
+ __poll_t mask;
+ u8 shutdown;
+ int state;
sock_poll_wait(file, sock, wait);
- if (sk->sk_state == DCCP_LISTEN)
+
+ state = inet_sk_state_load(sk);
+ if (state == DCCP_LISTEN)
return inet_csk_listen_poll(sk);
/* Socket is not locked. We are protected from async events
@@ -328,20 +332,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
*/
mask = 0;
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
mask = EPOLLERR;
+ shutdown = READ_ONCE(sk->sk_shutdown);
- if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
+ if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED)
mask |= EPOLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* Connected? */
- if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
+ if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
if (atomic_read(&sk->sk_rmem_alloc) > 0)
mask |= EPOLLIN | EPOLLRDNORM;
- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+ if (!(shutdown & SEND_SHUTDOWN)) {
if (sk_stream_is_writeable(sk)) {
mask |= EPOLLOUT | EPOLLWRNORM;
} else { /* send SIGIO later */
@@ -359,7 +364,6 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
}
return mask;
}
-
EXPORT_SYMBOL_GPL(dccp_poll);
int dccp_ioctl(struct sock *sk, int cmd, int *karg)
@@ -630,7 +634,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
return dccp_getsockopt_service(sk, len,
(__be32 __user *)optval, optlen);
case DCCP_SOCKOPT_GET_CUR_MPS:
- val = dp->dccps_mss_cache;
+ val = READ_ONCE(dp->dccps_mss_cache);
break;
case DCCP_SOCKOPT_AVAILABLE_CCIDS:
return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
@@ -739,7 +743,7 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
trace_dccp_probe(sk, len);
- if (len > dp->dccps_mss_cache)
+ if (len > READ_ONCE(dp->dccps_mss_cache))
return -EMSGSIZE;
lock_sock(sk);
@@ -772,6 +776,12 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out_discard;
}
+ /* We need to check dccps_mss_cache after socket is locked. */
+ if (len > dp->dccps_mss_cache) {
+ rc = -EMSGSIZE;
+ goto out_discard;
+ }
+
skb_reserve(skb, sk->sk_prot->max_header);
rc = memcpy_from_msg(skb_put(skb, len), msg, len);
if (rc != 0)
diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index ef91a76646a3..000da622116a 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y := leftover.o core.o netlink.o dev.o health.o
+obj-y := core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
+ resource.o param.o region.o health.o trap.o rate.o linecard.o
diff --git a/net/devlink/core.c b/net/devlink/core.c
index c23ebabadc52..6cec4afb01fb 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -5,9 +5,15 @@
*/
#include <net/genetlink.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/devlink.h>
#include "devl_internal.h"
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
+
DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
void *devlink_priv(struct devlink *devlink)
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index bf1d6f1bcfc7..bba4ace7d22b 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -174,7 +174,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
+static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
{
struct sk_buff *msg;
int err;
@@ -196,7 +196,7 @@ void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
-int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct sk_buff *msg;
@@ -217,17 +217,44 @@ int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
}
static int
-devlink_nl_cmd_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
+devlink_nl_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
{
return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
+ cb->nlh->nlmsg_seq, flags);
}
-const struct devlink_cmd devl_cmd_get = {
- .dump_one = devlink_nl_cmd_get_dump_one,
-};
+int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one);
+}
+
+void devlink_notify_register(struct devlink *devlink)
+{
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+ devlink_linecards_notify_register(devlink);
+ devlink_ports_notify_register(devlink);
+ devlink_trap_policers_notify_register(devlink);
+ devlink_trap_groups_notify_register(devlink);
+ devlink_traps_notify_register(devlink);
+ devlink_rates_notify_register(devlink);
+ devlink_regions_notify_register(devlink);
+ devlink_params_notify_register(devlink);
+}
+
+void devlink_notify_unregister(struct devlink *devlink)
+{
+ devlink_params_notify_unregister(devlink);
+ devlink_regions_notify_unregister(devlink);
+ devlink_rates_notify_unregister(devlink);
+ devlink_traps_notify_unregister(devlink);
+ devlink_trap_groups_notify_unregister(devlink);
+ devlink_trap_policers_notify_unregister(devlink);
+ devlink_ports_notify_unregister(devlink);
+ devlink_linecards_notify_unregister(devlink);
+ devlink_notify(devlink, DEVLINK_CMD_DEL);
+}
static void devlink_reload_failed_set(struct devlink *devlink,
bool reload_failed)
@@ -804,7 +831,7 @@ err_cancel_msg:
return err;
}
-int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_info_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct sk_buff *msg;
@@ -826,23 +853,24 @@ int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info)
}
static int
-devlink_nl_cmd_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
+devlink_nl_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
{
int err;
err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->nlh->nlmsg_seq, flags,
cb->extack);
if (err == -EOPNOTSUPP)
err = 0;
return err;
}
-const struct devlink_cmd devl_cmd_info_get = {
- .dump_one = devlink_nl_cmd_info_get_dump_one,
-};
+int devlink_nl_info_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(msg, cb, devlink_nl_info_get_dump_one);
+}
static int devlink_nl_flash_update_fill(struct sk_buff *msg,
struct devlink *devlink,
@@ -1204,8 +1232,7 @@ err_cancel_msg:
return err;
}
-int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct sk_buff *msg;
@@ -1228,23 +1255,25 @@ int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int
-devlink_nl_cmd_selftests_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_selftests_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
{
if (!devlink->ops->selftest_check)
return 0;
return devlink_nl_selftests_fill(msg, devlink,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->nlh->nlmsg_seq, flags,
cb->extack);
}
-const struct devlink_cmd devl_cmd_selftests_get = {
- .dump_one = devlink_nl_cmd_selftests_get_dump_one,
-};
+int devlink_nl_selftests_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_selftests_get_dump_one);
+}
static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
enum devlink_selftest_status test_status)
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 62921b2eb0d3..f6b5fea2e13c 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -3,6 +3,7 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
+#include <linux/etherdevice.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/notifier.h>
@@ -11,6 +12,10 @@
#include <linux/xarray.h>
#include <net/devlink.h>
#include <net/net_namespace.h>
+#include <net/rtnetlink.h>
+#include <rdma/ib_verbs.h>
+
+#include "netlink_gen.h"
#define DEVLINK_REGISTERED XA_MARK_1
@@ -90,9 +95,6 @@ static inline bool devl_is_registered(struct devlink *devlink)
/* Netlink */
#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
-#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
-#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
-#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
enum devlink_multicast_groups {
DEVLINK_MCGRP_CONFIG,
@@ -114,21 +116,16 @@ struct devlink_nl_dump_state {
};
};
-struct devlink_cmd {
- int (*dump_one)(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb);
-};
-
-extern const struct genl_small_ops devlink_nl_ops[56];
+typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags);
struct devlink *
devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
-void devlink_notify_unregister(struct devlink *devlink);
-void devlink_notify_register(struct devlink *devlink);
-
-int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb);
+int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
+ devlink_nl_dump_one_func_t *dump_one);
static inline struct devlink_nl_dump_state *
devlink_dump_state(struct netlink_callback *cb)
@@ -148,31 +145,36 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
return 0;
}
-/* Commands */
-extern const struct devlink_cmd devl_cmd_get;
-extern const struct devlink_cmd devl_cmd_port_get;
-extern const struct devlink_cmd devl_cmd_sb_get;
-extern const struct devlink_cmd devl_cmd_sb_pool_get;
-extern const struct devlink_cmd devl_cmd_sb_port_pool_get;
-extern const struct devlink_cmd devl_cmd_sb_tc_pool_bind_get;
-extern const struct devlink_cmd devl_cmd_param_get;
-extern const struct devlink_cmd devl_cmd_region_get;
-extern const struct devlink_cmd devl_cmd_info_get;
-extern const struct devlink_cmd devl_cmd_health_reporter_get;
-extern const struct devlink_cmd devl_cmd_trap_get;
-extern const struct devlink_cmd devl_cmd_trap_group_get;
-extern const struct devlink_cmd devl_cmd_trap_policer_get;
-extern const struct devlink_cmd devl_cmd_rate_get;
-extern const struct devlink_cmd devl_cmd_linecard_get;
-extern const struct devlink_cmd devl_cmd_selftests_get;
+int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info);
/* Notify */
-void devlink_notify(struct devlink *devlink, enum devlink_command cmd);
+void devlink_notify_register(struct devlink *devlink);
+void devlink_notify_unregister(struct devlink *devlink);
+void devlink_ports_notify_register(struct devlink *devlink);
+void devlink_ports_notify_unregister(struct devlink *devlink);
+void devlink_params_notify_register(struct devlink *devlink);
+void devlink_params_notify_unregister(struct devlink *devlink);
+void devlink_regions_notify_register(struct devlink *devlink);
+void devlink_regions_notify_unregister(struct devlink *devlink);
+void devlink_trap_policers_notify_register(struct devlink *devlink);
+void devlink_trap_policers_notify_unregister(struct devlink *devlink);
+void devlink_trap_groups_notify_register(struct devlink *devlink);
+void devlink_trap_groups_notify_unregister(struct devlink *devlink);
+void devlink_traps_notify_register(struct devlink *devlink);
+void devlink_traps_notify_unregister(struct devlink *devlink);
+void devlink_rates_notify_register(struct devlink *devlink);
+void devlink_rates_notify_unregister(struct devlink *devlink);
+void devlink_linecards_notify_register(struct devlink *devlink);
+void devlink_linecards_notify_unregister(struct devlink *devlink);
/* Ports */
+#define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port) \
+ WARN_ON_ONCE(!(devlink_port)->initialized)
+
+struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
+ unsigned int port_index);
int devlink_port_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr);
-
struct devlink_port *
devlink_port_get_from_info(struct devlink *devlink, struct genl_info *info);
struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
@@ -199,31 +201,66 @@ int devlink_resources_validate(struct devlink *devlink,
struct devlink_resource *resource,
struct genl_info *info);
-/* Line cards */
-struct devlink_linecard;
-
-struct devlink_linecard *
-devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info);
-
/* Rates */
int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack);
-struct devlink_rate *
-devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info);
-struct devlink_rate *
-devlink_rate_node_get_from_info(struct devlink *devlink,
- struct genl_info *info);
+
+/* Linecards */
+struct devlink_linecard {
+ struct list_head list;
+ struct devlink *devlink;
+ unsigned int index;
+ const struct devlink_linecard_ops *ops;
+ void *priv;
+ enum devlink_linecard_state state;
+ struct mutex state_lock; /* Protects state */
+ const char *type;
+ struct devlink_linecard_type *types;
+ unsigned int types_count;
+ struct devlink *nested_devlink;
+};
+
/* Devlink nl cmds */
-int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
+int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
struct genl_info *info);
+int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb);
+int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
@@ -236,3 +273,13 @@ int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
struct genl_info *info);
+int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c
new file mode 100644
index 000000000000..431227c412e5
--- /dev/null
+++ b/net/devlink/dpipe.c
@@ -0,0 +1,917 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
+ {
+ .name = "destination mac",
+ .id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
+ .bitwidth = 48,
+ },
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
+ .name = "ethernet",
+ .id = DEVLINK_DPIPE_HEADER_ETHERNET,
+ .fields = devlink_dpipe_fields_ethernet,
+ .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
+ .global = true,
+};
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ethernet);
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
+ {
+ .name = "destination ip",
+ .id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
+ .bitwidth = 32,
+ },
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
+ .name = "ipv4",
+ .id = DEVLINK_DPIPE_HEADER_IPV4,
+ .fields = devlink_dpipe_fields_ipv4,
+ .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
+ .global = true,
+};
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv4);
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
+ {
+ .name = "destination ip",
+ .id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
+ .bitwidth = 128,
+ },
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
+ .name = "ipv6",
+ .id = DEVLINK_DPIPE_HEADER_IPV6,
+ .fields = devlink_dpipe_fields_ipv6,
+ .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
+ .global = true,
+};
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv6);
+
+int devlink_dpipe_match_put(struct sk_buff *skb,
+ struct devlink_dpipe_match *match)
+{
+ struct devlink_dpipe_header *header = match->header;
+ struct devlink_dpipe_field *field = &header->fields[match->field_id];
+ struct nlattr *match_attr;
+
+ match_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_MATCH);
+ if (!match_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, match_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, match_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_match_put);
+
+static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table,
+ struct sk_buff *skb)
+{
+ struct nlattr *matches_attr;
+
+ matches_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
+ if (!matches_attr)
+ return -EMSGSIZE;
+
+ if (table->table_ops->matches_dump(table->priv, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, matches_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, matches_attr);
+ return -EMSGSIZE;
+}
+
+int devlink_dpipe_action_put(struct sk_buff *skb,
+ struct devlink_dpipe_action *action)
+{
+ struct devlink_dpipe_header *header = action->header;
+ struct devlink_dpipe_field *field = &header->fields[action->field_id];
+ struct nlattr *action_attr;
+
+ action_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ACTION);
+ if (!action_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, action_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, action_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_action_put);
+
+static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table,
+ struct sk_buff *skb)
+{
+ struct nlattr *actions_attr;
+
+ actions_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
+ if (!actions_attr)
+ return -EMSGSIZE;
+
+ if (table->table_ops->actions_dump(table->priv, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, actions_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, actions_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_table_put(struct sk_buff *skb,
+ struct devlink_dpipe_table *table)
+{
+ struct nlattr *table_attr;
+ u64 table_size;
+
+ table_size = table->table_ops->size_get(table->priv);
+ table_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLE);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
+ table->counters_enabled))
+ goto nla_put_failure;
+
+ if (table->resource_valid) {
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
+ table->resource_id, DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
+ table->resource_units, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ }
+ if (devlink_dpipe_matches_put(table, skb))
+ goto nla_put_failure;
+
+ if (devlink_dpipe_actions_put(table, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, table_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, table_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb,
+ struct genl_info *info)
+{
+ int err;
+
+ if (*pskb) {
+ err = genlmsg_reply(*pskb, info);
+ if (err)
+ return err;
+ }
+ *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!*pskb)
+ return -ENOMEM;
+ return 0;
+}
+
+static int devlink_dpipe_tables_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct list_head *dpipe_tables,
+ const char *table_name)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_dpipe_table *table;
+ struct nlattr *tables_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ bool incomplete;
+ void *hdr;
+ int i;
+ int err;
+
+ table = list_first_entry(dpipe_tables,
+ struct devlink_dpipe_table, list);
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr) {
+ nlmsg_free(skb);
+ return -EMSGSIZE;
+ }
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+ tables_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLES);
+ if (!tables_attr)
+ goto nla_put_failure;
+
+ i = 0;
+ incomplete = false;
+ list_for_each_entry_from(table, dpipe_tables, list) {
+ if (!table_name) {
+ err = devlink_dpipe_table_put(skb, table);
+ if (err) {
+ if (!i)
+ goto err_table_put;
+ incomplete = true;
+ break;
+ }
+ } else {
+ if (!strcmp(table->name, table_name)) {
+ err = devlink_dpipe_table_put(skb, table);
+ if (err)
+ break;
+ }
+ }
+ i++;
+ }
+
+ nla_nest_end(skb, tables_attr);
+ genlmsg_end(skb, hdr);
+ if (incomplete)
+ goto start_again;
+
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+ goto send_done;
+ }
+
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_table_put:
+ nlmsg_free(skb);
+ return err;
+}
+
+int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *table_name = NULL;
+
+ if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+
+ return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0,
+ &devlink->dpipe_table_list,
+ table_name);
+}
+
+static int devlink_dpipe_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE,
+ value->value_size, value->value))
+ return -EMSGSIZE;
+ if (value->mask)
+ if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK,
+ value->value_size, value->mask))
+ return -EMSGSIZE;
+ if (value->mapping_valid)
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING,
+ value->mapping_value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_action_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (!value->action)
+ return -EINVAL;
+ if (devlink_dpipe_action_put(skb, value->action))
+ return -EMSGSIZE;
+ if (devlink_dpipe_value_put(skb, value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_action_values_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *values,
+ unsigned int values_count)
+{
+ struct nlattr *action_attr;
+ int i;
+ int err;
+
+ for (i = 0; i < values_count; i++) {
+ action_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_ACTION_VALUE);
+ if (!action_attr)
+ return -EMSGSIZE;
+ err = devlink_dpipe_action_value_put(skb, &values[i]);
+ if (err)
+ goto err_action_value_put;
+ nla_nest_end(skb, action_attr);
+ }
+ return 0;
+
+err_action_value_put:
+ nla_nest_cancel(skb, action_attr);
+ return err;
+}
+
+static int devlink_dpipe_match_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (!value->match)
+ return -EINVAL;
+ if (devlink_dpipe_match_put(skb, value->match))
+ return -EMSGSIZE;
+ if (devlink_dpipe_value_put(skb, value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_match_values_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *values,
+ unsigned int values_count)
+{
+ struct nlattr *match_attr;
+ int i;
+ int err;
+
+ for (i = 0; i < values_count; i++) {
+ match_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_MATCH_VALUE);
+ if (!match_attr)
+ return -EMSGSIZE;
+ err = devlink_dpipe_match_value_put(skb, &values[i]);
+ if (err)
+ goto err_match_value_put;
+ nla_nest_end(skb, match_attr);
+ }
+ return 0;
+
+err_match_value_put:
+ nla_nest_cancel(skb, match_attr);
+ return err;
+}
+
+static int devlink_dpipe_entry_put(struct sk_buff *skb,
+ struct devlink_dpipe_entry *entry)
+{
+ struct nlattr *entry_attr, *matches_attr, *actions_attr;
+ int err;
+
+ entry_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (entry->counter_valid)
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
+ entry->counter, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ matches_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
+ if (!matches_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_match_values_put(skb, entry->match_values,
+ entry->match_values_count);
+ if (err) {
+ nla_nest_cancel(skb, matches_attr);
+ goto err_match_values_put;
+ }
+ nla_nest_end(skb, matches_attr);
+
+ actions_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
+ if (!actions_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_action_values_put(skb, entry->action_values,
+ entry->action_values_count);
+ if (err) {
+ nla_nest_cancel(skb, actions_attr);
+ goto err_action_values_put;
+ }
+ nla_nest_end(skb, actions_attr);
+
+ nla_nest_end(skb, entry_attr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_match_values_put:
+err_action_values_put:
+ nla_nest_cancel(skb, entry_attr);
+ return err;
+}
+
+static struct devlink_dpipe_table *
+devlink_dpipe_table_find(struct list_head *dpipe_tables,
+ const char *table_name, struct devlink *devlink)
+{
+ struct devlink_dpipe_table *table;
+
+ list_for_each_entry_rcu(table, dpipe_tables, list,
+ lockdep_is_held(&devlink->lock)) {
+ if (!strcmp(table->name, table_name))
+ return table;
+ }
+ return NULL;
+}
+
+int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct devlink *devlink;
+ int err;
+
+ err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb,
+ dump_ctx->info);
+ if (err)
+ return err;
+
+ dump_ctx->hdr = genlmsg_put(dump_ctx->skb,
+ dump_ctx->info->snd_portid,
+ dump_ctx->info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI,
+ dump_ctx->cmd);
+ if (!dump_ctx->hdr)
+ goto nla_put_failure;
+
+ devlink = dump_ctx->info->user_ptr[0];
+ if (devlink_nl_put_handle(dump_ctx->skb, devlink))
+ goto nla_put_failure;
+ dump_ctx->nest = nla_nest_start_noflag(dump_ctx->skb,
+ DEVLINK_ATTR_DPIPE_ENTRIES);
+ if (!dump_ctx->nest)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ nlmsg_free(dump_ctx->skb);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare);
+
+int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
+ struct devlink_dpipe_entry *entry)
+{
+ return devlink_dpipe_entry_put(dump_ctx->skb, entry);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append);
+
+int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ nla_nest_end(dump_ctx->skb, dump_ctx->nest);
+ genlmsg_end(dump_ctx->skb, dump_ctx->hdr);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
+
+void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
+
+{
+ unsigned int value_count, value_index;
+ struct devlink_dpipe_value *value;
+
+ value = entry->action_values;
+ value_count = entry->action_values_count;
+ for (value_index = 0; value_index < value_count; value_index++) {
+ kfree(value[value_index].value);
+ kfree(value[value_index].mask);
+ }
+
+ value = entry->match_values;
+ value_count = entry->match_values_count;
+ for (value_index = 0; value_index < value_count; value_index++) {
+ kfree(value[value_index].value);
+ kfree(value[value_index].mask);
+ }
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_clear);
+
+static int devlink_dpipe_entries_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct devlink_dpipe_table *table)
+{
+ struct devlink_dpipe_dump_ctx dump_ctx;
+ struct nlmsghdr *nlh;
+ int err;
+
+ dump_ctx.skb = NULL;
+ dump_ctx.cmd = cmd;
+ dump_ctx.info = info;
+
+ err = table->table_ops->entries_dump(table->priv,
+ table->counters_enabled,
+ &dump_ctx);
+ if (err)
+ return err;
+
+send_done:
+ nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
+ if (err)
+ return err;
+ goto send_done;
+ }
+ return genlmsg_reply(dump_ctx.skb, info);
+}
+
+int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_dpipe_table *table;
+ const char *table_name;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME))
+ return -EINVAL;
+
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name, devlink);
+ if (!table)
+ return -EINVAL;
+
+ if (!table->table_ops->entries_dump)
+ return -EINVAL;
+
+ return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ 0, table);
+}
+
+static int devlink_dpipe_fields_put(struct sk_buff *skb,
+ const struct devlink_dpipe_header *header)
+{
+ struct devlink_dpipe_field *field;
+ struct nlattr *field_attr;
+ int i;
+
+ for (i = 0; i < header->fields_count; i++) {
+ field = &header->fields[i];
+ field_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_FIELD);
+ if (!field_attr)
+ return -EMSGSIZE;
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type))
+ goto nla_put_failure;
+ nla_nest_end(skb, field_attr);
+ }
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, field_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_header_put(struct sk_buff *skb,
+ struct devlink_dpipe_header *header)
+{
+ struct nlattr *fields_attr, *header_attr;
+ int err;
+
+ header_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADER);
+ if (!header_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ fields_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
+ if (!fields_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_fields_put(skb, header);
+ if (err) {
+ nla_nest_cancel(skb, fields_attr);
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, fields_attr);
+ nla_nest_end(skb, header_attr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+ nla_nest_cancel(skb, header_attr);
+ return err;
+}
+
+static int devlink_dpipe_headers_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct devlink_dpipe_headers *
+ dpipe_headers)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct nlattr *headers_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ void *hdr;
+ int i, j;
+ int err;
+
+ i = 0;
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr) {
+ nlmsg_free(skb);
+ return -EMSGSIZE;
+ }
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+ headers_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADERS);
+ if (!headers_attr)
+ goto nla_put_failure;
+
+ j = 0;
+ for (; i < dpipe_headers->headers_count; i++) {
+ err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]);
+ if (err) {
+ if (!j)
+ goto err_table_put;
+ break;
+ }
+ j++;
+ }
+ nla_nest_end(skb, headers_attr);
+ genlmsg_end(skb, hdr);
+ if (i != dpipe_headers->headers_count)
+ goto start_again;
+
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+ goto send_done;
+ }
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_table_put:
+ nlmsg_free(skb);
+ return err;
+}
+
+int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (!devlink->dpipe_headers)
+ return -EOPNOTSUPP;
+ return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET,
+ 0, devlink->dpipe_headers);
+}
+
+static int devlink_dpipe_table_counters_set(struct devlink *devlink,
+ const char *table_name,
+ bool enable)
+{
+ struct devlink_dpipe_table *table;
+
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name, devlink);
+ if (!table)
+ return -EINVAL;
+
+ if (table->counter_control_extern)
+ return -EOPNOTSUPP;
+
+ if (!(table->counters_enabled ^ enable))
+ return 0;
+
+ table->counters_enabled = enable;
+ if (table->table_ops->counters_set_update)
+ table->table_ops->counters_set_update(table->priv, enable);
+ return 0;
+}
+
+int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *table_name;
+ bool counters_enable;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME) ||
+ GENL_REQ_ATTR_CHECK(info,
+ DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED))
+ return -EINVAL;
+
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+ counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]);
+
+ return devlink_dpipe_table_counters_set(devlink, table_name,
+ counters_enable);
+}
+
+/**
+ * devl_dpipe_headers_register - register dpipe headers
+ *
+ * @devlink: devlink
+ * @dpipe_headers: dpipe header array
+ *
+ * Register the headers supported by hardware.
+ */
+void devl_dpipe_headers_register(struct devlink *devlink,
+ struct devlink_dpipe_headers *dpipe_headers)
+{
+ lockdep_assert_held(&devlink->lock);
+
+ devlink->dpipe_headers = dpipe_headers;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_headers_register);
+
+/**
+ * devl_dpipe_headers_unregister - unregister dpipe headers
+ *
+ * @devlink: devlink
+ *
+ * Unregister the headers supported by hardware.
+ */
+void devl_dpipe_headers_unregister(struct devlink *devlink)
+{
+ lockdep_assert_held(&devlink->lock);
+
+ devlink->dpipe_headers = NULL;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_headers_unregister);
+
+/**
+ * devlink_dpipe_table_counter_enabled - check if counter allocation
+ * required
+ * @devlink: devlink
+ * @table_name: tables name
+ *
+ * Used by driver to check if counter allocation is required.
+ * After counter allocation is turned on the table entries
+ * are updated to include counter statistics.
+ *
+ * After that point on the driver must respect the counter
+ * state so that each entry added to the table is added
+ * with a counter.
+ */
+bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+ bool enabled;
+
+ rcu_read_lock();
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name, devlink);
+ enabled = false;
+ if (table)
+ enabled = table->counters_enabled;
+ rcu_read_unlock();
+ return enabled;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
+
+/**
+ * devl_dpipe_table_register - register dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ * @table_ops: table ops
+ * @priv: priv
+ * @counter_control_extern: external control for counters
+ */
+int devl_dpipe_table_register(struct devlink *devlink,
+ const char *table_name,
+ struct devlink_dpipe_table_ops *table_ops,
+ void *priv, bool counter_control_extern)
+{
+ struct devlink_dpipe_table *table;
+
+ lockdep_assert_held(&devlink->lock);
+
+ if (WARN_ON(!table_ops->size_get))
+ return -EINVAL;
+
+ if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
+ devlink))
+ return -EEXIST;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ table->name = table_name;
+ table->table_ops = table_ops;
+ table->priv = priv;
+ table->counter_control_extern = counter_control_extern;
+
+ list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_table_register);
+
+/**
+ * devl_dpipe_table_unregister - unregister dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ */
+void devl_dpipe_table_unregister(struct devlink *devlink,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+
+ lockdep_assert_held(&devlink->lock);
+
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name, devlink);
+ if (!table)
+ return;
+ list_del_rcu(&table->list);
+ kfree_rcu(table, rcu);
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_table_unregister);
+
+/**
+ * devl_dpipe_table_resource_set - set the resource id
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ * @resource_id: resource id
+ * @resource_units: number of resource's units consumed per table's entry
+ */
+int devl_dpipe_table_resource_set(struct devlink *devlink,
+ const char *table_name, u64 resource_id,
+ u64 resource_units)
+{
+ struct devlink_dpipe_table *table;
+
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name, devlink);
+ if (!table)
+ return -EINVAL;
+
+ table->resource_id = resource_id;
+ table->resource_units = resource_units;
+ table->resource_valid = true;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_table_resource_set);
diff --git a/net/devlink/health.c b/net/devlink/health.c
index 194340a8bb86..638cad8d5c65 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -356,8 +356,8 @@ devlink_health_reporter_get_from_info(struct devlink *devlink,
return devlink_health_reporter_get_from_attrs(devlink, info->attrs);
}
-int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_health_reporter_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
@@ -384,18 +384,29 @@ int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int
-devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_health_reporter_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ const struct genl_info *info = genl_info_dump(cb);
struct devlink_health_reporter *reporter;
+ unsigned long port_index_end = ULONG_MAX;
+ struct nlattr **attrs = info->attrs;
+ unsigned long port_index_start = 0;
struct devlink_port *port;
unsigned long port_index;
int idx = 0;
int err;
+ if (attrs && attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ port_index_start = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+ port_index_end = port_index_start;
+ flags |= NLM_F_DUMP_FILTERED;
+ goto per_port_dump;
+ }
+
list_for_each_entry(reporter, &devlink->reporter_list, list) {
if (idx < state->idx) {
idx++;
@@ -405,14 +416,16 @@ devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
DEVLINK_CMD_HEALTH_REPORTER_GET,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ flags);
if (err) {
state->idx = idx;
return err;
}
idx++;
}
- xa_for_each(&devlink->ports, port_index, port) {
+per_port_dump:
+ xa_for_each_range(&devlink->ports, port_index, port,
+ port_index_start, port_index_end) {
list_for_each_entry(reporter, &port->reporter_list, list) {
if (idx < state->idx) {
idx++;
@@ -422,7 +435,7 @@ devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
DEVLINK_CMD_HEALTH_REPORTER_GET,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ flags);
if (err) {
state->idx = idx;
return err;
@@ -434,9 +447,12 @@ devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
return 0;
}
-const struct devlink_cmd devl_cmd_health_reporter_get = {
- .dump_one = devlink_nl_cmd_health_reporter_get_dump_one,
-};
+int devlink_nl_health_reporter_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb,
+ devlink_nl_health_reporter_get_dump_one);
+}
int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
struct genl_info *info)
@@ -1248,7 +1264,7 @@ out:
static struct devlink_health_reporter *
devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ const struct genl_info *info = genl_info_dump(cb);
struct devlink_health_reporter *reporter;
struct nlattr **attrs = info->attrs;
struct devlink *devlink;
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
deleted file mode 100644
index 1f00f874471f..000000000000
--- a/net/devlink/leftover.c
+++ /dev/null
@@ -1,9507 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/core/devlink.c - Network physical/parent device Netlink interface
- *
- * Heavily inspired by net/wireless/
- * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
- */
-
-#include <linux/etherdevice.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/gfp.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/refcount.h>
-#include <linux/workqueue.h>
-#include <linux/u64_stats_sync.h>
-#include <linux/timekeeping.h>
-#include <rdma/ib_verbs.h>
-#include <net/netlink.h>
-#include <net/genetlink.h>
-#include <net/rtnetlink.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <net/devlink.h>
-#define CREATE_TRACE_POINTS
-#include <trace/events/devlink.h>
-
-#include "devl_internal.h"
-
-struct devlink_linecard {
- struct list_head list;
- struct devlink *devlink;
- unsigned int index;
- const struct devlink_linecard_ops *ops;
- void *priv;
- enum devlink_linecard_state state;
- struct mutex state_lock; /* Protects state */
- const char *type;
- struct devlink_linecard_type *types;
- unsigned int types_count;
- struct devlink *nested_devlink;
-};
-
-/**
- * struct devlink_resource - devlink resource
- * @name: name of the resource
- * @id: id, per devlink instance
- * @size: size of the resource
- * @size_new: updated size of the resource, reload is needed
- * @size_valid: valid in case the total size of the resource is valid
- * including its children
- * @parent: parent resource
- * @size_params: size parameters
- * @list: parent list
- * @resource_list: list of child resources
- * @occ_get: occupancy getter callback
- * @occ_get_priv: occupancy getter callback priv
- */
-struct devlink_resource {
- const char *name;
- u64 id;
- u64 size;
- u64 size_new;
- bool size_valid;
- struct devlink_resource *parent;
- struct devlink_resource_size_params size_params;
- struct list_head list;
- struct list_head resource_list;
- devlink_resource_occ_get_t *occ_get;
- void *occ_get_priv;
-};
-
-static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
- {
- .name = "destination mac",
- .id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
- .bitwidth = 48,
- },
-};
-
-struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
- .name = "ethernet",
- .id = DEVLINK_DPIPE_HEADER_ETHERNET,
- .fields = devlink_dpipe_fields_ethernet,
- .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
- .global = true,
-};
-EXPORT_SYMBOL_GPL(devlink_dpipe_header_ethernet);
-
-static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
- {
- .name = "destination ip",
- .id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
- .bitwidth = 32,
- },
-};
-
-struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
- .name = "ipv4",
- .id = DEVLINK_DPIPE_HEADER_IPV4,
- .fields = devlink_dpipe_fields_ipv4,
- .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
- .global = true,
-};
-EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv4);
-
-static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
- {
- .name = "destination ip",
- .id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
- .bitwidth = 128,
- },
-};
-
-struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
- .name = "ipv6",
- .id = DEVLINK_DPIPE_HEADER_IPV6,
- .fields = devlink_dpipe_fields_ipv6,
- .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
- .global = true,
-};
-EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv6);
-
-EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
-EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
-EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
-
-#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
- (_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
-
-static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
- [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
- [DEVLINK_PORT_FN_ATTR_STATE] =
- NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
- DEVLINK_PORT_FN_STATE_ACTIVE),
- [DEVLINK_PORT_FN_ATTR_CAPS] =
- NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
-};
-
-#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \
- WARN_ON_ONCE(!(devlink_port)->registered)
-#define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \
- WARN_ON_ONCE((devlink_port)->registered)
-#define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port) \
- WARN_ON_ONCE(!(devlink_port)->initialized)
-
-static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
- unsigned int port_index)
-{
- return xa_load(&devlink->ports, port_index);
-}
-
-struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
- struct nlattr **attrs)
-{
- if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
- u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
- struct devlink_port *devlink_port;
-
- devlink_port = devlink_port_get_by_index(devlink, port_index);
- if (!devlink_port)
- return ERR_PTR(-ENODEV);
- return devlink_port;
- }
- return ERR_PTR(-EINVAL);
-}
-
-struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
- struct genl_info *info)
-{
- return devlink_port_get_from_attrs(devlink, info->attrs);
-}
-
-static inline bool
-devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
-{
- return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
-}
-
-static inline bool
-devlink_rate_is_node(struct devlink_rate *devlink_rate)
-{
- return devlink_rate->type == DEVLINK_RATE_TYPE_NODE;
-}
-
-static struct devlink_rate *
-devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
-{
- struct devlink_rate *devlink_rate;
- struct devlink_port *devlink_port;
-
- devlink_port = devlink_port_get_from_attrs(devlink, info->attrs);
- if (IS_ERR(devlink_port))
- return ERR_CAST(devlink_port);
- devlink_rate = devlink_port->devlink_rate;
- return devlink_rate ?: ERR_PTR(-ENODEV);
-}
-
-static struct devlink_rate *
-devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name)
-{
- static struct devlink_rate *devlink_rate;
-
- list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
- if (devlink_rate_is_node(devlink_rate) &&
- !strcmp(node_name, devlink_rate->name))
- return devlink_rate;
- }
- return ERR_PTR(-ENODEV);
-}
-
-static struct devlink_rate *
-devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
-{
- const char *rate_node_name;
- size_t len;
-
- if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME])
- return ERR_PTR(-EINVAL);
- rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]);
- len = strlen(rate_node_name);
- /* Name cannot be empty or decimal number */
- if (!len || strspn(rate_node_name, "0123456789") == len)
- return ERR_PTR(-EINVAL);
-
- return devlink_rate_node_get_by_name(devlink, rate_node_name);
-}
-
-struct devlink_rate *
-devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
-{
- return devlink_rate_node_get_from_attrs(devlink, info->attrs);
-}
-
-struct devlink_rate *
-devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
-{
- struct nlattr **attrs = info->attrs;
-
- if (attrs[DEVLINK_ATTR_PORT_INDEX])
- return devlink_rate_leaf_get_from_info(devlink, info);
- else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME])
- return devlink_rate_node_get_from_info(devlink, info);
- else
- return ERR_PTR(-EINVAL);
-}
-
-static struct devlink_linecard *
-devlink_linecard_get_by_index(struct devlink *devlink,
- unsigned int linecard_index)
-{
- struct devlink_linecard *devlink_linecard;
-
- list_for_each_entry(devlink_linecard, &devlink->linecard_list, list) {
- if (devlink_linecard->index == linecard_index)
- return devlink_linecard;
- }
- return NULL;
-}
-
-static bool devlink_linecard_index_exists(struct devlink *devlink,
- unsigned int linecard_index)
-{
- return devlink_linecard_get_by_index(devlink, linecard_index);
-}
-
-static struct devlink_linecard *
-devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
-{
- if (attrs[DEVLINK_ATTR_LINECARD_INDEX]) {
- u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]);
- struct devlink_linecard *linecard;
-
- linecard = devlink_linecard_get_by_index(devlink, linecard_index);
- if (!linecard)
- return ERR_PTR(-ENODEV);
- return linecard;
- }
- return ERR_PTR(-EINVAL);
-}
-
-struct devlink_linecard *
-devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
-{
- return devlink_linecard_get_from_attrs(devlink, info->attrs);
-}
-
-struct devlink_sb {
- struct list_head list;
- unsigned int index;
- u32 size;
- u16 ingress_pools_count;
- u16 egress_pools_count;
- u16 ingress_tc_count;
- u16 egress_tc_count;
-};
-
-static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb)
-{
- return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count;
-}
-
-static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink,
- unsigned int sb_index)
-{
- struct devlink_sb *devlink_sb;
-
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- if (devlink_sb->index == sb_index)
- return devlink_sb;
- }
- return NULL;
-}
-
-static bool devlink_sb_index_exists(struct devlink *devlink,
- unsigned int sb_index)
-{
- return devlink_sb_get_by_index(devlink, sb_index);
-}
-
-static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink,
- struct nlattr **attrs)
-{
- if (attrs[DEVLINK_ATTR_SB_INDEX]) {
- u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]);
- struct devlink_sb *devlink_sb;
-
- devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
- if (!devlink_sb)
- return ERR_PTR(-ENODEV);
- return devlink_sb;
- }
- return ERR_PTR(-EINVAL);
-}
-
-static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink,
- struct genl_info *info)
-{
- return devlink_sb_get_from_attrs(devlink, info->attrs);
-}
-
-static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb,
- struct nlattr **attrs,
- u16 *p_pool_index)
-{
- u16 val;
-
- if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX])
- return -EINVAL;
-
- val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]);
- if (val >= devlink_sb_pool_count(devlink_sb))
- return -EINVAL;
- *p_pool_index = val;
- return 0;
-}
-
-static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb,
- struct genl_info *info,
- u16 *p_pool_index)
-{
- return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs,
- p_pool_index);
-}
-
-static int
-devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs,
- enum devlink_sb_pool_type *p_pool_type)
-{
- u8 val;
-
- if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE])
- return -EINVAL;
-
- val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]);
- if (val != DEVLINK_SB_POOL_TYPE_INGRESS &&
- val != DEVLINK_SB_POOL_TYPE_EGRESS)
- return -EINVAL;
- *p_pool_type = val;
- return 0;
-}
-
-static int
-devlink_sb_pool_type_get_from_info(struct genl_info *info,
- enum devlink_sb_pool_type *p_pool_type)
-{
- return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type);
-}
-
-static int
-devlink_sb_th_type_get_from_attrs(struct nlattr **attrs,
- enum devlink_sb_threshold_type *p_th_type)
-{
- u8 val;
-
- if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE])
- return -EINVAL;
-
- val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]);
- if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC &&
- val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC)
- return -EINVAL;
- *p_th_type = val;
- return 0;
-}
-
-static int
-devlink_sb_th_type_get_from_info(struct genl_info *info,
- enum devlink_sb_threshold_type *p_th_type)
-{
- return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type);
-}
-
-static int
-devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
- struct nlattr **attrs,
- enum devlink_sb_pool_type pool_type,
- u16 *p_tc_index)
-{
- u16 val;
-
- if (!attrs[DEVLINK_ATTR_SB_TC_INDEX])
- return -EINVAL;
-
- val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]);
- if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS &&
- val >= devlink_sb->ingress_tc_count)
- return -EINVAL;
- if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS &&
- val >= devlink_sb->egress_tc_count)
- return -EINVAL;
- *p_tc_index = val;
- return 0;
-}
-
-static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
- u32 cap, bool is_enable)
-{
- caps->selector |= cap;
- if (is_enable)
- caps->value |= cap;
-}
-
-static int devlink_port_fn_roce_fill(struct devlink_port *devlink_port,
- struct nla_bitfield32 *caps,
- struct netlink_ext_ack *extack)
-{
- bool is_enable;
- int err;
-
- if (!devlink_port->ops->port_fn_roce_get)
- return 0;
-
- err = devlink_port->ops->port_fn_roce_get(devlink_port, &is_enable,
- extack);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
- }
-
- devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
- return 0;
-}
-
-static int devlink_port_fn_migratable_fill(struct devlink_port *devlink_port,
- struct nla_bitfield32 *caps,
- struct netlink_ext_ack *extack)
-{
- bool is_enable;
- int err;
-
- if (!devlink_port->ops->port_fn_migratable_get ||
- devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
- return 0;
-
- err = devlink_port->ops->port_fn_migratable_get(devlink_port,
- &is_enable, extack);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
- }
-
- devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_MIGRATABLE, is_enable);
- return 0;
-}
-
-static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
- struct sk_buff *msg,
- struct netlink_ext_ack *extack,
- bool *msg_updated)
-{
- struct nla_bitfield32 caps = {};
- int err;
-
- err = devlink_port_fn_roce_fill(devlink_port, &caps, extack);
- if (err)
- return err;
-
- err = devlink_port_fn_migratable_fill(devlink_port, &caps, extack);
- if (err)
- return err;
-
- if (!caps.selector)
- return 0;
- err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
- caps.selector);
- if (err)
- return err;
-
- *msg_updated = true;
- return 0;
-}
-
-static int
-devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
- struct genl_info *info,
- enum devlink_sb_pool_type pool_type,
- u16 *p_tc_index)
-{
- return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs,
- pool_type, p_tc_index);
-}
-
-struct devlink_region {
- struct devlink *devlink;
- struct devlink_port *port;
- struct list_head list;
- union {
- const struct devlink_region_ops *ops;
- const struct devlink_port_region_ops *port_ops;
- };
- struct mutex snapshot_lock; /* protects snapshot_list,
- * max_snapshots and cur_snapshots
- * consistency.
- */
- struct list_head snapshot_list;
- u32 max_snapshots;
- u32 cur_snapshots;
- u64 size;
-};
-
-struct devlink_snapshot {
- struct list_head list;
- struct devlink_region *region;
- u8 *data;
- u32 id;
-};
-
-static struct devlink_region *
-devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
-{
- struct devlink_region *region;
-
- list_for_each_entry(region, &devlink->region_list, list)
- if (!strcmp(region->ops->name, region_name))
- return region;
-
- return NULL;
-}
-
-static struct devlink_region *
-devlink_port_region_get_by_name(struct devlink_port *port,
- const char *region_name)
-{
- struct devlink_region *region;
-
- list_for_each_entry(region, &port->region_list, list)
- if (!strcmp(region->ops->name, region_name))
- return region;
-
- return NULL;
-}
-
-static struct devlink_snapshot *
-devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
-{
- struct devlink_snapshot *snapshot;
-
- list_for_each_entry(snapshot, &region->snapshot_list, list)
- if (snapshot->id == id)
- return snapshot;
-
- return NULL;
-}
-
-static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
-{
- struct nlattr *nested_attr;
-
- nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK);
- if (!nested_attr)
- return -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- nla_nest_end(msg, nested_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, nested_attr);
- return -EMSGSIZE;
-}
-
-int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port)
-{
- if (devlink_nl_put_handle(msg, devlink_port->devlink))
- return -EMSGSIZE;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
- return -EMSGSIZE;
- return 0;
-}
-
-size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port)
-{
- struct devlink *devlink = devlink_port->devlink;
-
- return nla_total_size(strlen(devlink->dev->bus->name) + 1) /* DEVLINK_ATTR_BUS_NAME */
- + nla_total_size(strlen(dev_name(devlink->dev)) + 1) /* DEVLINK_ATTR_DEV_NAME */
- + nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */
-}
-
-static int devlink_nl_port_attrs_put(struct sk_buff *msg,
- struct devlink_port *devlink_port)
-{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
-
- if (!devlink_port->attrs_set)
- return 0;
- if (attrs->lanes) {
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_LANES, attrs->lanes))
- return -EMSGSIZE;
- }
- if (nla_put_u8(msg, DEVLINK_ATTR_PORT_SPLITTABLE, attrs->splittable))
- return -EMSGSIZE;
- if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
- return -EMSGSIZE;
- switch (devlink_port->attrs.flavour) {
- case DEVLINK_PORT_FLAVOUR_PCI_PF:
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
- attrs->pci_pf.controller) ||
- nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_pf.pf))
- return -EMSGSIZE;
- if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_pf.external))
- return -EMSGSIZE;
- break;
- case DEVLINK_PORT_FLAVOUR_PCI_VF:
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
- attrs->pci_vf.controller) ||
- nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_vf.pf) ||
- nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, attrs->pci_vf.vf))
- return -EMSGSIZE;
- if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_vf.external))
- return -EMSGSIZE;
- break;
- case DEVLINK_PORT_FLAVOUR_PCI_SF:
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
- attrs->pci_sf.controller) ||
- nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
- attrs->pci_sf.pf) ||
- nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_SF_NUMBER,
- attrs->pci_sf.sf))
- return -EMSGSIZE;
- break;
- case DEVLINK_PORT_FLAVOUR_PHYSICAL:
- case DEVLINK_PORT_FLAVOUR_CPU:
- case DEVLINK_PORT_FLAVOUR_DSA:
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
- attrs->phys.port_number))
- return -EMSGSIZE;
- if (!attrs->split)
- return 0;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
- attrs->phys.port_number))
- return -EMSGSIZE;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
- attrs->phys.split_subport_number))
- return -EMSGSIZE;
- break;
- default:
- break;
- }
- return 0;
-}
-
-static int devlink_port_fn_hw_addr_fill(struct devlink_port *port,
- struct sk_buff *msg,
- struct netlink_ext_ack *extack,
- bool *msg_updated)
-{
- u8 hw_addr[MAX_ADDR_LEN];
- int hw_addr_len;
- int err;
-
- if (!port->ops->port_fn_hw_addr_get)
- return 0;
-
- err = port->ops->port_fn_hw_addr_get(port, hw_addr, &hw_addr_len,
- extack);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
- }
- err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr);
- if (err)
- return err;
- *msg_updated = true;
- return 0;
-}
-
-static int devlink_nl_rate_fill(struct sk_buff *msg,
- struct devlink_rate *devlink_rate,
- enum devlink_command cmd, u32 portid, u32 seq,
- int flags, struct netlink_ext_ack *extack)
-{
- struct devlink *devlink = devlink_rate->devlink;
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type))
- goto nla_put_failure;
-
- if (devlink_rate_is_leaf(devlink_rate)) {
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
- devlink_rate->devlink_port->index))
- goto nla_put_failure;
- } else if (devlink_rate_is_node(devlink_rate)) {
- if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME,
- devlink_rate->name))
- goto nla_put_failure;
- }
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
- devlink_rate->tx_share, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX,
- devlink_rate->tx_max, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY,
- devlink_rate->tx_priority))
- goto nla_put_failure;
-
- if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_WEIGHT,
- devlink_rate->tx_weight))
- goto nla_put_failure;
-
- if (devlink_rate->parent)
- if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME,
- devlink_rate->parent->name))
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static bool
-devlink_port_fn_state_valid(enum devlink_port_fn_state state)
-{
- return state == DEVLINK_PORT_FN_STATE_INACTIVE ||
- state == DEVLINK_PORT_FN_STATE_ACTIVE;
-}
-
-static bool
-devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
-{
- return opstate == DEVLINK_PORT_FN_OPSTATE_DETACHED ||
- opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
-}
-
-static int devlink_port_fn_state_fill(struct devlink_port *port,
- struct sk_buff *msg,
- struct netlink_ext_ack *extack,
- bool *msg_updated)
-{
- enum devlink_port_fn_opstate opstate;
- enum devlink_port_fn_state state;
- int err;
-
- if (!port->ops->port_fn_state_get)
- return 0;
-
- err = port->ops->port_fn_state_get(port, &state, &opstate, extack);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
- }
- if (!devlink_port_fn_state_valid(state)) {
- WARN_ON_ONCE(1);
- NL_SET_ERR_MSG(extack, "Invalid state read from driver");
- return -EINVAL;
- }
- if (!devlink_port_fn_opstate_valid(opstate)) {
- WARN_ON_ONCE(1);
- NL_SET_ERR_MSG(extack, "Invalid operational state read from driver");
- return -EINVAL;
- }
- if (nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_STATE, state) ||
- nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_OPSTATE, opstate))
- return -EMSGSIZE;
- *msg_updated = true;
- return 0;
-}
-
-static int
-devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable,
- struct netlink_ext_ack *extack)
-{
- return devlink_port->ops->port_fn_migratable_set(devlink_port, enable,
- extack);
-}
-
-static int
-devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
- struct netlink_ext_ack *extack)
-{
- return devlink_port->ops->port_fn_roce_set(devlink_port, enable,
- extack);
-}
-
-static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
- const struct nlattr *attr,
- struct netlink_ext_ack *extack)
-{
- struct nla_bitfield32 caps;
- u32 caps_value;
- int err;
-
- caps = nla_get_bitfield32(attr);
- caps_value = caps.value & caps.selector;
- if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
- err = devlink_port_fn_roce_set(devlink_port,
- caps_value & DEVLINK_PORT_FN_CAP_ROCE,
- extack);
- if (err)
- return err;
- }
- if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
- err = devlink_port_fn_mig_set(devlink_port, caps_value &
- DEVLINK_PORT_FN_CAP_MIGRATABLE,
- extack);
- if (err)
- return err;
- }
- return 0;
-}
-
-static int
-devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *function_attr;
- bool msg_updated = false;
- int err;
-
- function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION);
- if (!function_attr)
- return -EMSGSIZE;
-
- err = devlink_port_fn_hw_addr_fill(port, msg, extack, &msg_updated);
- if (err)
- goto out;
- err = devlink_port_fn_caps_fill(port, msg, extack, &msg_updated);
- if (err)
- goto out;
- err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
-out:
- if (err || !msg_updated)
- nla_nest_cancel(msg, function_attr);
- else
- nla_nest_end(msg, function_attr);
- return err;
-}
-
-static int devlink_nl_port_fill(struct sk_buff *msg,
- struct devlink_port *devlink_port,
- enum devlink_command cmd, u32 portid, u32 seq,
- int flags, struct netlink_ext_ack *extack)
-{
- struct devlink *devlink = devlink_port->devlink;
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
- goto nla_put_failure;
-
- spin_lock_bh(&devlink_port->type_lock);
- if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
- goto nla_put_failure_type_locked;
- if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
- nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE,
- devlink_port->desired_type))
- goto nla_put_failure_type_locked;
- if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
- if (devlink_port->type_eth.netdev &&
- (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
- devlink_port->type_eth.ifindex) ||
- nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
- devlink_port->type_eth.ifname)))
- goto nla_put_failure_type_locked;
- }
- if (devlink_port->type == DEVLINK_PORT_TYPE_IB) {
- struct ib_device *ibdev = devlink_port->type_ib.ibdev;
-
- if (ibdev &&
- nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME,
- ibdev->name))
- goto nla_put_failure_type_locked;
- }
- spin_unlock_bh(&devlink_port->type_lock);
- if (devlink_nl_port_attrs_put(msg, devlink_port))
- goto nla_put_failure;
- if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
- goto nla_put_failure;
- if (devlink_port->linecard &&
- nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX,
- devlink_port->linecard->index))
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure_type_locked:
- spin_unlock_bh(&devlink_port->type_lock);
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void devlink_port_notify(struct devlink_port *devlink_port,
- enum devlink_command cmd)
-{
- struct devlink *devlink = devlink_port->devlink;
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
-
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static void devlink_rate_notify(struct devlink_rate *devlink_rate,
- enum devlink_command cmd)
-{
- struct devlink *devlink = devlink_rate->devlink;
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
-
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_rate_fill(msg, devlink_rate, cmd, 0, 0, 0, NULL);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static int
-devlink_nl_cmd_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_rate *devlink_rate;
- int idx = 0;
- int err = 0;
-
- list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
- enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
- u32 id = NETLINK_CB(cb->skb).portid;
-
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, NULL);
- if (err) {
- state->idx = idx;
- break;
- }
- idx++;
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_rate_get = {
- .dump_one = devlink_nl_cmd_rate_get_dump_one,
-};
-
-static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_rate *devlink_rate = info->user_ptr[1];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW,
- info->snd_portid, info->snd_seq, 0,
- info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static bool
-devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
- struct devlink_rate *parent)
-{
- while (parent) {
- if (parent == devlink_rate)
- return true;
- parent = parent->parent;
- }
- return false;
-}
-
-static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW,
- info->snd_portid, info->snd_seq, 0,
- info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_port *devlink_port;
- unsigned long port_index;
- int err = 0;
-
- xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
- err = devlink_nl_port_fill(msg, devlink_port,
- DEVLINK_CMD_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, cb->extack);
- if (err) {
- state->idx = port_index;
- break;
- }
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_port_get = {
- .dump_one = devlink_nl_cmd_port_get_dump_one,
-};
-
-static int devlink_port_type_set(struct devlink_port *devlink_port,
- enum devlink_port_type port_type)
-
-{
- int err;
-
- if (!devlink_port->ops->port_type_set)
- return -EOPNOTSUPP;
-
- if (port_type == devlink_port->type)
- return 0;
-
- err = devlink_port->ops->port_type_set(devlink_port, port_type);
- if (err)
- return err;
-
- devlink_port->desired_type = port_type;
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
- return 0;
-}
-
-static int devlink_port_function_hw_addr_set(struct devlink_port *port,
- const struct nlattr *attr,
- struct netlink_ext_ack *extack)
-{
- const u8 *hw_addr;
- int hw_addr_len;
-
- hw_addr = nla_data(attr);
- hw_addr_len = nla_len(attr);
- if (hw_addr_len > MAX_ADDR_LEN) {
- NL_SET_ERR_MSG(extack, "Port function hardware address too long");
- return -EINVAL;
- }
- if (port->type == DEVLINK_PORT_TYPE_ETH) {
- if (hw_addr_len != ETH_ALEN) {
- NL_SET_ERR_MSG(extack, "Address must be 6 bytes for Ethernet device");
- return -EINVAL;
- }
- if (!is_unicast_ether_addr(hw_addr)) {
- NL_SET_ERR_MSG(extack, "Non-unicast hardware address unsupported");
- return -EINVAL;
- }
- }
-
- return port->ops->port_fn_hw_addr_set(port, hw_addr, hw_addr_len,
- extack);
-}
-
-static int devlink_port_fn_state_set(struct devlink_port *port,
- const struct nlattr *attr,
- struct netlink_ext_ack *extack)
-{
- enum devlink_port_fn_state state;
-
- state = nla_get_u8(attr);
- return port->ops->port_fn_state_set(port, state, extack);
-}
-
-static int devlink_port_function_validate(struct devlink_port *devlink_port,
- struct nlattr **tb,
- struct netlink_ext_ack *extack)
-{
- const struct devlink_port_ops *ops = devlink_port->ops;
- struct nlattr *attr;
-
- if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
- !ops->port_fn_hw_addr_set) {
- NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
- "Port doesn't support function attributes");
- return -EOPNOTSUPP;
- }
- if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
- NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
- "Function does not support state setting");
- return -EOPNOTSUPP;
- }
- attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
- if (attr) {
- struct nla_bitfield32 caps;
-
- caps = nla_get_bitfield32(attr);
- if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE &&
- !ops->port_fn_roce_set) {
- NL_SET_ERR_MSG_ATTR(extack, attr,
- "Port doesn't support RoCE function attribute");
- return -EOPNOTSUPP;
- }
- if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
- if (!ops->port_fn_migratable_set) {
- NL_SET_ERR_MSG_ATTR(extack, attr,
- "Port doesn't support migratable function attribute");
- return -EOPNOTSUPP;
- }
- if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
- NL_SET_ERR_MSG_ATTR(extack, attr,
- "migratable function attribute supported for VFs only");
- return -EOPNOTSUPP;
- }
- }
- }
- return 0;
-}
-
-static int devlink_port_function_set(struct devlink_port *port,
- const struct nlattr *attr,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
- int err;
-
- err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr,
- devlink_function_nl_policy, extack);
- if (err < 0) {
- NL_SET_ERR_MSG(extack, "Fail to parse port function attributes");
- return err;
- }
-
- err = devlink_port_function_validate(port, tb, extack);
- if (err)
- return err;
-
- attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
- if (attr) {
- err = devlink_port_function_hw_addr_set(port, attr, extack);
- if (err)
- return err;
- }
-
- attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
- if (attr) {
- err = devlink_port_fn_caps_set(port, attr, extack);
- if (err)
- return err;
- }
-
- /* Keep this as the last function attribute set, so that when
- * multiple port function attributes are set along with state,
- * Those can be applied first before activating the state.
- */
- attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
- if (attr)
- err = devlink_port_fn_state_set(port, attr, extack);
-
- if (!err)
- devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
- return err;
-}
-
-static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- int err;
-
- if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
- enum devlink_port_type port_type;
-
- port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
- err = devlink_port_type_set(devlink_port, port_type);
- if (err)
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_PORT_FUNCTION]) {
- struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
- struct netlink_ext_ack *extack = info->extack;
-
- err = devlink_port_function_set(devlink_port, attr, extack);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = info->user_ptr[0];
- u32 count;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_SPLIT_COUNT))
- return -EINVAL;
- if (!devlink_port->ops->port_split)
- return -EOPNOTSUPP;
-
- count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
-
- if (!devlink_port->attrs.splittable) {
- /* Split ports cannot be split. */
- if (devlink_port->attrs.split)
- NL_SET_ERR_MSG(info->extack, "Port cannot be split further");
- else
- NL_SET_ERR_MSG(info->extack, "Port cannot be split");
- return -EINVAL;
- }
-
- if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) {
- NL_SET_ERR_MSG(info->extack, "Invalid split count");
- return -EINVAL;
- }
-
- return devlink_port->ops->port_split(devlink, devlink_port, count,
- info->extack);
-}
-
-static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = info->user_ptr[0];
-
- if (!devlink_port->ops->port_unsplit)
- return -EOPNOTSUPP;
- return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack);
-}
-
-static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct netlink_ext_ack *extack = info->extack;
- struct devlink_port_new_attrs new_attrs = {};
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_port *devlink_port;
- struct sk_buff *msg;
- int err;
-
- if (!devlink->ops->port_new)
- return -EOPNOTSUPP;
-
- if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] ||
- !info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) {
- NL_SET_ERR_MSG(extack, "Port flavour or PCI PF are not specified");
- return -EINVAL;
- }
- new_attrs.flavour = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_FLAVOUR]);
- new_attrs.pfnum =
- nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]);
-
- if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
- /* Port index of the new port being created by driver. */
- new_attrs.port_index =
- nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
- new_attrs.port_index_valid = true;
- }
- if (info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]) {
- new_attrs.controller =
- nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]);
- new_attrs.controller_valid = true;
- }
- if (new_attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_SF &&
- info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]) {
- new_attrs.sfnum = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]);
- new_attrs.sfnum_valid = true;
- }
-
- err = devlink->ops->port_new(devlink, &new_attrs,
- extack, &devlink_port);
- if (err)
- return err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg) {
- err = -ENOMEM;
- goto err_out_port_del;
- }
- err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
- info->snd_portid, info->snd_seq, 0, NULL);
- if (WARN_ON_ONCE(err))
- goto err_out_msg_free;
- err = genlmsg_reply(msg, info);
- if (err)
- goto err_out_port_del;
- return 0;
-
-err_out_msg_free:
- nlmsg_free(msg);
-err_out_port_del:
- devlink_port->ops->port_del(devlink, devlink_port, NULL);
- return err;
-}
-
-static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
-
- if (!devlink_port->ops->port_del)
- return -EOPNOTSUPP;
-
- return devlink_port->ops->port_del(devlink, devlink_port, extack);
-}
-
-static int
-devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
- struct genl_info *info,
- struct nlattr *nla_parent)
-{
- struct devlink *devlink = devlink_rate->devlink;
- const char *parent_name = nla_data(nla_parent);
- const struct devlink_ops *ops = devlink->ops;
- size_t len = strlen(parent_name);
- struct devlink_rate *parent;
- int err = -EOPNOTSUPP;
-
- parent = devlink_rate->parent;
-
- if (parent && !len) {
- if (devlink_rate_is_leaf(devlink_rate))
- err = ops->rate_leaf_parent_set(devlink_rate, NULL,
- devlink_rate->priv, NULL,
- info->extack);
- else if (devlink_rate_is_node(devlink_rate))
- err = ops->rate_node_parent_set(devlink_rate, NULL,
- devlink_rate->priv, NULL,
- info->extack);
- if (err)
- return err;
-
- refcount_dec(&parent->refcnt);
- devlink_rate->parent = NULL;
- } else if (len) {
- parent = devlink_rate_node_get_by_name(devlink, parent_name);
- if (IS_ERR(parent))
- return -ENODEV;
-
- if (parent == devlink_rate) {
- NL_SET_ERR_MSG(info->extack, "Parent to self is not allowed");
- return -EINVAL;
- }
-
- if (devlink_rate_is_node(devlink_rate) &&
- devlink_rate_is_parent_node(devlink_rate, parent->parent)) {
- NL_SET_ERR_MSG(info->extack, "Node is already a parent of parent node.");
- return -EEXIST;
- }
-
- if (devlink_rate_is_leaf(devlink_rate))
- err = ops->rate_leaf_parent_set(devlink_rate, parent,
- devlink_rate->priv, parent->priv,
- info->extack);
- else if (devlink_rate_is_node(devlink_rate))
- err = ops->rate_node_parent_set(devlink_rate, parent,
- devlink_rate->priv, parent->priv,
- info->extack);
- if (err)
- return err;
-
- if (devlink_rate->parent)
- /* we're reassigning to other parent in this case */
- refcount_dec(&devlink_rate->parent->refcnt);
-
- refcount_inc(&parent->refcnt);
- devlink_rate->parent = parent;
- }
-
- return 0;
-}
-
-static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
- const struct devlink_ops *ops,
- struct genl_info *info)
-{
- struct nlattr *nla_parent, **attrs = info->attrs;
- int err = -EOPNOTSUPP;
- u32 priority;
- u32 weight;
- u64 rate;
-
- if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) {
- rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]);
- if (devlink_rate_is_leaf(devlink_rate))
- err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv,
- rate, info->extack);
- else if (devlink_rate_is_node(devlink_rate))
- err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv,
- rate, info->extack);
- if (err)
- return err;
- devlink_rate->tx_share = rate;
- }
-
- if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) {
- rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]);
- if (devlink_rate_is_leaf(devlink_rate))
- err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv,
- rate, info->extack);
- else if (devlink_rate_is_node(devlink_rate))
- err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv,
- rate, info->extack);
- if (err)
- return err;
- devlink_rate->tx_max = rate;
- }
-
- if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]) {
- priority = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]);
- if (devlink_rate_is_leaf(devlink_rate))
- err = ops->rate_leaf_tx_priority_set(devlink_rate, devlink_rate->priv,
- priority, info->extack);
- else if (devlink_rate_is_node(devlink_rate))
- err = ops->rate_node_tx_priority_set(devlink_rate, devlink_rate->priv,
- priority, info->extack);
-
- if (err)
- return err;
- devlink_rate->tx_priority = priority;
- }
-
- if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]) {
- weight = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]);
- if (devlink_rate_is_leaf(devlink_rate))
- err = ops->rate_leaf_tx_weight_set(devlink_rate, devlink_rate->priv,
- weight, info->extack);
- else if (devlink_rate_is_node(devlink_rate))
- err = ops->rate_node_tx_weight_set(devlink_rate, devlink_rate->priv,
- weight, info->extack);
-
- if (err)
- return err;
- devlink_rate->tx_weight = weight;
- }
-
- nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME];
- if (nla_parent) {
- err = devlink_nl_rate_parent_node_set(devlink_rate, info,
- nla_parent);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
- struct genl_info *info,
- enum devlink_rate_type type)
-{
- struct nlattr **attrs = info->attrs;
-
- if (type == DEVLINK_RATE_TYPE_LEAF) {
- if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) {
- NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the leafs");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) {
- NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the leafs");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
- !ops->rate_leaf_parent_set) {
- NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the leafs");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_leaf_tx_priority_set) {
- NL_SET_ERR_MSG_ATTR(info->extack,
- attrs[DEVLINK_ATTR_RATE_TX_PRIORITY],
- "TX priority set isn't supported for the leafs");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_leaf_tx_weight_set) {
- NL_SET_ERR_MSG_ATTR(info->extack,
- attrs[DEVLINK_ATTR_RATE_TX_WEIGHT],
- "TX weight set isn't supported for the leafs");
- return false;
- }
- } else if (type == DEVLINK_RATE_TYPE_NODE) {
- if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
- NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) {
- NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the nodes");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
- !ops->rate_node_parent_set) {
- NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the nodes");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_node_tx_priority_set) {
- NL_SET_ERR_MSG_ATTR(info->extack,
- attrs[DEVLINK_ATTR_RATE_TX_PRIORITY],
- "TX priority set isn't supported for the nodes");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_node_tx_weight_set) {
- NL_SET_ERR_MSG_ATTR(info->extack,
- attrs[DEVLINK_ATTR_RATE_TX_WEIGHT],
- "TX weight set isn't supported for the nodes");
- return false;
- }
- } else {
- WARN(1, "Unknown type of rate object");
- return false;
- }
-
- return true;
-}
-
-static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_rate *devlink_rate = info->user_ptr[1];
- struct devlink *devlink = devlink_rate->devlink;
- const struct devlink_ops *ops = devlink->ops;
- int err;
-
- if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
- return -EOPNOTSUPP;
-
- err = devlink_nl_rate_set(devlink_rate, ops, info);
-
- if (!err)
- devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
- return err;
-}
-
-static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_rate *rate_node;
- const struct devlink_ops *ops;
- int err;
-
- ops = devlink->ops;
- if (!ops || !ops->rate_node_new || !ops->rate_node_del) {
- NL_SET_ERR_MSG(info->extack, "Rate nodes aren't supported");
- return -EOPNOTSUPP;
- }
-
- if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE))
- return -EOPNOTSUPP;
-
- rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs);
- if (!IS_ERR(rate_node))
- return -EEXIST;
- else if (rate_node == ERR_PTR(-EINVAL))
- return -EINVAL;
-
- rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
- if (!rate_node)
- return -ENOMEM;
-
- rate_node->devlink = devlink;
- rate_node->type = DEVLINK_RATE_TYPE_NODE;
- rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL);
- if (!rate_node->name) {
- err = -ENOMEM;
- goto err_strdup;
- }
-
- err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack);
- if (err)
- goto err_node_new;
-
- err = devlink_nl_rate_set(rate_node, ops, info);
- if (err)
- goto err_rate_set;
-
- refcount_set(&rate_node->refcnt, 1);
- list_add(&rate_node->list, &devlink->rate_list);
- devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
- return 0;
-
-err_rate_set:
- ops->rate_node_del(rate_node, rate_node->priv, info->extack);
-err_node_new:
- kfree(rate_node->name);
-err_strdup:
- kfree(rate_node);
- return err;
-}
-
-static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_rate *rate_node = info->user_ptr[1];
- struct devlink *devlink = rate_node->devlink;
- const struct devlink_ops *ops = devlink->ops;
- int err;
-
- if (refcount_read(&rate_node->refcnt) > 1) {
- NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node.");
- return -EBUSY;
- }
-
- devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
- err = ops->rate_node_del(rate_node, rate_node->priv, info->extack);
- if (rate_node->parent)
- refcount_dec(&rate_node->parent->refcnt);
- list_del(&rate_node->list);
- kfree(rate_node->name);
- kfree(rate_node);
- return err;
-}
-
-struct devlink_linecard_type {
- const char *type;
- const void *priv;
-};
-
-static int devlink_nl_linecard_fill(struct sk_buff *msg,
- struct devlink *devlink,
- struct devlink_linecard *linecard,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags,
- struct netlink_ext_ack *extack)
-{
- struct devlink_linecard_type *linecard_type;
- struct nlattr *attr;
- void *hdr;
- int i;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index))
- goto nla_put_failure;
- if (nla_put_u8(msg, DEVLINK_ATTR_LINECARD_STATE, linecard->state))
- goto nla_put_failure;
- if (linecard->type &&
- nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, linecard->type))
- goto nla_put_failure;
-
- if (linecard->types_count) {
- attr = nla_nest_start(msg,
- DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES);
- if (!attr)
- goto nla_put_failure;
- for (i = 0; i < linecard->types_count; i++) {
- linecard_type = &linecard->types[i];
- if (nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE,
- linecard_type->type)) {
- nla_nest_cancel(msg, attr);
- goto nla_put_failure;
- }
- }
- nla_nest_end(msg, attr);
- }
-
- if (linecard->nested_devlink &&
- devlink_nl_put_nested_handle(msg, linecard->nested_devlink))
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void devlink_linecard_notify(struct devlink_linecard *linecard,
- enum devlink_command cmd)
-{
- struct devlink *devlink = linecard->devlink;
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW &&
- cmd != DEVLINK_CMD_LINECARD_DEL);
-
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_linecard_fill(msg, devlink, linecard, cmd, 0, 0, 0,
- NULL);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_linecard *linecard = info->user_ptr[1];
- struct devlink *devlink = linecard->devlink;
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- mutex_lock(&linecard->state_lock);
- err = devlink_nl_linecard_fill(msg, devlink, linecard,
- DEVLINK_CMD_LINECARD_NEW,
- info->snd_portid, info->snd_seq, 0,
- info->extack);
- mutex_unlock(&linecard->state_lock);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_linecard_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_linecard *linecard;
- int idx = 0;
- int err = 0;
-
- list_for_each_entry(linecard, &devlink->linecard_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- mutex_lock(&linecard->state_lock);
- err = devlink_nl_linecard_fill(msg, devlink, linecard,
- DEVLINK_CMD_LINECARD_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI,
- cb->extack);
- mutex_unlock(&linecard->state_lock);
- if (err) {
- state->idx = idx;
- break;
- }
- idx++;
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_linecard_get = {
- .dump_one = devlink_nl_cmd_linecard_get_dump_one,
-};
-
-static struct devlink_linecard_type *
-devlink_linecard_type_lookup(struct devlink_linecard *linecard,
- const char *type)
-{
- struct devlink_linecard_type *linecard_type;
- int i;
-
- for (i = 0; i < linecard->types_count; i++) {
- linecard_type = &linecard->types[i];
- if (!strcmp(type, linecard_type->type))
- return linecard_type;
- }
- return NULL;
-}
-
-static int devlink_linecard_type_set(struct devlink_linecard *linecard,
- const char *type,
- struct netlink_ext_ack *extack)
-{
- const struct devlink_linecard_ops *ops = linecard->ops;
- struct devlink_linecard_type *linecard_type;
- int err;
-
- mutex_lock(&linecard->state_lock);
- if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
- NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
- err = -EBUSY;
- goto out;
- }
- if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
- NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
- err = -EBUSY;
- goto out;
- }
-
- linecard_type = devlink_linecard_type_lookup(linecard, type);
- if (!linecard_type) {
- NL_SET_ERR_MSG(extack, "Unsupported line card type provided");
- err = -EINVAL;
- goto out;
- }
-
- if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED &&
- linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
- NL_SET_ERR_MSG(extack, "Line card already provisioned");
- err = -EBUSY;
- /* Check if the line card is provisioned in the same
- * way the user asks. In case it is, make the operation
- * to return success.
- */
- if (ops->same_provision &&
- ops->same_provision(linecard, linecard->priv,
- linecard_type->type,
- linecard_type->priv))
- err = 0;
- goto out;
- }
-
- linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING;
- linecard->type = linecard_type->type;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
- err = ops->provision(linecard, linecard->priv, linecard_type->type,
- linecard_type->priv, extack);
- if (err) {
- /* Provisioning failed. Assume the linecard is unprovisioned
- * for future operations.
- */
- mutex_lock(&linecard->state_lock);
- linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
- linecard->type = NULL;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
- }
- return err;
-
-out:
- mutex_unlock(&linecard->state_lock);
- return err;
-}
-
-static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- mutex_lock(&linecard->state_lock);
- if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
- NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
- err = -EBUSY;
- goto out;
- }
- if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
- NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
- err = -EBUSY;
- goto out;
- }
- if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
- linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
- linecard->type = NULL;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- err = 0;
- goto out;
- }
-
- if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) {
- NL_SET_ERR_MSG(extack, "Line card is not provisioned");
- err = 0;
- goto out;
- }
- linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONING;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
- err = linecard->ops->unprovision(linecard, linecard->priv,
- extack);
- if (err) {
- /* Unprovisioning failed. Assume the linecard is unprovisioned
- * for future operations.
- */
- mutex_lock(&linecard->state_lock);
- linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
- linecard->type = NULL;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
- }
- return err;
-
-out:
- mutex_unlock(&linecard->state_lock);
- return err;
-}
-
-static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_linecard *linecard = info->user_ptr[1];
- struct netlink_ext_ack *extack = info->extack;
- int err;
-
- if (info->attrs[DEVLINK_ATTR_LINECARD_TYPE]) {
- const char *type;
-
- type = nla_data(info->attrs[DEVLINK_ATTR_LINECARD_TYPE]);
- if (strcmp(type, "")) {
- err = devlink_linecard_type_set(linecard, type, extack);
- if (err)
- return err;
- } else {
- err = devlink_linecard_type_unset(linecard, extack);
- if (err)
- return err;
- }
- }
-
- return 0;
-}
-
-static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
- struct devlink_sb *devlink_sb,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
-{
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT,
- devlink_sb->ingress_pools_count))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT,
- devlink_sb->egress_pools_count))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT,
- devlink_sb->ingress_tc_count))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT,
- devlink_sb->egress_tc_count))
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb;
- struct sk_buff *msg;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
- DEVLINK_CMD_SB_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_sb *devlink_sb;
- int idx = 0;
- int err = 0;
-
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
- DEVLINK_CMD_SB_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- state->idx = idx;
- break;
- }
- idx++;
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_sb_get = {
- .dump_one = devlink_nl_cmd_sb_get_dump_one,
-};
-
-static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
- struct devlink_sb *devlink_sb,
- u16 pool_index, enum devlink_command cmd,
- u32 portid, u32 seq, int flags)
-{
- struct devlink_sb_pool_info pool_info;
- void *hdr;
- int err;
-
- err = devlink->ops->sb_pool_get(devlink, devlink_sb->index,
- pool_index, &pool_info);
- if (err)
- return err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
- goto nla_put_failure;
- if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size))
- goto nla_put_failure;
- if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,
- pool_info.threshold_type))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_CELL_SIZE,
- pool_info.cell_size))
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb;
- struct sk_buff *msg;
- u16 pool_index;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
- &pool_index);
- if (err)
- return err;
-
- if (!devlink->ops->sb_pool_get)
- return -EOPNOTSUPP;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index,
- DEVLINK_CMD_SB_POOL_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
- struct devlink *devlink,
- struct devlink_sb *devlink_sb,
- u32 portid, u32 seq)
-{
- u16 pool_count = devlink_sb_pool_count(devlink_sb);
- u16 pool_index;
- int err;
-
- for (pool_index = 0; pool_index < pool_count; pool_index++) {
- if (*p_idx < start) {
- (*p_idx)++;
- continue;
- }
- err = devlink_nl_sb_pool_fill(msg, devlink,
- devlink_sb,
- pool_index,
- DEVLINK_CMD_SB_POOL_NEW,
- portid, seq, NLM_F_MULTI);
- if (err)
- return err;
- (*p_idx)++;
- }
- return 0;
-}
-
-static int
-devlink_nl_cmd_sb_pool_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_sb *devlink_sb;
- int err = 0;
- int idx = 0;
-
- if (!devlink->ops->sb_pool_get)
- return 0;
-
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- err = __sb_pool_get_dumpit(msg, state->idx, &idx,
- devlink, devlink_sb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- state->idx = idx;
- break;
- }
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_sb_pool_get = {
- .dump_one = devlink_nl_cmd_sb_pool_get_dump_one,
-};
-
-static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
- u16 pool_index, u32 size,
- enum devlink_sb_threshold_type threshold_type,
- struct netlink_ext_ack *extack)
-
-{
- const struct devlink_ops *ops = devlink->ops;
-
- if (ops->sb_pool_set)
- return ops->sb_pool_set(devlink, sb_index, pool_index,
- size, threshold_type, extack);
- return -EOPNOTSUPP;
-}
-
-static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- enum devlink_sb_threshold_type threshold_type;
- struct devlink_sb *devlink_sb;
- u16 pool_index;
- u32 size;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
- &pool_index);
- if (err)
- return err;
-
- err = devlink_sb_th_type_get_from_info(info, &threshold_type);
- if (err)
- return err;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_POOL_SIZE))
- return -EINVAL;
-
- size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]);
- return devlink_sb_pool_set(devlink, devlink_sb->index,
- pool_index, size, threshold_type,
- info->extack);
-}
-
-static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
- struct devlink *devlink,
- struct devlink_port *devlink_port,
- struct devlink_sb *devlink_sb,
- u16 pool_index,
- enum devlink_command cmd,
- u32 portid, u32 seq, int flags)
-{
- const struct devlink_ops *ops = devlink->ops;
- u32 threshold;
- void *hdr;
- int err;
-
- err = ops->sb_port_pool_get(devlink_port, devlink_sb->index,
- pool_index, &threshold);
- if (err)
- return err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
- goto nla_put_failure;
-
- if (ops->sb_occ_port_pool_get) {
- u32 cur;
- u32 max;
-
- err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index,
- pool_index, &cur, &max);
- if (err && err != -EOPNOTSUPP)
- goto sb_occ_get_failure;
- if (!err) {
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
- goto nla_put_failure;
- }
- }
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- err = -EMSGSIZE;
-sb_occ_get_failure:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = devlink_port->devlink;
- struct devlink_sb *devlink_sb;
- struct sk_buff *msg;
- u16 pool_index;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
- &pool_index);
- if (err)
- return err;
-
- if (!devlink->ops->sb_port_pool_get)
- return -EOPNOTSUPP;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port,
- devlink_sb, pool_index,
- DEVLINK_CMD_SB_PORT_POOL_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
- struct devlink *devlink,
- struct devlink_sb *devlink_sb,
- u32 portid, u32 seq)
-{
- struct devlink_port *devlink_port;
- u16 pool_count = devlink_sb_pool_count(devlink_sb);
- unsigned long port_index;
- u16 pool_index;
- int err;
-
- xa_for_each(&devlink->ports, port_index, devlink_port) {
- for (pool_index = 0; pool_index < pool_count; pool_index++) {
- if (*p_idx < start) {
- (*p_idx)++;
- continue;
- }
- err = devlink_nl_sb_port_pool_fill(msg, devlink,
- devlink_port,
- devlink_sb,
- pool_index,
- DEVLINK_CMD_SB_PORT_POOL_NEW,
- portid, seq,
- NLM_F_MULTI);
- if (err)
- return err;
- (*p_idx)++;
- }
- }
- return 0;
-}
-
-static int
-devlink_nl_cmd_sb_port_pool_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_sb *devlink_sb;
- int idx = 0;
- int err = 0;
-
- if (!devlink->ops->sb_port_pool_get)
- return 0;
-
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- err = __sb_port_pool_get_dumpit(msg, state->idx, &idx,
- devlink, devlink_sb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- state->idx = idx;
- break;
- }
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_sb_port_pool_get = {
- .dump_one = devlink_nl_cmd_sb_port_pool_get_dump_one,
-};
-
-static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
- unsigned int sb_index, u16 pool_index,
- u32 threshold,
- struct netlink_ext_ack *extack)
-
-{
- const struct devlink_ops *ops = devlink_port->devlink->ops;
-
- if (ops->sb_port_pool_set)
- return ops->sb_port_pool_set(devlink_port, sb_index,
- pool_index, threshold, extack);
- return -EOPNOTSUPP;
-}
-
-static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb;
- u16 pool_index;
- u32 threshold;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
- &pool_index);
- if (err)
- return err;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
- return -EINVAL;
-
- threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
- return devlink_sb_port_pool_set(devlink_port, devlink_sb->index,
- pool_index, threshold, info->extack);
-}
-
-static int
-devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink,
- struct devlink_port *devlink_port,
- struct devlink_sb *devlink_sb, u16 tc_index,
- enum devlink_sb_pool_type pool_type,
- enum devlink_command cmd,
- u32 portid, u32 seq, int flags)
-{
- const struct devlink_ops *ops = devlink->ops;
- u16 pool_index;
- u32 threshold;
- void *hdr;
- int err;
-
- err = ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index,
- tc_index, pool_type,
- &pool_index, &threshold);
- if (err)
- return err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index))
- goto nla_put_failure;
- if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
- goto nla_put_failure;
-
- if (ops->sb_occ_tc_port_bind_get) {
- u32 cur;
- u32 max;
-
- err = ops->sb_occ_tc_port_bind_get(devlink_port,
- devlink_sb->index,
- tc_index, pool_type,
- &cur, &max);
- if (err && err != -EOPNOTSUPP)
- return err;
- if (!err) {
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
- goto nla_put_failure;
- }
- }
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = devlink_port->devlink;
- struct devlink_sb *devlink_sb;
- struct sk_buff *msg;
- enum devlink_sb_pool_type pool_type;
- u16 tc_index;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- err = devlink_sb_pool_type_get_from_info(info, &pool_type);
- if (err)
- return err;
-
- err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
- pool_type, &tc_index);
- if (err)
- return err;
-
- if (!devlink->ops->sb_tc_pool_bind_get)
- return -EOPNOTSUPP;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port,
- devlink_sb, tc_index, pool_type,
- DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
- info->snd_portid,
- info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
- int start, int *p_idx,
- struct devlink *devlink,
- struct devlink_sb *devlink_sb,
- u32 portid, u32 seq)
-{
- struct devlink_port *devlink_port;
- unsigned long port_index;
- u16 tc_index;
- int err;
-
- xa_for_each(&devlink->ports, port_index, devlink_port) {
- for (tc_index = 0;
- tc_index < devlink_sb->ingress_tc_count; tc_index++) {
- if (*p_idx < start) {
- (*p_idx)++;
- continue;
- }
- err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
- devlink_port,
- devlink_sb,
- tc_index,
- DEVLINK_SB_POOL_TYPE_INGRESS,
- DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
- portid, seq,
- NLM_F_MULTI);
- if (err)
- return err;
- (*p_idx)++;
- }
- for (tc_index = 0;
- tc_index < devlink_sb->egress_tc_count; tc_index++) {
- if (*p_idx < start) {
- (*p_idx)++;
- continue;
- }
- err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
- devlink_port,
- devlink_sb,
- tc_index,
- DEVLINK_SB_POOL_TYPE_EGRESS,
- DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
- portid, seq,
- NLM_F_MULTI);
- if (err)
- return err;
- (*p_idx)++;
- }
- }
- return 0;
-}
-
-static int
-devlink_nl_cmd_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_sb *devlink_sb;
- int idx = 0;
- int err = 0;
-
- if (!devlink->ops->sb_tc_pool_bind_get)
- return 0;
-
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- err = __sb_tc_pool_bind_get_dumpit(msg, state->idx, &idx,
- devlink, devlink_sb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- state->idx = idx;
- break;
- }
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_sb_tc_pool_bind_get = {
- .dump_one = devlink_nl_cmd_sb_tc_pool_bind_get_dump_one,
-};
-
-static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
- unsigned int sb_index, u16 tc_index,
- enum devlink_sb_pool_type pool_type,
- u16 pool_index, u32 threshold,
- struct netlink_ext_ack *extack)
-
-{
- const struct devlink_ops *ops = devlink_port->devlink->ops;
-
- if (ops->sb_tc_pool_bind_set)
- return ops->sb_tc_pool_bind_set(devlink_port, sb_index,
- tc_index, pool_type,
- pool_index, threshold, extack);
- return -EOPNOTSUPP;
-}
-
-static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = info->user_ptr[0];
- enum devlink_sb_pool_type pool_type;
- struct devlink_sb *devlink_sb;
- u16 tc_index;
- u16 pool_index;
- u32 threshold;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- err = devlink_sb_pool_type_get_from_info(info, &pool_type);
- if (err)
- return err;
-
- err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
- pool_type, &tc_index);
- if (err)
- return err;
-
- err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
- &pool_index);
- if (err)
- return err;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
- return -EINVAL;
-
- threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
- return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index,
- tc_index, pool_type,
- pool_index, threshold, info->extack);
-}
-
-static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- const struct devlink_ops *ops = devlink->ops;
- struct devlink_sb *devlink_sb;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- if (ops->sb_occ_snapshot)
- return ops->sb_occ_snapshot(devlink, devlink_sb->index);
- return -EOPNOTSUPP;
-}
-
-static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- const struct devlink_ops *ops = devlink->ops;
- struct devlink_sb *devlink_sb;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- if (ops->sb_occ_max_clear)
- return ops->sb_occ_max_clear(devlink, devlink_sb->index);
- return -EOPNOTSUPP;
-}
-
-int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
- struct netlink_ext_ack *extack)
-{
- struct devlink_rate *devlink_rate;
-
- list_for_each_entry(devlink_rate, &devlink->rate_list, list)
- if (devlink_rate_is_node(devlink_rate)) {
- NL_SET_ERR_MSG(extack, "Rate node(s) exists.");
- return -EBUSY;
- }
- return 0;
-}
-
-int devlink_dpipe_match_put(struct sk_buff *skb,
- struct devlink_dpipe_match *match)
-{
- struct devlink_dpipe_header *header = match->header;
- struct devlink_dpipe_field *field = &header->fields[match->field_id];
- struct nlattr *match_attr;
-
- match_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_MATCH);
- if (!match_attr)
- return -EMSGSIZE;
-
- if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
- nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
- goto nla_put_failure;
-
- nla_nest_end(skb, match_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, match_attr);
- return -EMSGSIZE;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_match_put);
-
-static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table,
- struct sk_buff *skb)
-{
- struct nlattr *matches_attr;
-
- matches_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
- if (!matches_attr)
- return -EMSGSIZE;
-
- if (table->table_ops->matches_dump(table->priv, skb))
- goto nla_put_failure;
-
- nla_nest_end(skb, matches_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, matches_attr);
- return -EMSGSIZE;
-}
-
-int devlink_dpipe_action_put(struct sk_buff *skb,
- struct devlink_dpipe_action *action)
-{
- struct devlink_dpipe_header *header = action->header;
- struct devlink_dpipe_field *field = &header->fields[action->field_id];
- struct nlattr *action_attr;
-
- action_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ACTION);
- if (!action_attr)
- return -EMSGSIZE;
-
- if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
- nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
- goto nla_put_failure;
-
- nla_nest_end(skb, action_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, action_attr);
- return -EMSGSIZE;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_action_put);
-
-static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table,
- struct sk_buff *skb)
-{
- struct nlattr *actions_attr;
-
- actions_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
- if (!actions_attr)
- return -EMSGSIZE;
-
- if (table->table_ops->actions_dump(table->priv, skb))
- goto nla_put_failure;
-
- nla_nest_end(skb, actions_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, actions_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_dpipe_table_put(struct sk_buff *skb,
- struct devlink_dpipe_table *table)
-{
- struct nlattr *table_attr;
- u64 table_size;
-
- table_size = table->table_ops->size_get(table->priv);
- table_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLE);
- if (!table_attr)
- return -EMSGSIZE;
-
- if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size,
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
- table->counters_enabled))
- goto nla_put_failure;
-
- if (table->resource_valid) {
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
- table->resource_id, DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
- table->resource_units, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- }
- if (devlink_dpipe_matches_put(table, skb))
- goto nla_put_failure;
-
- if (devlink_dpipe_actions_put(table, skb))
- goto nla_put_failure;
-
- nla_nest_end(skb, table_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, table_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb,
- struct genl_info *info)
-{
- int err;
-
- if (*pskb) {
- err = genlmsg_reply(*pskb, info);
- if (err)
- return err;
- }
- *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!*pskb)
- return -ENOMEM;
- return 0;
-}
-
-static int devlink_dpipe_tables_fill(struct genl_info *info,
- enum devlink_command cmd, int flags,
- struct list_head *dpipe_tables,
- const char *table_name)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_dpipe_table *table;
- struct nlattr *tables_attr;
- struct sk_buff *skb = NULL;
- struct nlmsghdr *nlh;
- bool incomplete;
- void *hdr;
- int i;
- int err;
-
- table = list_first_entry(dpipe_tables,
- struct devlink_dpipe_table, list);
-start_again:
- err = devlink_dpipe_send_and_alloc_skb(&skb, info);
- if (err)
- return err;
-
- hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
- &devlink_nl_family, NLM_F_MULTI, cmd);
- if (!hdr) {
- nlmsg_free(skb);
- return -EMSGSIZE;
- }
-
- if (devlink_nl_put_handle(skb, devlink))
- goto nla_put_failure;
- tables_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLES);
- if (!tables_attr)
- goto nla_put_failure;
-
- i = 0;
- incomplete = false;
- list_for_each_entry_from(table, dpipe_tables, list) {
- if (!table_name) {
- err = devlink_dpipe_table_put(skb, table);
- if (err) {
- if (!i)
- goto err_table_put;
- incomplete = true;
- break;
- }
- } else {
- if (!strcmp(table->name, table_name)) {
- err = devlink_dpipe_table_put(skb, table);
- if (err)
- break;
- }
- }
- i++;
- }
-
- nla_nest_end(skb, tables_attr);
- genlmsg_end(skb, hdr);
- if (incomplete)
- goto start_again;
-
-send_done:
- nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
- NLMSG_DONE, 0, flags | NLM_F_MULTI);
- if (!nlh) {
- err = devlink_dpipe_send_and_alloc_skb(&skb, info);
- if (err)
- return err;
- goto send_done;
- }
-
- return genlmsg_reply(skb, info);
-
-nla_put_failure:
- err = -EMSGSIZE;
-err_table_put:
- nlmsg_free(skb);
- return err;
-}
-
-static int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- const char *table_name = NULL;
-
- if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
- table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
-
- return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0,
- &devlink->dpipe_table_list,
- table_name);
-}
-
-static int devlink_dpipe_value_put(struct sk_buff *skb,
- struct devlink_dpipe_value *value)
-{
- if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE,
- value->value_size, value->value))
- return -EMSGSIZE;
- if (value->mask)
- if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK,
- value->value_size, value->mask))
- return -EMSGSIZE;
- if (value->mapping_valid)
- if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING,
- value->mapping_value))
- return -EMSGSIZE;
- return 0;
-}
-
-static int devlink_dpipe_action_value_put(struct sk_buff *skb,
- struct devlink_dpipe_value *value)
-{
- if (!value->action)
- return -EINVAL;
- if (devlink_dpipe_action_put(skb, value->action))
- return -EMSGSIZE;
- if (devlink_dpipe_value_put(skb, value))
- return -EMSGSIZE;
- return 0;
-}
-
-static int devlink_dpipe_action_values_put(struct sk_buff *skb,
- struct devlink_dpipe_value *values,
- unsigned int values_count)
-{
- struct nlattr *action_attr;
- int i;
- int err;
-
- for (i = 0; i < values_count; i++) {
- action_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_ACTION_VALUE);
- if (!action_attr)
- return -EMSGSIZE;
- err = devlink_dpipe_action_value_put(skb, &values[i]);
- if (err)
- goto err_action_value_put;
- nla_nest_end(skb, action_attr);
- }
- return 0;
-
-err_action_value_put:
- nla_nest_cancel(skb, action_attr);
- return err;
-}
-
-static int devlink_dpipe_match_value_put(struct sk_buff *skb,
- struct devlink_dpipe_value *value)
-{
- if (!value->match)
- return -EINVAL;
- if (devlink_dpipe_match_put(skb, value->match))
- return -EMSGSIZE;
- if (devlink_dpipe_value_put(skb, value))
- return -EMSGSIZE;
- return 0;
-}
-
-static int devlink_dpipe_match_values_put(struct sk_buff *skb,
- struct devlink_dpipe_value *values,
- unsigned int values_count)
-{
- struct nlattr *match_attr;
- int i;
- int err;
-
- for (i = 0; i < values_count; i++) {
- match_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_MATCH_VALUE);
- if (!match_attr)
- return -EMSGSIZE;
- err = devlink_dpipe_match_value_put(skb, &values[i]);
- if (err)
- goto err_match_value_put;
- nla_nest_end(skb, match_attr);
- }
- return 0;
-
-err_match_value_put:
- nla_nest_cancel(skb, match_attr);
- return err;
-}
-
-static int devlink_dpipe_entry_put(struct sk_buff *skb,
- struct devlink_dpipe_entry *entry)
-{
- struct nlattr *entry_attr, *matches_attr, *actions_attr;
- int err;
-
- entry_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ENTRY);
- if (!entry_attr)
- return -EMSGSIZE;
-
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (entry->counter_valid)
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
- entry->counter, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- matches_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
- if (!matches_attr)
- goto nla_put_failure;
-
- err = devlink_dpipe_match_values_put(skb, entry->match_values,
- entry->match_values_count);
- if (err) {
- nla_nest_cancel(skb, matches_attr);
- goto err_match_values_put;
- }
- nla_nest_end(skb, matches_attr);
-
- actions_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
- if (!actions_attr)
- goto nla_put_failure;
-
- err = devlink_dpipe_action_values_put(skb, entry->action_values,
- entry->action_values_count);
- if (err) {
- nla_nest_cancel(skb, actions_attr);
- goto err_action_values_put;
- }
- nla_nest_end(skb, actions_attr);
-
- nla_nest_end(skb, entry_attr);
- return 0;
-
-nla_put_failure:
- err = -EMSGSIZE;
-err_match_values_put:
-err_action_values_put:
- nla_nest_cancel(skb, entry_attr);
- return err;
-}
-
-static struct devlink_dpipe_table *
-devlink_dpipe_table_find(struct list_head *dpipe_tables,
- const char *table_name, struct devlink *devlink)
-{
- struct devlink_dpipe_table *table;
- list_for_each_entry_rcu(table, dpipe_tables, list,
- lockdep_is_held(&devlink->lock)) {
- if (!strcmp(table->name, table_name))
- return table;
- }
- return NULL;
-}
-
-int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
-{
- struct devlink *devlink;
- int err;
-
- err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb,
- dump_ctx->info);
- if (err)
- return err;
-
- dump_ctx->hdr = genlmsg_put(dump_ctx->skb,
- dump_ctx->info->snd_portid,
- dump_ctx->info->snd_seq,
- &devlink_nl_family, NLM_F_MULTI,
- dump_ctx->cmd);
- if (!dump_ctx->hdr)
- goto nla_put_failure;
-
- devlink = dump_ctx->info->user_ptr[0];
- if (devlink_nl_put_handle(dump_ctx->skb, devlink))
- goto nla_put_failure;
- dump_ctx->nest = nla_nest_start_noflag(dump_ctx->skb,
- DEVLINK_ATTR_DPIPE_ENTRIES);
- if (!dump_ctx->nest)
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- nlmsg_free(dump_ctx->skb);
- return -EMSGSIZE;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare);
-
-int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
- struct devlink_dpipe_entry *entry)
-{
- return devlink_dpipe_entry_put(dump_ctx->skb, entry);
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append);
-
-int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
-{
- nla_nest_end(dump_ctx->skb, dump_ctx->nest);
- genlmsg_end(dump_ctx->skb, dump_ctx->hdr);
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
-
-void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
-
-{
- unsigned int value_count, value_index;
- struct devlink_dpipe_value *value;
-
- value = entry->action_values;
- value_count = entry->action_values_count;
- for (value_index = 0; value_index < value_count; value_index++) {
- kfree(value[value_index].value);
- kfree(value[value_index].mask);
- }
-
- value = entry->match_values;
- value_count = entry->match_values_count;
- for (value_index = 0; value_index < value_count; value_index++) {
- kfree(value[value_index].value);
- kfree(value[value_index].mask);
- }
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_entry_clear);
-
-static int devlink_dpipe_entries_fill(struct genl_info *info,
- enum devlink_command cmd, int flags,
- struct devlink_dpipe_table *table)
-{
- struct devlink_dpipe_dump_ctx dump_ctx;
- struct nlmsghdr *nlh;
- int err;
-
- dump_ctx.skb = NULL;
- dump_ctx.cmd = cmd;
- dump_ctx.info = info;
-
- err = table->table_ops->entries_dump(table->priv,
- table->counters_enabled,
- &dump_ctx);
- if (err)
- return err;
-
-send_done:
- nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
- NLMSG_DONE, 0, flags | NLM_F_MULTI);
- if (!nlh) {
- err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
- if (err)
- return err;
- goto send_done;
- }
- return genlmsg_reply(dump_ctx.skb, info);
-}
-
-static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_dpipe_table *table;
- const char *table_name;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME))
- return -EINVAL;
-
- table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
- table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name, devlink);
- if (!table)
- return -EINVAL;
-
- if (!table->table_ops->entries_dump)
- return -EINVAL;
-
- return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET,
- 0, table);
-}
-
-static int devlink_dpipe_fields_put(struct sk_buff *skb,
- const struct devlink_dpipe_header *header)
-{
- struct devlink_dpipe_field *field;
- struct nlattr *field_attr;
- int i;
-
- for (i = 0; i < header->fields_count; i++) {
- field = &header->fields[i];
- field_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_FIELD);
- if (!field_attr)
- return -EMSGSIZE;
- if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type))
- goto nla_put_failure;
- nla_nest_end(skb, field_attr);
- }
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, field_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_dpipe_header_put(struct sk_buff *skb,
- struct devlink_dpipe_header *header)
-{
- struct nlattr *fields_attr, *header_attr;
- int err;
-
- header_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADER);
- if (!header_attr)
- return -EMSGSIZE;
-
- if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
- nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
- goto nla_put_failure;
-
- fields_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
- if (!fields_attr)
- goto nla_put_failure;
-
- err = devlink_dpipe_fields_put(skb, header);
- if (err) {
- nla_nest_cancel(skb, fields_attr);
- goto nla_put_failure;
- }
- nla_nest_end(skb, fields_attr);
- nla_nest_end(skb, header_attr);
- return 0;
-
-nla_put_failure:
- err = -EMSGSIZE;
- nla_nest_cancel(skb, header_attr);
- return err;
-}
-
-static int devlink_dpipe_headers_fill(struct genl_info *info,
- enum devlink_command cmd, int flags,
- struct devlink_dpipe_headers *
- dpipe_headers)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct nlattr *headers_attr;
- struct sk_buff *skb = NULL;
- struct nlmsghdr *nlh;
- void *hdr;
- int i, j;
- int err;
-
- i = 0;
-start_again:
- err = devlink_dpipe_send_and_alloc_skb(&skb, info);
- if (err)
- return err;
-
- hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
- &devlink_nl_family, NLM_F_MULTI, cmd);
- if (!hdr) {
- nlmsg_free(skb);
- return -EMSGSIZE;
- }
-
- if (devlink_nl_put_handle(skb, devlink))
- goto nla_put_failure;
- headers_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADERS);
- if (!headers_attr)
- goto nla_put_failure;
-
- j = 0;
- for (; i < dpipe_headers->headers_count; i++) {
- err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]);
- if (err) {
- if (!j)
- goto err_table_put;
- break;
- }
- j++;
- }
- nla_nest_end(skb, headers_attr);
- genlmsg_end(skb, hdr);
- if (i != dpipe_headers->headers_count)
- goto start_again;
-
-send_done:
- nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
- NLMSG_DONE, 0, flags | NLM_F_MULTI);
- if (!nlh) {
- err = devlink_dpipe_send_and_alloc_skb(&skb, info);
- if (err)
- return err;
- goto send_done;
- }
- return genlmsg_reply(skb, info);
-
-nla_put_failure:
- err = -EMSGSIZE;
-err_table_put:
- nlmsg_free(skb);
- return err;
-}
-
-static int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
-
- if (!devlink->dpipe_headers)
- return -EOPNOTSUPP;
- return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET,
- 0, devlink->dpipe_headers);
-}
-
-static int devlink_dpipe_table_counters_set(struct devlink *devlink,
- const char *table_name,
- bool enable)
-{
- struct devlink_dpipe_table *table;
-
- table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name, devlink);
- if (!table)
- return -EINVAL;
-
- if (table->counter_control_extern)
- return -EOPNOTSUPP;
-
- if (!(table->counters_enabled ^ enable))
- return 0;
-
- table->counters_enabled = enable;
- if (table->table_ops->counters_set_update)
- table->table_ops->counters_set_update(table->priv, enable);
- return 0;
-}
-
-static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- const char *table_name;
- bool counters_enable;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME) ||
- GENL_REQ_ATTR_CHECK(info,
- DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED))
- return -EINVAL;
-
- table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
- counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]);
-
- return devlink_dpipe_table_counters_set(devlink, table_name,
- counters_enable);
-}
-
-static struct devlink_resource *
-devlink_resource_find(struct devlink *devlink,
- struct devlink_resource *resource, u64 resource_id)
-{
- struct list_head *resource_list;
-
- if (resource)
- resource_list = &resource->resource_list;
- else
- resource_list = &devlink->resource_list;
-
- list_for_each_entry(resource, resource_list, list) {
- struct devlink_resource *child_resource;
-
- if (resource->id == resource_id)
- return resource;
-
- child_resource = devlink_resource_find(devlink, resource,
- resource_id);
- if (child_resource)
- return child_resource;
- }
- return NULL;
-}
-
-static void
-devlink_resource_validate_children(struct devlink_resource *resource)
-{
- struct devlink_resource *child_resource;
- bool size_valid = true;
- u64 parts_size = 0;
-
- if (list_empty(&resource->resource_list))
- goto out;
-
- list_for_each_entry(child_resource, &resource->resource_list, list)
- parts_size += child_resource->size_new;
-
- if (parts_size > resource->size_new)
- size_valid = false;
-out:
- resource->size_valid = size_valid;
-}
-
-static int
-devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
- struct netlink_ext_ack *extack)
-{
- u64 reminder;
- int err = 0;
-
- if (size > resource->size_params.size_max) {
- NL_SET_ERR_MSG(extack, "Size larger than maximum");
- err = -EINVAL;
- }
-
- if (size < resource->size_params.size_min) {
- NL_SET_ERR_MSG(extack, "Size smaller than minimum");
- err = -EINVAL;
- }
-
- div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
- if (reminder) {
- NL_SET_ERR_MSG(extack, "Wrong granularity");
- err = -EINVAL;
- }
-
- return err;
-}
-
-static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_resource *resource;
- u64 resource_id;
- u64 size;
- int err;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_ID) ||
- GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_SIZE))
- return -EINVAL;
- resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]);
-
- resource = devlink_resource_find(devlink, NULL, resource_id);
- if (!resource)
- return -EINVAL;
-
- size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
- err = devlink_resource_validate_size(resource, size, info->extack);
- if (err)
- return err;
-
- resource->size_new = size;
- devlink_resource_validate_children(resource);
- if (resource->parent)
- devlink_resource_validate_children(resource->parent);
- return 0;
-}
-
-static int
-devlink_resource_size_params_put(struct devlink_resource *resource,
- struct sk_buff *skb)
-{
- struct devlink_resource_size_params *size_params;
-
- size_params = &resource->size_params;
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
- size_params->size_granularity, DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
- size_params->size_max, DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
- size_params->size_min, DEVLINK_ATTR_PAD) ||
- nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit))
- return -EMSGSIZE;
- return 0;
-}
-
-static int devlink_resource_occ_put(struct devlink_resource *resource,
- struct sk_buff *skb)
-{
- if (!resource->occ_get)
- return 0;
- return nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
- resource->occ_get(resource->occ_get_priv),
- DEVLINK_ATTR_PAD);
-}
-
-static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
- struct devlink_resource *resource)
-{
- struct devlink_resource *child_resource;
- struct nlattr *child_resource_attr;
- struct nlattr *resource_attr;
-
- resource_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE);
- if (!resource_attr)
- return -EMSGSIZE;
-
- if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size,
- DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (resource->size != resource->size_new &&
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
- resource->size_new, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (devlink_resource_occ_put(resource, skb))
- goto nla_put_failure;
- if (devlink_resource_size_params_put(resource, skb))
- goto nla_put_failure;
- if (list_empty(&resource->resource_list))
- goto out;
-
- if (nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_SIZE_VALID,
- resource->size_valid))
- goto nla_put_failure;
-
- child_resource_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_RESOURCE_LIST);
- if (!child_resource_attr)
- goto nla_put_failure;
-
- list_for_each_entry(child_resource, &resource->resource_list, list) {
- if (devlink_resource_put(devlink, skb, child_resource))
- goto resource_put_failure;
- }
-
- nla_nest_end(skb, child_resource_attr);
-out:
- nla_nest_end(skb, resource_attr);
- return 0;
-
-resource_put_failure:
- nla_nest_cancel(skb, child_resource_attr);
-nla_put_failure:
- nla_nest_cancel(skb, resource_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_resource_fill(struct genl_info *info,
- enum devlink_command cmd, int flags)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_resource *resource;
- struct nlattr *resources_attr;
- struct sk_buff *skb = NULL;
- struct nlmsghdr *nlh;
- bool incomplete;
- void *hdr;
- int i;
- int err;
-
- resource = list_first_entry(&devlink->resource_list,
- struct devlink_resource, list);
-start_again:
- err = devlink_dpipe_send_and_alloc_skb(&skb, info);
- if (err)
- return err;
-
- hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
- &devlink_nl_family, NLM_F_MULTI, cmd);
- if (!hdr) {
- nlmsg_free(skb);
- return -EMSGSIZE;
- }
-
- if (devlink_nl_put_handle(skb, devlink))
- goto nla_put_failure;
-
- resources_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_RESOURCE_LIST);
- if (!resources_attr)
- goto nla_put_failure;
-
- incomplete = false;
- i = 0;
- list_for_each_entry_from(resource, &devlink->resource_list, list) {
- err = devlink_resource_put(devlink, skb, resource);
- if (err) {
- if (!i)
- goto err_resource_put;
- incomplete = true;
- break;
- }
- i++;
- }
- nla_nest_end(skb, resources_attr);
- genlmsg_end(skb, hdr);
- if (incomplete)
- goto start_again;
-send_done:
- nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
- NLMSG_DONE, 0, flags | NLM_F_MULTI);
- if (!nlh) {
- err = devlink_dpipe_send_and_alloc_skb(&skb, info);
- if (err)
- return err;
- goto send_done;
- }
- return genlmsg_reply(skb, info);
-
-nla_put_failure:
- err = -EMSGSIZE;
-err_resource_put:
- nlmsg_free(skb);
- return err;
-}
-
-static int devlink_nl_cmd_resource_dump(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
-
- if (list_empty(&devlink->resource_list))
- return -EOPNOTSUPP;
-
- return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
-}
-
-int devlink_resources_validate(struct devlink *devlink,
- struct devlink_resource *resource,
- struct genl_info *info)
-{
- struct list_head *resource_list;
- int err = 0;
-
- if (resource)
- resource_list = &resource->resource_list;
- else
- resource_list = &devlink->resource_list;
-
- list_for_each_entry(resource, resource_list, list) {
- if (!resource->size_valid)
- return -EINVAL;
- err = devlink_resources_validate(devlink, resource, info);
- if (err)
- return err;
- }
- return err;
-}
-
-static const struct devlink_param devlink_param_generic[] = {
- {
- .id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
- .name = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME,
- .type = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
- .name = DEVLINK_PARAM_GENERIC_MAX_MACS_NAME,
- .type = DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
- .name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
- .type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
- .name = DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME,
- .type = DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
- .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME,
- .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
- .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME,
- .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
- .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
- .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
- .name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME,
- .type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
- .name = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME,
- .type = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
- .name = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME,
- .type = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE,
- },
-};
-
-static int devlink_param_generic_verify(const struct devlink_param *param)
-{
- /* verify it match generic parameter by id and name */
- if (param->id > DEVLINK_PARAM_GENERIC_ID_MAX)
- return -EINVAL;
- if (strcmp(param->name, devlink_param_generic[param->id].name))
- return -ENOENT;
-
- WARN_ON(param->type != devlink_param_generic[param->id].type);
-
- return 0;
-}
-
-static int devlink_param_driver_verify(const struct devlink_param *param)
-{
- int i;
-
- if (param->id <= DEVLINK_PARAM_GENERIC_ID_MAX)
- return -EINVAL;
- /* verify no such name in generic params */
- for (i = 0; i <= DEVLINK_PARAM_GENERIC_ID_MAX; i++)
- if (!strcmp(param->name, devlink_param_generic[i].name))
- return -EEXIST;
-
- return 0;
-}
-
-static struct devlink_param_item *
-devlink_param_find_by_name(struct xarray *params, const char *param_name)
-{
- struct devlink_param_item *param_item;
- unsigned long param_id;
-
- xa_for_each(params, param_id, param_item) {
- if (!strcmp(param_item->param->name, param_name))
- return param_item;
- }
- return NULL;
-}
-
-static struct devlink_param_item *
-devlink_param_find_by_id(struct xarray *params, u32 param_id)
-{
- return xa_load(params, param_id);
-}
-
-static bool
-devlink_param_cmode_is_supported(const struct devlink_param *param,
- enum devlink_param_cmode cmode)
-{
- return test_bit(cmode, &param->supported_cmodes);
-}
-
-static int devlink_param_get(struct devlink *devlink,
- const struct devlink_param *param,
- struct devlink_param_gset_ctx *ctx)
-{
- if (!param->get || devlink->reload_failed)
- return -EOPNOTSUPP;
- return param->get(devlink, param->id, ctx);
-}
-
-static int devlink_param_set(struct devlink *devlink,
- const struct devlink_param *param,
- struct devlink_param_gset_ctx *ctx)
-{
- if (!param->set || devlink->reload_failed)
- return -EOPNOTSUPP;
- return param->set(devlink, param->id, ctx);
-}
-
-static int
-devlink_param_type_to_nla_type(enum devlink_param_type param_type)
-{
- switch (param_type) {
- case DEVLINK_PARAM_TYPE_U8:
- return NLA_U8;
- case DEVLINK_PARAM_TYPE_U16:
- return NLA_U16;
- case DEVLINK_PARAM_TYPE_U32:
- return NLA_U32;
- case DEVLINK_PARAM_TYPE_STRING:
- return NLA_STRING;
- case DEVLINK_PARAM_TYPE_BOOL:
- return NLA_FLAG;
- default:
- return -EINVAL;
- }
-}
-
-static int
-devlink_nl_param_value_fill_one(struct sk_buff *msg,
- enum devlink_param_type type,
- enum devlink_param_cmode cmode,
- union devlink_param_value val)
-{
- struct nlattr *param_value_attr;
-
- param_value_attr = nla_nest_start_noflag(msg,
- DEVLINK_ATTR_PARAM_VALUE);
- if (!param_value_attr)
- goto nla_put_failure;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_CMODE, cmode))
- goto value_nest_cancel;
-
- switch (type) {
- case DEVLINK_PARAM_TYPE_U8:
- if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu8))
- goto value_nest_cancel;
- break;
- case DEVLINK_PARAM_TYPE_U16:
- if (nla_put_u16(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu16))
- goto value_nest_cancel;
- break;
- case DEVLINK_PARAM_TYPE_U32:
- if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32))
- goto value_nest_cancel;
- break;
- case DEVLINK_PARAM_TYPE_STRING:
- if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA,
- val.vstr))
- goto value_nest_cancel;
- break;
- case DEVLINK_PARAM_TYPE_BOOL:
- if (val.vbool &&
- nla_put_flag(msg, DEVLINK_ATTR_PARAM_VALUE_DATA))
- goto value_nest_cancel;
- break;
- }
-
- nla_nest_end(msg, param_value_attr);
- return 0;
-
-value_nest_cancel:
- nla_nest_cancel(msg, param_value_attr);
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
- unsigned int port_index,
- struct devlink_param_item *param_item,
- enum devlink_command cmd,
- u32 portid, u32 seq, int flags)
-{
- union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
- bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {};
- const struct devlink_param *param = param_item->param;
- struct devlink_param_gset_ctx ctx;
- struct nlattr *param_values_list;
- struct nlattr *param_attr;
- int nla_type;
- void *hdr;
- int err;
- int i;
-
- /* Get value from driver part to driverinit configuration mode */
- for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
- if (!devlink_param_cmode_is_supported(param, i))
- continue;
- if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) {
- if (param_item->driverinit_value_new_valid)
- param_value[i] = param_item->driverinit_value_new;
- else if (param_item->driverinit_value_valid)
- param_value[i] = param_item->driverinit_value;
- else
- return -EOPNOTSUPP;
- } else {
- ctx.cmode = i;
- err = devlink_param_get(devlink, param, &ctx);
- if (err)
- return err;
- param_value[i] = ctx.val;
- }
- param_value_set[i] = true;
- }
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto genlmsg_cancel;
-
- if (cmd == DEVLINK_CMD_PORT_PARAM_GET ||
- cmd == DEVLINK_CMD_PORT_PARAM_NEW ||
- cmd == DEVLINK_CMD_PORT_PARAM_DEL)
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index))
- goto genlmsg_cancel;
-
- param_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM);
- if (!param_attr)
- goto genlmsg_cancel;
- if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name))
- goto param_nest_cancel;
- if (param->generic && nla_put_flag(msg, DEVLINK_ATTR_PARAM_GENERIC))
- goto param_nest_cancel;
-
- nla_type = devlink_param_type_to_nla_type(param->type);
- if (nla_type < 0)
- goto param_nest_cancel;
- if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type))
- goto param_nest_cancel;
-
- param_values_list = nla_nest_start_noflag(msg,
- DEVLINK_ATTR_PARAM_VALUES_LIST);
- if (!param_values_list)
- goto param_nest_cancel;
-
- for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
- if (!param_value_set[i])
- continue;
- err = devlink_nl_param_value_fill_one(msg, param->type,
- i, param_value[i]);
- if (err)
- goto values_list_nest_cancel;
- }
-
- nla_nest_end(msg, param_values_list);
- nla_nest_end(msg, param_attr);
- genlmsg_end(msg, hdr);
- return 0;
-
-values_list_nest_cancel:
- nla_nest_end(msg, param_values_list);
-param_nest_cancel:
- nla_nest_cancel(msg, param_attr);
-genlmsg_cancel:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void devlink_param_notify(struct devlink *devlink,
- unsigned int port_index,
- struct devlink_param_item *param_item,
- enum devlink_command cmd)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL &&
- cmd != DEVLINK_CMD_PORT_PARAM_NEW &&
- cmd != DEVLINK_CMD_PORT_PARAM_DEL);
-
- /* devlink_notify_register() / devlink_notify_unregister()
- * will replay the notifications if the params are added/removed
- * outside of the lifetime of the instance.
- */
- if (!devl_is_registered(devlink))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
- err = devlink_nl_param_fill(msg, devlink, port_index, param_item, cmd,
- 0, 0, 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static int
-devlink_nl_cmd_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_param_item *param_item;
- unsigned long param_id;
- int err = 0;
-
- xa_for_each_start(&devlink->params, param_id, param_item, state->idx) {
- err = devlink_nl_param_fill(msg, devlink, 0, param_item,
- DEVLINK_CMD_PARAM_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- state->idx = param_id;
- break;
- }
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_param_get = {
- .dump_one = devlink_nl_cmd_param_get_dump_one,
-};
-
-static int
-devlink_param_type_get_from_info(struct genl_info *info,
- enum devlink_param_type *param_type)
-{
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_TYPE))
- return -EINVAL;
-
- switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) {
- case NLA_U8:
- *param_type = DEVLINK_PARAM_TYPE_U8;
- break;
- case NLA_U16:
- *param_type = DEVLINK_PARAM_TYPE_U16;
- break;
- case NLA_U32:
- *param_type = DEVLINK_PARAM_TYPE_U32;
- break;
- case NLA_STRING:
- *param_type = DEVLINK_PARAM_TYPE_STRING;
- break;
- case NLA_FLAG:
- *param_type = DEVLINK_PARAM_TYPE_BOOL;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int
-devlink_param_value_get_from_info(const struct devlink_param *param,
- struct genl_info *info,
- union devlink_param_value *value)
-{
- struct nlattr *param_data;
- int len;
-
- param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA];
-
- if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data)
- return -EINVAL;
-
- switch (param->type) {
- case DEVLINK_PARAM_TYPE_U8:
- if (nla_len(param_data) != sizeof(u8))
- return -EINVAL;
- value->vu8 = nla_get_u8(param_data);
- break;
- case DEVLINK_PARAM_TYPE_U16:
- if (nla_len(param_data) != sizeof(u16))
- return -EINVAL;
- value->vu16 = nla_get_u16(param_data);
- break;
- case DEVLINK_PARAM_TYPE_U32:
- if (nla_len(param_data) != sizeof(u32))
- return -EINVAL;
- value->vu32 = nla_get_u32(param_data);
- break;
- case DEVLINK_PARAM_TYPE_STRING:
- len = strnlen(nla_data(param_data), nla_len(param_data));
- if (len == nla_len(param_data) ||
- len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
- return -EINVAL;
- strcpy(value->vstr, nla_data(param_data));
- break;
- case DEVLINK_PARAM_TYPE_BOOL:
- if (param_data && nla_len(param_data))
- return -EINVAL;
- value->vbool = nla_get_flag(param_data);
- break;
- }
- return 0;
-}
-
-static struct devlink_param_item *
-devlink_param_get_from_info(struct xarray *params, struct genl_info *info)
-{
- char *param_name;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_NAME))
- return NULL;
-
- param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
- return devlink_param_find_by_name(params, param_name);
-}
-
-static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_param_item *param_item;
- struct sk_buff *msg;
- int err;
-
- param_item = devlink_param_get_from_info(&devlink->params, info);
- if (!param_item)
- return -EINVAL;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_param_fill(msg, devlink, 0, param_item,
- DEVLINK_CMD_PARAM_GET,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
- unsigned int port_index,
- struct xarray *params,
- struct genl_info *info,
- enum devlink_command cmd)
-{
- enum devlink_param_type param_type;
- struct devlink_param_gset_ctx ctx;
- enum devlink_param_cmode cmode;
- struct devlink_param_item *param_item;
- const struct devlink_param *param;
- union devlink_param_value value;
- int err = 0;
-
- param_item = devlink_param_get_from_info(params, info);
- if (!param_item)
- return -EINVAL;
- param = param_item->param;
- err = devlink_param_type_get_from_info(info, &param_type);
- if (err)
- return err;
- if (param_type != param->type)
- return -EINVAL;
- err = devlink_param_value_get_from_info(param, info, &value);
- if (err)
- return err;
- if (param->validate) {
- err = param->validate(devlink, param->id, value, info->extack);
- if (err)
- return err;
- }
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_VALUE_CMODE))
- return -EINVAL;
- cmode = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]);
- if (!devlink_param_cmode_is_supported(param, cmode))
- return -EOPNOTSUPP;
-
- if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) {
- param_item->driverinit_value_new = value;
- param_item->driverinit_value_new_valid = true;
- } else {
- if (!param->set)
- return -EOPNOTSUPP;
- ctx.val = value;
- ctx.cmode = cmode;
- err = devlink_param_set(devlink, param, &ctx);
- if (err)
- return err;
- }
-
- devlink_param_notify(devlink, port_index, param_item, cmd);
- return 0;
-}
-
-static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
-
- return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->params,
- info, DEVLINK_CMD_PARAM_NEW);
-}
-
-static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- NL_SET_ERR_MSG(cb->extack, "Port params are not supported");
- return msg->len;
-}
-
-static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- NL_SET_ERR_MSG(info->extack, "Port params are not supported");
- return -EINVAL;
-}
-
-static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- NL_SET_ERR_MSG(info->extack, "Port params are not supported");
- return -EINVAL;
-}
-
-static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
- struct devlink *devlink,
- struct devlink_snapshot *snapshot)
-{
- struct nlattr *snap_attr;
- int err;
-
- snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
- if (!snap_attr)
- return -EINVAL;
-
- err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id);
- if (err)
- goto nla_put_failure;
-
- nla_nest_end(msg, snap_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, snap_attr);
- return err;
-}
-
-static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg,
- struct devlink *devlink,
- struct devlink_region *region)
-{
- struct devlink_snapshot *snapshot;
- struct nlattr *snapshots_attr;
- int err;
-
- snapshots_attr = nla_nest_start_noflag(msg,
- DEVLINK_ATTR_REGION_SNAPSHOTS);
- if (!snapshots_attr)
- return -EINVAL;
-
- list_for_each_entry(snapshot, &region->snapshot_list, list) {
- err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot);
- if (err)
- goto nla_put_failure;
- }
-
- nla_nest_end(msg, snapshots_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, snapshots_attr);
- return err;
-}
-
-static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags,
- struct devlink_region *region)
-{
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- err = devlink_nl_put_handle(msg, devlink);
- if (err)
- goto nla_put_failure;
-
- if (region->port) {
- err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
- region->port->index);
- if (err)
- goto nla_put_failure;
- }
-
- err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->ops->name);
- if (err)
- goto nla_put_failure;
-
- err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
- region->size,
- DEVLINK_ATTR_PAD);
- if (err)
- goto nla_put_failure;
-
- err = nla_put_u32(msg, DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,
- region->max_snapshots);
- if (err)
- goto nla_put_failure;
-
- err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
- if (err)
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static struct sk_buff *
-devlink_nl_region_notify_build(struct devlink_region *region,
- struct devlink_snapshot *snapshot,
- enum devlink_command cmd, u32 portid, u32 seq)
-{
- struct devlink *devlink = region->devlink;
- struct sk_buff *msg;
- void *hdr;
- int err;
-
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return ERR_PTR(-ENOMEM);
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, 0, cmd);
- if (!hdr) {
- err = -EMSGSIZE;
- goto out_free_msg;
- }
-
- err = devlink_nl_put_handle(msg, devlink);
- if (err)
- goto out_cancel_msg;
-
- if (region->port) {
- err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
- region->port->index);
- if (err)
- goto out_cancel_msg;
- }
-
- err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME,
- region->ops->name);
- if (err)
- goto out_cancel_msg;
-
- if (snapshot) {
- err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID,
- snapshot->id);
- if (err)
- goto out_cancel_msg;
- } else {
- err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
- region->size, DEVLINK_ATTR_PAD);
- if (err)
- goto out_cancel_msg;
- }
- genlmsg_end(msg, hdr);
-
- return msg;
-
-out_cancel_msg:
- genlmsg_cancel(msg, hdr);
-out_free_msg:
- nlmsg_free(msg);
- return ERR_PTR(err);
-}
-
-static void devlink_nl_region_notify(struct devlink_region *region,
- struct devlink_snapshot *snapshot,
- enum devlink_command cmd)
-{
- struct devlink *devlink = region->devlink;
- struct sk_buff *msg;
-
- WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0);
- if (IS_ERR(msg))
- return;
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-/**
- * __devlink_snapshot_id_increment - Increment number of snapshots using an id
- * @devlink: devlink instance
- * @id: the snapshot id
- *
- * Track when a new snapshot begins using an id. Load the count for the
- * given id from the snapshot xarray, increment it, and store it back.
- *
- * Called when a new snapshot is created with the given id.
- *
- * The id *must* have been previously allocated by
- * devlink_region_snapshot_id_get().
- *
- * Returns 0 on success, or an error on failure.
- */
-static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id)
-{
- unsigned long count;
- void *p;
- int err;
-
- xa_lock(&devlink->snapshot_ids);
- p = xa_load(&devlink->snapshot_ids, id);
- if (WARN_ON(!p)) {
- err = -EINVAL;
- goto unlock;
- }
-
- if (WARN_ON(!xa_is_value(p))) {
- err = -EINVAL;
- goto unlock;
- }
-
- count = xa_to_value(p);
- count++;
-
- err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
- GFP_ATOMIC));
-unlock:
- xa_unlock(&devlink->snapshot_ids);
- return err;
-}
-
-/**
- * __devlink_snapshot_id_decrement - Decrease number of snapshots using an id
- * @devlink: devlink instance
- * @id: the snapshot id
- *
- * Track when a snapshot is deleted and stops using an id. Load the count
- * for the given id from the snapshot xarray, decrement it, and store it
- * back.
- *
- * If the count reaches zero, erase this id from the xarray, freeing it
- * up for future re-use by devlink_region_snapshot_id_get().
- *
- * Called when a snapshot using the given id is deleted, and when the
- * initial allocator of the id is finished using it.
- */
-static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id)
-{
- unsigned long count;
- void *p;
-
- xa_lock(&devlink->snapshot_ids);
- p = xa_load(&devlink->snapshot_ids, id);
- if (WARN_ON(!p))
- goto unlock;
-
- if (WARN_ON(!xa_is_value(p)))
- goto unlock;
-
- count = xa_to_value(p);
-
- if (count > 1) {
- count--;
- __xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
- GFP_ATOMIC);
- } else {
- /* If this was the last user, we can erase this id */
- __xa_erase(&devlink->snapshot_ids, id);
- }
-unlock:
- xa_unlock(&devlink->snapshot_ids);
-}
-
-/**
- * __devlink_snapshot_id_insert - Insert a specific snapshot ID
- * @devlink: devlink instance
- * @id: the snapshot id
- *
- * Mark the given snapshot id as used by inserting a zero value into the
- * snapshot xarray.
- *
- * This must be called while holding the devlink instance lock. Unlike
- * devlink_snapshot_id_get, the initial reference count is zero, not one.
- * It is expected that the id will immediately be used before
- * releasing the devlink instance lock.
- *
- * Returns zero on success, or an error code if the snapshot id could not
- * be inserted.
- */
-static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id)
-{
- int err;
-
- xa_lock(&devlink->snapshot_ids);
- if (xa_load(&devlink->snapshot_ids, id)) {
- xa_unlock(&devlink->snapshot_ids);
- return -EEXIST;
- }
- err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(0),
- GFP_ATOMIC));
- xa_unlock(&devlink->snapshot_ids);
- return err;
-}
-
-/**
- * __devlink_region_snapshot_id_get - get snapshot ID
- * @devlink: devlink instance
- * @id: storage to return snapshot id
- *
- * Allocates a new snapshot id. Returns zero on success, or a negative
- * error on failure. Must be called while holding the devlink instance
- * lock.
- *
- * Snapshot IDs are tracked using an xarray which stores the number of
- * users of the snapshot id.
- *
- * Note that the caller of this function counts as a 'user', in order to
- * avoid race conditions. The caller must release its hold on the
- * snapshot by using devlink_region_snapshot_id_put.
- */
-static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
-{
- return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1),
- xa_limit_32b, GFP_KERNEL);
-}
-
-/**
- * __devlink_region_snapshot_create - create a new snapshot
- * This will add a new snapshot of a region. The snapshot
- * will be stored on the region struct and can be accessed
- * from devlink. This is useful for future analyses of snapshots.
- * Multiple snapshots can be created on a region.
- * The @snapshot_id should be obtained using the getter function.
- *
- * Must be called only while holding the region snapshot lock.
- *
- * @region: devlink region of the snapshot
- * @data: snapshot data
- * @snapshot_id: snapshot id to be created
- */
-static int
-__devlink_region_snapshot_create(struct devlink_region *region,
- u8 *data, u32 snapshot_id)
-{
- struct devlink *devlink = region->devlink;
- struct devlink_snapshot *snapshot;
- int err;
-
- lockdep_assert_held(&region->snapshot_lock);
-
- /* check if region can hold one more snapshot */
- if (region->cur_snapshots == region->max_snapshots)
- return -ENOSPC;
-
- if (devlink_region_snapshot_get_by_id(region, snapshot_id))
- return -EEXIST;
-
- snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
- if (!snapshot)
- return -ENOMEM;
-
- err = __devlink_snapshot_id_increment(devlink, snapshot_id);
- if (err)
- goto err_snapshot_id_increment;
-
- snapshot->id = snapshot_id;
- snapshot->region = region;
- snapshot->data = data;
-
- list_add_tail(&snapshot->list, &region->snapshot_list);
-
- region->cur_snapshots++;
-
- devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_NEW);
- return 0;
-
-err_snapshot_id_increment:
- kfree(snapshot);
- return err;
-}
-
-static void devlink_region_snapshot_del(struct devlink_region *region,
- struct devlink_snapshot *snapshot)
-{
- struct devlink *devlink = region->devlink;
-
- lockdep_assert_held(&region->snapshot_lock);
-
- devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
- region->cur_snapshots--;
- list_del(&snapshot->list);
- region->ops->destructor(snapshot->data);
- __devlink_snapshot_id_decrement(devlink, snapshot->id);
- kfree(snapshot);
-}
-
-static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_port *port = NULL;
- struct devlink_region *region;
- const char *region_name;
- struct sk_buff *msg;
- unsigned int index;
- int err;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME))
- return -EINVAL;
-
- if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
- index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
- port = devlink_port_get_by_index(devlink, index);
- if (!port)
- return -ENODEV;
- }
-
- region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
- if (port)
- region = devlink_port_region_get_by_name(port, region_name);
- else
- region = devlink_region_get_by_name(devlink, region_name);
-
- if (!region)
- return -EINVAL;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_region_fill(msg, devlink, DEVLINK_CMD_REGION_GET,
- info->snd_portid, info->snd_seq, 0,
- region);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_region_get_port_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb,
- struct devlink_port *port,
- int *idx,
- int start)
-{
- struct devlink_region *region;
- int err = 0;
-
- list_for_each_entry(region, &port->region_list, list) {
- if (*idx < start) {
- (*idx)++;
- continue;
- }
- err = devlink_nl_region_fill(msg, port->devlink,
- DEVLINK_CMD_REGION_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, region);
- if (err)
- goto out;
- (*idx)++;
- }
-
-out:
- return err;
-}
-
-static int
-devlink_nl_cmd_region_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_region *region;
- struct devlink_port *port;
- unsigned long port_index;
- int idx = 0;
- int err;
-
- list_for_each_entry(region, &devlink->region_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_region_fill(msg, devlink,
- DEVLINK_CMD_REGION_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, region);
- if (err) {
- state->idx = idx;
- return err;
- }
- idx++;
- }
-
- xa_for_each(&devlink->ports, port_index, port) {
- err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, &idx,
- state->idx);
- if (err) {
- state->idx = idx;
- return err;
- }
- }
-
- return 0;
-}
-
-const struct devlink_cmd devl_cmd_region_get = {
- .dump_one = devlink_nl_cmd_region_get_dump_one,
-};
-
-static int devlink_nl_cmd_region_del(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_snapshot *snapshot;
- struct devlink_port *port = NULL;
- struct devlink_region *region;
- const char *region_name;
- unsigned int index;
- u32 snapshot_id;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME) ||
- GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_SNAPSHOT_ID))
- return -EINVAL;
-
- region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
- snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
-
- if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
- index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
- port = devlink_port_get_by_index(devlink, index);
- if (!port)
- return -ENODEV;
- }
-
- if (port)
- region = devlink_port_region_get_by_name(port, region_name);
- else
- region = devlink_region_get_by_name(devlink, region_name);
-
- if (!region)
- return -EINVAL;
-
- mutex_lock(&region->snapshot_lock);
- snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
- if (!snapshot) {
- mutex_unlock(&region->snapshot_lock);
- return -EINVAL;
- }
-
- devlink_region_snapshot_del(region, snapshot);
- mutex_unlock(&region->snapshot_lock);
- return 0;
-}
-
-static int
-devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_snapshot *snapshot;
- struct devlink_port *port = NULL;
- struct nlattr *snapshot_id_attr;
- struct devlink_region *region;
- const char *region_name;
- unsigned int index;
- u32 snapshot_id;
- u8 *data;
- int err;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) {
- NL_SET_ERR_MSG(info->extack, "No region name provided");
- return -EINVAL;
- }
-
- region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
-
- if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
- index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
- port = devlink_port_get_by_index(devlink, index);
- if (!port)
- return -ENODEV;
- }
-
- if (port)
- region = devlink_port_region_get_by_name(port, region_name);
- else
- region = devlink_region_get_by_name(devlink, region_name);
-
- if (!region) {
- NL_SET_ERR_MSG(info->extack, "The requested region does not exist");
- return -EINVAL;
- }
-
- if (!region->ops->snapshot) {
- NL_SET_ERR_MSG(info->extack, "The requested region does not support taking an immediate snapshot");
- return -EOPNOTSUPP;
- }
-
- mutex_lock(&region->snapshot_lock);
-
- if (region->cur_snapshots == region->max_snapshots) {
- NL_SET_ERR_MSG(info->extack, "The region has reached the maximum number of stored snapshots");
- err = -ENOSPC;
- goto unlock;
- }
-
- snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
- if (snapshot_id_attr) {
- snapshot_id = nla_get_u32(snapshot_id_attr);
-
- if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
- NL_SET_ERR_MSG(info->extack, "The requested snapshot id is already in use");
- err = -EEXIST;
- goto unlock;
- }
-
- err = __devlink_snapshot_id_insert(devlink, snapshot_id);
- if (err)
- goto unlock;
- } else {
- err = __devlink_region_snapshot_id_get(devlink, &snapshot_id);
- if (err) {
- NL_SET_ERR_MSG(info->extack, "Failed to allocate a new snapshot id");
- goto unlock;
- }
- }
-
- if (port)
- err = region->port_ops->snapshot(port, region->port_ops,
- info->extack, &data);
- else
- err = region->ops->snapshot(devlink, region->ops,
- info->extack, &data);
- if (err)
- goto err_snapshot_capture;
-
- err = __devlink_region_snapshot_create(region, data, snapshot_id);
- if (err)
- goto err_snapshot_create;
-
- if (!snapshot_id_attr) {
- struct sk_buff *msg;
-
- snapshot = devlink_region_snapshot_get_by_id(region,
- snapshot_id);
- if (WARN_ON(!snapshot)) {
- err = -EINVAL;
- goto unlock;
- }
-
- msg = devlink_nl_region_notify_build(region, snapshot,
- DEVLINK_CMD_REGION_NEW,
- info->snd_portid,
- info->snd_seq);
- err = PTR_ERR_OR_ZERO(msg);
- if (err)
- goto err_notify;
-
- err = genlmsg_reply(msg, info);
- if (err)
- goto err_notify;
- }
-
- mutex_unlock(&region->snapshot_lock);
- return 0;
-
-err_snapshot_create:
- region->ops->destructor(data);
-err_snapshot_capture:
- __devlink_snapshot_id_decrement(devlink, snapshot_id);
- mutex_unlock(&region->snapshot_lock);
- return err;
-
-err_notify:
- devlink_region_snapshot_del(region, snapshot);
-unlock:
- mutex_unlock(&region->snapshot_lock);
- return err;
-}
-
-static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
- u8 *chunk, u32 chunk_size,
- u64 addr)
-{
- struct nlattr *chunk_attr;
- int err;
-
- chunk_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_CHUNK);
- if (!chunk_attr)
- return -EINVAL;
-
- err = nla_put(msg, DEVLINK_ATTR_REGION_CHUNK_DATA, chunk_size, chunk);
- if (err)
- goto nla_put_failure;
-
- err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr,
- DEVLINK_ATTR_PAD);
- if (err)
- goto nla_put_failure;
-
- nla_nest_end(msg, chunk_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, chunk_attr);
- return err;
-}
-
-#define DEVLINK_REGION_READ_CHUNK_SIZE 256
-
-typedef int devlink_chunk_fill_t(void *cb_priv, u8 *chunk, u32 chunk_size,
- u64 curr_offset,
- struct netlink_ext_ack *extack);
-
-static int
-devlink_nl_region_read_fill(struct sk_buff *skb, devlink_chunk_fill_t *cb,
- void *cb_priv, u64 start_offset, u64 end_offset,
- u64 *new_offset, struct netlink_ext_ack *extack)
-{
- u64 curr_offset = start_offset;
- int err = 0;
- u8 *data;
-
- /* Allocate and re-use a single buffer */
- data = kmalloc(DEVLINK_REGION_READ_CHUNK_SIZE, GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- *new_offset = start_offset;
-
- while (curr_offset < end_offset) {
- u32 data_size;
-
- data_size = min_t(u32, end_offset - curr_offset,
- DEVLINK_REGION_READ_CHUNK_SIZE);
-
- err = cb(cb_priv, data, data_size, curr_offset, extack);
- if (err)
- break;
-
- err = devlink_nl_cmd_region_read_chunk_fill(skb, data, data_size, curr_offset);
- if (err)
- break;
-
- curr_offset += data_size;
- }
- *new_offset = curr_offset;
-
- kfree(data);
-
- return err;
-}
-
-static int
-devlink_region_snapshot_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
- u64 curr_offset,
- struct netlink_ext_ack __always_unused *extack)
-{
- struct devlink_snapshot *snapshot = cb_priv;
-
- memcpy(chunk, &snapshot->data[curr_offset], chunk_size);
-
- return 0;
-}
-
-static int
-devlink_region_port_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
- u64 curr_offset, struct netlink_ext_ack *extack)
-{
- struct devlink_region *region = cb_priv;
-
- return region->port_ops->read(region->port, region->port_ops, extack,
- curr_offset, chunk_size, chunk);
-}
-
-static int
-devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
- u64 curr_offset, struct netlink_ext_ack *extack)
-{
- struct devlink_region *region = cb_priv;
-
- return region->ops->read(region->devlink, region->ops, extack,
- curr_offset, chunk_size, chunk);
-}
-
-static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
- struct netlink_callback *cb)
-{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct nlattr *chunks_attr, *region_attr, *snapshot_attr;
- u64 ret_offset, start_offset, end_offset = U64_MAX;
- struct nlattr **attrs = info->attrs;
- struct devlink_port *port = NULL;
- devlink_chunk_fill_t *region_cb;
- struct devlink_region *region;
- const char *region_name;
- struct devlink *devlink;
- unsigned int index;
- void *region_cb_priv;
- void *hdr;
- int err;
-
- start_offset = state->start_offset;
-
- devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
- if (IS_ERR(devlink))
- return PTR_ERR(devlink);
-
- if (!attrs[DEVLINK_ATTR_REGION_NAME]) {
- NL_SET_ERR_MSG(cb->extack, "No region name provided");
- err = -EINVAL;
- goto out_unlock;
- }
-
- if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
- index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
- port = devlink_port_get_by_index(devlink, index);
- if (!port) {
- err = -ENODEV;
- goto out_unlock;
- }
- }
-
- region_attr = attrs[DEVLINK_ATTR_REGION_NAME];
- region_name = nla_data(region_attr);
-
- if (port)
- region = devlink_port_region_get_by_name(port, region_name);
- else
- region = devlink_region_get_by_name(devlink, region_name);
-
- if (!region) {
- NL_SET_ERR_MSG_ATTR(cb->extack, region_attr, "Requested region does not exist");
- err = -EINVAL;
- goto out_unlock;
- }
-
- snapshot_attr = attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
- if (!snapshot_attr) {
- if (!nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
- NL_SET_ERR_MSG(cb->extack, "No snapshot id provided");
- err = -EINVAL;
- goto out_unlock;
- }
-
- if (!region->ops->read) {
- NL_SET_ERR_MSG(cb->extack, "Requested region does not support direct read");
- err = -EOPNOTSUPP;
- goto out_unlock;
- }
-
- if (port)
- region_cb = &devlink_region_port_direct_fill;
- else
- region_cb = &devlink_region_direct_fill;
- region_cb_priv = region;
- } else {
- struct devlink_snapshot *snapshot;
- u32 snapshot_id;
-
- if (nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
- NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Direct region read does not use snapshot");
- err = -EINVAL;
- goto out_unlock;
- }
-
- snapshot_id = nla_get_u32(snapshot_attr);
- snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
- if (!snapshot) {
- NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist");
- err = -EINVAL;
- goto out_unlock;
- }
- region_cb = &devlink_region_snapshot_fill;
- region_cb_priv = snapshot;
- }
-
- if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
- attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
- if (!start_offset)
- start_offset =
- nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
-
- end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
- end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
- }
-
- if (end_offset > region->size)
- end_offset = region->size;
-
- /* return 0 if there is no further data to read */
- if (start_offset == end_offset) {
- err = 0;
- goto out_unlock;
- }
-
- hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
- DEVLINK_CMD_REGION_READ);
- if (!hdr) {
- err = -EMSGSIZE;
- goto out_unlock;
- }
-
- err = devlink_nl_put_handle(skb, devlink);
- if (err)
- goto nla_put_failure;
-
- if (region->port) {
- err = nla_put_u32(skb, DEVLINK_ATTR_PORT_INDEX,
- region->port->index);
- if (err)
- goto nla_put_failure;
- }
-
- err = nla_put_string(skb, DEVLINK_ATTR_REGION_NAME, region_name);
- if (err)
- goto nla_put_failure;
-
- chunks_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_REGION_CHUNKS);
- if (!chunks_attr) {
- err = -EMSGSIZE;
- goto nla_put_failure;
- }
-
- err = devlink_nl_region_read_fill(skb, region_cb, region_cb_priv,
- start_offset, end_offset, &ret_offset,
- cb->extack);
-
- if (err && err != -EMSGSIZE)
- goto nla_put_failure;
-
- /* Check if there was any progress done to prevent infinite loop */
- if (ret_offset == start_offset) {
- err = -EINVAL;
- goto nla_put_failure;
- }
-
- state->start_offset = ret_offset;
-
- nla_nest_end(skb, chunks_attr);
- genlmsg_end(skb, hdr);
- devl_unlock(devlink);
- devlink_put(devlink);
- return skb->len;
-
-nla_put_failure:
- genlmsg_cancel(skb, hdr);
-out_unlock:
- devl_unlock(devlink);
- devlink_put(devlink);
- return err;
-}
-
-struct devlink_stats {
- u64_stats_t rx_bytes;
- u64_stats_t rx_packets;
- struct u64_stats_sync syncp;
-};
-
-/**
- * struct devlink_trap_policer_item - Packet trap policer attributes.
- * @policer: Immutable packet trap policer attributes.
- * @rate: Rate in packets / sec.
- * @burst: Burst size in packets.
- * @list: trap_policer_list member.
- *
- * Describes packet trap policer attributes. Created by devlink during trap
- * policer registration.
- */
-struct devlink_trap_policer_item {
- const struct devlink_trap_policer *policer;
- u64 rate;
- u64 burst;
- struct list_head list;
-};
-
-/**
- * struct devlink_trap_group_item - Packet trap group attributes.
- * @group: Immutable packet trap group attributes.
- * @policer_item: Associated policer item. Can be NULL.
- * @list: trap_group_list member.
- * @stats: Trap group statistics.
- *
- * Describes packet trap group attributes. Created by devlink during trap
- * group registration.
- */
-struct devlink_trap_group_item {
- const struct devlink_trap_group *group;
- struct devlink_trap_policer_item *policer_item;
- struct list_head list;
- struct devlink_stats __percpu *stats;
-};
-
-/**
- * struct devlink_trap_item - Packet trap attributes.
- * @trap: Immutable packet trap attributes.
- * @group_item: Associated group item.
- * @list: trap_list member.
- * @action: Trap action.
- * @stats: Trap statistics.
- * @priv: Driver private information.
- *
- * Describes both mutable and immutable packet trap attributes. Created by
- * devlink during trap registration and used for all trap related operations.
- */
-struct devlink_trap_item {
- const struct devlink_trap *trap;
- struct devlink_trap_group_item *group_item;
- struct list_head list;
- enum devlink_trap_action action;
- struct devlink_stats __percpu *stats;
- void *priv;
-};
-
-static struct devlink_trap_policer_item *
-devlink_trap_policer_item_lookup(struct devlink *devlink, u32 id)
-{
- struct devlink_trap_policer_item *policer_item;
-
- list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
- if (policer_item->policer->id == id)
- return policer_item;
- }
-
- return NULL;
-}
-
-static struct devlink_trap_item *
-devlink_trap_item_lookup(struct devlink *devlink, const char *name)
-{
- struct devlink_trap_item *trap_item;
-
- list_for_each_entry(trap_item, &devlink->trap_list, list) {
- if (!strcmp(trap_item->trap->name, name))
- return trap_item;
- }
-
- return NULL;
-}
-
-static struct devlink_trap_item *
-devlink_trap_item_get_from_info(struct devlink *devlink,
- struct genl_info *info)
-{
- struct nlattr *attr;
-
- if (!info->attrs[DEVLINK_ATTR_TRAP_NAME])
- return NULL;
- attr = info->attrs[DEVLINK_ATTR_TRAP_NAME];
-
- return devlink_trap_item_lookup(devlink, nla_data(attr));
-}
-
-static int
-devlink_trap_action_get_from_info(struct genl_info *info,
- enum devlink_trap_action *p_trap_action)
-{
- u8 val;
-
- val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]);
- switch (val) {
- case DEVLINK_TRAP_ACTION_DROP:
- case DEVLINK_TRAP_ACTION_TRAP:
- case DEVLINK_TRAP_ACTION_MIRROR:
- *p_trap_action = val;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int devlink_trap_metadata_put(struct sk_buff *msg,
- const struct devlink_trap *trap)
-{
- struct nlattr *attr;
-
- attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA);
- if (!attr)
- return -EMSGSIZE;
-
- if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) &&
- nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT))
- goto nla_put_failure;
- if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE) &&
- nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE))
- goto nla_put_failure;
-
- nla_nest_end(msg, attr);
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, attr);
- return -EMSGSIZE;
-}
-
-static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
- struct devlink_stats *stats)
-{
- int i;
-
- memset(stats, 0, sizeof(*stats));
- for_each_possible_cpu(i) {
- struct devlink_stats *cpu_stats;
- u64 rx_packets, rx_bytes;
- unsigned int start;
-
- cpu_stats = per_cpu_ptr(trap_stats, i);
- do {
- start = u64_stats_fetch_begin(&cpu_stats->syncp);
- rx_packets = u64_stats_read(&cpu_stats->rx_packets);
- rx_bytes = u64_stats_read(&cpu_stats->rx_bytes);
- } while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
-
- u64_stats_add(&stats->rx_packets, rx_packets);
- u64_stats_add(&stats->rx_bytes, rx_bytes);
- }
-}
-
-static int
-devlink_trap_group_stats_put(struct sk_buff *msg,
- struct devlink_stats __percpu *trap_stats)
-{
- struct devlink_stats stats;
- struct nlattr *attr;
-
- devlink_trap_stats_read(trap_stats, &stats);
-
- attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
- if (!attr)
- return -EMSGSIZE;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
- u64_stats_read(&stats.rx_packets),
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
- u64_stats_read(&stats.rx_bytes),
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- nla_nest_end(msg, attr);
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, attr);
- return -EMSGSIZE;
-}
-
-static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
- const struct devlink_trap_item *trap_item)
-{
- struct devlink_stats stats;
- struct nlattr *attr;
- u64 drops = 0;
- int err;
-
- if (devlink->ops->trap_drop_counter_get) {
- err = devlink->ops->trap_drop_counter_get(devlink,
- trap_item->trap,
- &drops);
- if (err)
- return err;
- }
-
- devlink_trap_stats_read(trap_item->stats, &stats);
-
- attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
- if (!attr)
- return -EMSGSIZE;
-
- if (devlink->ops->trap_drop_counter_get &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
- u64_stats_read(&stats.rx_packets),
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
- u64_stats_read(&stats.rx_bytes),
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- nla_nest_end(msg, attr);
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, attr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
- const struct devlink_trap_item *trap_item,
- enum devlink_command cmd, u32 portid, u32 seq,
- int flags)
-{
- struct devlink_trap_group_item *group_item = trap_item->group_item;
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
- group_item->group->name))
- goto nla_put_failure;
-
- if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name))
- goto nla_put_failure;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type))
- goto nla_put_failure;
-
- if (trap_item->trap->generic &&
- nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
- goto nla_put_failure;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action))
- goto nla_put_failure;
-
- err = devlink_trap_metadata_put(msg, trap_item->trap);
- if (err)
- goto nla_put_failure;
-
- err = devlink_trap_stats_put(msg, devlink, trap_item);
- if (err)
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
-
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_trap_item *trap_item;
- struct sk_buff *msg;
- int err;
-
- if (list_empty(&devlink->trap_list))
- return -EOPNOTSUPP;
-
- trap_item = devlink_trap_item_get_from_info(devlink, info);
- if (!trap_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap");
- return -ENOENT;
- }
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_trap_fill(msg, devlink, trap_item,
- DEVLINK_CMD_TRAP_NEW, info->snd_portid,
- info->snd_seq, 0);
- if (err)
- goto err_trap_fill;
-
- return genlmsg_reply(msg, info);
-
-err_trap_fill:
- nlmsg_free(msg);
- return err;
-}
-
-static int
-devlink_nl_cmd_trap_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_trap_item *trap_item;
- int idx = 0;
- int err = 0;
-
- list_for_each_entry(trap_item, &devlink->trap_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_trap_fill(msg, devlink, trap_item,
- DEVLINK_CMD_TRAP_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- state->idx = idx;
- break;
- }
- idx++;
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_trap_get = {
- .dump_one = devlink_nl_cmd_trap_get_dump_one,
-};
-
-static int __devlink_trap_action_set(struct devlink *devlink,
- struct devlink_trap_item *trap_item,
- enum devlink_trap_action trap_action,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- if (trap_item->action != trap_action &&
- trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
- NL_SET_ERR_MSG(extack, "Cannot change action of non-drop traps. Skipping");
- return 0;
- }
-
- err = devlink->ops->trap_action_set(devlink, trap_item->trap,
- trap_action, extack);
- if (err)
- return err;
-
- trap_item->action = trap_action;
-
- return 0;
-}
-
-static int devlink_trap_action_set(struct devlink *devlink,
- struct devlink_trap_item *trap_item,
- struct genl_info *info)
-{
- enum devlink_trap_action trap_action;
- int err;
-
- if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
- return 0;
-
- err = devlink_trap_action_get_from_info(info, &trap_action);
- if (err) {
- NL_SET_ERR_MSG(info->extack, "Invalid trap action");
- return -EINVAL;
- }
-
- return __devlink_trap_action_set(devlink, trap_item, trap_action,
- info->extack);
-}
-
-static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_trap_item *trap_item;
-
- if (list_empty(&devlink->trap_list))
- return -EOPNOTSUPP;
-
- trap_item = devlink_trap_item_get_from_info(devlink, info);
- if (!trap_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap");
- return -ENOENT;
- }
-
- return devlink_trap_action_set(devlink, trap_item, info);
-}
-
-static struct devlink_trap_group_item *
-devlink_trap_group_item_lookup(struct devlink *devlink, const char *name)
-{
- struct devlink_trap_group_item *group_item;
-
- list_for_each_entry(group_item, &devlink->trap_group_list, list) {
- if (!strcmp(group_item->group->name, name))
- return group_item;
- }
-
- return NULL;
-}
-
-static struct devlink_trap_group_item *
-devlink_trap_group_item_lookup_by_id(struct devlink *devlink, u16 id)
-{
- struct devlink_trap_group_item *group_item;
-
- list_for_each_entry(group_item, &devlink->trap_group_list, list) {
- if (group_item->group->id == id)
- return group_item;
- }
-
- return NULL;
-}
-
-static struct devlink_trap_group_item *
-devlink_trap_group_item_get_from_info(struct devlink *devlink,
- struct genl_info *info)
-{
- char *name;
-
- if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME])
- return NULL;
- name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]);
-
- return devlink_trap_group_item_lookup(devlink, name);
-}
-
-static int
-devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
- const struct devlink_trap_group_item *group_item,
- enum devlink_command cmd, u32 portid, u32 seq,
- int flags)
-{
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
- group_item->group->name))
- goto nla_put_failure;
-
- if (group_item->group->generic &&
- nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
- goto nla_put_failure;
-
- if (group_item->policer_item &&
- nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
- group_item->policer_item->policer->id))
- goto nla_put_failure;
-
- err = devlink_trap_group_stats_put(msg, group_item->stats);
- if (err)
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
-
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_trap_group_item *group_item;
- struct sk_buff *msg;
- int err;
-
- if (list_empty(&devlink->trap_group_list))
- return -EOPNOTSUPP;
-
- group_item = devlink_trap_group_item_get_from_info(devlink, info);
- if (!group_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap group");
- return -ENOENT;
- }
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_trap_group_fill(msg, devlink, group_item,
- DEVLINK_CMD_TRAP_GROUP_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err)
- goto err_trap_group_fill;
-
- return genlmsg_reply(msg, info);
-
-err_trap_group_fill:
- nlmsg_free(msg);
- return err;
-}
-
-static int
-devlink_nl_cmd_trap_group_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_trap_group_item *group_item;
- int idx = 0;
- int err = 0;
-
-
- list_for_each_entry(group_item, &devlink->trap_group_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_trap_group_fill(msg, devlink, group_item,
- DEVLINK_CMD_TRAP_GROUP_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- state->idx = idx;
- break;
- }
- idx++;
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_trap_group_get = {
- .dump_one = devlink_nl_cmd_trap_group_get_dump_one,
-};
-
-static int
-__devlink_trap_group_action_set(struct devlink *devlink,
- struct devlink_trap_group_item *group_item,
- enum devlink_trap_action trap_action,
- struct netlink_ext_ack *extack)
-{
- const char *group_name = group_item->group->name;
- struct devlink_trap_item *trap_item;
- int err;
-
- if (devlink->ops->trap_group_action_set) {
- err = devlink->ops->trap_group_action_set(devlink, group_item->group,
- trap_action, extack);
- if (err)
- return err;
-
- list_for_each_entry(trap_item, &devlink->trap_list, list) {
- if (strcmp(trap_item->group_item->group->name, group_name))
- continue;
- if (trap_item->action != trap_action &&
- trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP)
- continue;
- trap_item->action = trap_action;
- }
-
- return 0;
- }
-
- list_for_each_entry(trap_item, &devlink->trap_list, list) {
- if (strcmp(trap_item->group_item->group->name, group_name))
- continue;
- err = __devlink_trap_action_set(devlink, trap_item,
- trap_action, extack);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int
-devlink_trap_group_action_set(struct devlink *devlink,
- struct devlink_trap_group_item *group_item,
- struct genl_info *info, bool *p_modified)
-{
- enum devlink_trap_action trap_action;
- int err;
-
- if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
- return 0;
-
- err = devlink_trap_action_get_from_info(info, &trap_action);
- if (err) {
- NL_SET_ERR_MSG(info->extack, "Invalid trap action");
- return -EINVAL;
- }
-
- err = __devlink_trap_group_action_set(devlink, group_item, trap_action,
- info->extack);
- if (err)
- return err;
-
- *p_modified = true;
-
- return 0;
-}
-
-static int devlink_trap_group_set(struct devlink *devlink,
- struct devlink_trap_group_item *group_item,
- struct genl_info *info)
-{
- struct devlink_trap_policer_item *policer_item;
- struct netlink_ext_ack *extack = info->extack;
- const struct devlink_trap_policer *policer;
- struct nlattr **attrs = info->attrs;
- u32 policer_id;
- int err;
-
- if (!attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
- return 0;
-
- if (!devlink->ops->trap_group_set)
- return -EOPNOTSUPP;
-
- policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
- policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
- if (policer_id && !policer_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
- return -ENOENT;
- }
- policer = policer_item ? policer_item->policer : NULL;
-
- err = devlink->ops->trap_group_set(devlink, group_item->group, policer,
- extack);
- if (err)
- return err;
-
- group_item->policer_item = policer_item;
-
- return 0;
-}
-
-static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_trap_group_item *group_item;
- bool modified = false;
- int err;
-
- if (list_empty(&devlink->trap_group_list))
- return -EOPNOTSUPP;
-
- group_item = devlink_trap_group_item_get_from_info(devlink, info);
- if (!group_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap group");
- return -ENOENT;
- }
-
- err = devlink_trap_group_action_set(devlink, group_item, info,
- &modified);
- if (err)
- return err;
-
- err = devlink_trap_group_set(devlink, group_item, info);
- if (err)
- goto err_trap_group_set;
-
- return 0;
-
-err_trap_group_set:
- if (modified)
- NL_SET_ERR_MSG(extack, "Trap group set failed, but some changes were committed already");
- return err;
-}
-
-static struct devlink_trap_policer_item *
-devlink_trap_policer_item_get_from_info(struct devlink *devlink,
- struct genl_info *info)
-{
- u32 id;
-
- if (!info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
- return NULL;
- id = nla_get_u32(info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
-
- return devlink_trap_policer_item_lookup(devlink, id);
-}
-
-static int
-devlink_trap_policer_stats_put(struct sk_buff *msg, struct devlink *devlink,
- const struct devlink_trap_policer *policer)
-{
- struct nlattr *attr;
- u64 drops;
- int err;
-
- if (!devlink->ops->trap_policer_counter_get)
- return 0;
-
- err = devlink->ops->trap_policer_counter_get(devlink, policer, &drops);
- if (err)
- return err;
-
- attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
- if (!attr)
- return -EMSGSIZE;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- nla_nest_end(msg, attr);
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, attr);
- return -EMSGSIZE;
-}
-
-static int
-devlink_nl_trap_policer_fill(struct sk_buff *msg, struct devlink *devlink,
- const struct devlink_trap_policer_item *policer_item,
- enum devlink_command cmd, u32 portid, u32 seq,
- int flags)
-{
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
- policer_item->policer->id))
- goto nla_put_failure;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_RATE,
- policer_item->rate, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_BURST,
- policer_item->burst, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- err = devlink_trap_policer_stats_put(msg, devlink,
- policer_item->policer);
- if (err)
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
-
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_trap_policer_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_trap_policer_item *policer_item;
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- if (list_empty(&devlink->trap_policer_list))
- return -EOPNOTSUPP;
-
- policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
- if (!policer_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
- return -ENOENT;
- }
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
- DEVLINK_CMD_TRAP_POLICER_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err)
- goto err_trap_policer_fill;
-
- return genlmsg_reply(msg, info);
-
-err_trap_policer_fill:
- nlmsg_free(msg);
- return err;
-}
-
-static int
-devlink_nl_cmd_trap_policer_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_trap_policer_item *policer_item;
- int idx = 0;
- int err = 0;
-
- list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
- DEVLINK_CMD_TRAP_POLICER_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- state->idx = idx;
- break;
- }
- idx++;
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_trap_policer_get = {
- .dump_one = devlink_nl_cmd_trap_policer_get_dump_one,
-};
-
-static int
-devlink_trap_policer_set(struct devlink *devlink,
- struct devlink_trap_policer_item *policer_item,
- struct genl_info *info)
-{
- struct netlink_ext_ack *extack = info->extack;
- struct nlattr **attrs = info->attrs;
- u64 rate, burst;
- int err;
-
- rate = policer_item->rate;
- burst = policer_item->burst;
-
- if (attrs[DEVLINK_ATTR_TRAP_POLICER_RATE])
- rate = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_RATE]);
-
- if (attrs[DEVLINK_ATTR_TRAP_POLICER_BURST])
- burst = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_BURST]);
-
- if (rate < policer_item->policer->min_rate) {
- NL_SET_ERR_MSG(extack, "Policer rate lower than limit");
- return -EINVAL;
- }
-
- if (rate > policer_item->policer->max_rate) {
- NL_SET_ERR_MSG(extack, "Policer rate higher than limit");
- return -EINVAL;
- }
-
- if (burst < policer_item->policer->min_burst) {
- NL_SET_ERR_MSG(extack, "Policer burst size lower than limit");
- return -EINVAL;
- }
-
- if (burst > policer_item->policer->max_burst) {
- NL_SET_ERR_MSG(extack, "Policer burst size higher than limit");
- return -EINVAL;
- }
-
- err = devlink->ops->trap_policer_set(devlink, policer_item->policer,
- rate, burst, info->extack);
- if (err)
- return err;
-
- policer_item->rate = rate;
- policer_item->burst = burst;
-
- return 0;
-}
-
-static int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_trap_policer_item *policer_item;
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
-
- if (list_empty(&devlink->trap_policer_list))
- return -EOPNOTSUPP;
-
- if (!devlink->ops->trap_policer_set)
- return -EOPNOTSUPP;
-
- policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
- if (!policer_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
- return -ENOENT;
- }
-
- return devlink_trap_policer_set(devlink, policer_item, info);
-}
-
-const struct genl_small_ops devlink_nl_ops[56] = {
- {
- .cmd = DEVLINK_CMD_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_PORT_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_PORT_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_RATE_GET,
- .doit = devlink_nl_cmd_rate_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_RATE_SET,
- .doit = devlink_nl_cmd_rate_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
- },
- {
- .cmd = DEVLINK_CMD_RATE_NEW,
- .doit = devlink_nl_cmd_rate_new_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_RATE_DEL,
- .doit = devlink_nl_cmd_rate_del_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_RATE_NODE,
- },
- {
- .cmd = DEVLINK_CMD_PORT_SPLIT,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_split_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_PORT_UNSPLIT,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_unsplit_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_PORT_NEW,
- .doit = devlink_nl_cmd_port_new_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_PORT_DEL,
- .doit = devlink_nl_cmd_port_del_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_LINECARD_GET,
- .doit = devlink_nl_cmd_linecard_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_LINECARD_SET,
- .doit = devlink_nl_cmd_linecard_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
- },
- {
- .cmd = DEVLINK_CMD_SB_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_SB_POOL_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_pool_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_SB_POOL_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_pool_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_port_pool_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_port_pool_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_occ_snapshot_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_occ_max_clear_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_ESWITCH_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_eswitch_get_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_ESWITCH_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_eswitch_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_dpipe_table_get,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_dpipe_entries_get,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_dpipe_headers_get,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_dpipe_table_counters_set,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_RESOURCE_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_resource_set,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_RESOURCE_DUMP,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_resource_dump,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_RELOAD,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_reload,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_PARAM_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_param_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_PARAM_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_param_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_PORT_PARAM_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_param_get_doit,
- .dumpit = devlink_nl_cmd_port_param_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_PORT_PARAM_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_param_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_REGION_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_region_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_REGION_NEW,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_region_new,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_REGION_DEL,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_region_del,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_REGION_READ,
- .validate = GENL_DONT_VALIDATE_STRICT |
- GENL_DONT_VALIDATE_DUMP_STRICT,
- .dumpit = devlink_nl_cmd_region_read_dumpit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_INFO_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_info_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_recover_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_diagnose_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
- .validate = GENL_DONT_VALIDATE_STRICT |
- GENL_DONT_VALIDATE_DUMP_STRICT,
- .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_test_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_FLASH_UPDATE,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_flash_update,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_TRAP_GET,
- .doit = devlink_nl_cmd_trap_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_TRAP_SET,
- .doit = devlink_nl_cmd_trap_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_TRAP_GROUP_GET,
- .doit = devlink_nl_cmd_trap_group_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_TRAP_GROUP_SET,
- .doit = devlink_nl_cmd_trap_group_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_TRAP_POLICER_GET,
- .doit = devlink_nl_cmd_trap_policer_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_TRAP_POLICER_SET,
- .doit = devlink_nl_cmd_trap_policer_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_SELFTESTS_GET,
- .doit = devlink_nl_cmd_selftests_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_SELFTESTS_RUN,
- .doit = devlink_nl_cmd_selftests_run,
- .flags = GENL_ADMIN_PERM,
- },
- /* -- No new ops here! Use split ops going forward! -- */
-};
-
-static void
-devlink_trap_policer_notify(struct devlink *devlink,
- const struct devlink_trap_policer_item *policer_item,
- enum devlink_command cmd);
-static void
-devlink_trap_group_notify(struct devlink *devlink,
- const struct devlink_trap_group_item *group_item,
- enum devlink_command cmd);
-static void devlink_trap_notify(struct devlink *devlink,
- const struct devlink_trap_item *trap_item,
- enum devlink_command cmd);
-
-void devlink_notify_register(struct devlink *devlink)
-{
- struct devlink_trap_policer_item *policer_item;
- struct devlink_trap_group_item *group_item;
- struct devlink_param_item *param_item;
- struct devlink_trap_item *trap_item;
- struct devlink_port *devlink_port;
- struct devlink_linecard *linecard;
- struct devlink_rate *rate_node;
- struct devlink_region *region;
- unsigned long port_index;
- unsigned long param_id;
-
- devlink_notify(devlink, DEVLINK_CMD_NEW);
- list_for_each_entry(linecard, &devlink->linecard_list, list)
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-
- xa_for_each(&devlink->ports, port_index, devlink_port)
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
-
- list_for_each_entry(policer_item, &devlink->trap_policer_list, list)
- devlink_trap_policer_notify(devlink, policer_item,
- DEVLINK_CMD_TRAP_POLICER_NEW);
-
- list_for_each_entry(group_item, &devlink->trap_group_list, list)
- devlink_trap_group_notify(devlink, group_item,
- DEVLINK_CMD_TRAP_GROUP_NEW);
-
- list_for_each_entry(trap_item, &devlink->trap_list, list)
- devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
-
- list_for_each_entry(rate_node, &devlink->rate_list, list)
- devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
-
- list_for_each_entry(region, &devlink->region_list, list)
- devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
-
- xa_for_each(&devlink->params, param_id, param_item)
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_NEW);
-}
-
-void devlink_notify_unregister(struct devlink *devlink)
-{
- struct devlink_trap_policer_item *policer_item;
- struct devlink_trap_group_item *group_item;
- struct devlink_param_item *param_item;
- struct devlink_trap_item *trap_item;
- struct devlink_port *devlink_port;
- struct devlink_rate *rate_node;
- struct devlink_region *region;
- unsigned long port_index;
- unsigned long param_id;
-
- xa_for_each(&devlink->params, param_id, param_item)
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_DEL);
-
- list_for_each_entry_reverse(region, &devlink->region_list, list)
- devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
-
- list_for_each_entry_reverse(rate_node, &devlink->rate_list, list)
- devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
-
- list_for_each_entry_reverse(trap_item, &devlink->trap_list, list)
- devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
-
- list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list)
- devlink_trap_group_notify(devlink, group_item,
- DEVLINK_CMD_TRAP_GROUP_DEL);
- list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list,
- list)
- devlink_trap_policer_notify(devlink, policer_item,
- DEVLINK_CMD_TRAP_POLICER_DEL);
-
- xa_for_each(&devlink->ports, port_index, devlink_port)
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
- devlink_notify(devlink, DEVLINK_CMD_DEL);
-}
-
-static void devlink_port_type_warn(struct work_struct *work)
-{
- struct devlink_port *port = container_of(to_delayed_work(work),
- struct devlink_port,
- type_warn_dw);
- dev_warn(port->devlink->dev, "Type was not set for devlink port.");
-}
-
-static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
-{
- /* Ignore CPU and DSA flavours. */
- return devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
- devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA &&
- devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_UNUSED;
-}
-
-#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 3600)
-
-static void devlink_port_type_warn_schedule(struct devlink_port *devlink_port)
-{
- if (!devlink_port_type_should_warn(devlink_port))
- return;
- /* Schedule a work to WARN in case driver does not set port
- * type within timeout.
- */
- schedule_delayed_work(&devlink_port->type_warn_dw,
- DEVLINK_PORT_TYPE_WARN_TIMEOUT);
-}
-
-static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
-{
- if (!devlink_port_type_should_warn(devlink_port))
- return;
- cancel_delayed_work_sync(&devlink_port->type_warn_dw);
-}
-
-/**
- * devlink_port_init() - Init devlink port
- *
- * @devlink: devlink
- * @devlink_port: devlink port
- *
- * Initialize essential stuff that is needed for functions
- * that may be called before devlink port registration.
- * Call to this function is optional and not needed
- * in case the driver does not use such functions.
- */
-void devlink_port_init(struct devlink *devlink,
- struct devlink_port *devlink_port)
-{
- if (devlink_port->initialized)
- return;
- devlink_port->devlink = devlink;
- INIT_LIST_HEAD(&devlink_port->region_list);
- devlink_port->initialized = true;
-}
-EXPORT_SYMBOL_GPL(devlink_port_init);
-
-/**
- * devlink_port_fini() - Deinitialize devlink port
- *
- * @devlink_port: devlink port
- *
- * Deinitialize essential stuff that is in use for functions
- * that may be called after devlink port unregistration.
- * Call to this function is optional and not needed
- * in case the driver does not use such functions.
- */
-void devlink_port_fini(struct devlink_port *devlink_port)
-{
- WARN_ON(!list_empty(&devlink_port->region_list));
-}
-EXPORT_SYMBOL_GPL(devlink_port_fini);
-
-static const struct devlink_port_ops devlink_port_dummy_ops = {};
-
-/**
- * devl_port_register_with_ops() - Register devlink port
- *
- * @devlink: devlink
- * @devlink_port: devlink port
- * @port_index: driver-specific numerical identifier of the port
- * @ops: port ops
- *
- * Register devlink port with provided port index. User can use
- * any indexing, even hw-related one. devlink_port structure
- * is convenient to be embedded inside user driver private structure.
- * Note that the caller should take care of zeroing the devlink_port
- * structure.
- */
-int devl_port_register_with_ops(struct devlink *devlink,
- struct devlink_port *devlink_port,
- unsigned int port_index,
- const struct devlink_port_ops *ops)
-{
- int err;
-
- devl_assert_locked(devlink);
-
- ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
- devlink_port_init(devlink, devlink_port);
- devlink_port->registered = true;
- devlink_port->index = port_index;
- devlink_port->ops = ops ? ops : &devlink_port_dummy_ops;
- spin_lock_init(&devlink_port->type_lock);
- INIT_LIST_HEAD(&devlink_port->reporter_list);
- err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL);
- if (err)
- return err;
-
- INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
- devlink_port_type_warn_schedule(devlink_port);
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_port_register_with_ops);
-
-/**
- * devlink_port_register_with_ops - Register devlink port
- *
- * @devlink: devlink
- * @devlink_port: devlink port
- * @port_index: driver-specific numerical identifier of the port
- * @ops: port ops
- *
- * Register devlink port with provided port index. User can use
- * any indexing, even hw-related one. devlink_port structure
- * is convenient to be embedded inside user driver private structure.
- * Note that the caller should take care of zeroing the devlink_port
- * structure.
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-int devlink_port_register_with_ops(struct devlink *devlink,
- struct devlink_port *devlink_port,
- unsigned int port_index,
- const struct devlink_port_ops *ops)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_port_register_with_ops(devlink, devlink_port,
- port_index, ops);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_port_register_with_ops);
-
-/**
- * devl_port_unregister() - Unregister devlink port
- *
- * @devlink_port: devlink port
- */
-void devl_port_unregister(struct devlink_port *devlink_port)
-{
- lockdep_assert_held(&devlink_port->devlink->lock);
- WARN_ON(devlink_port->type != DEVLINK_PORT_TYPE_NOTSET);
-
- devlink_port_type_warn_cancel(devlink_port);
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
- xa_erase(&devlink_port->devlink->ports, devlink_port->index);
- WARN_ON(!list_empty(&devlink_port->reporter_list));
- devlink_port->registered = false;
-}
-EXPORT_SYMBOL_GPL(devl_port_unregister);
-
-/**
- * devlink_port_unregister - Unregister devlink port
- *
- * @devlink_port: devlink port
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_port_unregister(struct devlink_port *devlink_port)
-{
- struct devlink *devlink = devlink_port->devlink;
-
- devl_lock(devlink);
- devl_port_unregister(devlink_port);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_port_unregister);
-
-static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port,
- struct net_device *netdev)
-{
- const struct net_device_ops *ops = netdev->netdev_ops;
-
- /* If driver registers devlink port, it should set devlink port
- * attributes accordingly so the compat functions are called
- * and the original ops are not used.
- */
- if (ops->ndo_get_phys_port_name) {
- /* Some drivers use the same set of ndos for netdevs
- * that have devlink_port registered and also for
- * those who don't. Make sure that ndo_get_phys_port_name
- * returns -EOPNOTSUPP here in case it is defined.
- * Warn if not.
- */
- char name[IFNAMSIZ];
- int err;
-
- err = ops->ndo_get_phys_port_name(netdev, name, sizeof(name));
- WARN_ON(err != -EOPNOTSUPP);
- }
- if (ops->ndo_get_port_parent_id) {
- /* Some drivers use the same set of ndos for netdevs
- * that have devlink_port registered and also for
- * those who don't. Make sure that ndo_get_port_parent_id
- * returns -EOPNOTSUPP here in case it is defined.
- * Warn if not.
- */
- struct netdev_phys_item_id ppid;
- int err;
-
- err = ops->ndo_get_port_parent_id(netdev, &ppid);
- WARN_ON(err != -EOPNOTSUPP);
- }
-}
-
-static void __devlink_port_type_set(struct devlink_port *devlink_port,
- enum devlink_port_type type,
- void *type_dev)
-{
- struct net_device *netdev = type_dev;
-
- ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
-
- if (type == DEVLINK_PORT_TYPE_NOTSET) {
- devlink_port_type_warn_schedule(devlink_port);
- } else {
- devlink_port_type_warn_cancel(devlink_port);
- if (type == DEVLINK_PORT_TYPE_ETH && netdev)
- devlink_port_type_netdev_checks(devlink_port, netdev);
- }
-
- spin_lock_bh(&devlink_port->type_lock);
- devlink_port->type = type;
- switch (type) {
- case DEVLINK_PORT_TYPE_ETH:
- devlink_port->type_eth.netdev = netdev;
- if (netdev) {
- ASSERT_RTNL();
- devlink_port->type_eth.ifindex = netdev->ifindex;
- BUILD_BUG_ON(sizeof(devlink_port->type_eth.ifname) !=
- sizeof(netdev->name));
- strcpy(devlink_port->type_eth.ifname, netdev->name);
- }
- break;
- case DEVLINK_PORT_TYPE_IB:
- devlink_port->type_ib.ibdev = type_dev;
- break;
- default:
- break;
- }
- spin_unlock_bh(&devlink_port->type_lock);
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
-}
-
-/**
- * devlink_port_type_eth_set - Set port type to Ethernet
- *
- * @devlink_port: devlink port
- *
- * If driver is calling this, most likely it is doing something wrong.
- */
-void devlink_port_type_eth_set(struct devlink_port *devlink_port)
-{
- dev_warn(devlink_port->devlink->dev,
- "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n",
- devlink_port->index);
- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, NULL);
-}
-EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
-
-/**
- * devlink_port_type_ib_set - Set port type to InfiniBand
- *
- * @devlink_port: devlink port
- * @ibdev: related IB device
- */
-void devlink_port_type_ib_set(struct devlink_port *devlink_port,
- struct ib_device *ibdev)
-{
- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev);
-}
-EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
-
-/**
- * devlink_port_type_clear - Clear port type
- *
- * @devlink_port: devlink port
- *
- * If driver is calling this for clearing Ethernet type, most likely
- * it is doing something wrong.
- */
-void devlink_port_type_clear(struct devlink_port *devlink_port)
-{
- if (devlink_port->type == DEVLINK_PORT_TYPE_ETH)
- dev_warn(devlink_port->devlink->dev,
- "devlink port type for port %d cleared without a software interface reference, device type not supported by the kernel?\n",
- devlink_port->index);
- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL);
-}
-EXPORT_SYMBOL_GPL(devlink_port_type_clear);
-
-int devlink_port_netdevice_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
-{
- struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
- struct devlink_port *devlink_port = netdev->devlink_port;
- struct devlink *devlink;
-
- if (!devlink_port)
- return NOTIFY_OK;
- devlink = devlink_port->devlink;
-
- switch (event) {
- case NETDEV_POST_INIT:
- /* Set the type but not netdev pointer. It is going to be set
- * later on by NETDEV_REGISTER event. Happens once during
- * netdevice register
- */
- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH,
- NULL);
- break;
- case NETDEV_REGISTER:
- case NETDEV_CHANGENAME:
- if (devlink_net(devlink) != dev_net(netdev))
- return NOTIFY_OK;
- /* Set the netdev on top of previously set type. Note this
- * event happens also during net namespace change so here
- * we take into account netdev pointer appearing in this
- * namespace.
- */
- __devlink_port_type_set(devlink_port, devlink_port->type,
- netdev);
- break;
- case NETDEV_UNREGISTER:
- if (devlink_net(devlink) != dev_net(netdev))
- return NOTIFY_OK;
- /* Clear netdev pointer, but not the type. This event happens
- * also during net namespace change so we need to clear
- * pointer to netdev that is going to another net namespace.
- */
- __devlink_port_type_set(devlink_port, devlink_port->type,
- NULL);
- break;
- case NETDEV_PRE_UNINIT:
- /* Clear the type and the netdev pointer. Happens one during
- * netdevice unregister.
- */
- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET,
- NULL);
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
- enum devlink_port_flavour flavour)
-{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
-
- devlink_port->attrs_set = true;
- attrs->flavour = flavour;
- if (attrs->switch_id.id_len) {
- devlink_port->switch_port = true;
- if (WARN_ON(attrs->switch_id.id_len > MAX_PHYS_ITEM_ID_LEN))
- attrs->switch_id.id_len = MAX_PHYS_ITEM_ID_LEN;
- } else {
- devlink_port->switch_port = false;
- }
- return 0;
-}
-
-/**
- * devlink_port_attrs_set - Set port attributes
- *
- * @devlink_port: devlink port
- * @attrs: devlink port attrs
- */
-void devlink_port_attrs_set(struct devlink_port *devlink_port,
- struct devlink_port_attrs *attrs)
-{
- int ret;
-
- ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
- devlink_port->attrs = *attrs;
- ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
- if (ret)
- return;
- WARN_ON(attrs->splittable && attrs->split);
-}
-EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
-
-/**
- * devlink_port_attrs_pci_pf_set - Set PCI PF port attributes
- *
- * @devlink_port: devlink port
- * @controller: associated controller number for the devlink port instance
- * @pf: associated PF for the devlink port instance
- * @external: indicates if the port is for an external controller
- */
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
- u16 pf, bool external)
-{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
- int ret;
-
- ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
- ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_PF);
- if (ret)
- return;
- attrs->pci_pf.controller = controller;
- attrs->pci_pf.pf = pf;
- attrs->pci_pf.external = external;
-}
-EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
-
-/**
- * devlink_port_attrs_pci_vf_set - Set PCI VF port attributes
- *
- * @devlink_port: devlink port
- * @controller: associated controller number for the devlink port instance
- * @pf: associated PF for the devlink port instance
- * @vf: associated VF of a PF for the devlink port instance
- * @external: indicates if the port is for an external controller
- */
-void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
- u16 pf, u16 vf, bool external)
-{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
- int ret;
-
- ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
- ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_VF);
- if (ret)
- return;
- attrs->pci_vf.controller = controller;
- attrs->pci_vf.pf = pf;
- attrs->pci_vf.vf = vf;
- attrs->pci_vf.external = external;
-}
-EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
-
-/**
- * devlink_port_attrs_pci_sf_set - Set PCI SF port attributes
- *
- * @devlink_port: devlink port
- * @controller: associated controller number for the devlink port instance
- * @pf: associated PF for the devlink port instance
- * @sf: associated SF of a PF for the devlink port instance
- * @external: indicates if the port is for an external controller
- */
-void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller,
- u16 pf, u32 sf, bool external)
-{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
- int ret;
-
- ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
- ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_SF);
- if (ret)
- return;
- attrs->pci_sf.controller = controller;
- attrs->pci_sf.pf = pf;
- attrs->pci_sf.sf = sf;
- attrs->pci_sf.external = external;
-}
-EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
-
-/**
- * devl_rate_node_create - create devlink rate node
- * @devlink: devlink instance
- * @priv: driver private data
- * @node_name: name of the resulting node
- * @parent: parent devlink_rate struct
- *
- * Create devlink rate object of type node
- */
-struct devlink_rate *
-devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
- struct devlink_rate *parent)
-{
- struct devlink_rate *rate_node;
-
- rate_node = devlink_rate_node_get_by_name(devlink, node_name);
- if (!IS_ERR(rate_node))
- return ERR_PTR(-EEXIST);
-
- rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
- if (!rate_node)
- return ERR_PTR(-ENOMEM);
-
- if (parent) {
- rate_node->parent = parent;
- refcount_inc(&rate_node->parent->refcnt);
- }
-
- rate_node->type = DEVLINK_RATE_TYPE_NODE;
- rate_node->devlink = devlink;
- rate_node->priv = priv;
-
- rate_node->name = kstrdup(node_name, GFP_KERNEL);
- if (!rate_node->name) {
- kfree(rate_node);
- return ERR_PTR(-ENOMEM);
- }
-
- refcount_set(&rate_node->refcnt, 1);
- list_add(&rate_node->list, &devlink->rate_list);
- devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
- return rate_node;
-}
-EXPORT_SYMBOL_GPL(devl_rate_node_create);
-
-/**
- * devl_rate_leaf_create - create devlink rate leaf
- * @devlink_port: devlink port object to create rate object on
- * @priv: driver private data
- * @parent: parent devlink_rate struct
- *
- * Create devlink rate object of type leaf on provided @devlink_port.
- */
-int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
- struct devlink_rate *parent)
-{
- struct devlink *devlink = devlink_port->devlink;
- struct devlink_rate *devlink_rate;
-
- devl_assert_locked(devlink_port->devlink);
-
- if (WARN_ON(devlink_port->devlink_rate))
- return -EBUSY;
-
- devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
- if (!devlink_rate)
- return -ENOMEM;
-
- if (parent) {
- devlink_rate->parent = parent;
- refcount_inc(&devlink_rate->parent->refcnt);
- }
-
- devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
- devlink_rate->devlink = devlink;
- devlink_rate->devlink_port = devlink_port;
- devlink_rate->priv = priv;
- list_add_tail(&devlink_rate->list, &devlink->rate_list);
- devlink_port->devlink_rate = devlink_rate;
- devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_rate_leaf_create);
-
-/**
- * devl_rate_leaf_destroy - destroy devlink rate leaf
- *
- * @devlink_port: devlink port linked to the rate object
- *
- * Destroy the devlink rate object of type leaf on provided @devlink_port.
- */
-void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
-{
- struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
-
- devl_assert_locked(devlink_port->devlink);
- if (!devlink_rate)
- return;
-
- devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
- if (devlink_rate->parent)
- refcount_dec(&devlink_rate->parent->refcnt);
- list_del(&devlink_rate->list);
- devlink_port->devlink_rate = NULL;
- kfree(devlink_rate);
-}
-EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
-
-/**
- * devl_rate_nodes_destroy - destroy all devlink rate nodes on device
- * @devlink: devlink instance
- *
- * Unset parent for all rate objects and destroy all rate nodes
- * on specified device.
- */
-void devl_rate_nodes_destroy(struct devlink *devlink)
-{
- static struct devlink_rate *devlink_rate, *tmp;
- const struct devlink_ops *ops = devlink->ops;
-
- devl_assert_locked(devlink);
-
- list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
- if (!devlink_rate->parent)
- continue;
-
- refcount_dec(&devlink_rate->parent->refcnt);
- if (devlink_rate_is_leaf(devlink_rate))
- ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv,
- NULL, NULL);
- else if (devlink_rate_is_node(devlink_rate))
- ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv,
- NULL, NULL);
- }
- list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) {
- if (devlink_rate_is_node(devlink_rate)) {
- ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL);
- list_del(&devlink_rate->list);
- kfree(devlink_rate->name);
- kfree(devlink_rate);
- }
- }
-}
-EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);
-
-/**
- * devlink_port_linecard_set - Link port with a linecard
- *
- * @devlink_port: devlink port
- * @linecard: devlink linecard
- */
-void devlink_port_linecard_set(struct devlink_port *devlink_port,
- struct devlink_linecard *linecard)
-{
- ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
- devlink_port->linecard = linecard;
-}
-EXPORT_SYMBOL_GPL(devlink_port_linecard_set);
-
-static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
- char *name, size_t len)
-{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
- int n = 0;
-
- if (!devlink_port->attrs_set)
- return -EOPNOTSUPP;
-
- switch (attrs->flavour) {
- case DEVLINK_PORT_FLAVOUR_PHYSICAL:
- if (devlink_port->linecard)
- n = snprintf(name, len, "l%u",
- devlink_port->linecard->index);
- if (n < len)
- n += snprintf(name + n, len - n, "p%u",
- attrs->phys.port_number);
- if (n < len && attrs->split)
- n += snprintf(name + n, len - n, "s%u",
- attrs->phys.split_subport_number);
- break;
- case DEVLINK_PORT_FLAVOUR_CPU:
- case DEVLINK_PORT_FLAVOUR_DSA:
- case DEVLINK_PORT_FLAVOUR_UNUSED:
- /* As CPU and DSA ports do not have a netdevice associated
- * case should not ever happen.
- */
- WARN_ON(1);
- return -EINVAL;
- case DEVLINK_PORT_FLAVOUR_PCI_PF:
- if (attrs->pci_pf.external) {
- n = snprintf(name, len, "c%u", attrs->pci_pf.controller);
- if (n >= len)
- return -EINVAL;
- len -= n;
- name += n;
- }
- n = snprintf(name, len, "pf%u", attrs->pci_pf.pf);
- break;
- case DEVLINK_PORT_FLAVOUR_PCI_VF:
- if (attrs->pci_vf.external) {
- n = snprintf(name, len, "c%u", attrs->pci_vf.controller);
- if (n >= len)
- return -EINVAL;
- len -= n;
- name += n;
- }
- n = snprintf(name, len, "pf%uvf%u",
- attrs->pci_vf.pf, attrs->pci_vf.vf);
- break;
- case DEVLINK_PORT_FLAVOUR_PCI_SF:
- if (attrs->pci_sf.external) {
- n = snprintf(name, len, "c%u", attrs->pci_sf.controller);
- if (n >= len)
- return -EINVAL;
- len -= n;
- name += n;
- }
- n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
- attrs->pci_sf.sf);
- break;
- case DEVLINK_PORT_FLAVOUR_VIRTUAL:
- return -EOPNOTSUPP;
- }
-
- if (n >= len)
- return -EINVAL;
-
- return 0;
-}
-
-static int devlink_linecard_types_init(struct devlink_linecard *linecard)
-{
- struct devlink_linecard_type *linecard_type;
- unsigned int count;
- int i;
-
- count = linecard->ops->types_count(linecard, linecard->priv);
- linecard->types = kmalloc_array(count, sizeof(*linecard_type),
- GFP_KERNEL);
- if (!linecard->types)
- return -ENOMEM;
- linecard->types_count = count;
-
- for (i = 0; i < count; i++) {
- linecard_type = &linecard->types[i];
- linecard->ops->types_get(linecard, linecard->priv, i,
- &linecard_type->type,
- &linecard_type->priv);
- }
- return 0;
-}
-
-static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
-{
- kfree(linecard->types);
-}
-
-/**
- * devl_linecard_create - Create devlink linecard
- *
- * @devlink: devlink
- * @linecard_index: driver-specific numerical identifier of the linecard
- * @ops: linecards ops
- * @priv: user priv pointer
- *
- * Create devlink linecard instance with provided linecard index.
- * Caller can use any indexing, even hw-related one.
- *
- * Return: Line card structure or an ERR_PTR() encoded error code.
- */
-struct devlink_linecard *
-devl_linecard_create(struct devlink *devlink, unsigned int linecard_index,
- const struct devlink_linecard_ops *ops, void *priv)
-{
- struct devlink_linecard *linecard;
- int err;
-
- if (WARN_ON(!ops || !ops->provision || !ops->unprovision ||
- !ops->types_count || !ops->types_get))
- return ERR_PTR(-EINVAL);
-
- if (devlink_linecard_index_exists(devlink, linecard_index))
- return ERR_PTR(-EEXIST);
-
- linecard = kzalloc(sizeof(*linecard), GFP_KERNEL);
- if (!linecard)
- return ERR_PTR(-ENOMEM);
-
- linecard->devlink = devlink;
- linecard->index = linecard_index;
- linecard->ops = ops;
- linecard->priv = priv;
- linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
- mutex_init(&linecard->state_lock);
-
- err = devlink_linecard_types_init(linecard);
- if (err) {
- mutex_destroy(&linecard->state_lock);
- kfree(linecard);
- return ERR_PTR(err);
- }
-
- list_add_tail(&linecard->list, &devlink->linecard_list);
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- return linecard;
-}
-EXPORT_SYMBOL_GPL(devl_linecard_create);
-
-/**
- * devl_linecard_destroy - Destroy devlink linecard
- *
- * @linecard: devlink linecard
- */
-void devl_linecard_destroy(struct devlink_linecard *linecard)
-{
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
- list_del(&linecard->list);
- devlink_linecard_types_fini(linecard);
- mutex_destroy(&linecard->state_lock);
- kfree(linecard);
-}
-EXPORT_SYMBOL_GPL(devl_linecard_destroy);
-
-/**
- * devlink_linecard_provision_set - Set provisioning on linecard
- *
- * @linecard: devlink linecard
- * @type: linecard type
- *
- * This is either called directly from the provision() op call or
- * as a result of the provision() op call asynchronously.
- */
-void devlink_linecard_provision_set(struct devlink_linecard *linecard,
- const char *type)
-{
- mutex_lock(&linecard->state_lock);
- WARN_ON(linecard->type && strcmp(linecard->type, type));
- linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
- linecard->type = type;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
-
-/**
- * devlink_linecard_provision_clear - Clear provisioning on linecard
- *
- * @linecard: devlink linecard
- *
- * This is either called directly from the unprovision() op call or
- * as a result of the unprovision() op call asynchronously.
- */
-void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
-{
- mutex_lock(&linecard->state_lock);
- WARN_ON(linecard->nested_devlink);
- linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
- linecard->type = NULL;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
-
-/**
- * devlink_linecard_provision_fail - Fail provisioning on linecard
- *
- * @linecard: devlink linecard
- *
- * This is either called directly from the provision() op call or
- * as a result of the provision() op call asynchronously.
- */
-void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
-{
- mutex_lock(&linecard->state_lock);
- WARN_ON(linecard->nested_devlink);
- linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_provision_fail);
-
-/**
- * devlink_linecard_activate - Set linecard active
- *
- * @linecard: devlink linecard
- */
-void devlink_linecard_activate(struct devlink_linecard *linecard)
-{
- mutex_lock(&linecard->state_lock);
- WARN_ON(linecard->state != DEVLINK_LINECARD_STATE_PROVISIONED);
- linecard->state = DEVLINK_LINECARD_STATE_ACTIVE;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_activate);
-
-/**
- * devlink_linecard_deactivate - Set linecard inactive
- *
- * @linecard: devlink linecard
- */
-void devlink_linecard_deactivate(struct devlink_linecard *linecard)
-{
- mutex_lock(&linecard->state_lock);
- switch (linecard->state) {
- case DEVLINK_LINECARD_STATE_ACTIVE:
- linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- break;
- case DEVLINK_LINECARD_STATE_UNPROVISIONING:
- /* Line card is being deactivated as part
- * of unprovisioning flow.
- */
- break;
- default:
- WARN_ON(1);
- break;
- }
- mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
-
-/**
- * devlink_linecard_nested_dl_set - Attach/detach nested devlink
- * instance to linecard.
- *
- * @linecard: devlink linecard
- * @nested_devlink: devlink instance to attach or NULL to detach
- */
-void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
- struct devlink *nested_devlink)
-{
- mutex_lock(&linecard->state_lock);
- linecard->nested_devlink = nested_devlink;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
-
-int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
- u32 size, u16 ingress_pools_count,
- u16 egress_pools_count, u16 ingress_tc_count,
- u16 egress_tc_count)
-{
- struct devlink_sb *devlink_sb;
-
- lockdep_assert_held(&devlink->lock);
-
- if (devlink_sb_index_exists(devlink, sb_index))
- return -EEXIST;
-
- devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL);
- if (!devlink_sb)
- return -ENOMEM;
- devlink_sb->index = sb_index;
- devlink_sb->size = size;
- devlink_sb->ingress_pools_count = ingress_pools_count;
- devlink_sb->egress_pools_count = egress_pools_count;
- devlink_sb->ingress_tc_count = ingress_tc_count;
- devlink_sb->egress_tc_count = egress_tc_count;
- list_add_tail(&devlink_sb->list, &devlink->sb_list);
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_sb_register);
-
-int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
- u32 size, u16 ingress_pools_count,
- u16 egress_pools_count, u16 ingress_tc_count,
- u16 egress_tc_count)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_sb_register(devlink, sb_index, size, ingress_pools_count,
- egress_pools_count, ingress_tc_count,
- egress_tc_count);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_sb_register);
-
-void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index)
-{
- struct devlink_sb *devlink_sb;
-
- lockdep_assert_held(&devlink->lock);
-
- devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
- WARN_ON(!devlink_sb);
- list_del(&devlink_sb->list);
- kfree(devlink_sb);
-}
-EXPORT_SYMBOL_GPL(devl_sb_unregister);
-
-void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
-{
- devl_lock(devlink);
- devl_sb_unregister(devlink, sb_index);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_sb_unregister);
-
-/**
- * devl_dpipe_headers_register - register dpipe headers
- *
- * @devlink: devlink
- * @dpipe_headers: dpipe header array
- *
- * Register the headers supported by hardware.
- */
-void devl_dpipe_headers_register(struct devlink *devlink,
- struct devlink_dpipe_headers *dpipe_headers)
-{
- lockdep_assert_held(&devlink->lock);
-
- devlink->dpipe_headers = dpipe_headers;
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_headers_register);
-
-/**
- * devl_dpipe_headers_unregister - unregister dpipe headers
- *
- * @devlink: devlink
- *
- * Unregister the headers supported by hardware.
- */
-void devl_dpipe_headers_unregister(struct devlink *devlink)
-{
- lockdep_assert_held(&devlink->lock);
-
- devlink->dpipe_headers = NULL;
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_headers_unregister);
-
-/**
- * devlink_dpipe_table_counter_enabled - check if counter allocation
- * required
- * @devlink: devlink
- * @table_name: tables name
- *
- * Used by driver to check if counter allocation is required.
- * After counter allocation is turned on the table entries
- * are updated to include counter statistics.
- *
- * After that point on the driver must respect the counter
- * state so that each entry added to the table is added
- * with a counter.
- */
-bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
- const char *table_name)
-{
- struct devlink_dpipe_table *table;
- bool enabled;
-
- rcu_read_lock();
- table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name, devlink);
- enabled = false;
- if (table)
- enabled = table->counters_enabled;
- rcu_read_unlock();
- return enabled;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
-
-/**
- * devl_dpipe_table_register - register dpipe table
- *
- * @devlink: devlink
- * @table_name: table name
- * @table_ops: table ops
- * @priv: priv
- * @counter_control_extern: external control for counters
- */
-int devl_dpipe_table_register(struct devlink *devlink,
- const char *table_name,
- struct devlink_dpipe_table_ops *table_ops,
- void *priv, bool counter_control_extern)
-{
- struct devlink_dpipe_table *table;
-
- lockdep_assert_held(&devlink->lock);
-
- if (WARN_ON(!table_ops->size_get))
- return -EINVAL;
-
- if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
- devlink))
- return -EEXIST;
-
- table = kzalloc(sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
-
- table->name = table_name;
- table->table_ops = table_ops;
- table->priv = priv;
- table->counter_control_extern = counter_control_extern;
-
- list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_table_register);
-
-/**
- * devl_dpipe_table_unregister - unregister dpipe table
- *
- * @devlink: devlink
- * @table_name: table name
- */
-void devl_dpipe_table_unregister(struct devlink *devlink,
- const char *table_name)
-{
- struct devlink_dpipe_table *table;
-
- lockdep_assert_held(&devlink->lock);
-
- table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name, devlink);
- if (!table)
- return;
- list_del_rcu(&table->list);
- kfree_rcu(table, rcu);
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_table_unregister);
-
-/**
- * devl_resource_register - devlink resource register
- *
- * @devlink: devlink
- * @resource_name: resource's name
- * @resource_size: resource's size
- * @resource_id: resource's id
- * @parent_resource_id: resource's parent id
- * @size_params: size parameters
- *
- * Generic resources should reuse the same names across drivers.
- * Please see the generic resources list at:
- * Documentation/networking/devlink/devlink-resource.rst
- */
-int devl_resource_register(struct devlink *devlink,
- const char *resource_name,
- u64 resource_size,
- u64 resource_id,
- u64 parent_resource_id,
- const struct devlink_resource_size_params *size_params)
-{
- struct devlink_resource *resource;
- struct list_head *resource_list;
- bool top_hierarchy;
-
- lockdep_assert_held(&devlink->lock);
-
- top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
-
- resource = devlink_resource_find(devlink, NULL, resource_id);
- if (resource)
- return -EINVAL;
-
- resource = kzalloc(sizeof(*resource), GFP_KERNEL);
- if (!resource)
- return -ENOMEM;
-
- if (top_hierarchy) {
- resource_list = &devlink->resource_list;
- } else {
- struct devlink_resource *parent_resource;
-
- parent_resource = devlink_resource_find(devlink, NULL,
- parent_resource_id);
- if (parent_resource) {
- resource_list = &parent_resource->resource_list;
- resource->parent = parent_resource;
- } else {
- kfree(resource);
- return -EINVAL;
- }
- }
-
- resource->name = resource_name;
- resource->size = resource_size;
- resource->size_new = resource_size;
- resource->id = resource_id;
- resource->size_valid = true;
- memcpy(&resource->size_params, size_params,
- sizeof(resource->size_params));
- INIT_LIST_HEAD(&resource->resource_list);
- list_add_tail(&resource->list, resource_list);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_resource_register);
-
-/**
- * devlink_resource_register - devlink resource register
- *
- * @devlink: devlink
- * @resource_name: resource's name
- * @resource_size: resource's size
- * @resource_id: resource's id
- * @parent_resource_id: resource's parent id
- * @size_params: size parameters
- *
- * Generic resources should reuse the same names across drivers.
- * Please see the generic resources list at:
- * Documentation/networking/devlink/devlink-resource.rst
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-int devlink_resource_register(struct devlink *devlink,
- const char *resource_name,
- u64 resource_size,
- u64 resource_id,
- u64 parent_resource_id,
- const struct devlink_resource_size_params *size_params)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_resource_register(devlink, resource_name, resource_size,
- resource_id, parent_resource_id, size_params);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_resource_register);
-
-static void devlink_resource_unregister(struct devlink *devlink,
- struct devlink_resource *resource)
-{
- struct devlink_resource *tmp, *child_resource;
-
- list_for_each_entry_safe(child_resource, tmp, &resource->resource_list,
- list) {
- devlink_resource_unregister(devlink, child_resource);
- list_del(&child_resource->list);
- kfree(child_resource);
- }
-}
-
-/**
- * devl_resources_unregister - free all resources
- *
- * @devlink: devlink
- */
-void devl_resources_unregister(struct devlink *devlink)
-{
- struct devlink_resource *tmp, *child_resource;
-
- lockdep_assert_held(&devlink->lock);
-
- list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list,
- list) {
- devlink_resource_unregister(devlink, child_resource);
- list_del(&child_resource->list);
- kfree(child_resource);
- }
-}
-EXPORT_SYMBOL_GPL(devl_resources_unregister);
-
-/**
- * devlink_resources_unregister - free all resources
- *
- * @devlink: devlink
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resources_unregister(struct devlink *devlink)
-{
- devl_lock(devlink);
- devl_resources_unregister(devlink);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resources_unregister);
-
-/**
- * devl_resource_size_get - get and update size
- *
- * @devlink: devlink
- * @resource_id: the requested resource id
- * @p_resource_size: ptr to update
- */
-int devl_resource_size_get(struct devlink *devlink,
- u64 resource_id,
- u64 *p_resource_size)
-{
- struct devlink_resource *resource;
-
- lockdep_assert_held(&devlink->lock);
-
- resource = devlink_resource_find(devlink, NULL, resource_id);
- if (!resource)
- return -EINVAL;
- *p_resource_size = resource->size_new;
- resource->size = resource->size_new;
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_resource_size_get);
-
-/**
- * devl_dpipe_table_resource_set - set the resource id
- *
- * @devlink: devlink
- * @table_name: table name
- * @resource_id: resource id
- * @resource_units: number of resource's units consumed per table's entry
- */
-int devl_dpipe_table_resource_set(struct devlink *devlink,
- const char *table_name, u64 resource_id,
- u64 resource_units)
-{
- struct devlink_dpipe_table *table;
-
- table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name, devlink);
- if (!table)
- return -EINVAL;
-
- table->resource_id = resource_id;
- table->resource_units = resource_units;
- table->resource_valid = true;
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_table_resource_set);
-
-/**
- * devl_resource_occ_get_register - register occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- * @occ_get: occupancy getter callback
- * @occ_get_priv: occupancy getter callback priv
- */
-void devl_resource_occ_get_register(struct devlink *devlink,
- u64 resource_id,
- devlink_resource_occ_get_t *occ_get,
- void *occ_get_priv)
-{
- struct devlink_resource *resource;
-
- lockdep_assert_held(&devlink->lock);
-
- resource = devlink_resource_find(devlink, NULL, resource_id);
- if (WARN_ON(!resource))
- return;
- WARN_ON(resource->occ_get);
-
- resource->occ_get = occ_get;
- resource->occ_get_priv = occ_get_priv;
-}
-EXPORT_SYMBOL_GPL(devl_resource_occ_get_register);
-
-/**
- * devlink_resource_occ_get_register - register occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- * @occ_get: occupancy getter callback
- * @occ_get_priv: occupancy getter callback priv
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resource_occ_get_register(struct devlink *devlink,
- u64 resource_id,
- devlink_resource_occ_get_t *occ_get,
- void *occ_get_priv)
-{
- devl_lock(devlink);
- devl_resource_occ_get_register(devlink, resource_id,
- occ_get, occ_get_priv);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
-
-/**
- * devl_resource_occ_get_unregister - unregister occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- */
-void devl_resource_occ_get_unregister(struct devlink *devlink,
- u64 resource_id)
-{
- struct devlink_resource *resource;
-
- lockdep_assert_held(&devlink->lock);
-
- resource = devlink_resource_find(devlink, NULL, resource_id);
- if (WARN_ON(!resource))
- return;
- WARN_ON(!resource->occ_get);
-
- resource->occ_get = NULL;
- resource->occ_get_priv = NULL;
-}
-EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister);
-
-/**
- * devlink_resource_occ_get_unregister - unregister occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resource_occ_get_unregister(struct devlink *devlink,
- u64 resource_id)
-{
- devl_lock(devlink);
- devl_resource_occ_get_unregister(devlink, resource_id);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
-
-static int devlink_param_verify(const struct devlink_param *param)
-{
- if (!param || !param->name || !param->supported_cmodes)
- return -EINVAL;
- if (param->generic)
- return devlink_param_generic_verify(param);
- else
- return devlink_param_driver_verify(param);
-}
-
-static int devlink_param_register(struct devlink *devlink,
- const struct devlink_param *param)
-{
- struct devlink_param_item *param_item;
- int err;
-
- WARN_ON(devlink_param_verify(param));
- WARN_ON(devlink_param_find_by_name(&devlink->params, param->name));
-
- if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
- WARN_ON(param->get || param->set);
- else
- WARN_ON(!param->get || !param->set);
-
- param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
- if (!param_item)
- return -ENOMEM;
-
- param_item->param = param;
-
- err = xa_insert(&devlink->params, param->id, param_item, GFP_KERNEL);
- if (err)
- goto err_xa_insert;
-
- devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
- return 0;
-
-err_xa_insert:
- kfree(param_item);
- return err;
-}
-
-static void devlink_param_unregister(struct devlink *devlink,
- const struct devlink_param *param)
-{
- struct devlink_param_item *param_item;
-
- param_item = devlink_param_find_by_id(&devlink->params, param->id);
- if (WARN_ON(!param_item))
- return;
- devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL);
- xa_erase(&devlink->params, param->id);
- kfree(param_item);
-}
-
-/**
- * devl_params_register - register configuration parameters
- *
- * @devlink: devlink
- * @params: configuration parameters array
- * @params_count: number of parameters provided
- *
- * Register the configuration parameters supported by the driver.
- */
-int devl_params_register(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
-{
- const struct devlink_param *param = params;
- int i, err;
-
- lockdep_assert_held(&devlink->lock);
-
- for (i = 0; i < params_count; i++, param++) {
- err = devlink_param_register(devlink, param);
- if (err)
- goto rollback;
- }
- return 0;
-
-rollback:
- if (!i)
- return err;
-
- for (param--; i > 0; i--, param--)
- devlink_param_unregister(devlink, param);
- return err;
-}
-EXPORT_SYMBOL_GPL(devl_params_register);
-
-int devlink_params_register(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_params_register(devlink, params, params_count);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_params_register);
-
-/**
- * devl_params_unregister - unregister configuration parameters
- * @devlink: devlink
- * @params: configuration parameters to unregister
- * @params_count: number of parameters provided
- */
-void devl_params_unregister(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
-{
- const struct devlink_param *param = params;
- int i;
-
- lockdep_assert_held(&devlink->lock);
-
- for (i = 0; i < params_count; i++, param++)
- devlink_param_unregister(devlink, param);
-}
-EXPORT_SYMBOL_GPL(devl_params_unregister);
-
-void devlink_params_unregister(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
-{
- devl_lock(devlink);
- devl_params_unregister(devlink, params, params_count);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_params_unregister);
-
-/**
- * devl_param_driverinit_value_get - get configuration parameter
- * value for driver initializing
- *
- * @devlink: devlink
- * @param_id: parameter ID
- * @val: pointer to store the value of parameter in driverinit
- * configuration mode
- *
- * This function should be used by the driver to get driverinit
- * configuration for initialization after reload command.
- *
- * Note that lockless call of this function relies on the
- * driver to maintain following basic sane behavior:
- * 1) Driver ensures a call to this function cannot race with
- * registering/unregistering the parameter with the same parameter ID.
- * 2) Driver ensures a call to this function cannot race with
- * devl_param_driverinit_value_set() call with the same parameter ID.
- * 3) Driver ensures a call to this function cannot race with
- * reload operation.
- * If the driver is not able to comply, it has to take the devlink->lock
- * while calling this.
- */
-int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
- union devlink_param_value *val)
-{
- struct devlink_param_item *param_item;
-
- if (WARN_ON(!devlink_reload_supported(devlink->ops)))
- return -EOPNOTSUPP;
-
- param_item = devlink_param_find_by_id(&devlink->params, param_id);
- if (!param_item)
- return -EINVAL;
-
- if (!param_item->driverinit_value_valid)
- return -EOPNOTSUPP;
-
- if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT)))
- return -EOPNOTSUPP;
-
- *val = param_item->driverinit_value;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_param_driverinit_value_get);
-
-/**
- * devl_param_driverinit_value_set - set value of configuration
- * parameter for driverinit
- * configuration mode
- *
- * @devlink: devlink
- * @param_id: parameter ID
- * @init_val: value of parameter to set for driverinit configuration mode
- *
- * This function should be used by the driver to set driverinit
- * configuration mode default value.
- */
-void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
- union devlink_param_value init_val)
-{
- struct devlink_param_item *param_item;
-
- devl_assert_locked(devlink);
-
- param_item = devlink_param_find_by_id(&devlink->params, param_id);
- if (WARN_ON(!param_item))
- return;
-
- if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT)))
- return;
-
- param_item->driverinit_value = init_val;
- param_item->driverinit_value_valid = true;
-
- devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
-}
-EXPORT_SYMBOL_GPL(devl_param_driverinit_value_set);
-
-void devlink_params_driverinit_load_new(struct devlink *devlink)
-{
- struct devlink_param_item *param_item;
- unsigned long param_id;
-
- xa_for_each(&devlink->params, param_id, param_item) {
- if (!devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT) ||
- !param_item->driverinit_value_new_valid)
- continue;
- param_item->driverinit_value = param_item->driverinit_value_new;
- param_item->driverinit_value_valid = true;
- param_item->driverinit_value_new_valid = false;
- }
-}
-
-/**
- * devl_param_value_changed - notify devlink on a parameter's value
- * change. Should be called by the driver
- * right after the change.
- *
- * @devlink: devlink
- * @param_id: parameter ID
- *
- * This function should be used by the driver to notify devlink on value
- * change, excluding driverinit configuration mode.
- * For driverinit configuration mode driver should use the function
- */
-void devl_param_value_changed(struct devlink *devlink, u32 param_id)
-{
- struct devlink_param_item *param_item;
-
- param_item = devlink_param_find_by_id(&devlink->params, param_id);
- WARN_ON(!param_item);
-
- devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
-}
-EXPORT_SYMBOL_GPL(devl_param_value_changed);
-
-/**
- * devl_region_create - create a new address region
- *
- * @devlink: devlink
- * @ops: region operations and name
- * @region_max_snapshots: Maximum supported number of snapshots for region
- * @region_size: size of region
- */
-struct devlink_region *devl_region_create(struct devlink *devlink,
- const struct devlink_region_ops *ops,
- u32 region_max_snapshots,
- u64 region_size)
-{
- struct devlink_region *region;
-
- devl_assert_locked(devlink);
-
- if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
- return ERR_PTR(-EINVAL);
-
- if (devlink_region_get_by_name(devlink, ops->name))
- return ERR_PTR(-EEXIST);
-
- region = kzalloc(sizeof(*region), GFP_KERNEL);
- if (!region)
- return ERR_PTR(-ENOMEM);
-
- region->devlink = devlink;
- region->max_snapshots = region_max_snapshots;
- region->ops = ops;
- region->size = region_size;
- INIT_LIST_HEAD(&region->snapshot_list);
- mutex_init(&region->snapshot_lock);
- list_add_tail(&region->list, &devlink->region_list);
- devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
-
- return region;
-}
-EXPORT_SYMBOL_GPL(devl_region_create);
-
-/**
- * devlink_region_create - create a new address region
- *
- * @devlink: devlink
- * @ops: region operations and name
- * @region_max_snapshots: Maximum supported number of snapshots for region
- * @region_size: size of region
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-struct devlink_region *
-devlink_region_create(struct devlink *devlink,
- const struct devlink_region_ops *ops,
- u32 region_max_snapshots, u64 region_size)
-{
- struct devlink_region *region;
-
- devl_lock(devlink);
- region = devl_region_create(devlink, ops, region_max_snapshots,
- region_size);
- devl_unlock(devlink);
- return region;
-}
-EXPORT_SYMBOL_GPL(devlink_region_create);
-
-/**
- * devlink_port_region_create - create a new address region for a port
- *
- * @port: devlink port
- * @ops: region operations and name
- * @region_max_snapshots: Maximum supported number of snapshots for region
- * @region_size: size of region
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-struct devlink_region *
-devlink_port_region_create(struct devlink_port *port,
- const struct devlink_port_region_ops *ops,
- u32 region_max_snapshots, u64 region_size)
-{
- struct devlink *devlink = port->devlink;
- struct devlink_region *region;
- int err = 0;
-
- ASSERT_DEVLINK_PORT_INITIALIZED(port);
-
- if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
- return ERR_PTR(-EINVAL);
-
- devl_lock(devlink);
-
- if (devlink_port_region_get_by_name(port, ops->name)) {
- err = -EEXIST;
- goto unlock;
- }
-
- region = kzalloc(sizeof(*region), GFP_KERNEL);
- if (!region) {
- err = -ENOMEM;
- goto unlock;
- }
-
- region->devlink = devlink;
- region->port = port;
- region->max_snapshots = region_max_snapshots;
- region->port_ops = ops;
- region->size = region_size;
- INIT_LIST_HEAD(&region->snapshot_list);
- mutex_init(&region->snapshot_lock);
- list_add_tail(&region->list, &port->region_list);
- devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
-
- devl_unlock(devlink);
- return region;
-
-unlock:
- devl_unlock(devlink);
- return ERR_PTR(err);
-}
-EXPORT_SYMBOL_GPL(devlink_port_region_create);
-
-/**
- * devl_region_destroy - destroy address region
- *
- * @region: devlink region to destroy
- */
-void devl_region_destroy(struct devlink_region *region)
-{
- struct devlink *devlink = region->devlink;
- struct devlink_snapshot *snapshot, *ts;
-
- devl_assert_locked(devlink);
-
- /* Free all snapshots of region */
- mutex_lock(&region->snapshot_lock);
- list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
- devlink_region_snapshot_del(region, snapshot);
- mutex_unlock(&region->snapshot_lock);
-
- list_del(&region->list);
- mutex_destroy(&region->snapshot_lock);
-
- devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
- kfree(region);
-}
-EXPORT_SYMBOL_GPL(devl_region_destroy);
-
-/**
- * devlink_region_destroy - destroy address region
- *
- * @region: devlink region to destroy
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_region_destroy(struct devlink_region *region)
-{
- struct devlink *devlink = region->devlink;
-
- devl_lock(devlink);
- devl_region_destroy(region);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_region_destroy);
-
-/**
- * devlink_region_snapshot_id_get - get snapshot ID
- *
- * This callback should be called when adding a new snapshot,
- * Driver should use the same id for multiple snapshots taken
- * on multiple regions at the same time/by the same trigger.
- *
- * The caller of this function must use devlink_region_snapshot_id_put
- * when finished creating regions using this id.
- *
- * Returns zero on success, or a negative error code on failure.
- *
- * @devlink: devlink
- * @id: storage to return id
- */
-int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
-{
- return __devlink_region_snapshot_id_get(devlink, id);
-}
-EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
-
-/**
- * devlink_region_snapshot_id_put - put snapshot ID reference
- *
- * This should be called by a driver after finishing creating snapshots
- * with an id. Doing so ensures that the ID can later be released in the
- * event that all snapshots using it have been destroyed.
- *
- * @devlink: devlink
- * @id: id to release reference on
- */
-void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id)
-{
- __devlink_snapshot_id_decrement(devlink, id);
-}
-EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put);
-
-/**
- * devlink_region_snapshot_create - create a new snapshot
- * This will add a new snapshot of a region. The snapshot
- * will be stored on the region struct and can be accessed
- * from devlink. This is useful for future analyses of snapshots.
- * Multiple snapshots can be created on a region.
- * The @snapshot_id should be obtained using the getter function.
- *
- * @region: devlink region of the snapshot
- * @data: snapshot data
- * @snapshot_id: snapshot id to be created
- */
-int devlink_region_snapshot_create(struct devlink_region *region,
- u8 *data, u32 snapshot_id)
-{
- int err;
-
- mutex_lock(&region->snapshot_lock);
- err = __devlink_region_snapshot_create(region, data, snapshot_id);
- mutex_unlock(&region->snapshot_lock);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
-
-#define DEVLINK_TRAP(_id, _type) \
- { \
- .type = DEVLINK_TRAP_TYPE_##_type, \
- .id = DEVLINK_TRAP_GENERIC_ID_##_id, \
- .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \
- }
-
-static const struct devlink_trap devlink_trap_generic[] = {
- DEVLINK_TRAP(SMAC_MC, DROP),
- DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP),
- DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP),
- DEVLINK_TRAP(INGRESS_STP_FILTER, DROP),
- DEVLINK_TRAP(EMPTY_TX_LIST, DROP),
- DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP),
- DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP),
- DEVLINK_TRAP(TTL_ERROR, EXCEPTION),
- DEVLINK_TRAP(TAIL_DROP, DROP),
- DEVLINK_TRAP(NON_IP_PACKET, DROP),
- DEVLINK_TRAP(UC_DIP_MC_DMAC, DROP),
- DEVLINK_TRAP(DIP_LB, DROP),
- DEVLINK_TRAP(SIP_MC, DROP),
- DEVLINK_TRAP(SIP_LB, DROP),
- DEVLINK_TRAP(CORRUPTED_IP_HDR, DROP),
- DEVLINK_TRAP(IPV4_SIP_BC, DROP),
- DEVLINK_TRAP(IPV6_MC_DIP_RESERVED_SCOPE, DROP),
- DEVLINK_TRAP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, DROP),
- DEVLINK_TRAP(MTU_ERROR, EXCEPTION),
- DEVLINK_TRAP(UNRESOLVED_NEIGH, EXCEPTION),
- DEVLINK_TRAP(RPF, EXCEPTION),
- DEVLINK_TRAP(REJECT_ROUTE, EXCEPTION),
- DEVLINK_TRAP(IPV4_LPM_UNICAST_MISS, EXCEPTION),
- DEVLINK_TRAP(IPV6_LPM_UNICAST_MISS, EXCEPTION),
- DEVLINK_TRAP(NON_ROUTABLE, DROP),
- DEVLINK_TRAP(DECAP_ERROR, EXCEPTION),
- DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP),
- DEVLINK_TRAP(INGRESS_FLOW_ACTION_DROP, DROP),
- DEVLINK_TRAP(EGRESS_FLOW_ACTION_DROP, DROP),
- DEVLINK_TRAP(STP, CONTROL),
- DEVLINK_TRAP(LACP, CONTROL),
- DEVLINK_TRAP(LLDP, CONTROL),
- DEVLINK_TRAP(IGMP_QUERY, CONTROL),
- DEVLINK_TRAP(IGMP_V1_REPORT, CONTROL),
- DEVLINK_TRAP(IGMP_V2_REPORT, CONTROL),
- DEVLINK_TRAP(IGMP_V3_REPORT, CONTROL),
- DEVLINK_TRAP(IGMP_V2_LEAVE, CONTROL),
- DEVLINK_TRAP(MLD_QUERY, CONTROL),
- DEVLINK_TRAP(MLD_V1_REPORT, CONTROL),
- DEVLINK_TRAP(MLD_V2_REPORT, CONTROL),
- DEVLINK_TRAP(MLD_V1_DONE, CONTROL),
- DEVLINK_TRAP(IPV4_DHCP, CONTROL),
- DEVLINK_TRAP(IPV6_DHCP, CONTROL),
- DEVLINK_TRAP(ARP_REQUEST, CONTROL),
- DEVLINK_TRAP(ARP_RESPONSE, CONTROL),
- DEVLINK_TRAP(ARP_OVERLAY, CONTROL),
- DEVLINK_TRAP(IPV6_NEIGH_SOLICIT, CONTROL),
- DEVLINK_TRAP(IPV6_NEIGH_ADVERT, CONTROL),
- DEVLINK_TRAP(IPV4_BFD, CONTROL),
- DEVLINK_TRAP(IPV6_BFD, CONTROL),
- DEVLINK_TRAP(IPV4_OSPF, CONTROL),
- DEVLINK_TRAP(IPV6_OSPF, CONTROL),
- DEVLINK_TRAP(IPV4_BGP, CONTROL),
- DEVLINK_TRAP(IPV6_BGP, CONTROL),
- DEVLINK_TRAP(IPV4_VRRP, CONTROL),
- DEVLINK_TRAP(IPV6_VRRP, CONTROL),
- DEVLINK_TRAP(IPV4_PIM, CONTROL),
- DEVLINK_TRAP(IPV6_PIM, CONTROL),
- DEVLINK_TRAP(UC_LB, CONTROL),
- DEVLINK_TRAP(LOCAL_ROUTE, CONTROL),
- DEVLINK_TRAP(EXTERNAL_ROUTE, CONTROL),
- DEVLINK_TRAP(IPV6_UC_DIP_LINK_LOCAL_SCOPE, CONTROL),
- DEVLINK_TRAP(IPV6_DIP_ALL_NODES, CONTROL),
- DEVLINK_TRAP(IPV6_DIP_ALL_ROUTERS, CONTROL),
- DEVLINK_TRAP(IPV6_ROUTER_SOLICIT, CONTROL),
- DEVLINK_TRAP(IPV6_ROUTER_ADVERT, CONTROL),
- DEVLINK_TRAP(IPV6_REDIRECT, CONTROL),
- DEVLINK_TRAP(IPV4_ROUTER_ALERT, CONTROL),
- DEVLINK_TRAP(IPV6_ROUTER_ALERT, CONTROL),
- DEVLINK_TRAP(PTP_EVENT, CONTROL),
- DEVLINK_TRAP(PTP_GENERAL, CONTROL),
- DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL),
- DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL),
- DEVLINK_TRAP(EARLY_DROP, DROP),
- DEVLINK_TRAP(VXLAN_PARSING, DROP),
- DEVLINK_TRAP(LLC_SNAP_PARSING, DROP),
- DEVLINK_TRAP(VLAN_PARSING, DROP),
- DEVLINK_TRAP(PPPOE_PPP_PARSING, DROP),
- DEVLINK_TRAP(MPLS_PARSING, DROP),
- DEVLINK_TRAP(ARP_PARSING, DROP),
- DEVLINK_TRAP(IP_1_PARSING, DROP),
- DEVLINK_TRAP(IP_N_PARSING, DROP),
- DEVLINK_TRAP(GRE_PARSING, DROP),
- DEVLINK_TRAP(UDP_PARSING, DROP),
- DEVLINK_TRAP(TCP_PARSING, DROP),
- DEVLINK_TRAP(IPSEC_PARSING, DROP),
- DEVLINK_TRAP(SCTP_PARSING, DROP),
- DEVLINK_TRAP(DCCP_PARSING, DROP),
- DEVLINK_TRAP(GTP_PARSING, DROP),
- DEVLINK_TRAP(ESP_PARSING, DROP),
- DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP),
- DEVLINK_TRAP(DMAC_FILTER, DROP),
- DEVLINK_TRAP(EAPOL, CONTROL),
- DEVLINK_TRAP(LOCKED_PORT, DROP),
-};
-
-#define DEVLINK_TRAP_GROUP(_id) \
- { \
- .id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \
- .name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \
- }
-
-static const struct devlink_trap_group devlink_trap_group_generic[] = {
- DEVLINK_TRAP_GROUP(L2_DROPS),
- DEVLINK_TRAP_GROUP(L3_DROPS),
- DEVLINK_TRAP_GROUP(L3_EXCEPTIONS),
- DEVLINK_TRAP_GROUP(BUFFER_DROPS),
- DEVLINK_TRAP_GROUP(TUNNEL_DROPS),
- DEVLINK_TRAP_GROUP(ACL_DROPS),
- DEVLINK_TRAP_GROUP(STP),
- DEVLINK_TRAP_GROUP(LACP),
- DEVLINK_TRAP_GROUP(LLDP),
- DEVLINK_TRAP_GROUP(MC_SNOOPING),
- DEVLINK_TRAP_GROUP(DHCP),
- DEVLINK_TRAP_GROUP(NEIGH_DISCOVERY),
- DEVLINK_TRAP_GROUP(BFD),
- DEVLINK_TRAP_GROUP(OSPF),
- DEVLINK_TRAP_GROUP(BGP),
- DEVLINK_TRAP_GROUP(VRRP),
- DEVLINK_TRAP_GROUP(PIM),
- DEVLINK_TRAP_GROUP(UC_LB),
- DEVLINK_TRAP_GROUP(LOCAL_DELIVERY),
- DEVLINK_TRAP_GROUP(EXTERNAL_DELIVERY),
- DEVLINK_TRAP_GROUP(IPV6),
- DEVLINK_TRAP_GROUP(PTP_EVENT),
- DEVLINK_TRAP_GROUP(PTP_GENERAL),
- DEVLINK_TRAP_GROUP(ACL_SAMPLE),
- DEVLINK_TRAP_GROUP(ACL_TRAP),
- DEVLINK_TRAP_GROUP(PARSER_ERROR_DROPS),
- DEVLINK_TRAP_GROUP(EAPOL),
-};
-
-static int devlink_trap_generic_verify(const struct devlink_trap *trap)
-{
- if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX)
- return -EINVAL;
-
- if (strcmp(trap->name, devlink_trap_generic[trap->id].name))
- return -EINVAL;
-
- if (trap->type != devlink_trap_generic[trap->id].type)
- return -EINVAL;
-
- return 0;
-}
-
-static int devlink_trap_driver_verify(const struct devlink_trap *trap)
-{
- int i;
-
- if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) {
- if (!strcmp(trap->name, devlink_trap_generic[i].name))
- return -EEXIST;
- }
-
- return 0;
-}
-
-static int devlink_trap_verify(const struct devlink_trap *trap)
-{
- if (!trap || !trap->name)
- return -EINVAL;
-
- if (trap->generic)
- return devlink_trap_generic_verify(trap);
- else
- return devlink_trap_driver_verify(trap);
-}
-
-static int
-devlink_trap_group_generic_verify(const struct devlink_trap_group *group)
-{
- if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
- return -EINVAL;
-
- if (strcmp(group->name, devlink_trap_group_generic[group->id].name))
- return -EINVAL;
-
- return 0;
-}
-
-static int
-devlink_trap_group_driver_verify(const struct devlink_trap_group *group)
-{
- int i;
-
- if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) {
- if (!strcmp(group->name, devlink_trap_group_generic[i].name))
- return -EEXIST;
- }
-
- return 0;
-}
-
-static int devlink_trap_group_verify(const struct devlink_trap_group *group)
-{
- if (group->generic)
- return devlink_trap_group_generic_verify(group);
- else
- return devlink_trap_group_driver_verify(group);
-}
-
-static void
-devlink_trap_group_notify(struct devlink *devlink,
- const struct devlink_trap_group_item *group_item,
- enum devlink_command cmd)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
- cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0,
- 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static int
-devlink_trap_item_group_link(struct devlink *devlink,
- struct devlink_trap_item *trap_item)
-{
- u16 group_id = trap_item->trap->init_group_id;
- struct devlink_trap_group_item *group_item;
-
- group_item = devlink_trap_group_item_lookup_by_id(devlink, group_id);
- if (WARN_ON_ONCE(!group_item))
- return -EINVAL;
-
- trap_item->group_item = group_item;
-
- return 0;
-}
-
-static void devlink_trap_notify(struct devlink *devlink,
- const struct devlink_trap_item *trap_item,
- enum devlink_command cmd)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
- cmd != DEVLINK_CMD_TRAP_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static int
-devlink_trap_register(struct devlink *devlink,
- const struct devlink_trap *trap, void *priv)
-{
- struct devlink_trap_item *trap_item;
- int err;
-
- if (devlink_trap_item_lookup(devlink, trap->name))
- return -EEXIST;
-
- trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL);
- if (!trap_item)
- return -ENOMEM;
-
- trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
- if (!trap_item->stats) {
- err = -ENOMEM;
- goto err_stats_alloc;
- }
-
- trap_item->trap = trap;
- trap_item->action = trap->init_action;
- trap_item->priv = priv;
-
- err = devlink_trap_item_group_link(devlink, trap_item);
- if (err)
- goto err_group_link;
-
- err = devlink->ops->trap_init(devlink, trap, trap_item);
- if (err)
- goto err_trap_init;
-
- list_add_tail(&trap_item->list, &devlink->trap_list);
- devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
-
- return 0;
-
-err_trap_init:
-err_group_link:
- free_percpu(trap_item->stats);
-err_stats_alloc:
- kfree(trap_item);
- return err;
-}
-
-static void devlink_trap_unregister(struct devlink *devlink,
- const struct devlink_trap *trap)
-{
- struct devlink_trap_item *trap_item;
-
- trap_item = devlink_trap_item_lookup(devlink, trap->name);
- if (WARN_ON_ONCE(!trap_item))
- return;
-
- devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
- list_del(&trap_item->list);
- if (devlink->ops->trap_fini)
- devlink->ops->trap_fini(devlink, trap, trap_item);
- free_percpu(trap_item->stats);
- kfree(trap_item);
-}
-
-static void devlink_trap_disable(struct devlink *devlink,
- const struct devlink_trap *trap)
-{
- struct devlink_trap_item *trap_item;
-
- trap_item = devlink_trap_item_lookup(devlink, trap->name);
- if (WARN_ON_ONCE(!trap_item))
- return;
-
- devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP,
- NULL);
- trap_item->action = DEVLINK_TRAP_ACTION_DROP;
-}
-
-/**
- * devl_traps_register - Register packet traps with devlink.
- * @devlink: devlink.
- * @traps: Packet traps.
- * @traps_count: Count of provided packet traps.
- * @priv: Driver private information.
- *
- * Return: Non-zero value on failure.
- */
-int devl_traps_register(struct devlink *devlink,
- const struct devlink_trap *traps,
- size_t traps_count, void *priv)
-{
- int i, err;
-
- if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
- return -EINVAL;
-
- devl_assert_locked(devlink);
- for (i = 0; i < traps_count; i++) {
- const struct devlink_trap *trap = &traps[i];
-
- err = devlink_trap_verify(trap);
- if (err)
- goto err_trap_verify;
-
- err = devlink_trap_register(devlink, trap, priv);
- if (err)
- goto err_trap_register;
- }
-
- return 0;
-
-err_trap_register:
-err_trap_verify:
- for (i--; i >= 0; i--)
- devlink_trap_unregister(devlink, &traps[i]);
- return err;
-}
-EXPORT_SYMBOL_GPL(devl_traps_register);
-
-/**
- * devlink_traps_register - Register packet traps with devlink.
- * @devlink: devlink.
- * @traps: Packet traps.
- * @traps_count: Count of provided packet traps.
- * @priv: Driver private information.
- *
- * Context: Takes and release devlink->lock <mutex>.
- *
- * Return: Non-zero value on failure.
- */
-int devlink_traps_register(struct devlink *devlink,
- const struct devlink_trap *traps,
- size_t traps_count, void *priv)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_traps_register(devlink, traps, traps_count, priv);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_traps_register);
-
-/**
- * devl_traps_unregister - Unregister packet traps from devlink.
- * @devlink: devlink.
- * @traps: Packet traps.
- * @traps_count: Count of provided packet traps.
- */
-void devl_traps_unregister(struct devlink *devlink,
- const struct devlink_trap *traps,
- size_t traps_count)
-{
- int i;
-
- devl_assert_locked(devlink);
- /* Make sure we do not have any packets in-flight while unregistering
- * traps by disabling all of them and waiting for a grace period.
- */
- for (i = traps_count - 1; i >= 0; i--)
- devlink_trap_disable(devlink, &traps[i]);
- synchronize_rcu();
- for (i = traps_count - 1; i >= 0; i--)
- devlink_trap_unregister(devlink, &traps[i]);
-}
-EXPORT_SYMBOL_GPL(devl_traps_unregister);
-
-/**
- * devlink_traps_unregister - Unregister packet traps from devlink.
- * @devlink: devlink.
- * @traps: Packet traps.
- * @traps_count: Count of provided packet traps.
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_traps_unregister(struct devlink *devlink,
- const struct devlink_trap *traps,
- size_t traps_count)
-{
- devl_lock(devlink);
- devl_traps_unregister(devlink, traps, traps_count);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_traps_unregister);
-
-static void
-devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
- size_t skb_len)
-{
- struct devlink_stats *stats;
-
- stats = this_cpu_ptr(trap_stats);
- u64_stats_update_begin(&stats->syncp);
- u64_stats_add(&stats->rx_bytes, skb_len);
- u64_stats_inc(&stats->rx_packets);
- u64_stats_update_end(&stats->syncp);
-}
-
-static void
-devlink_trap_report_metadata_set(struct devlink_trap_metadata *metadata,
- const struct devlink_trap_item *trap_item,
- struct devlink_port *in_devlink_port,
- const struct flow_action_cookie *fa_cookie)
-{
- metadata->trap_name = trap_item->trap->name;
- metadata->trap_group_name = trap_item->group_item->group->name;
- metadata->fa_cookie = fa_cookie;
- metadata->trap_type = trap_item->trap->type;
-
- spin_lock(&in_devlink_port->type_lock);
- if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
- metadata->input_dev = in_devlink_port->type_eth.netdev;
- spin_unlock(&in_devlink_port->type_lock);
-}
-
-/**
- * devlink_trap_report - Report trapped packet to drop monitor.
- * @devlink: devlink.
- * @skb: Trapped packet.
- * @trap_ctx: Trap context.
- * @in_devlink_port: Input devlink port.
- * @fa_cookie: Flow action cookie. Could be NULL.
- */
-void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
- void *trap_ctx, struct devlink_port *in_devlink_port,
- const struct flow_action_cookie *fa_cookie)
-
-{
- struct devlink_trap_item *trap_item = trap_ctx;
-
- devlink_trap_stats_update(trap_item->stats, skb->len);
- devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
-
- if (trace_devlink_trap_report_enabled()) {
- struct devlink_trap_metadata metadata = {};
-
- devlink_trap_report_metadata_set(&metadata, trap_item,
- in_devlink_port, fa_cookie);
- trace_devlink_trap_report(devlink, skb, &metadata);
- }
-}
-EXPORT_SYMBOL_GPL(devlink_trap_report);
-
-/**
- * devlink_trap_ctx_priv - Trap context to driver private information.
- * @trap_ctx: Trap context.
- *
- * Return: Driver private information passed during registration.
- */
-void *devlink_trap_ctx_priv(void *trap_ctx)
-{
- struct devlink_trap_item *trap_item = trap_ctx;
-
- return trap_item->priv;
-}
-EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv);
-
-static int
-devlink_trap_group_item_policer_link(struct devlink *devlink,
- struct devlink_trap_group_item *group_item)
-{
- u32 policer_id = group_item->group->init_policer_id;
- struct devlink_trap_policer_item *policer_item;
-
- if (policer_id == 0)
- return 0;
-
- policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
- if (WARN_ON_ONCE(!policer_item))
- return -EINVAL;
-
- group_item->policer_item = policer_item;
-
- return 0;
-}
-
-static int
-devlink_trap_group_register(struct devlink *devlink,
- const struct devlink_trap_group *group)
-{
- struct devlink_trap_group_item *group_item;
- int err;
-
- if (devlink_trap_group_item_lookup(devlink, group->name))
- return -EEXIST;
-
- group_item = kzalloc(sizeof(*group_item), GFP_KERNEL);
- if (!group_item)
- return -ENOMEM;
-
- group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
- if (!group_item->stats) {
- err = -ENOMEM;
- goto err_stats_alloc;
- }
-
- group_item->group = group;
-
- err = devlink_trap_group_item_policer_link(devlink, group_item);
- if (err)
- goto err_policer_link;
-
- if (devlink->ops->trap_group_init) {
- err = devlink->ops->trap_group_init(devlink, group);
- if (err)
- goto err_group_init;
- }
-
- list_add_tail(&group_item->list, &devlink->trap_group_list);
- devlink_trap_group_notify(devlink, group_item,
- DEVLINK_CMD_TRAP_GROUP_NEW);
-
- return 0;
-
-err_group_init:
-err_policer_link:
- free_percpu(group_item->stats);
-err_stats_alloc:
- kfree(group_item);
- return err;
-}
-
-static void
-devlink_trap_group_unregister(struct devlink *devlink,
- const struct devlink_trap_group *group)
-{
- struct devlink_trap_group_item *group_item;
-
- group_item = devlink_trap_group_item_lookup(devlink, group->name);
- if (WARN_ON_ONCE(!group_item))
- return;
-
- devlink_trap_group_notify(devlink, group_item,
- DEVLINK_CMD_TRAP_GROUP_DEL);
- list_del(&group_item->list);
- free_percpu(group_item->stats);
- kfree(group_item);
-}
-
-/**
- * devl_trap_groups_register - Register packet trap groups with devlink.
- * @devlink: devlink.
- * @groups: Packet trap groups.
- * @groups_count: Count of provided packet trap groups.
- *
- * Return: Non-zero value on failure.
- */
-int devl_trap_groups_register(struct devlink *devlink,
- const struct devlink_trap_group *groups,
- size_t groups_count)
-{
- int i, err;
-
- devl_assert_locked(devlink);
- for (i = 0; i < groups_count; i++) {
- const struct devlink_trap_group *group = &groups[i];
-
- err = devlink_trap_group_verify(group);
- if (err)
- goto err_trap_group_verify;
-
- err = devlink_trap_group_register(devlink, group);
- if (err)
- goto err_trap_group_register;
- }
-
- return 0;
-
-err_trap_group_register:
-err_trap_group_verify:
- for (i--; i >= 0; i--)
- devlink_trap_group_unregister(devlink, &groups[i]);
- return err;
-}
-EXPORT_SYMBOL_GPL(devl_trap_groups_register);
-
-/**
- * devlink_trap_groups_register - Register packet trap groups with devlink.
- * @devlink: devlink.
- * @groups: Packet trap groups.
- * @groups_count: Count of provided packet trap groups.
- *
- * Context: Takes and release devlink->lock <mutex>.
- *
- * Return: Non-zero value on failure.
- */
-int devlink_trap_groups_register(struct devlink *devlink,
- const struct devlink_trap_group *groups,
- size_t groups_count)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_trap_groups_register(devlink, groups, groups_count);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_trap_groups_register);
-
-/**
- * devl_trap_groups_unregister - Unregister packet trap groups from devlink.
- * @devlink: devlink.
- * @groups: Packet trap groups.
- * @groups_count: Count of provided packet trap groups.
- */
-void devl_trap_groups_unregister(struct devlink *devlink,
- const struct devlink_trap_group *groups,
- size_t groups_count)
-{
- int i;
-
- devl_assert_locked(devlink);
- for (i = groups_count - 1; i >= 0; i--)
- devlink_trap_group_unregister(devlink, &groups[i]);
-}
-EXPORT_SYMBOL_GPL(devl_trap_groups_unregister);
-
-/**
- * devlink_trap_groups_unregister - Unregister packet trap groups from devlink.
- * @devlink: devlink.
- * @groups: Packet trap groups.
- * @groups_count: Count of provided packet trap groups.
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_trap_groups_unregister(struct devlink *devlink,
- const struct devlink_trap_group *groups,
- size_t groups_count)
-{
- devl_lock(devlink);
- devl_trap_groups_unregister(devlink, groups, groups_count);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister);
-
-static void
-devlink_trap_policer_notify(struct devlink *devlink,
- const struct devlink_trap_policer_item *policer_item,
- enum devlink_command cmd)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
- cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_trap_policer_fill(msg, devlink, policer_item, cmd, 0,
- 0, 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static int
-devlink_trap_policer_register(struct devlink *devlink,
- const struct devlink_trap_policer *policer)
-{
- struct devlink_trap_policer_item *policer_item;
- int err;
-
- if (devlink_trap_policer_item_lookup(devlink, policer->id))
- return -EEXIST;
-
- policer_item = kzalloc(sizeof(*policer_item), GFP_KERNEL);
- if (!policer_item)
- return -ENOMEM;
-
- policer_item->policer = policer;
- policer_item->rate = policer->init_rate;
- policer_item->burst = policer->init_burst;
-
- if (devlink->ops->trap_policer_init) {
- err = devlink->ops->trap_policer_init(devlink, policer);
- if (err)
- goto err_policer_init;
- }
-
- list_add_tail(&policer_item->list, &devlink->trap_policer_list);
- devlink_trap_policer_notify(devlink, policer_item,
- DEVLINK_CMD_TRAP_POLICER_NEW);
-
- return 0;
-
-err_policer_init:
- kfree(policer_item);
- return err;
-}
-
-static void
-devlink_trap_policer_unregister(struct devlink *devlink,
- const struct devlink_trap_policer *policer)
-{
- struct devlink_trap_policer_item *policer_item;
-
- policer_item = devlink_trap_policer_item_lookup(devlink, policer->id);
- if (WARN_ON_ONCE(!policer_item))
- return;
-
- devlink_trap_policer_notify(devlink, policer_item,
- DEVLINK_CMD_TRAP_POLICER_DEL);
- list_del(&policer_item->list);
- if (devlink->ops->trap_policer_fini)
- devlink->ops->trap_policer_fini(devlink, policer);
- kfree(policer_item);
-}
-
-/**
- * devl_trap_policers_register - Register packet trap policers with devlink.
- * @devlink: devlink.
- * @policers: Packet trap policers.
- * @policers_count: Count of provided packet trap policers.
- *
- * Return: Non-zero value on failure.
- */
-int
-devl_trap_policers_register(struct devlink *devlink,
- const struct devlink_trap_policer *policers,
- size_t policers_count)
-{
- int i, err;
-
- devl_assert_locked(devlink);
- for (i = 0; i < policers_count; i++) {
- const struct devlink_trap_policer *policer = &policers[i];
-
- if (WARN_ON(policer->id == 0 ||
- policer->max_rate < policer->min_rate ||
- policer->max_burst < policer->min_burst)) {
- err = -EINVAL;
- goto err_trap_policer_verify;
- }
-
- err = devlink_trap_policer_register(devlink, policer);
- if (err)
- goto err_trap_policer_register;
- }
- return 0;
-
-err_trap_policer_register:
-err_trap_policer_verify:
- for (i--; i >= 0; i--)
- devlink_trap_policer_unregister(devlink, &policers[i]);
- return err;
-}
-EXPORT_SYMBOL_GPL(devl_trap_policers_register);
-
-/**
- * devl_trap_policers_unregister - Unregister packet trap policers from devlink.
- * @devlink: devlink.
- * @policers: Packet trap policers.
- * @policers_count: Count of provided packet trap policers.
- */
-void
-devl_trap_policers_unregister(struct devlink *devlink,
- const struct devlink_trap_policer *policers,
- size_t policers_count)
-{
- int i;
-
- devl_assert_locked(devlink);
- for (i = policers_count - 1; i >= 0; i--)
- devlink_trap_policer_unregister(devlink, &policers[i]);
-}
-EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
-
-int devlink_compat_phys_port_name_get(struct net_device *dev,
- char *name, size_t len)
-{
- struct devlink_port *devlink_port;
-
- /* RTNL mutex is held here which ensures that devlink_port
- * instance cannot disappear in the middle. No need to take
- * any devlink lock as only permanent values are accessed.
- */
- ASSERT_RTNL();
-
- devlink_port = dev->devlink_port;
- if (!devlink_port)
- return -EOPNOTSUPP;
-
- return __devlink_port_phys_port_name_get(devlink_port, name, len);
-}
-
-int devlink_compat_switch_id_get(struct net_device *dev,
- struct netdev_phys_item_id *ppid)
-{
- struct devlink_port *devlink_port;
-
- /* Caller must hold RTNL mutex or reference to dev, which ensures that
- * devlink_port instance cannot disappear in the middle. No need to take
- * any devlink lock as only permanent values are accessed.
- */
- devlink_port = dev->devlink_port;
- if (!devlink_port || !devlink_port->switch_port)
- return -EOPNOTSUPP;
-
- memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid));
-
- return 0;
-}
diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c
new file mode 100644
index 000000000000..85c32c314b0f
--- /dev/null
+++ b/net/devlink/linecard.c
@@ -0,0 +1,606 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+static struct devlink_linecard *
+devlink_linecard_get_by_index(struct devlink *devlink,
+ unsigned int linecard_index)
+{
+ struct devlink_linecard *devlink_linecard;
+
+ list_for_each_entry(devlink_linecard, &devlink->linecard_list, list) {
+ if (devlink_linecard->index == linecard_index)
+ return devlink_linecard;
+ }
+ return NULL;
+}
+
+static bool devlink_linecard_index_exists(struct devlink *devlink,
+ unsigned int linecard_index)
+{
+ return devlink_linecard_get_by_index(devlink, linecard_index);
+}
+
+static struct devlink_linecard *
+devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
+{
+ if (attrs[DEVLINK_ATTR_LINECARD_INDEX]) {
+ u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]);
+ struct devlink_linecard *linecard;
+
+ linecard = devlink_linecard_get_by_index(devlink, linecard_index);
+ if (!linecard)
+ return ERR_PTR(-ENODEV);
+ return linecard;
+ }
+ return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_linecard *
+devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ return devlink_linecard_get_from_attrs(devlink, info->attrs);
+}
+
+static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
+{
+ struct nlattr *nested_attr;
+
+ nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK);
+ if (!nested_attr)
+ return -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nested_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, nested_attr);
+ return -EMSGSIZE;
+}
+
+struct devlink_linecard_type {
+ const char *type;
+ const void *priv;
+};
+
+static int devlink_nl_linecard_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_linecard *linecard,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_linecard_type *linecard_type;
+ struct nlattr *attr;
+ void *hdr;
+ int i;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_LINECARD_STATE, linecard->state))
+ goto nla_put_failure;
+ if (linecard->type &&
+ nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, linecard->type))
+ goto nla_put_failure;
+
+ if (linecard->types_count) {
+ attr = nla_nest_start(msg,
+ DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES);
+ if (!attr)
+ goto nla_put_failure;
+ for (i = 0; i < linecard->types_count; i++) {
+ linecard_type = &linecard->types[i];
+ if (nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE,
+ linecard_type->type)) {
+ nla_nest_cancel(msg, attr);
+ goto nla_put_failure;
+ }
+ }
+ nla_nest_end(msg, attr);
+ }
+
+ if (linecard->nested_devlink &&
+ devlink_nl_put_nested_handle(msg, linecard->nested_devlink))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_linecard_notify(struct devlink_linecard *linecard,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = linecard->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW &&
+ cmd != DEVLINK_CMD_LINECARD_DEL);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_linecard_fill(msg, devlink, linecard, cmd, 0, 0, 0,
+ NULL);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_linecards_notify_register(struct devlink *devlink)
+{
+ struct devlink_linecard *linecard;
+
+ list_for_each_entry(linecard, &devlink->linecard_list, list)
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+}
+
+void devlink_linecards_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_linecard *linecard;
+
+ list_for_each_entry_reverse(linecard, &devlink->linecard_list, list)
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
+}
+
+int devlink_nl_linecard_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_linecard *linecard;
+ struct sk_buff *msg;
+ int err;
+
+ linecard = devlink_linecard_get_from_info(devlink, info);
+ if (IS_ERR(linecard))
+ return PTR_ERR(linecard);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ mutex_lock(&linecard->state_lock);
+ err = devlink_nl_linecard_fill(msg, devlink, linecard,
+ DEVLINK_CMD_LINECARD_NEW,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ mutex_unlock(&linecard->state_lock);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_linecard_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_linecard *linecard;
+ int idx = 0;
+ int err = 0;
+
+ list_for_each_entry(linecard, &devlink->linecard_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ mutex_lock(&linecard->state_lock);
+ err = devlink_nl_linecard_fill(msg, devlink, linecard,
+ DEVLINK_CMD_LINECARD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags,
+ cb->extack);
+ mutex_unlock(&linecard->state_lock);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
+ }
+
+ return err;
+}
+
+int devlink_nl_linecard_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_linecard_get_dump_one);
+}
+
+static struct devlink_linecard_type *
+devlink_linecard_type_lookup(struct devlink_linecard *linecard,
+ const char *type)
+{
+ struct devlink_linecard_type *linecard_type;
+ int i;
+
+ for (i = 0; i < linecard->types_count; i++) {
+ linecard_type = &linecard->types[i];
+ if (!strcmp(type, linecard_type->type))
+ return linecard_type;
+ }
+ return NULL;
+}
+
+static int devlink_linecard_type_set(struct devlink_linecard *linecard,
+ const char *type,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_linecard_ops *ops = linecard->ops;
+ struct devlink_linecard_type *linecard_type;
+ int err;
+
+ mutex_lock(&linecard->state_lock);
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
+ NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
+ NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
+ err = -EBUSY;
+ goto out;
+ }
+
+ linecard_type = devlink_linecard_type_lookup(linecard, type);
+ if (!linecard_type) {
+ NL_SET_ERR_MSG(extack, "Unsupported line card type provided");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED &&
+ linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
+ NL_SET_ERR_MSG(extack, "Line card already provisioned");
+ err = -EBUSY;
+ /* Check if the line card is provisioned in the same
+ * way the user asks. In case it is, make the operation
+ * to return success.
+ */
+ if (ops->same_provision &&
+ ops->same_provision(linecard, linecard->priv,
+ linecard_type->type,
+ linecard_type->priv))
+ err = 0;
+ goto out;
+ }
+
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING;
+ linecard->type = linecard_type->type;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ err = ops->provision(linecard, linecard->priv, linecard_type->type,
+ linecard_type->priv, extack);
+ if (err) {
+ /* Provisioning failed. Assume the linecard is unprovisioned
+ * for future operations.
+ */
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ }
+ return err;
+
+out:
+ mutex_unlock(&linecard->state_lock);
+ return err;
+}
+
+static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ mutex_lock(&linecard->state_lock);
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
+ NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
+ NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ err = 0;
+ goto out;
+ }
+
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) {
+ NL_SET_ERR_MSG(extack, "Line card is not provisioned");
+ err = 0;
+ goto out;
+ }
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONING;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ err = linecard->ops->unprovision(linecard, linecard->priv,
+ extack);
+ if (err) {
+ /* Unprovisioning failed. Assume the linecard is unprovisioned
+ * for future operations.
+ */
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ }
+ return err;
+
+out:
+ mutex_unlock(&linecard->state_lock);
+ return err;
+}
+
+int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_linecard *linecard;
+ int err;
+
+ linecard = devlink_linecard_get_from_info(devlink, info);
+ if (IS_ERR(linecard))
+ return PTR_ERR(linecard);
+
+ if (info->attrs[DEVLINK_ATTR_LINECARD_TYPE]) {
+ const char *type;
+
+ type = nla_data(info->attrs[DEVLINK_ATTR_LINECARD_TYPE]);
+ if (strcmp(type, "")) {
+ err = devlink_linecard_type_set(linecard, type, extack);
+ if (err)
+ return err;
+ } else {
+ err = devlink_linecard_type_unset(linecard, extack);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int devlink_linecard_types_init(struct devlink_linecard *linecard)
+{
+ struct devlink_linecard_type *linecard_type;
+ unsigned int count;
+ int i;
+
+ count = linecard->ops->types_count(linecard, linecard->priv);
+ linecard->types = kmalloc_array(count, sizeof(*linecard_type),
+ GFP_KERNEL);
+ if (!linecard->types)
+ return -ENOMEM;
+ linecard->types_count = count;
+
+ for (i = 0; i < count; i++) {
+ linecard_type = &linecard->types[i];
+ linecard->ops->types_get(linecard, linecard->priv, i,
+ &linecard_type->type,
+ &linecard_type->priv);
+ }
+ return 0;
+}
+
+static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
+{
+ kfree(linecard->types);
+}
+
+/**
+ * devl_linecard_create - Create devlink linecard
+ *
+ * @devlink: devlink
+ * @linecard_index: driver-specific numerical identifier of the linecard
+ * @ops: linecards ops
+ * @priv: user priv pointer
+ *
+ * Create devlink linecard instance with provided linecard index.
+ * Caller can use any indexing, even hw-related one.
+ *
+ * Return: Line card structure or an ERR_PTR() encoded error code.
+ */
+struct devlink_linecard *
+devl_linecard_create(struct devlink *devlink, unsigned int linecard_index,
+ const struct devlink_linecard_ops *ops, void *priv)
+{
+ struct devlink_linecard *linecard;
+ int err;
+
+ if (WARN_ON(!ops || !ops->provision || !ops->unprovision ||
+ !ops->types_count || !ops->types_get))
+ return ERR_PTR(-EINVAL);
+
+ if (devlink_linecard_index_exists(devlink, linecard_index))
+ return ERR_PTR(-EEXIST);
+
+ linecard = kzalloc(sizeof(*linecard), GFP_KERNEL);
+ if (!linecard)
+ return ERR_PTR(-ENOMEM);
+
+ linecard->devlink = devlink;
+ linecard->index = linecard_index;
+ linecard->ops = ops;
+ linecard->priv = priv;
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ mutex_init(&linecard->state_lock);
+
+ err = devlink_linecard_types_init(linecard);
+ if (err) {
+ mutex_destroy(&linecard->state_lock);
+ kfree(linecard);
+ return ERR_PTR(err);
+ }
+
+ list_add_tail(&linecard->list, &devlink->linecard_list);
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ return linecard;
+}
+EXPORT_SYMBOL_GPL(devl_linecard_create);
+
+/**
+ * devl_linecard_destroy - Destroy devlink linecard
+ *
+ * @linecard: devlink linecard
+ */
+void devl_linecard_destroy(struct devlink_linecard *linecard)
+{
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
+ list_del(&linecard->list);
+ devlink_linecard_types_fini(linecard);
+ mutex_destroy(&linecard->state_lock);
+ kfree(linecard);
+}
+EXPORT_SYMBOL_GPL(devl_linecard_destroy);
+
+/**
+ * devlink_linecard_provision_set - Set provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ * @type: linecard type
+ *
+ * This is either called directly from the provision() op call or
+ * as a result of the provision() op call asynchronously.
+ */
+void devlink_linecard_provision_set(struct devlink_linecard *linecard,
+ const char *type)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->type && strcmp(linecard->type, type));
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
+ linecard->type = type;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
+
+/**
+ * devlink_linecard_provision_clear - Clear provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ *
+ * This is either called directly from the unprovision() op call or
+ * as a result of the unprovision() op call asynchronously.
+ */
+void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->nested_devlink);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
+
+/**
+ * devlink_linecard_provision_fail - Fail provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ *
+ * This is either called directly from the provision() op call or
+ * as a result of the provision() op call asynchronously.
+ */
+void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->nested_devlink);
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_fail);
+
+/**
+ * devlink_linecard_activate - Set linecard active
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_activate(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->state != DEVLINK_LINECARD_STATE_PROVISIONED);
+ linecard->state = DEVLINK_LINECARD_STATE_ACTIVE;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_activate);
+
+/**
+ * devlink_linecard_deactivate - Set linecard inactive
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_deactivate(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ switch (linecard->state) {
+ case DEVLINK_LINECARD_STATE_ACTIVE:
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ break;
+ case DEVLINK_LINECARD_STATE_UNPROVISIONING:
+ /* Line card is being deactivated as part
+ * of unprovisioning flow.
+ */
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
+
+/**
+ * devlink_linecard_nested_dl_set - Attach/detach nested devlink
+ * instance to linecard.
+ *
+ * @linecard: devlink linecard
+ * @nested_devlink: devlink instance to attach or NULL to detach
+ */
+void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+ struct devlink *nested_devlink)
+{
+ mutex_lock(&linecard->state_lock);
+ linecard->nested_devlink = nested_devlink;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
index 7a332eb70f70..fc3e7c029a3b 100644
--- a/net/devlink/netlink.c
+++ b/net/devlink/netlink.c
@@ -82,6 +82,21 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG },
};
+int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info)
+{
+ int err;
+
+ if (*msg) {
+ err = genlmsg_reply(*msg, info);
+ if (err)
+ return err;
+ }
+ *msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!*msg)
+ return -ENOMEM;
+ return 0;
+}
+
struct devlink *
devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
{
@@ -109,10 +124,9 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
return ERR_PTR(-ENODEV);
}
-static int devlink_nl_pre_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
+static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
+ u8 flags)
{
- struct devlink_linecard *linecard;
struct devlink_port *devlink_port;
struct devlink *devlink;
int err;
@@ -122,42 +136,17 @@ static int devlink_nl_pre_doit(const struct genl_split_ops *ops,
return PTR_ERR(devlink);
info->user_ptr[0] = devlink;
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
+ if (flags & DEVLINK_NL_FLAG_NEED_PORT) {
devlink_port = devlink_port_get_from_info(devlink, info);
if (IS_ERR(devlink_port)) {
err = PTR_ERR(devlink_port);
goto unlock;
}
info->user_ptr[1] = devlink_port;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
+ } else if (flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
devlink_port = devlink_port_get_from_info(devlink, info);
if (!IS_ERR(devlink_port))
info->user_ptr[1] = devlink_port;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
- struct devlink_rate *devlink_rate;
-
- devlink_rate = devlink_rate_get_from_info(devlink, info);
- if (IS_ERR(devlink_rate)) {
- err = PTR_ERR(devlink_rate);
- goto unlock;
- }
- info->user_ptr[1] = devlink_rate;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) {
- struct devlink_rate *rate_node;
-
- rate_node = devlink_rate_node_get_from_info(devlink, info);
- if (IS_ERR(rate_node)) {
- err = PTR_ERR(rate_node);
- goto unlock;
- }
- info->user_ptr[1] = rate_node;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
- linecard = devlink_linecard_get_from_info(devlink, info);
- if (IS_ERR(linecard)) {
- err = PTR_ERR(linecard);
- goto unlock;
- }
- info->user_ptr[1] = linecard;
}
return 0;
@@ -167,8 +156,27 @@ unlock:
return err;
}
-static void devlink_nl_post_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, ops->internal_flags);
+}
+
+int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_PORT);
+}
+
+int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
+ struct sk_buff *skb,
+ struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT);
+}
+
+void devlink_nl_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink;
@@ -177,42 +185,41 @@ static void devlink_nl_post_doit(const struct genl_split_ops *ops,
devlink_put(devlink);
}
-static const struct devlink_cmd *devl_cmds[] = {
- [DEVLINK_CMD_GET] = &devl_cmd_get,
- [DEVLINK_CMD_PORT_GET] = &devl_cmd_port_get,
- [DEVLINK_CMD_SB_GET] = &devl_cmd_sb_get,
- [DEVLINK_CMD_SB_POOL_GET] = &devl_cmd_sb_pool_get,
- [DEVLINK_CMD_SB_PORT_POOL_GET] = &devl_cmd_sb_port_pool_get,
- [DEVLINK_CMD_SB_TC_POOL_BIND_GET] = &devl_cmd_sb_tc_pool_bind_get,
- [DEVLINK_CMD_PARAM_GET] = &devl_cmd_param_get,
- [DEVLINK_CMD_REGION_GET] = &devl_cmd_region_get,
- [DEVLINK_CMD_INFO_GET] = &devl_cmd_info_get,
- [DEVLINK_CMD_HEALTH_REPORTER_GET] = &devl_cmd_health_reporter_get,
- [DEVLINK_CMD_TRAP_GET] = &devl_cmd_trap_get,
- [DEVLINK_CMD_TRAP_GROUP_GET] = &devl_cmd_trap_group_get,
- [DEVLINK_CMD_TRAP_POLICER_GET] = &devl_cmd_trap_policer_get,
- [DEVLINK_CMD_RATE_GET] = &devl_cmd_rate_get,
- [DEVLINK_CMD_LINECARD_GET] = &devl_cmd_linecard_get,
- [DEVLINK_CMD_SELFTESTS_GET] = &devl_cmd_selftests_get,
-};
+static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb, int flags,
+ devlink_nl_dump_one_func_t *dump_one,
+ struct nlattr **attrs)
+{
+ struct devlink *devlink;
+ int err;
+
+ devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs);
+ if (IS_ERR(devlink))
+ return PTR_ERR(devlink);
+ err = dump_one(msg, devlink, cb, flags | NLM_F_DUMP_FILTERED);
+
+ devl_unlock(devlink);
+ devlink_put(devlink);
+
+ if (err != -EMSGSIZE)
+ return err;
+ return msg->len;
+}
-int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int devlink_nl_inst_iter_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb, int flags,
+ devlink_nl_dump_one_func_t *dump_one)
{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- const struct devlink_cmd *cmd;
struct devlink *devlink;
int err = 0;
- cmd = devl_cmds[info->op.cmd];
-
while ((devlink = devlinks_xa_find_get(sock_net(msg->sk),
&state->instance))) {
devl_lock(devlink);
if (devl_is_registered(devlink))
- err = cmd->dump_one(msg, devlink, cb);
+ err = dump_one(msg, devlink, cb, flags);
else
err = 0;
@@ -233,6 +240,272 @@ int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
return msg->len;
}
+int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
+ devlink_nl_dump_one_func_t *dump_one)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct nlattr **attrs = info->attrs;
+ int flags = NLM_F_MULTI;
+
+ if (attrs &&
+ (attrs[DEVLINK_ATTR_BUS_NAME] || attrs[DEVLINK_ATTR_DEV_NAME]))
+ return devlink_nl_inst_single_dumpit(msg, cb, flags, dump_one,
+ attrs);
+ else
+ return devlink_nl_inst_iter_dumpit(msg, cb, flags, dump_one);
+}
+
+static const struct genl_small_ops devlink_nl_small_ops[40] = {
+ {
+ .cmd = DEVLINK_CMD_PORT_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_port_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_SET,
+ .doit = devlink_nl_cmd_rate_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_NEW,
+ .doit = devlink_nl_cmd_rate_new_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_DEL,
+ .doit = devlink_nl_cmd_rate_del_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_SPLIT,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_port_split_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_UNSPLIT,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_port_unsplit_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_NEW,
+ .doit = devlink_nl_cmd_port_new_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_DEL,
+ .doit = devlink_nl_cmd_port_del_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+
+ {
+ .cmd = DEVLINK_CMD_LINECARD_SET,
+ .doit = devlink_nl_cmd_linecard_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_POOL_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_sb_pool_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_sb_port_pool_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_sb_occ_snapshot_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_sb_occ_max_clear_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_ESWITCH_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_eswitch_get_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_ESWITCH_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_eswitch_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_dpipe_table_get,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_dpipe_entries_get,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_dpipe_headers_get,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_dpipe_table_counters_set,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_RESOURCE_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_resource_set,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_RESOURCE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_resource_dump,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_RELOAD,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_reload,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_PARAM_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_param_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_PARAM_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_port_param_get_doit,
+ .dumpit = devlink_nl_cmd_port_param_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_PARAM_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_port_param_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_NEW,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_region_new,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_DEL,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_region_del,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_READ,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
+ .dumpit = devlink_nl_cmd_region_read_dumpit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_health_reporter_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_health_reporter_recover_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_health_reporter_diagnose_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
+ .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_health_reporter_test_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_FLASH_UPDATE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_flash_update,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_SET,
+ .doit = devlink_nl_cmd_trap_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_SET,
+ .doit = devlink_nl_cmd_trap_group_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_POLICER_SET,
+ .doit = devlink_nl_cmd_trap_policer_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_RUN,
+ .doit = devlink_nl_cmd_selftests_run,
+ .flags = GENL_ADMIN_PERM,
+ },
+ /* -- No new ops here! Use split ops going forward! -- */
+};
+
struct genl_family devlink_nl_family __ro_after_init = {
.name = DEVLINK_GENL_NAME,
.version = DEVLINK_GENL_VERSION,
@@ -243,8 +516,10 @@ struct genl_family devlink_nl_family __ro_after_init = {
.pre_doit = devlink_nl_pre_doit,
.post_doit = devlink_nl_post_doit,
.module = THIS_MODULE,
- .small_ops = devlink_nl_ops,
- .n_small_ops = ARRAY_SIZE(devlink_nl_ops),
+ .small_ops = devlink_nl_small_ops,
+ .n_small_ops = ARRAY_SIZE(devlink_nl_small_ops),
+ .split_ops = devlink_nl_ops,
+ .n_split_ops = ARRAY_SIZE(devlink_nl_ops),
.resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1,
.mcgrps = devlink_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
new file mode 100644
index 000000000000..467b7a431de1
--- /dev/null
+++ b/net/devlink/netlink_gen.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/devlink.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "netlink_gen.h"
+
+#include <uapi/linux/devlink.h>
+
+/* DEVLINK_CMD_GET - do */
+static const struct nla_policy devlink_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_PORT_GET - do */
+static const struct nla_policy devlink_port_get_do_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_PORT_GET - dump */
+static const struct nla_policy devlink_port_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SB_GET - do */
+static const struct nla_policy devlink_sb_get_do_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_SB_GET - dump */
+static const struct nla_policy devlink_sb_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SB_POOL_GET - do */
+static const struct nla_policy devlink_sb_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, },
+};
+
+/* DEVLINK_CMD_SB_POOL_GET - dump */
+static const struct nla_policy devlink_sb_pool_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SB_PORT_POOL_GET - do */
+static const struct nla_policy devlink_sb_port_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, },
+};
+
+/* DEVLINK_CMD_SB_PORT_POOL_GET - dump */
+static const struct nla_policy devlink_sb_port_pool_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */
+static const struct nla_policy devlink_sb_tc_pool_bind_get_do_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_POOL_TYPE] = NLA_POLICY_MAX(NLA_U8, 1),
+ [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16, },
+};
+
+/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */
+static const struct nla_policy devlink_sb_tc_pool_bind_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_PARAM_GET - do */
+static const struct nla_policy devlink_param_get_do_nl_policy[DEVLINK_ATTR_PARAM_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_PARAM_GET - dump */
+static const struct nla_policy devlink_param_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_REGION_GET - do */
+static const struct nla_policy devlink_region_get_do_nl_policy[DEVLINK_ATTR_REGION_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_REGION_GET - dump */
+static const struct nla_policy devlink_region_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_INFO_GET - do */
+static const struct nla_policy devlink_info_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_HEALTH_REPORTER_GET - do */
+static const struct nla_policy devlink_health_reporter_get_do_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_HEALTH_REPORTER_GET - dump */
+static const struct nla_policy devlink_health_reporter_get_dump_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_TRAP_GET - do */
+static const struct nla_policy devlink_trap_get_do_nl_policy[DEVLINK_ATTR_TRAP_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_TRAP_GET - dump */
+static const struct nla_policy devlink_trap_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_TRAP_GROUP_GET - do */
+static const struct nla_policy devlink_trap_group_get_do_nl_policy[DEVLINK_ATTR_TRAP_GROUP_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_TRAP_GROUP_GET - dump */
+static const struct nla_policy devlink_trap_group_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_TRAP_POLICER_GET - do */
+static const struct nla_policy devlink_trap_policer_get_do_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_TRAP_POLICER_GET - dump */
+static const struct nla_policy devlink_trap_policer_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_RATE_GET - do */
+static const struct nla_policy devlink_rate_get_do_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_RATE_GET - dump */
+static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_LINECARD_GET - do */
+static const struct nla_policy devlink_linecard_get_do_nl_policy[DEVLINK_ATTR_LINECARD_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_LINECARD_GET - dump */
+static const struct nla_policy devlink_linecard_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SELFTESTS_GET - do */
+static const struct nla_policy devlink_selftests_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* Ops table for devlink */
+const struct genl_split_ops devlink_nl_ops[32] = {
+ {
+ .cmd = DEVLINK_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_DUMP,
+ .dumpit = devlink_nl_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_port_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_port_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_GET,
+ .dumpit = devlink_nl_port_get_dumpit,
+ .policy = devlink_port_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_sb_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_GET,
+ .dumpit = devlink_nl_sb_get_dumpit,
+ .policy = devlink_sb_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_POOL_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_sb_pool_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_pool_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_POOL_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_POOL_GET,
+ .dumpit = devlink_nl_sb_pool_get_dumpit,
+ .policy = devlink_sb_pool_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_sb_port_pool_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_port_pool_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_POOL_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
+ .dumpit = devlink_nl_sb_port_pool_get_dumpit,
+ .policy = devlink_sb_port_pool_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_sb_tc_pool_bind_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_tc_pool_bind_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_TC_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
+ .dumpit = devlink_nl_sb_tc_pool_bind_get_dumpit,
+ .policy = devlink_sb_tc_pool_bind_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_PARAM_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_param_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_param_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_PARAM_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_PARAM_GET,
+ .dumpit = devlink_nl_param_get_dumpit,
+ .policy = devlink_param_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_region_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_region_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_REGION_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_GET,
+ .dumpit = devlink_nl_region_get_dumpit,
+ .policy = devlink_region_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_INFO_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_info_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_info_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_INFO_GET,
+ .validate = GENL_DONT_VALIDATE_DUMP,
+ .dumpit = devlink_nl_info_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_health_reporter_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_health_reporter_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
+ .dumpit = devlink_nl_health_reporter_get_dumpit,
+ .policy = devlink_health_reporter_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_INDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_trap_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_trap_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_TRAP_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GET,
+ .dumpit = devlink_nl_trap_get_dumpit,
+ .policy = devlink_trap_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_trap_group_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_trap_group_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_TRAP_GROUP_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_GET,
+ .dumpit = devlink_nl_trap_group_get_dumpit,
+ .policy = devlink_trap_group_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_POLICER_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_trap_policer_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_trap_policer_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_TRAP_POLICER_ID,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_POLICER_GET,
+ .dumpit = devlink_nl_trap_policer_get_dumpit,
+ .policy = devlink_trap_policer_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_rate_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_rate_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_RATE_NODE_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_GET,
+ .dumpit = devlink_nl_rate_get_dumpit,
+ .policy = devlink_rate_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_LINECARD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_linecard_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_linecard_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_LINECARD_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_LINECARD_GET,
+ .dumpit = devlink_nl_linecard_get_dumpit,
+ .policy = devlink_linecard_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_selftests_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_selftests_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_GET,
+ .validate = GENL_DONT_VALIDATE_DUMP,
+ .dumpit = devlink_nl_selftests_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+};
diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h
new file mode 100644
index 000000000000..f8bbc93e39be
--- /dev/null
+++ b/net/devlink/netlink_gen.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/devlink.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_DEVLINK_GEN_H
+#define _LINUX_DEVLINK_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/devlink.h>
+
+/* Ops table for devlink */
+extern const struct genl_split_ops devlink_nl_ops[32];
+
+int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
+ struct sk_buff *skb,
+ struct genl_info *info);
+void
+devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+
+int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int devlink_nl_port_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_port_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_sb_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_sb_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int devlink_nl_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_sb_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_sb_port_pool_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_sb_port_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_sb_tc_pool_bind_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_param_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_param_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_region_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_region_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_info_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_info_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_health_reporter_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_health_reporter_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_trap_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_trap_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_trap_group_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_trap_group_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_trap_policer_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_trap_policer_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_rate_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_linecard_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_linecard_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_selftests_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+
+#endif /* _LINUX_DEVLINK_GEN_H */
diff --git a/net/devlink/param.c b/net/devlink/param.c
new file mode 100644
index 000000000000..31275f9d4cb7
--- /dev/null
+++ b/net/devlink/param.c
@@ -0,0 +1,865 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+static const struct devlink_param devlink_param_generic[] = {
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+ .name = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME,
+ .type = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+ .name = DEVLINK_PARAM_GENERIC_MAX_MACS_NAME,
+ .type = DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+ .name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
+ .type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
+ .name = DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME,
+ .type = DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
+ .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME,
+ .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
+ .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME,
+ .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+ .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
+ .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
+ .name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME,
+ .type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ .name = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME,
+ .type = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+ .name = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME,
+ .type = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE,
+ },
+};
+
+static int devlink_param_generic_verify(const struct devlink_param *param)
+{
+ /* verify it match generic parameter by id and name */
+ if (param->id > DEVLINK_PARAM_GENERIC_ID_MAX)
+ return -EINVAL;
+ if (strcmp(param->name, devlink_param_generic[param->id].name))
+ return -ENOENT;
+
+ WARN_ON(param->type != devlink_param_generic[param->id].type);
+
+ return 0;
+}
+
+static int devlink_param_driver_verify(const struct devlink_param *param)
+{
+ int i;
+
+ if (param->id <= DEVLINK_PARAM_GENERIC_ID_MAX)
+ return -EINVAL;
+ /* verify no such name in generic params */
+ for (i = 0; i <= DEVLINK_PARAM_GENERIC_ID_MAX; i++)
+ if (!strcmp(param->name, devlink_param_generic[i].name))
+ return -EEXIST;
+
+ return 0;
+}
+
+static struct devlink_param_item *
+devlink_param_find_by_name(struct xarray *params, const char *param_name)
+{
+ struct devlink_param_item *param_item;
+ unsigned long param_id;
+
+ xa_for_each(params, param_id, param_item) {
+ if (!strcmp(param_item->param->name, param_name))
+ return param_item;
+ }
+ return NULL;
+}
+
+static struct devlink_param_item *
+devlink_param_find_by_id(struct xarray *params, u32 param_id)
+{
+ return xa_load(params, param_id);
+}
+
+static bool
+devlink_param_cmode_is_supported(const struct devlink_param *param,
+ enum devlink_param_cmode cmode)
+{
+ return test_bit(cmode, &param->supported_cmodes);
+}
+
+static int devlink_param_get(struct devlink *devlink,
+ const struct devlink_param *param,
+ struct devlink_param_gset_ctx *ctx)
+{
+ if (!param->get)
+ return -EOPNOTSUPP;
+ return param->get(devlink, param->id, ctx);
+}
+
+static int devlink_param_set(struct devlink *devlink,
+ const struct devlink_param *param,
+ struct devlink_param_gset_ctx *ctx)
+{
+ if (!param->set)
+ return -EOPNOTSUPP;
+ return param->set(devlink, param->id, ctx);
+}
+
+static int
+devlink_param_type_to_nla_type(enum devlink_param_type param_type)
+{
+ switch (param_type) {
+ case DEVLINK_PARAM_TYPE_U8:
+ return NLA_U8;
+ case DEVLINK_PARAM_TYPE_U16:
+ return NLA_U16;
+ case DEVLINK_PARAM_TYPE_U32:
+ return NLA_U32;
+ case DEVLINK_PARAM_TYPE_STRING:
+ return NLA_STRING;
+ case DEVLINK_PARAM_TYPE_BOOL:
+ return NLA_FLAG;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int
+devlink_nl_param_value_fill_one(struct sk_buff *msg,
+ enum devlink_param_type type,
+ enum devlink_param_cmode cmode,
+ union devlink_param_value val)
+{
+ struct nlattr *param_value_attr;
+
+ param_value_attr = nla_nest_start_noflag(msg,
+ DEVLINK_ATTR_PARAM_VALUE);
+ if (!param_value_attr)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_CMODE, cmode))
+ goto value_nest_cancel;
+
+ switch (type) {
+ case DEVLINK_PARAM_TYPE_U8:
+ if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu8))
+ goto value_nest_cancel;
+ break;
+ case DEVLINK_PARAM_TYPE_U16:
+ if (nla_put_u16(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu16))
+ goto value_nest_cancel;
+ break;
+ case DEVLINK_PARAM_TYPE_U32:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32))
+ goto value_nest_cancel;
+ break;
+ case DEVLINK_PARAM_TYPE_STRING:
+ if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA,
+ val.vstr))
+ goto value_nest_cancel;
+ break;
+ case DEVLINK_PARAM_TYPE_BOOL:
+ if (val.vbool &&
+ nla_put_flag(msg, DEVLINK_ATTR_PARAM_VALUE_DATA))
+ goto value_nest_cancel;
+ break;
+ }
+
+ nla_nest_end(msg, param_value_attr);
+ return 0;
+
+value_nest_cancel:
+ nla_nest_cancel(msg, param_value_attr);
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
+ unsigned int port_index,
+ struct devlink_param_item *param_item,
+ enum devlink_command cmd,
+ u32 portid, u32 seq, int flags)
+{
+ union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
+ bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {};
+ const struct devlink_param *param = param_item->param;
+ struct devlink_param_gset_ctx ctx;
+ struct nlattr *param_values_list;
+ struct nlattr *param_attr;
+ int nla_type;
+ void *hdr;
+ int err;
+ int i;
+
+ /* Get value from driver part to driverinit configuration mode */
+ for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
+ if (!devlink_param_cmode_is_supported(param, i))
+ continue;
+ if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) {
+ if (param_item->driverinit_value_new_valid)
+ param_value[i] = param_item->driverinit_value_new;
+ else if (param_item->driverinit_value_valid)
+ param_value[i] = param_item->driverinit_value;
+ else
+ return -EOPNOTSUPP;
+ } else {
+ ctx.cmode = i;
+ err = devlink_param_get(devlink, param, &ctx);
+ if (err)
+ return err;
+ param_value[i] = ctx.val;
+ }
+ param_value_set[i] = true;
+ }
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ if (cmd == DEVLINK_CMD_PORT_PARAM_GET ||
+ cmd == DEVLINK_CMD_PORT_PARAM_NEW ||
+ cmd == DEVLINK_CMD_PORT_PARAM_DEL)
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index))
+ goto genlmsg_cancel;
+
+ param_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM);
+ if (!param_attr)
+ goto genlmsg_cancel;
+ if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name))
+ goto param_nest_cancel;
+ if (param->generic && nla_put_flag(msg, DEVLINK_ATTR_PARAM_GENERIC))
+ goto param_nest_cancel;
+
+ nla_type = devlink_param_type_to_nla_type(param->type);
+ if (nla_type < 0)
+ goto param_nest_cancel;
+ if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type))
+ goto param_nest_cancel;
+
+ param_values_list = nla_nest_start_noflag(msg,
+ DEVLINK_ATTR_PARAM_VALUES_LIST);
+ if (!param_values_list)
+ goto param_nest_cancel;
+
+ for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
+ if (!param_value_set[i])
+ continue;
+ err = devlink_nl_param_value_fill_one(msg, param->type,
+ i, param_value[i]);
+ if (err)
+ goto values_list_nest_cancel;
+ }
+
+ nla_nest_end(msg, param_values_list);
+ nla_nest_end(msg, param_attr);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+values_list_nest_cancel:
+ nla_nest_end(msg, param_values_list);
+param_nest_cancel:
+ nla_nest_cancel(msg, param_attr);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_param_notify(struct devlink *devlink,
+ unsigned int port_index,
+ struct devlink_param_item *param_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL &&
+ cmd != DEVLINK_CMD_PORT_PARAM_NEW &&
+ cmd != DEVLINK_CMD_PORT_PARAM_DEL);
+
+ /* devlink_notify_register() / devlink_notify_unregister()
+ * will replay the notifications if the params are added/removed
+ * outside of the lifetime of the instance.
+ */
+ if (!devl_is_registered(devlink))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+ err = devlink_nl_param_fill(msg, devlink, port_index, param_item, cmd,
+ 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static void devlink_params_notify(struct devlink *devlink,
+ enum devlink_command cmd)
+{
+ struct devlink_param_item *param_item;
+ unsigned long param_id;
+
+ xa_for_each(&devlink->params, param_id, param_item)
+ devlink_param_notify(devlink, 0, param_item, cmd);
+}
+
+void devlink_params_notify_register(struct devlink *devlink)
+{
+ devlink_params_notify(devlink, DEVLINK_CMD_PARAM_NEW);
+}
+
+void devlink_params_notify_unregister(struct devlink *devlink)
+{
+ devlink_params_notify(devlink, DEVLINK_CMD_PARAM_DEL);
+}
+
+static int devlink_nl_param_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_param_item *param_item;
+ unsigned long param_id;
+ int err = 0;
+
+ xa_for_each_start(&devlink->params, param_id, param_item, state->idx) {
+ err = devlink_nl_param_fill(msg, devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = param_id;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int devlink_nl_param_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_param_get_dump_one);
+}
+
+static int
+devlink_param_type_get_from_info(struct genl_info *info,
+ enum devlink_param_type *param_type)
+{
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_TYPE))
+ return -EINVAL;
+
+ switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) {
+ case NLA_U8:
+ *param_type = DEVLINK_PARAM_TYPE_U8;
+ break;
+ case NLA_U16:
+ *param_type = DEVLINK_PARAM_TYPE_U16;
+ break;
+ case NLA_U32:
+ *param_type = DEVLINK_PARAM_TYPE_U32;
+ break;
+ case NLA_STRING:
+ *param_type = DEVLINK_PARAM_TYPE_STRING;
+ break;
+ case NLA_FLAG:
+ *param_type = DEVLINK_PARAM_TYPE_BOOL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+devlink_param_value_get_from_info(const struct devlink_param *param,
+ struct genl_info *info,
+ union devlink_param_value *value)
+{
+ struct nlattr *param_data;
+ int len;
+
+ param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA];
+
+ if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data)
+ return -EINVAL;
+
+ switch (param->type) {
+ case DEVLINK_PARAM_TYPE_U8:
+ if (nla_len(param_data) != sizeof(u8))
+ return -EINVAL;
+ value->vu8 = nla_get_u8(param_data);
+ break;
+ case DEVLINK_PARAM_TYPE_U16:
+ if (nla_len(param_data) != sizeof(u16))
+ return -EINVAL;
+ value->vu16 = nla_get_u16(param_data);
+ break;
+ case DEVLINK_PARAM_TYPE_U32:
+ if (nla_len(param_data) != sizeof(u32))
+ return -EINVAL;
+ value->vu32 = nla_get_u32(param_data);
+ break;
+ case DEVLINK_PARAM_TYPE_STRING:
+ len = strnlen(nla_data(param_data), nla_len(param_data));
+ if (len == nla_len(param_data) ||
+ len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
+ return -EINVAL;
+ strcpy(value->vstr, nla_data(param_data));
+ break;
+ case DEVLINK_PARAM_TYPE_BOOL:
+ if (param_data && nla_len(param_data))
+ return -EINVAL;
+ value->vbool = nla_get_flag(param_data);
+ break;
+ }
+ return 0;
+}
+
+static struct devlink_param_item *
+devlink_param_get_from_info(struct xarray *params, struct genl_info *info)
+{
+ char *param_name;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_NAME))
+ return NULL;
+
+ param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
+ return devlink_param_find_by_name(params, param_name);
+}
+
+int devlink_nl_param_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_param_item *param_item;
+ struct sk_buff *msg;
+ int err;
+
+ param_item = devlink_param_get_from_info(&devlink->params, info);
+ if (!param_item)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_param_fill(msg, devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_GET,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
+ unsigned int port_index,
+ struct xarray *params,
+ struct genl_info *info,
+ enum devlink_command cmd)
+{
+ enum devlink_param_type param_type;
+ struct devlink_param_gset_ctx ctx;
+ enum devlink_param_cmode cmode;
+ struct devlink_param_item *param_item;
+ const struct devlink_param *param;
+ union devlink_param_value value;
+ int err = 0;
+
+ param_item = devlink_param_get_from_info(params, info);
+ if (!param_item)
+ return -EINVAL;
+ param = param_item->param;
+ err = devlink_param_type_get_from_info(info, &param_type);
+ if (err)
+ return err;
+ if (param_type != param->type)
+ return -EINVAL;
+ err = devlink_param_value_get_from_info(param, info, &value);
+ if (err)
+ return err;
+ if (param->validate) {
+ err = param->validate(devlink, param->id, value, info->extack);
+ if (err)
+ return err;
+ }
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_VALUE_CMODE))
+ return -EINVAL;
+ cmode = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]);
+ if (!devlink_param_cmode_is_supported(param, cmode))
+ return -EOPNOTSUPP;
+
+ if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) {
+ param_item->driverinit_value_new = value;
+ param_item->driverinit_value_new_valid = true;
+ } else {
+ if (!param->set)
+ return -EOPNOTSUPP;
+ ctx.val = value;
+ ctx.cmode = cmode;
+ err = devlink_param_set(devlink, param, &ctx);
+ if (err)
+ return err;
+ }
+
+ devlink_param_notify(devlink, port_index, param_item, cmd);
+ return 0;
+}
+
+int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+
+ return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->params,
+ info, DEVLINK_CMD_PARAM_NEW);
+}
+
+int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ NL_SET_ERR_MSG(cb->extack, "Port params are not supported");
+ return msg->len;
+}
+
+int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ NL_SET_ERR_MSG(info->extack, "Port params are not supported");
+ return -EINVAL;
+}
+
+int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ NL_SET_ERR_MSG(info->extack, "Port params are not supported");
+ return -EINVAL;
+}
+
+static int devlink_param_verify(const struct devlink_param *param)
+{
+ if (!param || !param->name || !param->supported_cmodes)
+ return -EINVAL;
+ if (param->generic)
+ return devlink_param_generic_verify(param);
+ else
+ return devlink_param_driver_verify(param);
+}
+
+static int devlink_param_register(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ struct devlink_param_item *param_item;
+ int err;
+
+ WARN_ON(devlink_param_verify(param));
+ WARN_ON(devlink_param_find_by_name(&devlink->params, param->name));
+
+ if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
+ WARN_ON(param->get || param->set);
+ else
+ WARN_ON(!param->get || !param->set);
+
+ param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
+ if (!param_item)
+ return -ENOMEM;
+
+ param_item->param = param;
+
+ err = xa_insert(&devlink->params, param->id, param_item, GFP_KERNEL);
+ if (err)
+ goto err_xa_insert;
+
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+ return 0;
+
+err_xa_insert:
+ kfree(param_item);
+ return err;
+}
+
+static void devlink_param_unregister(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ struct devlink_param_item *param_item;
+
+ param_item = devlink_param_find_by_id(&devlink->params, param->id);
+ if (WARN_ON(!param_item))
+ return;
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL);
+ xa_erase(&devlink->params, param->id);
+ kfree(param_item);
+}
+
+/**
+ * devl_params_register - register configuration parameters
+ *
+ * @devlink: devlink
+ * @params: configuration parameters array
+ * @params_count: number of parameters provided
+ *
+ * Register the configuration parameters supported by the driver.
+ */
+int devl_params_register(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ const struct devlink_param *param = params;
+ int i, err;
+
+ lockdep_assert_held(&devlink->lock);
+
+ for (i = 0; i < params_count; i++, param++) {
+ err = devlink_param_register(devlink, param);
+ if (err)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ if (!i)
+ return err;
+
+ for (param--; i > 0; i--, param--)
+ devlink_param_unregister(devlink, param);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devl_params_register);
+
+int devlink_params_register(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_params_register(devlink, params, params_count);
+ devl_unlock(devlink);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_params_register);
+
+/**
+ * devl_params_unregister - unregister configuration parameters
+ * @devlink: devlink
+ * @params: configuration parameters to unregister
+ * @params_count: number of parameters provided
+ */
+void devl_params_unregister(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ const struct devlink_param *param = params;
+ int i;
+
+ lockdep_assert_held(&devlink->lock);
+
+ for (i = 0; i < params_count; i++, param++)
+ devlink_param_unregister(devlink, param);
+}
+EXPORT_SYMBOL_GPL(devl_params_unregister);
+
+void devlink_params_unregister(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ devl_lock(devlink);
+ devl_params_unregister(devlink, params, params_count);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_params_unregister);
+
+/**
+ * devl_param_driverinit_value_get - get configuration parameter
+ * value for driver initializing
+ *
+ * @devlink: devlink
+ * @param_id: parameter ID
+ * @val: pointer to store the value of parameter in driverinit
+ * configuration mode
+ *
+ * This function should be used by the driver to get driverinit
+ * configuration for initialization after reload command.
+ *
+ * Note that lockless call of this function relies on the
+ * driver to maintain following basic sane behavior:
+ * 1) Driver ensures a call to this function cannot race with
+ * registering/unregistering the parameter with the same parameter ID.
+ * 2) Driver ensures a call to this function cannot race with
+ * devl_param_driverinit_value_set() call with the same parameter ID.
+ * 3) Driver ensures a call to this function cannot race with
+ * reload operation.
+ * If the driver is not able to comply, it has to take the devlink->lock
+ * while calling this.
+ */
+int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+ union devlink_param_value *val)
+{
+ struct devlink_param_item *param_item;
+
+ if (WARN_ON(!devlink_reload_supported(devlink->ops)))
+ return -EOPNOTSUPP;
+
+ param_item = devlink_param_find_by_id(&devlink->params, param_id);
+ if (!param_item)
+ return -EINVAL;
+
+ if (!param_item->driverinit_value_valid)
+ return -EOPNOTSUPP;
+
+ if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT)))
+ return -EOPNOTSUPP;
+
+ *val = param_item->driverinit_value;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_param_driverinit_value_get);
+
+/**
+ * devl_param_driverinit_value_set - set value of configuration
+ * parameter for driverinit
+ * configuration mode
+ *
+ * @devlink: devlink
+ * @param_id: parameter ID
+ * @init_val: value of parameter to set for driverinit configuration mode
+ *
+ * This function should be used by the driver to set driverinit
+ * configuration mode default value.
+ */
+void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+ union devlink_param_value init_val)
+{
+ struct devlink_param_item *param_item;
+
+ devl_assert_locked(devlink);
+
+ param_item = devlink_param_find_by_id(&devlink->params, param_id);
+ if (WARN_ON(!param_item))
+ return;
+
+ if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT)))
+ return;
+
+ param_item->driverinit_value = init_val;
+ param_item->driverinit_value_valid = true;
+
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devl_param_driverinit_value_set);
+
+void devlink_params_driverinit_load_new(struct devlink *devlink)
+{
+ struct devlink_param_item *param_item;
+ unsigned long param_id;
+
+ xa_for_each(&devlink->params, param_id, param_item) {
+ if (!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT) ||
+ !param_item->driverinit_value_new_valid)
+ continue;
+ param_item->driverinit_value = param_item->driverinit_value_new;
+ param_item->driverinit_value_valid = true;
+ param_item->driverinit_value_new_valid = false;
+ }
+}
+
+/**
+ * devl_param_value_changed - notify devlink on a parameter's value
+ * change. Should be called by the driver
+ * right after the change.
+ *
+ * @devlink: devlink
+ * @param_id: parameter ID
+ *
+ * This function should be used by the driver to notify devlink on value
+ * change, excluding driverinit configuration mode.
+ * For driverinit configuration mode driver should use the function
+ */
+void devl_param_value_changed(struct devlink *devlink, u32 param_id)
+{
+ struct devlink_param_item *param_item;
+
+ param_item = devlink_param_find_by_id(&devlink->params, param_id);
+ WARN_ON(!param_item);
+
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devl_param_value_changed);
diff --git a/net/devlink/port.c b/net/devlink/port.c
new file mode 100644
index 000000000000..4763b42885fb
--- /dev/null
+++ b/net/devlink/port.c
@@ -0,0 +1,1515 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
+ (_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
+
+static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
+ [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
+ [DEVLINK_PORT_FN_ATTR_STATE] =
+ NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
+ DEVLINK_PORT_FN_STATE_ACTIVE),
+ [DEVLINK_PORT_FN_ATTR_CAPS] =
+ NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
+};
+
+#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \
+ WARN_ON_ONCE(!(devlink_port)->registered)
+#define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \
+ WARN_ON_ONCE((devlink_port)->registered)
+
+struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
+ unsigned int port_index)
+{
+ return xa_load(&devlink->ports, port_index);
+}
+
+struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs)
+{
+ if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+ struct devlink_port *devlink_port;
+
+ devlink_port = devlink_port_get_by_index(devlink, port_index);
+ if (!devlink_port)
+ return ERR_PTR(-ENODEV);
+ return devlink_port;
+ }
+ return ERR_PTR(-EINVAL);
+}
+
+struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ return devlink_port_get_from_attrs(devlink, info->attrs);
+}
+
+static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
+ u32 cap, bool is_enable)
+{
+ caps->selector |= cap;
+ if (is_enable)
+ caps->value |= cap;
+}
+
+static int devlink_port_fn_roce_fill(struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!devlink_port->ops->port_fn_roce_get)
+ return 0;
+
+ err = devlink_port->ops->port_fn_roce_get(devlink_port, &is_enable,
+ extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_migratable_fill(struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!devlink_port->ops->port_fn_migratable_get ||
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+ return 0;
+
+ err = devlink_port->ops->port_fn_migratable_get(devlink_port,
+ &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_MIGRATABLE, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_ipsec_crypto_fill(struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!devlink_port->ops->port_fn_ipsec_crypto_get ||
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+ return 0;
+
+ err = devlink_port->ops->port_fn_ipsec_crypto_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_ipsec_packet_fill(struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!devlink_port->ops->port_fn_ipsec_packet_get ||
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+ return 0;
+
+ err = devlink_port->ops->port_fn_ipsec_packet_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_IPSEC_PACKET, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ struct nla_bitfield32 caps = {};
+ int err;
+
+ err = devlink_port_fn_roce_fill(devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ err = devlink_port_fn_migratable_fill(devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ err = devlink_port_fn_ipsec_crypto_fill(devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ err = devlink_port_fn_ipsec_packet_fill(devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ if (!caps.selector)
+ return 0;
+ err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
+ caps.selector);
+ if (err)
+ return err;
+
+ *msg_updated = true;
+ return 0;
+}
+
+int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port)
+{
+ if (devlink_nl_put_handle(msg, devlink_port->devlink))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+ return -EMSGSIZE;
+ return 0;
+}
+
+size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port)
+{
+ struct devlink *devlink = devlink_port->devlink;
+
+ return nla_total_size(strlen(devlink->dev->bus->name) + 1) /* DEVLINK_ATTR_BUS_NAME */
+ + nla_total_size(strlen(dev_name(devlink->dev)) + 1) /* DEVLINK_ATTR_DEV_NAME */
+ + nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */
+}
+
+static int devlink_nl_port_attrs_put(struct sk_buff *msg,
+ struct devlink_port *devlink_port)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+
+ if (!devlink_port->attrs_set)
+ return 0;
+ if (attrs->lanes) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_LANES, attrs->lanes))
+ return -EMSGSIZE;
+ }
+ if (nla_put_u8(msg, DEVLINK_ATTR_PORT_SPLITTABLE, attrs->splittable))
+ return -EMSGSIZE;
+ if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
+ return -EMSGSIZE;
+ switch (devlink_port->attrs.flavour) {
+ case DEVLINK_PORT_FLAVOUR_PCI_PF:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+ attrs->pci_pf.controller) ||
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_pf.pf))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_pf.external))
+ return -EMSGSIZE;
+ break;
+ case DEVLINK_PORT_FLAVOUR_PCI_VF:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+ attrs->pci_vf.controller) ||
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_vf.pf) ||
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, attrs->pci_vf.vf))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_vf.external))
+ return -EMSGSIZE;
+ break;
+ case DEVLINK_PORT_FLAVOUR_PCI_SF:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+ attrs->pci_sf.controller) ||
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
+ attrs->pci_sf.pf) ||
+ nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_SF_NUMBER,
+ attrs->pci_sf.sf))
+ return -EMSGSIZE;
+ break;
+ case DEVLINK_PORT_FLAVOUR_PHYSICAL:
+ case DEVLINK_PORT_FLAVOUR_CPU:
+ case DEVLINK_PORT_FLAVOUR_DSA:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
+ attrs->phys.port_number))
+ return -EMSGSIZE;
+ if (!attrs->split)
+ return 0;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
+ attrs->phys.port_number))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
+ attrs->phys.split_subport_number))
+ return -EMSGSIZE;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int devlink_port_fn_hw_addr_fill(struct devlink_port *port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ u8 hw_addr[MAX_ADDR_LEN];
+ int hw_addr_len;
+ int err;
+
+ if (!port->ops->port_fn_hw_addr_get)
+ return 0;
+
+ err = port->ops->port_fn_hw_addr_get(port, hw_addr, &hw_addr_len,
+ extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+ err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr);
+ if (err)
+ return err;
+ *msg_updated = true;
+ return 0;
+}
+
+static bool
+devlink_port_fn_state_valid(enum devlink_port_fn_state state)
+{
+ return state == DEVLINK_PORT_FN_STATE_INACTIVE ||
+ state == DEVLINK_PORT_FN_STATE_ACTIVE;
+}
+
+static bool
+devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
+{
+ return opstate == DEVLINK_PORT_FN_OPSTATE_DETACHED ||
+ opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
+}
+
+static int devlink_port_fn_state_fill(struct devlink_port *port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ enum devlink_port_fn_opstate opstate;
+ enum devlink_port_fn_state state;
+ int err;
+
+ if (!port->ops->port_fn_state_get)
+ return 0;
+
+ err = port->ops->port_fn_state_get(port, &state, &opstate, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+ if (!devlink_port_fn_state_valid(state)) {
+ WARN_ON_ONCE(1);
+ NL_SET_ERR_MSG(extack, "Invalid state read from driver");
+ return -EINVAL;
+ }
+ if (!devlink_port_fn_opstate_valid(opstate)) {
+ WARN_ON_ONCE(1);
+ NL_SET_ERR_MSG(extack, "Invalid operational state read from driver");
+ return -EINVAL;
+ }
+ if (nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_STATE, state) ||
+ nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_OPSTATE, opstate))
+ return -EMSGSIZE;
+ *msg_updated = true;
+ return 0;
+}
+
+static int
+devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ return devlink_port->ops->port_fn_migratable_set(devlink_port, enable,
+ extack);
+}
+
+static int
+devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ return devlink_port->ops->port_fn_roce_set(devlink_port, enable,
+ extack);
+}
+
+static int
+devlink_port_fn_ipsec_crypto_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ return devlink_port->ops->port_fn_ipsec_crypto_set(devlink_port, enable, extack);
+}
+
+static int
+devlink_port_fn_ipsec_packet_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ return devlink_port->ops->port_fn_ipsec_packet_set(devlink_port, enable, extack);
+}
+
+static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nla_bitfield32 caps;
+ u32 caps_value;
+ int err;
+
+ caps = nla_get_bitfield32(attr);
+ caps_value = caps.value & caps.selector;
+ if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
+ err = devlink_port_fn_roce_set(devlink_port,
+ caps_value & DEVLINK_PORT_FN_CAP_ROCE,
+ extack);
+ if (err)
+ return err;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+ err = devlink_port_fn_mig_set(devlink_port, caps_value &
+ DEVLINK_PORT_FN_CAP_MIGRATABLE,
+ extack);
+ if (err)
+ return err;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO) {
+ err = devlink_port_fn_ipsec_crypto_set(devlink_port, caps_value &
+ DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO,
+ extack);
+ if (err)
+ return err;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_PACKET) {
+ err = devlink_port_fn_ipsec_packet_set(devlink_port, caps_value &
+ DEVLINK_PORT_FN_CAP_IPSEC_PACKET,
+ extack);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int
+devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *function_attr;
+ bool msg_updated = false;
+ int err;
+
+ function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION);
+ if (!function_attr)
+ return -EMSGSIZE;
+
+ err = devlink_port_fn_hw_addr_fill(port, msg, extack, &msg_updated);
+ if (err)
+ goto out;
+ err = devlink_port_fn_caps_fill(port, msg, extack, &msg_updated);
+ if (err)
+ goto out;
+ err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
+out:
+ if (err || !msg_updated)
+ nla_nest_cancel(msg, function_attr);
+ else
+ nla_nest_end(msg, function_attr);
+ return err;
+}
+
+static int devlink_nl_port_fill(struct sk_buff *msg,
+ struct devlink_port *devlink_port,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags, struct netlink_ext_ack *extack)
+{
+ struct devlink *devlink = devlink_port->devlink;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+ goto nla_put_failure;
+
+ spin_lock_bh(&devlink_port->type_lock);
+ if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
+ goto nla_put_failure_type_locked;
+ if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE,
+ devlink_port->desired_type))
+ goto nla_put_failure_type_locked;
+ if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
+ if (devlink_port->type_eth.netdev &&
+ (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
+ devlink_port->type_eth.ifindex) ||
+ nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
+ devlink_port->type_eth.ifname)))
+ goto nla_put_failure_type_locked;
+ }
+ if (devlink_port->type == DEVLINK_PORT_TYPE_IB) {
+ struct ib_device *ibdev = devlink_port->type_ib.ibdev;
+
+ if (ibdev &&
+ nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME,
+ ibdev->name))
+ goto nla_put_failure_type_locked;
+ }
+ spin_unlock_bh(&devlink_port->type_lock);
+ if (devlink_nl_port_attrs_put(msg, devlink_port))
+ goto nla_put_failure;
+ if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
+ goto nla_put_failure;
+ if (devlink_port->linecard &&
+ nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX,
+ devlink_port->linecard->index))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure_type_locked:
+ spin_unlock_bh(&devlink_port->type_lock);
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_port_notify(struct devlink_port *devlink_port,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = devlink_port->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static void devlink_ports_notify(struct devlink *devlink,
+ enum devlink_command cmd)
+{
+ struct devlink_port *devlink_port;
+ unsigned long port_index;
+
+ xa_for_each(&devlink->ports, port_index, devlink_port)
+ devlink_port_notify(devlink_port, cmd);
+}
+
+void devlink_ports_notify_register(struct devlink *devlink)
+{
+ devlink_ports_notify(devlink, DEVLINK_CMD_PORT_NEW);
+}
+
+void devlink_ports_notify_unregister(struct devlink *devlink)
+{
+ devlink_ports_notify(devlink, DEVLINK_CMD_PORT_DEL);
+}
+
+int devlink_nl_port_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_port *devlink_port;
+ unsigned long port_index;
+ int err = 0;
+
+ xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
+ err = devlink_nl_port_fill(msg, devlink_port,
+ DEVLINK_CMD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags,
+ cb->extack);
+ if (err) {
+ state->idx = port_index;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int devlink_nl_port_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_port_get_dump_one);
+}
+
+static int devlink_port_type_set(struct devlink_port *devlink_port,
+ enum devlink_port_type port_type)
+
+{
+ int err;
+
+ if (!devlink_port->ops->port_type_set)
+ return -EOPNOTSUPP;
+
+ if (port_type == devlink_port->type)
+ return 0;
+
+ err = devlink_port->ops->port_type_set(devlink_port, port_type);
+ if (err)
+ return err;
+
+ devlink_port->desired_type = port_type;
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ return 0;
+}
+
+static int devlink_port_function_hw_addr_set(struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ const u8 *hw_addr;
+ int hw_addr_len;
+
+ hw_addr = nla_data(attr);
+ hw_addr_len = nla_len(attr);
+ if (hw_addr_len > MAX_ADDR_LEN) {
+ NL_SET_ERR_MSG(extack, "Port function hardware address too long");
+ return -EINVAL;
+ }
+ if (port->type == DEVLINK_PORT_TYPE_ETH) {
+ if (hw_addr_len != ETH_ALEN) {
+ NL_SET_ERR_MSG(extack, "Address must be 6 bytes for Ethernet device");
+ return -EINVAL;
+ }
+ if (!is_unicast_ether_addr(hw_addr)) {
+ NL_SET_ERR_MSG(extack, "Non-unicast hardware address unsupported");
+ return -EINVAL;
+ }
+ }
+
+ return port->ops->port_fn_hw_addr_set(port, hw_addr, hw_addr_len,
+ extack);
+}
+
+static int devlink_port_fn_state_set(struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ enum devlink_port_fn_state state;
+
+ state = nla_get_u8(attr);
+ return port->ops->port_fn_state_set(port, state, extack);
+}
+
+static int devlink_port_function_validate(struct devlink_port *devlink_port,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_port_ops *ops = devlink_port->ops;
+ struct nlattr *attr;
+
+ if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
+ !ops->port_fn_hw_addr_set) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ "Port doesn't support function attributes");
+ return -EOPNOTSUPP;
+ }
+ if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ "Function does not support state setting");
+ return -EOPNOTSUPP;
+ }
+ attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+ if (attr) {
+ struct nla_bitfield32 caps;
+
+ caps = nla_get_bitfield32(attr);
+ if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE &&
+ !ops->port_fn_roce_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support RoCE function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+ if (!ops->port_fn_migratable_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support migratable function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "migratable function attribute supported for VFs only");
+ return -EOPNOTSUPP;
+ }
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO) {
+ if (!ops->port_fn_ipsec_crypto_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support ipsec_crypto function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "ipsec_crypto function attribute supported for VFs only");
+ return -EOPNOTSUPP;
+ }
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_PACKET) {
+ if (!ops->port_fn_ipsec_packet_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support ipsec_packet function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "ipsec_packet function attribute supported for VFs only");
+ return -EOPNOTSUPP;
+ }
+ }
+ }
+ return 0;
+}
+
+static int devlink_port_function_set(struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr,
+ devlink_function_nl_policy, extack);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Fail to parse port function attributes");
+ return err;
+ }
+
+ err = devlink_port_function_validate(port, tb, extack);
+ if (err)
+ return err;
+
+ attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
+ if (attr) {
+ err = devlink_port_function_hw_addr_set(port, attr, extack);
+ if (err)
+ return err;
+ }
+
+ attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+ if (attr) {
+ err = devlink_port_fn_caps_set(port, attr, extack);
+ if (err)
+ return err;
+ }
+
+ /* Keep this as the last function attribute set, so that when
+ * multiple port function attributes are set along with state,
+ * Those can be applied first before activating the state.
+ */
+ attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
+ if (attr)
+ err = devlink_port_fn_state_set(port, attr, extack);
+
+ if (!err)
+ devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
+ return err;
+}
+
+int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ int err;
+
+ if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
+ enum devlink_port_type port_type;
+
+ port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
+ err = devlink_port_type_set(devlink_port, port_type);
+ if (err)
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_PORT_FUNCTION]) {
+ struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
+ struct netlink_ext_ack *extack = info->extack;
+
+ err = devlink_port_function_set(devlink_port, attr, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
+ u32 count;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_SPLIT_COUNT))
+ return -EINVAL;
+ if (!devlink_port->ops->port_split)
+ return -EOPNOTSUPP;
+
+ count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
+
+ if (!devlink_port->attrs.splittable) {
+ /* Split ports cannot be split. */
+ if (devlink_port->attrs.split)
+ NL_SET_ERR_MSG(info->extack, "Port cannot be split further");
+ else
+ NL_SET_ERR_MSG(info->extack, "Port cannot be split");
+ return -EINVAL;
+ }
+
+ if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) {
+ NL_SET_ERR_MSG(info->extack, "Invalid split count");
+ return -EINVAL;
+ }
+
+ return devlink_port->ops->port_split(devlink, devlink_port, count,
+ info->extack);
+}
+
+int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (!devlink_port->ops->port_unsplit)
+ return -EOPNOTSUPP;
+ return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack);
+}
+
+int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink_port_new_attrs new_attrs = {};
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_port *devlink_port;
+ struct sk_buff *msg;
+ int err;
+
+ if (!devlink->ops->port_new)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] ||
+ !info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) {
+ NL_SET_ERR_MSG(extack, "Port flavour or PCI PF are not specified");
+ return -EINVAL;
+ }
+ new_attrs.flavour = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_FLAVOUR]);
+ new_attrs.pfnum =
+ nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]);
+
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ /* Port index of the new port being created by driver. */
+ new_attrs.port_index =
+ nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+ new_attrs.port_index_valid = true;
+ }
+ if (info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]) {
+ new_attrs.controller =
+ nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]);
+ new_attrs.controller_valid = true;
+ }
+ if (new_attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_SF &&
+ info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]) {
+ new_attrs.sfnum = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]);
+ new_attrs.sfnum_valid = true;
+ }
+
+ err = devlink->ops->port_new(devlink, &new_attrs,
+ extack, &devlink_port);
+ if (err)
+ return err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ err = -ENOMEM;
+ goto err_out_port_del;
+ }
+ err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
+ info->snd_portid, info->snd_seq, 0, NULL);
+ if (WARN_ON_ONCE(err))
+ goto err_out_msg_free;
+ err = genlmsg_reply(msg, info);
+ if (err)
+ goto err_out_port_del;
+ return 0;
+
+err_out_msg_free:
+ nlmsg_free(msg);
+err_out_port_del:
+ devlink_port->ops->port_del(devlink, devlink_port, NULL);
+ return err;
+}
+
+int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (!devlink_port->ops->port_del)
+ return -EOPNOTSUPP;
+
+ return devlink_port->ops->port_del(devlink, devlink_port, extack);
+}
+
+static void devlink_port_type_warn(struct work_struct *work)
+{
+ struct devlink_port *port = container_of(to_delayed_work(work),
+ struct devlink_port,
+ type_warn_dw);
+ dev_warn(port->devlink->dev, "Type was not set for devlink port.");
+}
+
+static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
+{
+ /* Ignore CPU and DSA flavours. */
+ return devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA &&
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_UNUSED;
+}
+
+#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 3600)
+
+static void devlink_port_type_warn_schedule(struct devlink_port *devlink_port)
+{
+ if (!devlink_port_type_should_warn(devlink_port))
+ return;
+ /* Schedule a work to WARN in case driver does not set port
+ * type within timeout.
+ */
+ schedule_delayed_work(&devlink_port->type_warn_dw,
+ DEVLINK_PORT_TYPE_WARN_TIMEOUT);
+}
+
+static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
+{
+ if (!devlink_port_type_should_warn(devlink_port))
+ return;
+ cancel_delayed_work_sync(&devlink_port->type_warn_dw);
+}
+
+/**
+ * devlink_port_init() - Init devlink port
+ *
+ * @devlink: devlink
+ * @devlink_port: devlink port
+ *
+ * Initialize essential stuff that is needed for functions
+ * that may be called before devlink port registration.
+ * Call to this function is optional and not needed
+ * in case the driver does not use such functions.
+ */
+void devlink_port_init(struct devlink *devlink,
+ struct devlink_port *devlink_port)
+{
+ if (devlink_port->initialized)
+ return;
+ devlink_port->devlink = devlink;
+ INIT_LIST_HEAD(&devlink_port->region_list);
+ devlink_port->initialized = true;
+}
+EXPORT_SYMBOL_GPL(devlink_port_init);
+
+/**
+ * devlink_port_fini() - Deinitialize devlink port
+ *
+ * @devlink_port: devlink port
+ *
+ * Deinitialize essential stuff that is in use for functions
+ * that may be called after devlink port unregistration.
+ * Call to this function is optional and not needed
+ * in case the driver does not use such functions.
+ */
+void devlink_port_fini(struct devlink_port *devlink_port)
+{
+ WARN_ON(!list_empty(&devlink_port->region_list));
+}
+EXPORT_SYMBOL_GPL(devlink_port_fini);
+
+static const struct devlink_port_ops devlink_port_dummy_ops = {};
+
+/**
+ * devl_port_register_with_ops() - Register devlink port
+ *
+ * @devlink: devlink
+ * @devlink_port: devlink port
+ * @port_index: driver-specific numerical identifier of the port
+ * @ops: port ops
+ *
+ * Register devlink port with provided port index. User can use
+ * any indexing, even hw-related one. devlink_port structure
+ * is convenient to be embedded inside user driver private structure.
+ * Note that the caller should take care of zeroing the devlink_port
+ * structure.
+ */
+int devl_port_register_with_ops(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index,
+ const struct devlink_port_ops *ops)
+{
+ int err;
+
+ devl_assert_locked(devlink);
+
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ devlink_port_init(devlink, devlink_port);
+ devlink_port->registered = true;
+ devlink_port->index = port_index;
+ devlink_port->ops = ops ? ops : &devlink_port_dummy_ops;
+ spin_lock_init(&devlink_port->type_lock);
+ INIT_LIST_HEAD(&devlink_port->reporter_list);
+ err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL);
+ if (err) {
+ devlink_port->registered = false;
+ return err;
+ }
+
+ INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
+ devlink_port_type_warn_schedule(devlink_port);
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_port_register_with_ops);
+
+/**
+ * devlink_port_register_with_ops - Register devlink port
+ *
+ * @devlink: devlink
+ * @devlink_port: devlink port
+ * @port_index: driver-specific numerical identifier of the port
+ * @ops: port ops
+ *
+ * Register devlink port with provided port index. User can use
+ * any indexing, even hw-related one. devlink_port structure
+ * is convenient to be embedded inside user driver private structure.
+ * Note that the caller should take care of zeroing the devlink_port
+ * structure.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+int devlink_port_register_with_ops(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index,
+ const struct devlink_port_ops *ops)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_port_register_with_ops(devlink, devlink_port,
+ port_index, ops);
+ devl_unlock(devlink);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_port_register_with_ops);
+
+/**
+ * devl_port_unregister() - Unregister devlink port
+ *
+ * @devlink_port: devlink port
+ */
+void devl_port_unregister(struct devlink_port *devlink_port)
+{
+ lockdep_assert_held(&devlink_port->devlink->lock);
+ WARN_ON(devlink_port->type != DEVLINK_PORT_TYPE_NOTSET);
+
+ devlink_port_type_warn_cancel(devlink_port);
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
+ xa_erase(&devlink_port->devlink->ports, devlink_port->index);
+ WARN_ON(!list_empty(&devlink_port->reporter_list));
+ devlink_port->registered = false;
+}
+EXPORT_SYMBOL_GPL(devl_port_unregister);
+
+/**
+ * devlink_port_unregister - Unregister devlink port
+ *
+ * @devlink_port: devlink port
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_port_unregister(struct devlink_port *devlink_port)
+{
+ struct devlink *devlink = devlink_port->devlink;
+
+ devl_lock(devlink);
+ devl_port_unregister(devlink_port);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_port_unregister);
+
+static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port,
+ struct net_device *netdev)
+{
+ const struct net_device_ops *ops = netdev->netdev_ops;
+
+ /* If driver registers devlink port, it should set devlink port
+ * attributes accordingly so the compat functions are called
+ * and the original ops are not used.
+ */
+ if (ops->ndo_get_phys_port_name) {
+ /* Some drivers use the same set of ndos for netdevs
+ * that have devlink_port registered and also for
+ * those who don't. Make sure that ndo_get_phys_port_name
+ * returns -EOPNOTSUPP here in case it is defined.
+ * Warn if not.
+ */
+ char name[IFNAMSIZ];
+ int err;
+
+ err = ops->ndo_get_phys_port_name(netdev, name, sizeof(name));
+ WARN_ON(err != -EOPNOTSUPP);
+ }
+ if (ops->ndo_get_port_parent_id) {
+ /* Some drivers use the same set of ndos for netdevs
+ * that have devlink_port registered and also for
+ * those who don't. Make sure that ndo_get_port_parent_id
+ * returns -EOPNOTSUPP here in case it is defined.
+ * Warn if not.
+ */
+ struct netdev_phys_item_id ppid;
+ int err;
+
+ err = ops->ndo_get_port_parent_id(netdev, &ppid);
+ WARN_ON(err != -EOPNOTSUPP);
+ }
+}
+
+static void __devlink_port_type_set(struct devlink_port *devlink_port,
+ enum devlink_port_type type,
+ void *type_dev)
+{
+ struct net_device *netdev = type_dev;
+
+ ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
+
+ if (type == DEVLINK_PORT_TYPE_NOTSET) {
+ devlink_port_type_warn_schedule(devlink_port);
+ } else {
+ devlink_port_type_warn_cancel(devlink_port);
+ if (type == DEVLINK_PORT_TYPE_ETH && netdev)
+ devlink_port_type_netdev_checks(devlink_port, netdev);
+ }
+
+ spin_lock_bh(&devlink_port->type_lock);
+ devlink_port->type = type;
+ switch (type) {
+ case DEVLINK_PORT_TYPE_ETH:
+ devlink_port->type_eth.netdev = netdev;
+ if (netdev) {
+ ASSERT_RTNL();
+ devlink_port->type_eth.ifindex = netdev->ifindex;
+ BUILD_BUG_ON(sizeof(devlink_port->type_eth.ifname) !=
+ sizeof(netdev->name));
+ strcpy(devlink_port->type_eth.ifname, netdev->name);
+ }
+ break;
+ case DEVLINK_PORT_TYPE_IB:
+ devlink_port->type_ib.ibdev = type_dev;
+ break;
+ default:
+ break;
+ }
+ spin_unlock_bh(&devlink_port->type_lock);
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+}
+
+/**
+ * devlink_port_type_eth_set - Set port type to Ethernet
+ *
+ * @devlink_port: devlink port
+ *
+ * If driver is calling this, most likely it is doing something wrong.
+ */
+void devlink_port_type_eth_set(struct devlink_port *devlink_port)
+{
+ dev_warn(devlink_port->devlink->dev,
+ "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n",
+ devlink_port->index);
+ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, NULL);
+}
+EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
+
+/**
+ * devlink_port_type_ib_set - Set port type to InfiniBand
+ *
+ * @devlink_port: devlink port
+ * @ibdev: related IB device
+ */
+void devlink_port_type_ib_set(struct devlink_port *devlink_port,
+ struct ib_device *ibdev)
+{
+ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev);
+}
+EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
+
+/**
+ * devlink_port_type_clear - Clear port type
+ *
+ * @devlink_port: devlink port
+ *
+ * If driver is calling this for clearing Ethernet type, most likely
+ * it is doing something wrong.
+ */
+void devlink_port_type_clear(struct devlink_port *devlink_port)
+{
+ if (devlink_port->type == DEVLINK_PORT_TYPE_ETH)
+ dev_warn(devlink_port->devlink->dev,
+ "devlink port type for port %d cleared without a software interface reference, device type not supported by the kernel?\n",
+ devlink_port->index);
+ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL);
+}
+EXPORT_SYMBOL_GPL(devlink_port_type_clear);
+
+int devlink_port_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct devlink_port *devlink_port = netdev->devlink_port;
+ struct devlink *devlink;
+
+ if (!devlink_port)
+ return NOTIFY_OK;
+ devlink = devlink_port->devlink;
+
+ switch (event) {
+ case NETDEV_POST_INIT:
+ /* Set the type but not netdev pointer. It is going to be set
+ * later on by NETDEV_REGISTER event. Happens once during
+ * netdevice register
+ */
+ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH,
+ NULL);
+ break;
+ case NETDEV_REGISTER:
+ case NETDEV_CHANGENAME:
+ if (devlink_net(devlink) != dev_net(netdev))
+ return NOTIFY_OK;
+ /* Set the netdev on top of previously set type. Note this
+ * event happens also during net namespace change so here
+ * we take into account netdev pointer appearing in this
+ * namespace.
+ */
+ __devlink_port_type_set(devlink_port, devlink_port->type,
+ netdev);
+ break;
+ case NETDEV_UNREGISTER:
+ if (devlink_net(devlink) != dev_net(netdev))
+ return NOTIFY_OK;
+ /* Clear netdev pointer, but not the type. This event happens
+ * also during net namespace change so we need to clear
+ * pointer to netdev that is going to another net namespace.
+ */
+ __devlink_port_type_set(devlink_port, devlink_port->type,
+ NULL);
+ break;
+ case NETDEV_PRE_UNINIT:
+ /* Clear the type and the netdev pointer. Happens one during
+ * netdevice unregister.
+ */
+ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET,
+ NULL);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
+ enum devlink_port_flavour flavour)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+
+ devlink_port->attrs_set = true;
+ attrs->flavour = flavour;
+ if (attrs->switch_id.id_len) {
+ devlink_port->switch_port = true;
+ if (WARN_ON(attrs->switch_id.id_len > MAX_PHYS_ITEM_ID_LEN))
+ attrs->switch_id.id_len = MAX_PHYS_ITEM_ID_LEN;
+ } else {
+ devlink_port->switch_port = false;
+ }
+ return 0;
+}
+
+/**
+ * devlink_port_attrs_set - Set port attributes
+ *
+ * @devlink_port: devlink port
+ * @attrs: devlink port attrs
+ */
+void devlink_port_attrs_set(struct devlink_port *devlink_port,
+ struct devlink_port_attrs *attrs)
+{
+ int ret;
+
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ devlink_port->attrs = *attrs;
+ ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
+ if (ret)
+ return;
+ WARN_ON(attrs->splittable && attrs->split);
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
+
+/**
+ * devlink_port_attrs_pci_pf_set - Set PCI PF port attributes
+ *
+ * @devlink_port: devlink port
+ * @controller: associated controller number for the devlink port instance
+ * @pf: associated PF for the devlink port instance
+ * @external: indicates if the port is for an external controller
+ */
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
+ u16 pf, bool external)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+ int ret;
+
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ ret = __devlink_port_attrs_set(devlink_port,
+ DEVLINK_PORT_FLAVOUR_PCI_PF);
+ if (ret)
+ return;
+ attrs->pci_pf.controller = controller;
+ attrs->pci_pf.pf = pf;
+ attrs->pci_pf.external = external;
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
+
+/**
+ * devlink_port_attrs_pci_vf_set - Set PCI VF port attributes
+ *
+ * @devlink_port: devlink port
+ * @controller: associated controller number for the devlink port instance
+ * @pf: associated PF for the devlink port instance
+ * @vf: associated VF of a PF for the devlink port instance
+ * @external: indicates if the port is for an external controller
+ */
+void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
+ u16 pf, u16 vf, bool external)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+ int ret;
+
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ ret = __devlink_port_attrs_set(devlink_port,
+ DEVLINK_PORT_FLAVOUR_PCI_VF);
+ if (ret)
+ return;
+ attrs->pci_vf.controller = controller;
+ attrs->pci_vf.pf = pf;
+ attrs->pci_vf.vf = vf;
+ attrs->pci_vf.external = external;
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
+
+/**
+ * devlink_port_attrs_pci_sf_set - Set PCI SF port attributes
+ *
+ * @devlink_port: devlink port
+ * @controller: associated controller number for the devlink port instance
+ * @pf: associated PF for the devlink port instance
+ * @sf: associated SF of a PF for the devlink port instance
+ * @external: indicates if the port is for an external controller
+ */
+void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller,
+ u16 pf, u32 sf, bool external)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+ int ret;
+
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ ret = __devlink_port_attrs_set(devlink_port,
+ DEVLINK_PORT_FLAVOUR_PCI_SF);
+ if (ret)
+ return;
+ attrs->pci_sf.controller = controller;
+ attrs->pci_sf.pf = pf;
+ attrs->pci_sf.sf = sf;
+ attrs->pci_sf.external = external;
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
+
+/**
+ * devlink_port_linecard_set - Link port with a linecard
+ *
+ * @devlink_port: devlink port
+ * @linecard: devlink linecard
+ */
+void devlink_port_linecard_set(struct devlink_port *devlink_port,
+ struct devlink_linecard *linecard)
+{
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ devlink_port->linecard = linecard;
+}
+EXPORT_SYMBOL_GPL(devlink_port_linecard_set);
+
+static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
+ char *name, size_t len)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+ int n = 0;
+
+ if (!devlink_port->attrs_set)
+ return -EOPNOTSUPP;
+
+ switch (attrs->flavour) {
+ case DEVLINK_PORT_FLAVOUR_PHYSICAL:
+ if (devlink_port->linecard)
+ n = snprintf(name, len, "l%u",
+ devlink_port->linecard->index);
+ if (n < len)
+ n += snprintf(name + n, len - n, "p%u",
+ attrs->phys.port_number);
+ if (n < len && attrs->split)
+ n += snprintf(name + n, len - n, "s%u",
+ attrs->phys.split_subport_number);
+ break;
+ case DEVLINK_PORT_FLAVOUR_CPU:
+ case DEVLINK_PORT_FLAVOUR_DSA:
+ case DEVLINK_PORT_FLAVOUR_UNUSED:
+ /* As CPU and DSA ports do not have a netdevice associated
+ * case should not ever happen.
+ */
+ WARN_ON(1);
+ return -EINVAL;
+ case DEVLINK_PORT_FLAVOUR_PCI_PF:
+ if (attrs->pci_pf.external) {
+ n = snprintf(name, len, "c%u", attrs->pci_pf.controller);
+ if (n >= len)
+ return -EINVAL;
+ len -= n;
+ name += n;
+ }
+ n = snprintf(name, len, "pf%u", attrs->pci_pf.pf);
+ break;
+ case DEVLINK_PORT_FLAVOUR_PCI_VF:
+ if (attrs->pci_vf.external) {
+ n = snprintf(name, len, "c%u", attrs->pci_vf.controller);
+ if (n >= len)
+ return -EINVAL;
+ len -= n;
+ name += n;
+ }
+ n = snprintf(name, len, "pf%uvf%u",
+ attrs->pci_vf.pf, attrs->pci_vf.vf);
+ break;
+ case DEVLINK_PORT_FLAVOUR_PCI_SF:
+ if (attrs->pci_sf.external) {
+ n = snprintf(name, len, "c%u", attrs->pci_sf.controller);
+ if (n >= len)
+ return -EINVAL;
+ len -= n;
+ name += n;
+ }
+ n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
+ attrs->pci_sf.sf);
+ break;
+ case DEVLINK_PORT_FLAVOUR_VIRTUAL:
+ return -EOPNOTSUPP;
+ }
+
+ if (n >= len)
+ return -EINVAL;
+
+ return 0;
+}
+
+int devlink_compat_phys_port_name_get(struct net_device *dev,
+ char *name, size_t len)
+{
+ struct devlink_port *devlink_port;
+
+ /* RTNL mutex is held here which ensures that devlink_port
+ * instance cannot disappear in the middle. No need to take
+ * any devlink lock as only permanent values are accessed.
+ */
+ ASSERT_RTNL();
+
+ devlink_port = dev->devlink_port;
+ if (!devlink_port)
+ return -EOPNOTSUPP;
+
+ return __devlink_port_phys_port_name_get(devlink_port, name, len);
+}
+
+int devlink_compat_switch_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *ppid)
+{
+ struct devlink_port *devlink_port;
+
+ /* Caller must hold RTNL mutex or reference to dev, which ensures that
+ * devlink_port instance cannot disappear in the middle. No need to take
+ * any devlink lock as only permanent values are accessed.
+ */
+ devlink_port = dev->devlink_port;
+ if (!devlink_port || !devlink_port->switch_port)
+ return -EOPNOTSUPP;
+
+ memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid));
+
+ return 0;
+}
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
new file mode 100644
index 000000000000..dff1593b8406
--- /dev/null
+++ b/net/devlink/rate.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+static inline bool
+devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
+{
+ return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
+}
+
+static inline bool
+devlink_rate_is_node(struct devlink_rate *devlink_rate)
+{
+ return devlink_rate->type == DEVLINK_RATE_TYPE_NODE;
+}
+
+static struct devlink_rate *
+devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ struct devlink_rate *devlink_rate;
+ struct devlink_port *devlink_port;
+
+ devlink_port = devlink_port_get_from_attrs(devlink, info->attrs);
+ if (IS_ERR(devlink_port))
+ return ERR_CAST(devlink_port);
+ devlink_rate = devlink_port->devlink_rate;
+ return devlink_rate ?: ERR_PTR(-ENODEV);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name)
+{
+ static struct devlink_rate *devlink_rate;
+
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ if (devlink_rate_is_node(devlink_rate) &&
+ !strcmp(node_name, devlink_rate->name))
+ return devlink_rate;
+ }
+ return ERR_PTR(-ENODEV);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
+{
+ const char *rate_node_name;
+ size_t len;
+
+ if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME])
+ return ERR_PTR(-EINVAL);
+ rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]);
+ len = strlen(rate_node_name);
+ /* Name cannot be empty or decimal number */
+ if (!len || strspn(rate_node_name, "0123456789") == len)
+ return ERR_PTR(-EINVAL);
+
+ return devlink_rate_node_get_by_name(devlink, rate_node_name);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ return devlink_rate_node_get_from_attrs(devlink, info->attrs);
+}
+
+static struct devlink_rate *
+devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ struct nlattr **attrs = info->attrs;
+
+ if (attrs[DEVLINK_ATTR_PORT_INDEX])
+ return devlink_rate_leaf_get_from_info(devlink, info);
+ else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME])
+ return devlink_rate_node_get_from_info(devlink, info);
+ else
+ return ERR_PTR(-EINVAL);
+}
+
+static int devlink_nl_rate_fill(struct sk_buff *msg,
+ struct devlink_rate *devlink_rate,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags, struct netlink_ext_ack *extack)
+{
+ struct devlink *devlink = devlink_rate->devlink;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type))
+ goto nla_put_failure;
+
+ if (devlink_rate_is_leaf(devlink_rate)) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+ devlink_rate->devlink_port->index))
+ goto nla_put_failure;
+ } else if (devlink_rate_is_node(devlink_rate)) {
+ if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME,
+ devlink_rate->name))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
+ devlink_rate->tx_share, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX,
+ devlink_rate->tx_max, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY,
+ devlink_rate->tx_priority))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_WEIGHT,
+ devlink_rate->tx_weight))
+ goto nla_put_failure;
+
+ if (devlink_rate->parent)
+ if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME,
+ devlink_rate->parent->name))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_rate_notify(struct devlink_rate *devlink_rate,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = devlink_rate->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_rate_fill(msg, devlink_rate, cmd, 0, 0, 0, NULL);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_rates_notify_register(struct devlink *devlink)
+{
+ struct devlink_rate *rate_node;
+
+ list_for_each_entry(rate_node, &devlink->rate_list, list)
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+}
+
+void devlink_rates_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_rate *rate_node;
+
+ list_for_each_entry_reverse(rate_node, &devlink->rate_list, list)
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+}
+
+static int
+devlink_nl_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_rate *devlink_rate;
+ int idx = 0;
+ int err = 0;
+
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
+ u32 id = NETLINK_CB(cb->skb).portid;
+
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
+ cb->nlh->nlmsg_seq, flags, NULL);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
+ }
+
+ return err;
+}
+
+int devlink_nl_rate_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_rate_get_dump_one);
+}
+
+int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *devlink_rate;
+ struct sk_buff *msg;
+ int err;
+
+ devlink_rate = devlink_rate_get_from_info(devlink, info);
+ if (IS_ERR(devlink_rate))
+ return PTR_ERR(devlink_rate);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static bool
+devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent)
+{
+ while (parent) {
+ if (parent == devlink_rate)
+ return true;
+ parent = parent->parent;
+ }
+ return false;
+}
+
+static int
+devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
+ struct genl_info *info,
+ struct nlattr *nla_parent)
+{
+ struct devlink *devlink = devlink_rate->devlink;
+ const char *parent_name = nla_data(nla_parent);
+ const struct devlink_ops *ops = devlink->ops;
+ size_t len = strlen(parent_name);
+ struct devlink_rate *parent;
+ int err = -EOPNOTSUPP;
+
+ parent = devlink_rate->parent;
+
+ if (parent && !len) {
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_parent_set(devlink_rate, NULL,
+ devlink_rate->priv, NULL,
+ info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_parent_set(devlink_rate, NULL,
+ devlink_rate->priv, NULL,
+ info->extack);
+ if (err)
+ return err;
+
+ refcount_dec(&parent->refcnt);
+ devlink_rate->parent = NULL;
+ } else if (len) {
+ parent = devlink_rate_node_get_by_name(devlink, parent_name);
+ if (IS_ERR(parent))
+ return -ENODEV;
+
+ if (parent == devlink_rate) {
+ NL_SET_ERR_MSG(info->extack, "Parent to self is not allowed");
+ return -EINVAL;
+ }
+
+ if (devlink_rate_is_node(devlink_rate) &&
+ devlink_rate_is_parent_node(devlink_rate, parent->parent)) {
+ NL_SET_ERR_MSG(info->extack, "Node is already a parent of parent node.");
+ return -EEXIST;
+ }
+
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_parent_set(devlink_rate, parent,
+ devlink_rate->priv, parent->priv,
+ info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_parent_set(devlink_rate, parent,
+ devlink_rate->priv, parent->priv,
+ info->extack);
+ if (err)
+ return err;
+
+ if (devlink_rate->parent)
+ /* we're reassigning to other parent in this case */
+ refcount_dec(&devlink_rate->parent->refcnt);
+
+ refcount_inc(&parent->refcnt);
+ devlink_rate->parent = parent;
+ }
+
+ return 0;
+}
+
+static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
+ const struct devlink_ops *ops,
+ struct genl_info *info)
+{
+ struct nlattr *nla_parent, **attrs = info->attrs;
+ int err = -EOPNOTSUPP;
+ u32 priority;
+ u32 weight;
+ u64 rate;
+
+ if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) {
+ rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]);
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ if (err)
+ return err;
+ devlink_rate->tx_share = rate;
+ }
+
+ if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) {
+ rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]);
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ if (err)
+ return err;
+ devlink_rate->tx_max = rate;
+ }
+
+ if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]) {
+ priority = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]);
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tx_priority_set(devlink_rate, devlink_rate->priv,
+ priority, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tx_priority_set(devlink_rate, devlink_rate->priv,
+ priority, info->extack);
+
+ if (err)
+ return err;
+ devlink_rate->tx_priority = priority;
+ }
+
+ if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]) {
+ weight = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]);
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tx_weight_set(devlink_rate, devlink_rate->priv,
+ weight, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tx_weight_set(devlink_rate, devlink_rate->priv,
+ weight, info->extack);
+
+ if (err)
+ return err;
+ devlink_rate->tx_weight = weight;
+ }
+
+ nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME];
+ if (nla_parent) {
+ err = devlink_nl_rate_parent_node_set(devlink_rate, info,
+ nla_parent);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
+ struct genl_info *info,
+ enum devlink_rate_type type)
+{
+ struct nlattr **attrs = info->attrs;
+
+ if (type == DEVLINK_RATE_TYPE_LEAF) {
+ if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) {
+ NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the leafs");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) {
+ NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the leafs");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
+ !ops->rate_leaf_parent_set) {
+ NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the leafs");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_leaf_tx_priority_set) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[DEVLINK_ATTR_RATE_TX_PRIORITY],
+ "TX priority set isn't supported for the leafs");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_leaf_tx_weight_set) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[DEVLINK_ATTR_RATE_TX_WEIGHT],
+ "TX weight set isn't supported for the leafs");
+ return false;
+ }
+ } else if (type == DEVLINK_RATE_TYPE_NODE) {
+ if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
+ NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) {
+ NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the nodes");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
+ !ops->rate_node_parent_set) {
+ NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the nodes");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_node_tx_priority_set) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[DEVLINK_ATTR_RATE_TX_PRIORITY],
+ "TX priority set isn't supported for the nodes");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_node_tx_weight_set) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[DEVLINK_ATTR_RATE_TX_WEIGHT],
+ "TX weight set isn't supported for the nodes");
+ return false;
+ }
+ } else {
+ WARN(1, "Unknown type of rate object");
+ return false;
+ }
+
+ return true;
+}
+
+int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *devlink_rate;
+ const struct devlink_ops *ops;
+ int err;
+
+ devlink_rate = devlink_rate_get_from_info(devlink, info);
+ if (IS_ERR(devlink_rate))
+ return PTR_ERR(devlink_rate);
+
+ ops = devlink->ops;
+ if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
+ return -EOPNOTSUPP;
+
+ err = devlink_nl_rate_set(devlink_rate, ops, info);
+
+ if (!err)
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+ return err;
+}
+
+int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *rate_node;
+ const struct devlink_ops *ops;
+ int err;
+
+ ops = devlink->ops;
+ if (!ops || !ops->rate_node_new || !ops->rate_node_del) {
+ NL_SET_ERR_MSG(info->extack, "Rate nodes aren't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE))
+ return -EOPNOTSUPP;
+
+ rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs);
+ if (!IS_ERR(rate_node))
+ return -EEXIST;
+ else if (rate_node == ERR_PTR(-EINVAL))
+ return -EINVAL;
+
+ rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
+ if (!rate_node)
+ return -ENOMEM;
+
+ rate_node->devlink = devlink;
+ rate_node->type = DEVLINK_RATE_TYPE_NODE;
+ rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL);
+ if (!rate_node->name) {
+ err = -ENOMEM;
+ goto err_strdup;
+ }
+
+ err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack);
+ if (err)
+ goto err_node_new;
+
+ err = devlink_nl_rate_set(rate_node, ops, info);
+ if (err)
+ goto err_rate_set;
+
+ refcount_set(&rate_node->refcnt, 1);
+ list_add(&rate_node->list, &devlink->rate_list);
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+ return 0;
+
+err_rate_set:
+ ops->rate_node_del(rate_node, rate_node->priv, info->extack);
+err_node_new:
+ kfree(rate_node->name);
+err_strdup:
+ kfree(rate_node);
+ return err;
+}
+
+int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *rate_node;
+ int err;
+
+ rate_node = devlink_rate_node_get_from_info(devlink, info);
+ if (IS_ERR(rate_node))
+ return PTR_ERR(rate_node);
+
+ if (refcount_read(&rate_node->refcnt) > 1) {
+ NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node.");
+ return -EBUSY;
+ }
+
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+ err = devlink->ops->rate_node_del(rate_node, rate_node->priv,
+ info->extack);
+ if (rate_node->parent)
+ refcount_dec(&rate_node->parent->refcnt);
+ list_del(&rate_node->list);
+ kfree(rate_node->name);
+ kfree(rate_node);
+ return err;
+}
+
+int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_rate *devlink_rate;
+
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list)
+ if (devlink_rate_is_node(devlink_rate)) {
+ NL_SET_ERR_MSG(extack, "Rate node(s) exists.");
+ return -EBUSY;
+ }
+ return 0;
+}
+
+/**
+ * devl_rate_node_create - create devlink rate node
+ * @devlink: devlink instance
+ * @priv: driver private data
+ * @node_name: name of the resulting node
+ * @parent: parent devlink_rate struct
+ *
+ * Create devlink rate object of type node
+ */
+struct devlink_rate *
+devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
+ struct devlink_rate *parent)
+{
+ struct devlink_rate *rate_node;
+
+ rate_node = devlink_rate_node_get_by_name(devlink, node_name);
+ if (!IS_ERR(rate_node))
+ return ERR_PTR(-EEXIST);
+
+ rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
+ if (!rate_node)
+ return ERR_PTR(-ENOMEM);
+
+ if (parent) {
+ rate_node->parent = parent;
+ refcount_inc(&rate_node->parent->refcnt);
+ }
+
+ rate_node->type = DEVLINK_RATE_TYPE_NODE;
+ rate_node->devlink = devlink;
+ rate_node->priv = priv;
+
+ rate_node->name = kstrdup(node_name, GFP_KERNEL);
+ if (!rate_node->name) {
+ kfree(rate_node);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ refcount_set(&rate_node->refcnt, 1);
+ list_add(&rate_node->list, &devlink->rate_list);
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+ return rate_node;
+}
+EXPORT_SYMBOL_GPL(devl_rate_node_create);
+
+/**
+ * devl_rate_leaf_create - create devlink rate leaf
+ * @devlink_port: devlink port object to create rate object on
+ * @priv: driver private data
+ * @parent: parent devlink_rate struct
+ *
+ * Create devlink rate object of type leaf on provided @devlink_port.
+ */
+int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
+ struct devlink_rate *parent)
+{
+ struct devlink *devlink = devlink_port->devlink;
+ struct devlink_rate *devlink_rate;
+
+ devl_assert_locked(devlink_port->devlink);
+
+ if (WARN_ON(devlink_port->devlink_rate))
+ return -EBUSY;
+
+ devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
+ if (!devlink_rate)
+ return -ENOMEM;
+
+ if (parent) {
+ devlink_rate->parent = parent;
+ refcount_inc(&devlink_rate->parent->refcnt);
+ }
+
+ devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
+ devlink_rate->devlink = devlink;
+ devlink_rate->devlink_port = devlink_port;
+ devlink_rate->priv = priv;
+ list_add_tail(&devlink_rate->list, &devlink->rate_list);
+ devlink_port->devlink_rate = devlink_rate;
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_rate_leaf_create);
+
+/**
+ * devl_rate_leaf_destroy - destroy devlink rate leaf
+ *
+ * @devlink_port: devlink port linked to the rate object
+ *
+ * Destroy the devlink rate object of type leaf on provided @devlink_port.
+ */
+void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
+{
+ struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
+
+ devl_assert_locked(devlink_port->devlink);
+ if (!devlink_rate)
+ return;
+
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
+ if (devlink_rate->parent)
+ refcount_dec(&devlink_rate->parent->refcnt);
+ list_del(&devlink_rate->list);
+ devlink_port->devlink_rate = NULL;
+ kfree(devlink_rate);
+}
+EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
+
+/**
+ * devl_rate_nodes_destroy - destroy all devlink rate nodes on device
+ * @devlink: devlink instance
+ *
+ * Unset parent for all rate objects and destroy all rate nodes
+ * on specified device.
+ */
+void devl_rate_nodes_destroy(struct devlink *devlink)
+{
+ static struct devlink_rate *devlink_rate, *tmp;
+ const struct devlink_ops *ops = devlink->ops;
+
+ devl_assert_locked(devlink);
+
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ if (!devlink_rate->parent)
+ continue;
+
+ refcount_dec(&devlink_rate->parent->refcnt);
+ if (devlink_rate_is_leaf(devlink_rate))
+ ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv,
+ NULL, NULL);
+ else if (devlink_rate_is_node(devlink_rate))
+ ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv,
+ NULL, NULL);
+ }
+ list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) {
+ if (devlink_rate_is_node(devlink_rate)) {
+ ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL);
+ list_del(&devlink_rate->list);
+ kfree(devlink_rate->name);
+ kfree(devlink_rate);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);
diff --git a/net/devlink/region.c b/net/devlink/region.c
new file mode 100644
index 000000000000..d197cdb662db
--- /dev/null
+++ b/net/devlink/region.c
@@ -0,0 +1,1260 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+struct devlink_region {
+ struct devlink *devlink;
+ struct devlink_port *port;
+ struct list_head list;
+ union {
+ const struct devlink_region_ops *ops;
+ const struct devlink_port_region_ops *port_ops;
+ };
+ struct mutex snapshot_lock; /* protects snapshot_list,
+ * max_snapshots and cur_snapshots
+ * consistency.
+ */
+ struct list_head snapshot_list;
+ u32 max_snapshots;
+ u32 cur_snapshots;
+ u64 size;
+};
+
+struct devlink_snapshot {
+ struct list_head list;
+ struct devlink_region *region;
+ u8 *data;
+ u32 id;
+};
+
+static struct devlink_region *
+devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
+{
+ struct devlink_region *region;
+
+ list_for_each_entry(region, &devlink->region_list, list)
+ if (!strcmp(region->ops->name, region_name))
+ return region;
+
+ return NULL;
+}
+
+static struct devlink_region *
+devlink_port_region_get_by_name(struct devlink_port *port,
+ const char *region_name)
+{
+ struct devlink_region *region;
+
+ list_for_each_entry(region, &port->region_list, list)
+ if (!strcmp(region->ops->name, region_name))
+ return region;
+
+ return NULL;
+}
+
+static struct devlink_snapshot *
+devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
+{
+ struct devlink_snapshot *snapshot;
+
+ list_for_each_entry(snapshot, &region->snapshot_list, list)
+ if (snapshot->id == id)
+ return snapshot;
+
+ return NULL;
+}
+
+static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_snapshot *snapshot)
+{
+ struct nlattr *snap_attr;
+ int err;
+
+ snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
+ if (!snap_attr)
+ return -EINVAL;
+
+ err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id);
+ if (err)
+ goto nla_put_failure;
+
+ nla_nest_end(msg, snap_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, snap_attr);
+ return err;
+}
+
+static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_region *region)
+{
+ struct devlink_snapshot *snapshot;
+ struct nlattr *snapshots_attr;
+ int err;
+
+ snapshots_attr = nla_nest_start_noflag(msg,
+ DEVLINK_ATTR_REGION_SNAPSHOTS);
+ if (!snapshots_attr)
+ return -EINVAL;
+
+ list_for_each_entry(snapshot, &region->snapshot_list, list) {
+ err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(msg, snapshots_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, snapshots_attr);
+ return err;
+}
+
+static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags,
+ struct devlink_region *region)
+{
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = devlink_nl_put_handle(msg, devlink);
+ if (err)
+ goto nla_put_failure;
+
+ if (region->port) {
+ err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+ region->port->index);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->ops->name);
+ if (err)
+ goto nla_put_failure;
+
+ err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+ region->size,
+ DEVLINK_ATTR_PAD);
+ if (err)
+ goto nla_put_failure;
+
+ err = nla_put_u32(msg, DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,
+ region->max_snapshots);
+ if (err)
+ goto nla_put_failure;
+
+ err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+static struct sk_buff *
+devlink_nl_region_notify_build(struct devlink_region *region,
+ struct devlink_snapshot *snapshot,
+ enum devlink_command cmd, u32 portid, u32 seq)
+{
+ struct devlink *devlink = region->devlink;
+ struct sk_buff *msg;
+ void *hdr;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return ERR_PTR(-ENOMEM);
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, 0, cmd);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto out_free_msg;
+ }
+
+ err = devlink_nl_put_handle(msg, devlink);
+ if (err)
+ goto out_cancel_msg;
+
+ if (region->port) {
+ err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+ region->port->index);
+ if (err)
+ goto out_cancel_msg;
+ }
+
+ err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME,
+ region->ops->name);
+ if (err)
+ goto out_cancel_msg;
+
+ if (snapshot) {
+ err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID,
+ snapshot->id);
+ if (err)
+ goto out_cancel_msg;
+ } else {
+ err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+ region->size, DEVLINK_ATTR_PAD);
+ if (err)
+ goto out_cancel_msg;
+ }
+ genlmsg_end(msg, hdr);
+
+ return msg;
+
+out_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+out_free_msg:
+ nlmsg_free(msg);
+ return ERR_PTR(err);
+}
+
+static void devlink_nl_region_notify(struct devlink_region *region,
+ struct devlink_snapshot *snapshot,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = region->devlink;
+ struct sk_buff *msg;
+
+ WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0);
+ if (IS_ERR(msg))
+ return;
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_regions_notify_register(struct devlink *devlink)
+{
+ struct devlink_region *region;
+
+ list_for_each_entry(region, &devlink->region_list, list)
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+}
+
+void devlink_regions_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_region *region;
+
+ list_for_each_entry_reverse(region, &devlink->region_list, list)
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+}
+
+/**
+ * __devlink_snapshot_id_increment - Increment number of snapshots using an id
+ * @devlink: devlink instance
+ * @id: the snapshot id
+ *
+ * Track when a new snapshot begins using an id. Load the count for the
+ * given id from the snapshot xarray, increment it, and store it back.
+ *
+ * Called when a new snapshot is created with the given id.
+ *
+ * The id *must* have been previously allocated by
+ * devlink_region_snapshot_id_get().
+ *
+ * Returns 0 on success, or an error on failure.
+ */
+static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id)
+{
+ unsigned long count;
+ void *p;
+ int err;
+
+ xa_lock(&devlink->snapshot_ids);
+ p = xa_load(&devlink->snapshot_ids, id);
+ if (WARN_ON(!p)) {
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ if (WARN_ON(!xa_is_value(p))) {
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ count = xa_to_value(p);
+ count++;
+
+ err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
+ GFP_ATOMIC));
+unlock:
+ xa_unlock(&devlink->snapshot_ids);
+ return err;
+}
+
+/**
+ * __devlink_snapshot_id_decrement - Decrease number of snapshots using an id
+ * @devlink: devlink instance
+ * @id: the snapshot id
+ *
+ * Track when a snapshot is deleted and stops using an id. Load the count
+ * for the given id from the snapshot xarray, decrement it, and store it
+ * back.
+ *
+ * If the count reaches zero, erase this id from the xarray, freeing it
+ * up for future re-use by devlink_region_snapshot_id_get().
+ *
+ * Called when a snapshot using the given id is deleted, and when the
+ * initial allocator of the id is finished using it.
+ */
+static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id)
+{
+ unsigned long count;
+ void *p;
+
+ xa_lock(&devlink->snapshot_ids);
+ p = xa_load(&devlink->snapshot_ids, id);
+ if (WARN_ON(!p))
+ goto unlock;
+
+ if (WARN_ON(!xa_is_value(p)))
+ goto unlock;
+
+ count = xa_to_value(p);
+
+ if (count > 1) {
+ count--;
+ __xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
+ GFP_ATOMIC);
+ } else {
+ /* If this was the last user, we can erase this id */
+ __xa_erase(&devlink->snapshot_ids, id);
+ }
+unlock:
+ xa_unlock(&devlink->snapshot_ids);
+}
+
+/**
+ * __devlink_snapshot_id_insert - Insert a specific snapshot ID
+ * @devlink: devlink instance
+ * @id: the snapshot id
+ *
+ * Mark the given snapshot id as used by inserting a zero value into the
+ * snapshot xarray.
+ *
+ * This must be called while holding the devlink instance lock. Unlike
+ * devlink_snapshot_id_get, the initial reference count is zero, not one.
+ * It is expected that the id will immediately be used before
+ * releasing the devlink instance lock.
+ *
+ * Returns zero on success, or an error code if the snapshot id could not
+ * be inserted.
+ */
+static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id)
+{
+ int err;
+
+ xa_lock(&devlink->snapshot_ids);
+ if (xa_load(&devlink->snapshot_ids, id)) {
+ xa_unlock(&devlink->snapshot_ids);
+ return -EEXIST;
+ }
+ err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(0),
+ GFP_ATOMIC));
+ xa_unlock(&devlink->snapshot_ids);
+ return err;
+}
+
+/**
+ * __devlink_region_snapshot_id_get - get snapshot ID
+ * @devlink: devlink instance
+ * @id: storage to return snapshot id
+ *
+ * Allocates a new snapshot id. Returns zero on success, or a negative
+ * error on failure. Must be called while holding the devlink instance
+ * lock.
+ *
+ * Snapshot IDs are tracked using an xarray which stores the number of
+ * users of the snapshot id.
+ *
+ * Note that the caller of this function counts as a 'user', in order to
+ * avoid race conditions. The caller must release its hold on the
+ * snapshot by using devlink_region_snapshot_id_put.
+ */
+static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
+{
+ return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1),
+ xa_limit_32b, GFP_KERNEL);
+}
+
+/**
+ * __devlink_region_snapshot_create - create a new snapshot
+ * This will add a new snapshot of a region. The snapshot
+ * will be stored on the region struct and can be accessed
+ * from devlink. This is useful for future analyses of snapshots.
+ * Multiple snapshots can be created on a region.
+ * The @snapshot_id should be obtained using the getter function.
+ *
+ * Must be called only while holding the region snapshot lock.
+ *
+ * @region: devlink region of the snapshot
+ * @data: snapshot data
+ * @snapshot_id: snapshot id to be created
+ */
+static int
+__devlink_region_snapshot_create(struct devlink_region *region,
+ u8 *data, u32 snapshot_id)
+{
+ struct devlink *devlink = region->devlink;
+ struct devlink_snapshot *snapshot;
+ int err;
+
+ lockdep_assert_held(&region->snapshot_lock);
+
+ /* check if region can hold one more snapshot */
+ if (region->cur_snapshots == region->max_snapshots)
+ return -ENOSPC;
+
+ if (devlink_region_snapshot_get_by_id(region, snapshot_id))
+ return -EEXIST;
+
+ snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
+ if (!snapshot)
+ return -ENOMEM;
+
+ err = __devlink_snapshot_id_increment(devlink, snapshot_id);
+ if (err)
+ goto err_snapshot_id_increment;
+
+ snapshot->id = snapshot_id;
+ snapshot->region = region;
+ snapshot->data = data;
+
+ list_add_tail(&snapshot->list, &region->snapshot_list);
+
+ region->cur_snapshots++;
+
+ devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_NEW);
+ return 0;
+
+err_snapshot_id_increment:
+ kfree(snapshot);
+ return err;
+}
+
+static void devlink_region_snapshot_del(struct devlink_region *region,
+ struct devlink_snapshot *snapshot)
+{
+ struct devlink *devlink = region->devlink;
+
+ lockdep_assert_held(&region->snapshot_lock);
+
+ devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
+ region->cur_snapshots--;
+ list_del(&snapshot->list);
+ region->ops->destructor(snapshot->data);
+ __devlink_snapshot_id_decrement(devlink, snapshot->id);
+ kfree(snapshot);
+}
+
+int devlink_nl_region_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_port *port = NULL;
+ struct devlink_region *region;
+ const char *region_name;
+ struct sk_buff *msg;
+ unsigned int index;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME))
+ return -EINVAL;
+
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ port = devlink_port_get_by_index(devlink, index);
+ if (!port)
+ return -ENODEV;
+ }
+
+ region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+ if (port)
+ region = devlink_port_region_get_by_name(port, region_name);
+ else
+ region = devlink_region_get_by_name(devlink, region_name);
+
+ if (!region)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_region_fill(msg, devlink, DEVLINK_CMD_REGION_GET,
+ info->snd_portid, info->snd_seq, 0,
+ region);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_region_get_port_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb,
+ struct devlink_port *port,
+ int *idx, int start, int flags)
+{
+ struct devlink_region *region;
+ int err = 0;
+
+ list_for_each_entry(region, &port->region_list, list) {
+ if (*idx < start) {
+ (*idx)++;
+ continue;
+ }
+ err = devlink_nl_region_fill(msg, port->devlink,
+ DEVLINK_CMD_REGION_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ flags, region);
+ if (err)
+ goto out;
+ (*idx)++;
+ }
+
+out:
+ return err;
+}
+
+static int devlink_nl_region_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_region *region;
+ struct devlink_port *port;
+ unsigned long port_index;
+ int idx = 0;
+ int err;
+
+ list_for_each_entry(region, &devlink->region_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_region_fill(msg, devlink,
+ DEVLINK_CMD_REGION_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags,
+ region);
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
+ idx++;
+ }
+
+ xa_for_each(&devlink->ports, port_index, port) {
+ err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, &idx,
+ state->idx, flags);
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int devlink_nl_region_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_region_get_dump_one);
+}
+
+int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_snapshot *snapshot;
+ struct devlink_port *port = NULL;
+ struct devlink_region *region;
+ const char *region_name;
+ unsigned int index;
+ u32 snapshot_id;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME) ||
+ GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_SNAPSHOT_ID))
+ return -EINVAL;
+
+ region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+ snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
+
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ port = devlink_port_get_by_index(devlink, index);
+ if (!port)
+ return -ENODEV;
+ }
+
+ if (port)
+ region = devlink_port_region_get_by_name(port, region_name);
+ else
+ region = devlink_region_get_by_name(devlink, region_name);
+
+ if (!region)
+ return -EINVAL;
+
+ mutex_lock(&region->snapshot_lock);
+ snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+ if (!snapshot) {
+ mutex_unlock(&region->snapshot_lock);
+ return -EINVAL;
+ }
+
+ devlink_region_snapshot_del(region, snapshot);
+ mutex_unlock(&region->snapshot_lock);
+ return 0;
+}
+
+int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_snapshot *snapshot;
+ struct devlink_port *port = NULL;
+ struct nlattr *snapshot_id_attr;
+ struct devlink_region *region;
+ const char *region_name;
+ unsigned int index;
+ u32 snapshot_id;
+ u8 *data;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) {
+ NL_SET_ERR_MSG(info->extack, "No region name provided");
+ return -EINVAL;
+ }
+
+ region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ port = devlink_port_get_by_index(devlink, index);
+ if (!port)
+ return -ENODEV;
+ }
+
+ if (port)
+ region = devlink_port_region_get_by_name(port, region_name);
+ else
+ region = devlink_region_get_by_name(devlink, region_name);
+
+ if (!region) {
+ NL_SET_ERR_MSG(info->extack, "The requested region does not exist");
+ return -EINVAL;
+ }
+
+ if (!region->ops->snapshot) {
+ NL_SET_ERR_MSG(info->extack, "The requested region does not support taking an immediate snapshot");
+ return -EOPNOTSUPP;
+ }
+
+ mutex_lock(&region->snapshot_lock);
+
+ if (region->cur_snapshots == region->max_snapshots) {
+ NL_SET_ERR_MSG(info->extack, "The region has reached the maximum number of stored snapshots");
+ err = -ENOSPC;
+ goto unlock;
+ }
+
+ snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
+ if (snapshot_id_attr) {
+ snapshot_id = nla_get_u32(snapshot_id_attr);
+
+ if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
+ NL_SET_ERR_MSG(info->extack, "The requested snapshot id is already in use");
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ err = __devlink_snapshot_id_insert(devlink, snapshot_id);
+ if (err)
+ goto unlock;
+ } else {
+ err = __devlink_region_snapshot_id_get(devlink, &snapshot_id);
+ if (err) {
+ NL_SET_ERR_MSG(info->extack, "Failed to allocate a new snapshot id");
+ goto unlock;
+ }
+ }
+
+ if (port)
+ err = region->port_ops->snapshot(port, region->port_ops,
+ info->extack, &data);
+ else
+ err = region->ops->snapshot(devlink, region->ops,
+ info->extack, &data);
+ if (err)
+ goto err_snapshot_capture;
+
+ err = __devlink_region_snapshot_create(region, data, snapshot_id);
+ if (err)
+ goto err_snapshot_create;
+
+ if (!snapshot_id_attr) {
+ struct sk_buff *msg;
+
+ snapshot = devlink_region_snapshot_get_by_id(region,
+ snapshot_id);
+ if (WARN_ON(!snapshot)) {
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ msg = devlink_nl_region_notify_build(region, snapshot,
+ DEVLINK_CMD_REGION_NEW,
+ info->snd_portid,
+ info->snd_seq);
+ err = PTR_ERR_OR_ZERO(msg);
+ if (err)
+ goto err_notify;
+
+ err = genlmsg_reply(msg, info);
+ if (err)
+ goto err_notify;
+ }
+
+ mutex_unlock(&region->snapshot_lock);
+ return 0;
+
+err_snapshot_create:
+ region->ops->destructor(data);
+err_snapshot_capture:
+ __devlink_snapshot_id_decrement(devlink, snapshot_id);
+ mutex_unlock(&region->snapshot_lock);
+ return err;
+
+err_notify:
+ devlink_region_snapshot_del(region, snapshot);
+unlock:
+ mutex_unlock(&region->snapshot_lock);
+ return err;
+}
+
+static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
+ u8 *chunk, u32 chunk_size,
+ u64 addr)
+{
+ struct nlattr *chunk_attr;
+ int err;
+
+ chunk_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_CHUNK);
+ if (!chunk_attr)
+ return -EINVAL;
+
+ err = nla_put(msg, DEVLINK_ATTR_REGION_CHUNK_DATA, chunk_size, chunk);
+ if (err)
+ goto nla_put_failure;
+
+ err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr,
+ DEVLINK_ATTR_PAD);
+ if (err)
+ goto nla_put_failure;
+
+ nla_nest_end(msg, chunk_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, chunk_attr);
+ return err;
+}
+
+#define DEVLINK_REGION_READ_CHUNK_SIZE 256
+
+typedef int devlink_chunk_fill_t(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset,
+ struct netlink_ext_ack *extack);
+
+static int
+devlink_nl_region_read_fill(struct sk_buff *skb, devlink_chunk_fill_t *cb,
+ void *cb_priv, u64 start_offset, u64 end_offset,
+ u64 *new_offset, struct netlink_ext_ack *extack)
+{
+ u64 curr_offset = start_offset;
+ int err = 0;
+ u8 *data;
+
+ /* Allocate and re-use a single buffer */
+ data = kmalloc(DEVLINK_REGION_READ_CHUNK_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ *new_offset = start_offset;
+
+ while (curr_offset < end_offset) {
+ u32 data_size;
+
+ data_size = min_t(u32, end_offset - curr_offset,
+ DEVLINK_REGION_READ_CHUNK_SIZE);
+
+ err = cb(cb_priv, data, data_size, curr_offset, extack);
+ if (err)
+ break;
+
+ err = devlink_nl_cmd_region_read_chunk_fill(skb, data, data_size, curr_offset);
+ if (err)
+ break;
+
+ curr_offset += data_size;
+ }
+ *new_offset = curr_offset;
+
+ kfree(data);
+
+ return err;
+}
+
+static int
+devlink_region_snapshot_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset,
+ struct netlink_ext_ack __always_unused *extack)
+{
+ struct devlink_snapshot *snapshot = cb_priv;
+
+ memcpy(chunk, &snapshot->data[curr_offset], chunk_size);
+
+ return 0;
+}
+
+static int
+devlink_region_port_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset, struct netlink_ext_ack *extack)
+{
+ struct devlink_region *region = cb_priv;
+
+ return region->port_ops->read(region->port, region->port_ops, extack,
+ curr_offset, chunk_size, chunk);
+}
+
+static int
+devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset, struct netlink_ext_ack *extack)
+{
+ struct devlink_region *region = cb_priv;
+
+ return region->ops->read(region->devlink, region->ops, extack,
+ curr_offset, chunk_size, chunk);
+}
+
+int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct nlattr *chunks_attr, *region_attr, *snapshot_attr;
+ u64 ret_offset, start_offset, end_offset = U64_MAX;
+ struct nlattr **attrs = info->info.attrs;
+ struct devlink_port *port = NULL;
+ devlink_chunk_fill_t *region_cb;
+ struct devlink_region *region;
+ const char *region_name;
+ struct devlink *devlink;
+ unsigned int index;
+ void *region_cb_priv;
+ void *hdr;
+ int err;
+
+ start_offset = state->start_offset;
+
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+ if (IS_ERR(devlink))
+ return PTR_ERR(devlink);
+
+ if (!attrs[DEVLINK_ATTR_REGION_NAME]) {
+ NL_SET_ERR_MSG(cb->extack, "No region name provided");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ port = devlink_port_get_by_index(devlink, index);
+ if (!port) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
+ }
+
+ region_attr = attrs[DEVLINK_ATTR_REGION_NAME];
+ region_name = nla_data(region_attr);
+
+ if (port)
+ region = devlink_port_region_get_by_name(port, region_name);
+ else
+ region = devlink_region_get_by_name(devlink, region_name);
+
+ if (!region) {
+ NL_SET_ERR_MSG_ATTR(cb->extack, region_attr, "Requested region does not exist");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ snapshot_attr = attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
+ if (!snapshot_attr) {
+ if (!nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
+ NL_SET_ERR_MSG(cb->extack, "No snapshot id provided");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (!region->ops->read) {
+ NL_SET_ERR_MSG(cb->extack, "Requested region does not support direct read");
+ err = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
+ if (port)
+ region_cb = &devlink_region_port_direct_fill;
+ else
+ region_cb = &devlink_region_direct_fill;
+ region_cb_priv = region;
+ } else {
+ struct devlink_snapshot *snapshot;
+ u32 snapshot_id;
+
+ if (nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
+ NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Direct region read does not use snapshot");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ snapshot_id = nla_get_u32(snapshot_attr);
+ snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+ if (!snapshot) {
+ NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ region_cb = &devlink_region_snapshot_fill;
+ region_cb_priv = snapshot;
+ }
+
+ if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
+ attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
+ if (!start_offset)
+ start_offset =
+ nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+
+ end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+ end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
+ }
+
+ if (end_offset > region->size)
+ end_offset = region->size;
+
+ /* return 0 if there is no further data to read */
+ if (start_offset == end_offset) {
+ err = 0;
+ goto out_unlock;
+ }
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
+ DEVLINK_CMD_REGION_READ);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto out_unlock;
+ }
+
+ err = devlink_nl_put_handle(skb, devlink);
+ if (err)
+ goto nla_put_failure;
+
+ if (region->port) {
+ err = nla_put_u32(skb, DEVLINK_ATTR_PORT_INDEX,
+ region->port->index);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ err = nla_put_string(skb, DEVLINK_ATTR_REGION_NAME, region_name);
+ if (err)
+ goto nla_put_failure;
+
+ chunks_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_REGION_CHUNKS);
+ if (!chunks_attr) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ err = devlink_nl_region_read_fill(skb, region_cb, region_cb_priv,
+ start_offset, end_offset, &ret_offset,
+ cb->extack);
+
+ if (err && err != -EMSGSIZE)
+ goto nla_put_failure;
+
+ /* Check if there was any progress done to prevent infinite loop */
+ if (ret_offset == start_offset) {
+ err = -EINVAL;
+ goto nla_put_failure;
+ }
+
+ state->start_offset = ret_offset;
+
+ nla_nest_end(skb, chunks_attr);
+ genlmsg_end(skb, hdr);
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ return skb->len;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+out_unlock:
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ return err;
+}
+
+/**
+ * devl_region_create - create a new address region
+ *
+ * @devlink: devlink
+ * @ops: region operations and name
+ * @region_max_snapshots: Maximum supported number of snapshots for region
+ * @region_size: size of region
+ */
+struct devlink_region *devl_region_create(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots,
+ u64 region_size)
+{
+ struct devlink_region *region;
+
+ devl_assert_locked(devlink);
+
+ if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
+ return ERR_PTR(-EINVAL);
+
+ if (devlink_region_get_by_name(devlink, ops->name))
+ return ERR_PTR(-EEXIST);
+
+ region = kzalloc(sizeof(*region), GFP_KERNEL);
+ if (!region)
+ return ERR_PTR(-ENOMEM);
+
+ region->devlink = devlink;
+ region->max_snapshots = region_max_snapshots;
+ region->ops = ops;
+ region->size = region_size;
+ INIT_LIST_HEAD(&region->snapshot_list);
+ mutex_init(&region->snapshot_lock);
+ list_add_tail(&region->list, &devlink->region_list);
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+ return region;
+}
+EXPORT_SYMBOL_GPL(devl_region_create);
+
+/**
+ * devlink_region_create - create a new address region
+ *
+ * @devlink: devlink
+ * @ops: region operations and name
+ * @region_max_snapshots: Maximum supported number of snapshots for region
+ * @region_size: size of region
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+struct devlink_region *
+devlink_region_create(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size)
+{
+ struct devlink_region *region;
+
+ devl_lock(devlink);
+ region = devl_region_create(devlink, ops, region_max_snapshots,
+ region_size);
+ devl_unlock(devlink);
+ return region;
+}
+EXPORT_SYMBOL_GPL(devlink_region_create);
+
+/**
+ * devlink_port_region_create - create a new address region for a port
+ *
+ * @port: devlink port
+ * @ops: region operations and name
+ * @region_max_snapshots: Maximum supported number of snapshots for region
+ * @region_size: size of region
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+struct devlink_region *
+devlink_port_region_create(struct devlink_port *port,
+ const struct devlink_port_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size)
+{
+ struct devlink *devlink = port->devlink;
+ struct devlink_region *region;
+ int err = 0;
+
+ ASSERT_DEVLINK_PORT_INITIALIZED(port);
+
+ if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
+ return ERR_PTR(-EINVAL);
+
+ devl_lock(devlink);
+
+ if (devlink_port_region_get_by_name(port, ops->name)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ region = kzalloc(sizeof(*region), GFP_KERNEL);
+ if (!region) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ region->devlink = devlink;
+ region->port = port;
+ region->max_snapshots = region_max_snapshots;
+ region->port_ops = ops;
+ region->size = region_size;
+ INIT_LIST_HEAD(&region->snapshot_list);
+ mutex_init(&region->snapshot_lock);
+ list_add_tail(&region->list, &port->region_list);
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+ devl_unlock(devlink);
+ return region;
+
+unlock:
+ devl_unlock(devlink);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(devlink_port_region_create);
+
+/**
+ * devl_region_destroy - destroy address region
+ *
+ * @region: devlink region to destroy
+ */
+void devl_region_destroy(struct devlink_region *region)
+{
+ struct devlink *devlink = region->devlink;
+ struct devlink_snapshot *snapshot, *ts;
+
+ devl_assert_locked(devlink);
+
+ /* Free all snapshots of region */
+ mutex_lock(&region->snapshot_lock);
+ list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
+ devlink_region_snapshot_del(region, snapshot);
+ mutex_unlock(&region->snapshot_lock);
+
+ list_del(&region->list);
+ mutex_destroy(&region->snapshot_lock);
+
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+ kfree(region);
+}
+EXPORT_SYMBOL_GPL(devl_region_destroy);
+
+/**
+ * devlink_region_destroy - destroy address region
+ *
+ * @region: devlink region to destroy
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_region_destroy(struct devlink_region *region)
+{
+ struct devlink *devlink = region->devlink;
+
+ devl_lock(devlink);
+ devl_region_destroy(region);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_region_destroy);
+
+/**
+ * devlink_region_snapshot_id_get - get snapshot ID
+ *
+ * This callback should be called when adding a new snapshot,
+ * Driver should use the same id for multiple snapshots taken
+ * on multiple regions at the same time/by the same trigger.
+ *
+ * The caller of this function must use devlink_region_snapshot_id_put
+ * when finished creating regions using this id.
+ *
+ * Returns zero on success, or a negative error code on failure.
+ *
+ * @devlink: devlink
+ * @id: storage to return id
+ */
+int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
+{
+ return __devlink_region_snapshot_id_get(devlink, id);
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
+
+/**
+ * devlink_region_snapshot_id_put - put snapshot ID reference
+ *
+ * This should be called by a driver after finishing creating snapshots
+ * with an id. Doing so ensures that the ID can later be released in the
+ * event that all snapshots using it have been destroyed.
+ *
+ * @devlink: devlink
+ * @id: id to release reference on
+ */
+void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id)
+{
+ __devlink_snapshot_id_decrement(devlink, id);
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put);
+
+/**
+ * devlink_region_snapshot_create - create a new snapshot
+ * This will add a new snapshot of a region. The snapshot
+ * will be stored on the region struct and can be accessed
+ * from devlink. This is useful for future analyses of snapshots.
+ * Multiple snapshots can be created on a region.
+ * The @snapshot_id should be obtained using the getter function.
+ *
+ * @region: devlink region of the snapshot
+ * @data: snapshot data
+ * @snapshot_id: snapshot id to be created
+ */
+int devlink_region_snapshot_create(struct devlink_region *region,
+ u8 *data, u32 snapshot_id)
+{
+ int err;
+
+ mutex_lock(&region->snapshot_lock);
+ err = __devlink_region_snapshot_create(region, data, snapshot_id);
+ mutex_unlock(&region->snapshot_lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
diff --git a/net/devlink/resource.c b/net/devlink/resource.c
new file mode 100644
index 000000000000..c8b615e4c385
--- /dev/null
+++ b/net/devlink/resource.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+/**
+ * struct devlink_resource - devlink resource
+ * @name: name of the resource
+ * @id: id, per devlink instance
+ * @size: size of the resource
+ * @size_new: updated size of the resource, reload is needed
+ * @size_valid: valid in case the total size of the resource is valid
+ * including its children
+ * @parent: parent resource
+ * @size_params: size parameters
+ * @list: parent list
+ * @resource_list: list of child resources
+ * @occ_get: occupancy getter callback
+ * @occ_get_priv: occupancy getter callback priv
+ */
+struct devlink_resource {
+ const char *name;
+ u64 id;
+ u64 size;
+ u64 size_new;
+ bool size_valid;
+ struct devlink_resource *parent;
+ struct devlink_resource_size_params size_params;
+ struct list_head list;
+ struct list_head resource_list;
+ devlink_resource_occ_get_t *occ_get;
+ void *occ_get_priv;
+};
+
+static struct devlink_resource *
+devlink_resource_find(struct devlink *devlink,
+ struct devlink_resource *resource, u64 resource_id)
+{
+ struct list_head *resource_list;
+
+ if (resource)
+ resource_list = &resource->resource_list;
+ else
+ resource_list = &devlink->resource_list;
+
+ list_for_each_entry(resource, resource_list, list) {
+ struct devlink_resource *child_resource;
+
+ if (resource->id == resource_id)
+ return resource;
+
+ child_resource = devlink_resource_find(devlink, resource,
+ resource_id);
+ if (child_resource)
+ return child_resource;
+ }
+ return NULL;
+}
+
+static void
+devlink_resource_validate_children(struct devlink_resource *resource)
+{
+ struct devlink_resource *child_resource;
+ bool size_valid = true;
+ u64 parts_size = 0;
+
+ if (list_empty(&resource->resource_list))
+ goto out;
+
+ list_for_each_entry(child_resource, &resource->resource_list, list)
+ parts_size += child_resource->size_new;
+
+ if (parts_size > resource->size_new)
+ size_valid = false;
+out:
+ resource->size_valid = size_valid;
+}
+
+static int
+devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
+ struct netlink_ext_ack *extack)
+{
+ u64 reminder;
+ int err = 0;
+
+ if (size > resource->size_params.size_max) {
+ NL_SET_ERR_MSG(extack, "Size larger than maximum");
+ err = -EINVAL;
+ }
+
+ if (size < resource->size_params.size_min) {
+ NL_SET_ERR_MSG(extack, "Size smaller than minimum");
+ err = -EINVAL;
+ }
+
+ div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
+ if (reminder) {
+ NL_SET_ERR_MSG(extack, "Wrong granularity");
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_resource *resource;
+ u64 resource_id;
+ u64 size;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_ID) ||
+ GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_SIZE))
+ return -EINVAL;
+ resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]);
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (!resource)
+ return -EINVAL;
+
+ size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
+ err = devlink_resource_validate_size(resource, size, info->extack);
+ if (err)
+ return err;
+
+ resource->size_new = size;
+ devlink_resource_validate_children(resource);
+ if (resource->parent)
+ devlink_resource_validate_children(resource->parent);
+ return 0;
+}
+
+static int
+devlink_resource_size_params_put(struct devlink_resource *resource,
+ struct sk_buff *skb)
+{
+ struct devlink_resource_size_params *size_params;
+
+ size_params = &resource->size_params;
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
+ size_params->size_granularity, DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
+ size_params->size_max, DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
+ size_params->size_min, DEVLINK_ATTR_PAD) ||
+ nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_resource_occ_put(struct devlink_resource *resource,
+ struct sk_buff *skb)
+{
+ if (!resource->occ_get)
+ return 0;
+ return nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
+ resource->occ_get(resource->occ_get_priv),
+ DEVLINK_ATTR_PAD);
+}
+
+static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
+ struct devlink_resource *resource)
+{
+ struct devlink_resource *child_resource;
+ struct nlattr *child_resource_attr;
+ struct nlattr *resource_attr;
+
+ resource_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE);
+ if (!resource_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size,
+ DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (resource->size != resource->size_new &&
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
+ resource->size_new, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (devlink_resource_occ_put(resource, skb))
+ goto nla_put_failure;
+ if (devlink_resource_size_params_put(resource, skb))
+ goto nla_put_failure;
+ if (list_empty(&resource->resource_list))
+ goto out;
+
+ if (nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_SIZE_VALID,
+ resource->size_valid))
+ goto nla_put_failure;
+
+ child_resource_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_RESOURCE_LIST);
+ if (!child_resource_attr)
+ goto nla_put_failure;
+
+ list_for_each_entry(child_resource, &resource->resource_list, list) {
+ if (devlink_resource_put(devlink, skb, child_resource))
+ goto resource_put_failure;
+ }
+
+ nla_nest_end(skb, child_resource_attr);
+out:
+ nla_nest_end(skb, resource_attr);
+ return 0;
+
+resource_put_failure:
+ nla_nest_cancel(skb, child_resource_attr);
+nla_put_failure:
+ nla_nest_cancel(skb, resource_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_resource_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_resource *resource;
+ struct nlattr *resources_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ bool incomplete;
+ void *hdr;
+ int i;
+ int err;
+
+ resource = list_first_entry(&devlink->resource_list,
+ struct devlink_resource, list);
+start_again:
+ err = devlink_nl_msg_reply_and_new(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr) {
+ nlmsg_free(skb);
+ return -EMSGSIZE;
+ }
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+
+ resources_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_RESOURCE_LIST);
+ if (!resources_attr)
+ goto nla_put_failure;
+
+ incomplete = false;
+ i = 0;
+ list_for_each_entry_from(resource, &devlink->resource_list, list) {
+ err = devlink_resource_put(devlink, skb, resource);
+ if (err) {
+ if (!i)
+ goto err_resource_put;
+ incomplete = true;
+ break;
+ }
+ i++;
+ }
+ nla_nest_end(skb, resources_attr);
+ genlmsg_end(skb, hdr);
+ if (incomplete)
+ goto start_again;
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_nl_msg_reply_and_new(&skb, info);
+ if (err)
+ return err;
+ goto send_done;
+ }
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_resource_put:
+ nlmsg_free(skb);
+ return err;
+}
+
+int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (list_empty(&devlink->resource_list))
+ return -EOPNOTSUPP;
+
+ return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
+}
+
+int devlink_resources_validate(struct devlink *devlink,
+ struct devlink_resource *resource,
+ struct genl_info *info)
+{
+ struct list_head *resource_list;
+ int err = 0;
+
+ if (resource)
+ resource_list = &resource->resource_list;
+ else
+ resource_list = &devlink->resource_list;
+
+ list_for_each_entry(resource, resource_list, list) {
+ if (!resource->size_valid)
+ return -EINVAL;
+ err = devlink_resources_validate(devlink, resource, info);
+ if (err)
+ return err;
+ }
+ return err;
+}
+
+/**
+ * devl_resource_register - devlink resource register
+ *
+ * @devlink: devlink
+ * @resource_name: resource's name
+ * @resource_size: resource's size
+ * @resource_id: resource's id
+ * @parent_resource_id: resource's parent id
+ * @size_params: size parameters
+ *
+ * Generic resources should reuse the same names across drivers.
+ * Please see the generic resources list at:
+ * Documentation/networking/devlink/devlink-resource.rst
+ */
+int devl_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ const struct devlink_resource_size_params *size_params)
+{
+ struct devlink_resource *resource;
+ struct list_head *resource_list;
+ bool top_hierarchy;
+
+ lockdep_assert_held(&devlink->lock);
+
+ top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (resource)
+ return -EINVAL;
+
+ resource = kzalloc(sizeof(*resource), GFP_KERNEL);
+ if (!resource)
+ return -ENOMEM;
+
+ if (top_hierarchy) {
+ resource_list = &devlink->resource_list;
+ } else {
+ struct devlink_resource *parent_resource;
+
+ parent_resource = devlink_resource_find(devlink, NULL,
+ parent_resource_id);
+ if (parent_resource) {
+ resource_list = &parent_resource->resource_list;
+ resource->parent = parent_resource;
+ } else {
+ kfree(resource);
+ return -EINVAL;
+ }
+ }
+
+ resource->name = resource_name;
+ resource->size = resource_size;
+ resource->size_new = resource_size;
+ resource->id = resource_id;
+ resource->size_valid = true;
+ memcpy(&resource->size_params, size_params,
+ sizeof(resource->size_params));
+ INIT_LIST_HEAD(&resource->resource_list);
+ list_add_tail(&resource->list, resource_list);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_resource_register);
+
+/**
+ * devlink_resource_register - devlink resource register
+ *
+ * @devlink: devlink
+ * @resource_name: resource's name
+ * @resource_size: resource's size
+ * @resource_id: resource's id
+ * @parent_resource_id: resource's parent id
+ * @size_params: size parameters
+ *
+ * Generic resources should reuse the same names across drivers.
+ * Please see the generic resources list at:
+ * Documentation/networking/devlink/devlink-resource.rst
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+int devlink_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ const struct devlink_resource_size_params *size_params)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_resource_register(devlink, resource_name, resource_size,
+ resource_id, parent_resource_id, size_params);
+ devl_unlock(devlink);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_resource_register);
+
+static void devlink_resource_unregister(struct devlink *devlink,
+ struct devlink_resource *resource)
+{
+ struct devlink_resource *tmp, *child_resource;
+
+ list_for_each_entry_safe(child_resource, tmp, &resource->resource_list,
+ list) {
+ devlink_resource_unregister(devlink, child_resource);
+ list_del(&child_resource->list);
+ kfree(child_resource);
+ }
+}
+
+/**
+ * devl_resources_unregister - free all resources
+ *
+ * @devlink: devlink
+ */
+void devl_resources_unregister(struct devlink *devlink)
+{
+ struct devlink_resource *tmp, *child_resource;
+
+ lockdep_assert_held(&devlink->lock);
+
+ list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list,
+ list) {
+ devlink_resource_unregister(devlink, child_resource);
+ list_del(&child_resource->list);
+ kfree(child_resource);
+ }
+}
+EXPORT_SYMBOL_GPL(devl_resources_unregister);
+
+/**
+ * devlink_resources_unregister - free all resources
+ *
+ * @devlink: devlink
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_resources_unregister(struct devlink *devlink)
+{
+ devl_lock(devlink);
+ devl_resources_unregister(devlink);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_resources_unregister);
+
+/**
+ * devl_resource_size_get - get and update size
+ *
+ * @devlink: devlink
+ * @resource_id: the requested resource id
+ * @p_resource_size: ptr to update
+ */
+int devl_resource_size_get(struct devlink *devlink,
+ u64 resource_id,
+ u64 *p_resource_size)
+{
+ struct devlink_resource *resource;
+
+ lockdep_assert_held(&devlink->lock);
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (!resource)
+ return -EINVAL;
+ *p_resource_size = resource->size_new;
+ resource->size = resource->size_new;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_resource_size_get);
+
+/**
+ * devl_resource_occ_get_register - register occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ * @occ_get: occupancy getter callback
+ * @occ_get_priv: occupancy getter callback priv
+ */
+void devl_resource_occ_get_register(struct devlink *devlink,
+ u64 resource_id,
+ devlink_resource_occ_get_t *occ_get,
+ void *occ_get_priv)
+{
+ struct devlink_resource *resource;
+
+ lockdep_assert_held(&devlink->lock);
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (WARN_ON(!resource))
+ return;
+ WARN_ON(resource->occ_get);
+
+ resource->occ_get = occ_get;
+ resource->occ_get_priv = occ_get_priv;
+}
+EXPORT_SYMBOL_GPL(devl_resource_occ_get_register);
+
+/**
+ * devlink_resource_occ_get_register - register occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ * @occ_get: occupancy getter callback
+ * @occ_get_priv: occupancy getter callback priv
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_resource_occ_get_register(struct devlink *devlink,
+ u64 resource_id,
+ devlink_resource_occ_get_t *occ_get,
+ void *occ_get_priv)
+{
+ devl_lock(devlink);
+ devl_resource_occ_get_register(devlink, resource_id,
+ occ_get, occ_get_priv);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
+
+/**
+ * devl_resource_occ_get_unregister - unregister occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ */
+void devl_resource_occ_get_unregister(struct devlink *devlink,
+ u64 resource_id)
+{
+ struct devlink_resource *resource;
+
+ lockdep_assert_held(&devlink->lock);
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (WARN_ON(!resource))
+ return;
+ WARN_ON(!resource->occ_get);
+
+ resource->occ_get = NULL;
+ resource->occ_get_priv = NULL;
+}
+EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister);
+
+/**
+ * devlink_resource_occ_get_unregister - unregister occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_resource_occ_get_unregister(struct devlink *devlink,
+ u64 resource_id)
+{
+ devl_lock(devlink);
+ devl_resource_occ_get_unregister(devlink, resource_id);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
diff --git a/net/devlink/sb.c b/net/devlink/sb.c
new file mode 100644
index 000000000000..bd677fff5ec8
--- /dev/null
+++ b/net/devlink/sb.c
@@ -0,0 +1,996 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+struct devlink_sb {
+ struct list_head list;
+ unsigned int index;
+ u32 size;
+ u16 ingress_pools_count;
+ u16 egress_pools_count;
+ u16 ingress_tc_count;
+ u16 egress_tc_count;
+};
+
+static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb)
+{
+ return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count;
+}
+
+static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink,
+ unsigned int sb_index)
+{
+ struct devlink_sb *devlink_sb;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ if (devlink_sb->index == sb_index)
+ return devlink_sb;
+ }
+ return NULL;
+}
+
+static bool devlink_sb_index_exists(struct devlink *devlink,
+ unsigned int sb_index)
+{
+ return devlink_sb_get_by_index(devlink, sb_index);
+}
+
+static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs)
+{
+ if (attrs[DEVLINK_ATTR_SB_INDEX]) {
+ u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]);
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
+ if (!devlink_sb)
+ return ERR_PTR(-ENODEV);
+ return devlink_sb;
+ }
+ return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ return devlink_sb_get_from_attrs(devlink, info->attrs);
+}
+
+static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb,
+ struct nlattr **attrs,
+ u16 *p_pool_index)
+{
+ u16 val;
+
+ if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX])
+ return -EINVAL;
+
+ val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]);
+ if (val >= devlink_sb_pool_count(devlink_sb))
+ return -EINVAL;
+ *p_pool_index = val;
+ return 0;
+}
+
+static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb,
+ struct genl_info *info,
+ u16 *p_pool_index)
+{
+ return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs,
+ p_pool_index);
+}
+
+static int
+devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs,
+ enum devlink_sb_pool_type *p_pool_type)
+{
+ u8 val;
+
+ if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE])
+ return -EINVAL;
+
+ val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]);
+ if (val != DEVLINK_SB_POOL_TYPE_INGRESS &&
+ val != DEVLINK_SB_POOL_TYPE_EGRESS)
+ return -EINVAL;
+ *p_pool_type = val;
+ return 0;
+}
+
+static int
+devlink_sb_pool_type_get_from_info(struct genl_info *info,
+ enum devlink_sb_pool_type *p_pool_type)
+{
+ return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type);
+}
+
+static int
+devlink_sb_th_type_get_from_attrs(struct nlattr **attrs,
+ enum devlink_sb_threshold_type *p_th_type)
+{
+ u8 val;
+
+ if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE])
+ return -EINVAL;
+
+ val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]);
+ if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC &&
+ val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC)
+ return -EINVAL;
+ *p_th_type = val;
+ return 0;
+}
+
+static int
+devlink_sb_th_type_get_from_info(struct genl_info *info,
+ enum devlink_sb_threshold_type *p_th_type)
+{
+ return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type);
+}
+
+static int
+devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
+ struct nlattr **attrs,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_tc_index)
+{
+ u16 val;
+
+ if (!attrs[DEVLINK_ATTR_SB_TC_INDEX])
+ return -EINVAL;
+
+ val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]);
+ if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS &&
+ val >= devlink_sb->ingress_tc_count)
+ return -EINVAL;
+ if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS &&
+ val >= devlink_sb->egress_tc_count)
+ return -EINVAL;
+ *p_tc_index = val;
+ return 0;
+}
+
+static int
+devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
+ struct genl_info *info,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_tc_index)
+{
+ return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs,
+ pool_type, p_tc_index);
+}
+
+static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT,
+ devlink_sb->ingress_pools_count))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT,
+ devlink_sb->egress_pools_count))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT,
+ devlink_sb->ingress_tc_count))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT,
+ devlink_sb->egress_tc_count))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_sb_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb;
+ struct sk_buff *msg;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+ DEVLINK_CMD_SB_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_sb *devlink_sb;
+ int idx = 0;
+ int err = 0;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+ DEVLINK_CMD_SB_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
+ }
+
+ return err;
+}
+
+int devlink_nl_sb_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_sb_get_dump_one);
+}
+
+static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ u16 pool_index, enum devlink_command cmd,
+ u32 portid, u32 seq, int flags)
+{
+ struct devlink_sb_pool_info pool_info;
+ void *hdr;
+ int err;
+
+ err = devlink->ops->sb_pool_get(devlink, devlink_sb->index,
+ pool_index, &pool_info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,
+ pool_info.threshold_type))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_CELL_SIZE,
+ pool_info.cell_size))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb;
+ struct sk_buff *msg;
+ u16 pool_index;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ if (!devlink->ops->sb_pool_get)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index,
+ DEVLINK_CMD_SB_POOL_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
+ struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ u32 portid, u32 seq, int flags)
+{
+ u16 pool_count = devlink_sb_pool_count(devlink_sb);
+ u16 pool_index;
+ int err;
+
+ for (pool_index = 0; pool_index < pool_count; pool_index++) {
+ if (*p_idx < start) {
+ (*p_idx)++;
+ continue;
+ }
+ err = devlink_nl_sb_pool_fill(msg, devlink,
+ devlink_sb,
+ pool_index,
+ DEVLINK_CMD_SB_POOL_NEW,
+ portid, seq, flags);
+ if (err)
+ return err;
+ (*p_idx)++;
+ }
+ return 0;
+}
+
+static int
+devlink_nl_sb_pool_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_sb *devlink_sb;
+ int err = 0;
+ int idx = 0;
+
+ if (!devlink->ops->sb_pool_get)
+ return 0;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_pool_get_dumpit(msg, state->idx, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int devlink_nl_sb_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_sb_pool_get_dump_one);
+}
+
+static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
+ u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack)
+
+{
+ const struct devlink_ops *ops = devlink->ops;
+
+ if (ops->sb_pool_set)
+ return ops->sb_pool_set(devlink, sb_index, pool_index,
+ size, threshold_type, extack);
+ return -EOPNOTSUPP;
+}
+
+int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ enum devlink_sb_threshold_type threshold_type;
+ struct devlink_sb *devlink_sb;
+ u16 pool_index;
+ u32 size;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ err = devlink_sb_th_type_get_from_info(info, &threshold_type);
+ if (err)
+ return err;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_POOL_SIZE))
+ return -EINVAL;
+
+ size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]);
+ return devlink_sb_pool_set(devlink, devlink_sb->index,
+ pool_index, size, threshold_type,
+ info->extack);
+}
+
+static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ struct devlink_sb *devlink_sb,
+ u16 pool_index,
+ enum devlink_command cmd,
+ u32 portid, u32 seq, int flags)
+{
+ const struct devlink_ops *ops = devlink->ops;
+ u32 threshold;
+ void *hdr;
+ int err;
+
+ err = ops->sb_port_pool_get(devlink_port, devlink_sb->index,
+ pool_index, &threshold);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
+ goto nla_put_failure;
+
+ if (ops->sb_occ_port_pool_get) {
+ u32 cur;
+ u32 max;
+
+ err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index,
+ pool_index, &cur, &max);
+ if (err && err != -EOPNOTSUPP)
+ goto sb_occ_get_failure;
+ if (!err) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
+ goto nla_put_failure;
+ }
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+sb_occ_get_failure:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_sb_port_pool_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = devlink_port->devlink;
+ struct devlink_sb *devlink_sb;
+ struct sk_buff *msg;
+ u16 pool_index;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ if (!devlink->ops->sb_port_pool_get)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port,
+ devlink_sb, pool_index,
+ DEVLINK_CMD_SB_PORT_POOL_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
+ struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ u32 portid, u32 seq, int flags)
+{
+ struct devlink_port *devlink_port;
+ u16 pool_count = devlink_sb_pool_count(devlink_sb);
+ unsigned long port_index;
+ u16 pool_index;
+ int err;
+
+ xa_for_each(&devlink->ports, port_index, devlink_port) {
+ for (pool_index = 0; pool_index < pool_count; pool_index++) {
+ if (*p_idx < start) {
+ (*p_idx)++;
+ continue;
+ }
+ err = devlink_nl_sb_port_pool_fill(msg, devlink,
+ devlink_port,
+ devlink_sb,
+ pool_index,
+ DEVLINK_CMD_SB_PORT_POOL_NEW,
+ portid, seq, flags);
+ if (err)
+ return err;
+ (*p_idx)++;
+ }
+ }
+ return 0;
+}
+
+static int
+devlink_nl_sb_port_pool_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_sb *devlink_sb;
+ int idx = 0;
+ int err = 0;
+
+ if (!devlink->ops->sb_port_pool_get)
+ return 0;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_port_pool_get_dumpit(msg, state->idx, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int devlink_nl_sb_port_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_sb_port_pool_get_dump_one);
+}
+
+static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold,
+ struct netlink_ext_ack *extack)
+
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ if (ops->sb_port_pool_set)
+ return ops->sb_port_pool_set(devlink_port, sb_index,
+ pool_index, threshold, extack);
+ return -EOPNOTSUPP;
+}
+
+int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb;
+ u16 pool_index;
+ u32 threshold;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
+ return -EINVAL;
+
+ threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
+ return devlink_sb_port_pool_set(devlink_port, devlink_sb->index,
+ pool_index, threshold, info->extack);
+}
+
+static int
+devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ struct devlink_sb *devlink_sb, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ enum devlink_command cmd,
+ u32 portid, u32 seq, int flags)
+{
+ const struct devlink_ops *ops = devlink->ops;
+ u16 pool_index;
+ u32 threshold;
+ void *hdr;
+ int err;
+
+ err = ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index,
+ tc_index, pool_type,
+ &pool_index, &threshold);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
+ goto nla_put_failure;
+
+ if (ops->sb_occ_tc_port_bind_get) {
+ u32 cur;
+ u32 max;
+
+ err = ops->sb_occ_tc_port_bind_get(devlink_port,
+ devlink_sb->index,
+ tc_index, pool_type,
+ &cur, &max);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ if (!err) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
+ goto nla_put_failure;
+ }
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = devlink_port->devlink;
+ struct devlink_sb *devlink_sb;
+ struct sk_buff *msg;
+ enum devlink_sb_pool_type pool_type;
+ u16 tc_index;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ err = devlink_sb_pool_type_get_from_info(info, &pool_type);
+ if (err)
+ return err;
+
+ err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
+ pool_type, &tc_index);
+ if (err)
+ return err;
+
+ if (!devlink->ops->sb_tc_pool_bind_get)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port,
+ devlink_sb, tc_index, pool_type,
+ DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+ info->snd_portid,
+ info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
+ int start, int *p_idx,
+ struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ u32 portid, u32 seq, int flags)
+{
+ struct devlink_port *devlink_port;
+ unsigned long port_index;
+ u16 tc_index;
+ int err;
+
+ xa_for_each(&devlink->ports, port_index, devlink_port) {
+ for (tc_index = 0;
+ tc_index < devlink_sb->ingress_tc_count; tc_index++) {
+ if (*p_idx < start) {
+ (*p_idx)++;
+ continue;
+ }
+ err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
+ devlink_port,
+ devlink_sb,
+ tc_index,
+ DEVLINK_SB_POOL_TYPE_INGRESS,
+ DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+ portid, seq,
+ flags);
+ if (err)
+ return err;
+ (*p_idx)++;
+ }
+ for (tc_index = 0;
+ tc_index < devlink_sb->egress_tc_count; tc_index++) {
+ if (*p_idx < start) {
+ (*p_idx)++;
+ continue;
+ }
+ err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
+ devlink_port,
+ devlink_sb,
+ tc_index,
+ DEVLINK_SB_POOL_TYPE_EGRESS,
+ DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+ portid, seq,
+ flags);
+ if (err)
+ return err;
+ (*p_idx)++;
+ }
+ }
+ return 0;
+}
+
+static int devlink_nl_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_sb *devlink_sb;
+ int idx = 0;
+ int err = 0;
+
+ if (!devlink->ops->sb_tc_pool_bind_get)
+ return 0;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_tc_pool_bind_get_dumpit(msg, state->idx, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int devlink_nl_sb_tc_pool_bind_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb,
+ devlink_nl_sb_tc_pool_bind_get_dump_one);
+}
+
+static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack)
+
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ if (ops->sb_tc_pool_bind_set)
+ return ops->sb_tc_pool_bind_set(devlink_port, sb_index,
+ tc_index, pool_type,
+ pool_index, threshold, extack);
+ return -EOPNOTSUPP;
+}
+
+int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
+ enum devlink_sb_pool_type pool_type;
+ struct devlink_sb *devlink_sb;
+ u16 tc_index;
+ u16 pool_index;
+ u32 threshold;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ err = devlink_sb_pool_type_get_from_info(info, &pool_type);
+ if (err)
+ return err;
+
+ err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
+ pool_type, &tc_index);
+ if (err)
+ return err;
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
+ return -EINVAL;
+
+ threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
+ return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index,
+ tc_index, pool_type,
+ pool_index, threshold, info->extack);
+}
+
+int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const struct devlink_ops *ops = devlink->ops;
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ if (ops->sb_occ_snapshot)
+ return ops->sb_occ_snapshot(devlink, devlink_sb->index);
+ return -EOPNOTSUPP;
+}
+
+int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const struct devlink_ops *ops = devlink->ops;
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ if (ops->sb_occ_max_clear)
+ return ops->sb_occ_max_clear(devlink, devlink_sb->index);
+ return -EOPNOTSUPP;
+}
+
+int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
+ u32 size, u16 ingress_pools_count,
+ u16 egress_pools_count, u16 ingress_tc_count,
+ u16 egress_tc_count)
+{
+ struct devlink_sb *devlink_sb;
+
+ lockdep_assert_held(&devlink->lock);
+
+ if (devlink_sb_index_exists(devlink, sb_index))
+ return -EEXIST;
+
+ devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL);
+ if (!devlink_sb)
+ return -ENOMEM;
+ devlink_sb->index = sb_index;
+ devlink_sb->size = size;
+ devlink_sb->ingress_pools_count = ingress_pools_count;
+ devlink_sb->egress_pools_count = egress_pools_count;
+ devlink_sb->ingress_tc_count = ingress_tc_count;
+ devlink_sb->egress_tc_count = egress_tc_count;
+ list_add_tail(&devlink_sb->list, &devlink->sb_list);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_sb_register);
+
+int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
+ u32 size, u16 ingress_pools_count,
+ u16 egress_pools_count, u16 ingress_tc_count,
+ u16 egress_tc_count)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_sb_register(devlink, sb_index, size, ingress_pools_count,
+ egress_pools_count, ingress_tc_count,
+ egress_tc_count);
+ devl_unlock(devlink);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_sb_register);
+
+void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+{
+ struct devlink_sb *devlink_sb;
+
+ lockdep_assert_held(&devlink->lock);
+
+ devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
+ WARN_ON(!devlink_sb);
+ list_del(&devlink_sb->list);
+ kfree(devlink_sb);
+}
+EXPORT_SYMBOL_GPL(devl_sb_unregister);
+
+void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+{
+ devl_lock(devlink);
+ devl_sb_unregister(devlink, sb_index);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_sb_unregister);
diff --git a/net/devlink/trap.c b/net/devlink/trap.c
new file mode 100644
index 000000000000..c26bf9b29bca
--- /dev/null
+++ b/net/devlink/trap.c
@@ -0,0 +1,1861 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <trace/events/devlink.h>
+
+#include "devl_internal.h"
+
+struct devlink_stats {
+ u64_stats_t rx_bytes;
+ u64_stats_t rx_packets;
+ struct u64_stats_sync syncp;
+};
+
+/**
+ * struct devlink_trap_policer_item - Packet trap policer attributes.
+ * @policer: Immutable packet trap policer attributes.
+ * @rate: Rate in packets / sec.
+ * @burst: Burst size in packets.
+ * @list: trap_policer_list member.
+ *
+ * Describes packet trap policer attributes. Created by devlink during trap
+ * policer registration.
+ */
+struct devlink_trap_policer_item {
+ const struct devlink_trap_policer *policer;
+ u64 rate;
+ u64 burst;
+ struct list_head list;
+};
+
+/**
+ * struct devlink_trap_group_item - Packet trap group attributes.
+ * @group: Immutable packet trap group attributes.
+ * @policer_item: Associated policer item. Can be NULL.
+ * @list: trap_group_list member.
+ * @stats: Trap group statistics.
+ *
+ * Describes packet trap group attributes. Created by devlink during trap
+ * group registration.
+ */
+struct devlink_trap_group_item {
+ const struct devlink_trap_group *group;
+ struct devlink_trap_policer_item *policer_item;
+ struct list_head list;
+ struct devlink_stats __percpu *stats;
+};
+
+/**
+ * struct devlink_trap_item - Packet trap attributes.
+ * @trap: Immutable packet trap attributes.
+ * @group_item: Associated group item.
+ * @list: trap_list member.
+ * @action: Trap action.
+ * @stats: Trap statistics.
+ * @priv: Driver private information.
+ *
+ * Describes both mutable and immutable packet trap attributes. Created by
+ * devlink during trap registration and used for all trap related operations.
+ */
+struct devlink_trap_item {
+ const struct devlink_trap *trap;
+ struct devlink_trap_group_item *group_item;
+ struct list_head list;
+ enum devlink_trap_action action;
+ struct devlink_stats __percpu *stats;
+ void *priv;
+};
+
+static struct devlink_trap_policer_item *
+devlink_trap_policer_item_lookup(struct devlink *devlink, u32 id)
+{
+ struct devlink_trap_policer_item *policer_item;
+
+ list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
+ if (policer_item->policer->id == id)
+ return policer_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_item *
+devlink_trap_item_lookup(struct devlink *devlink, const char *name)
+{
+ struct devlink_trap_item *trap_item;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (!strcmp(trap_item->trap->name, name))
+ return trap_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_item *
+devlink_trap_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ struct nlattr *attr;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_NAME])
+ return NULL;
+ attr = info->attrs[DEVLINK_ATTR_TRAP_NAME];
+
+ return devlink_trap_item_lookup(devlink, nla_data(attr));
+}
+
+static int
+devlink_trap_action_get_from_info(struct genl_info *info,
+ enum devlink_trap_action *p_trap_action)
+{
+ u8 val;
+
+ val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]);
+ switch (val) {
+ case DEVLINK_TRAP_ACTION_DROP:
+ case DEVLINK_TRAP_ACTION_TRAP:
+ case DEVLINK_TRAP_ACTION_MIRROR:
+ *p_trap_action = val;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_metadata_put(struct sk_buff *msg,
+ const struct devlink_trap *trap)
+{
+ struct nlattr *attr;
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT))
+ goto nla_put_failure;
+ if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE) &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
+ struct devlink_stats *stats)
+{
+ int i;
+
+ memset(stats, 0, sizeof(*stats));
+ for_each_possible_cpu(i) {
+ struct devlink_stats *cpu_stats;
+ u64 rx_packets, rx_bytes;
+ unsigned int start;
+
+ cpu_stats = per_cpu_ptr(trap_stats, i);
+ do {
+ start = u64_stats_fetch_begin(&cpu_stats->syncp);
+ rx_packets = u64_stats_read(&cpu_stats->rx_packets);
+ rx_bytes = u64_stats_read(&cpu_stats->rx_bytes);
+ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
+
+ u64_stats_add(&stats->rx_packets, rx_packets);
+ u64_stats_add(&stats->rx_bytes, rx_bytes);
+ }
+}
+
+static int
+devlink_trap_group_stats_put(struct sk_buff *msg,
+ struct devlink_stats __percpu *trap_stats)
+{
+ struct devlink_stats stats;
+ struct nlattr *attr;
+
+ devlink_trap_stats_read(trap_stats, &stats);
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ u64_stats_read(&stats.rx_packets),
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ u64_stats_read(&stats.rx_bytes),
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_item *trap_item)
+{
+ struct devlink_stats stats;
+ struct nlattr *attr;
+ u64 drops = 0;
+ int err;
+
+ if (devlink->ops->trap_drop_counter_get) {
+ err = devlink->ops->trap_drop_counter_get(devlink,
+ trap_item->trap,
+ &drops);
+ if (err)
+ return err;
+ }
+
+ devlink_trap_stats_read(trap_item->stats, &stats);
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (devlink->ops->trap_drop_counter_get &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ u64_stats_read(&stats.rx_packets),
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ u64_stats_read(&stats.rx_bytes),
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ struct devlink_trap_group_item *group_item = trap_item->group_item;
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+ group_item->group->name))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type))
+ goto nla_put_failure;
+
+ if (trap_item->trap->generic &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action))
+ goto nla_put_failure;
+
+ err = devlink_trap_metadata_put(msg, trap_item->trap);
+ if (err)
+ goto nla_put_failure;
+
+ err = devlink_trap_stats_put(msg, devlink, trap_item);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_trap_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_item *trap_item;
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_list))
+ return -EOPNOTSUPP;
+
+ trap_item = devlink_trap_item_get_from_info(devlink, info);
+ if (!trap_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_fill(msg, devlink, trap_item,
+ DEVLINK_CMD_TRAP_NEW, info->snd_portid,
+ info->snd_seq, 0);
+ if (err)
+ goto err_trap_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_trap_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_trap_item *trap_item;
+ int idx = 0;
+ int err = 0;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_fill(msg, devlink, trap_item,
+ DEVLINK_CMD_TRAP_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
+ }
+
+ return err;
+}
+
+int devlink_nl_trap_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_trap_get_dump_one);
+}
+
+static int __devlink_trap_action_set(struct devlink *devlink,
+ struct devlink_trap_item *trap_item,
+ enum devlink_trap_action trap_action,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (trap_item->action != trap_action &&
+ trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
+ NL_SET_ERR_MSG(extack, "Cannot change action of non-drop traps. Skipping");
+ return 0;
+ }
+
+ err = devlink->ops->trap_action_set(devlink, trap_item->trap,
+ trap_action, extack);
+ if (err)
+ return err;
+
+ trap_item->action = trap_action;
+
+ return 0;
+}
+
+static int devlink_trap_action_set(struct devlink *devlink,
+ struct devlink_trap_item *trap_item,
+ struct genl_info *info)
+{
+ enum devlink_trap_action trap_action;
+ int err;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+ return 0;
+
+ err = devlink_trap_action_get_from_info(info, &trap_action);
+ if (err) {
+ NL_SET_ERR_MSG(info->extack, "Invalid trap action");
+ return -EINVAL;
+ }
+
+ return __devlink_trap_action_set(devlink, trap_item, trap_action,
+ info->extack);
+}
+
+int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_item *trap_item;
+
+ if (list_empty(&devlink->trap_list))
+ return -EOPNOTSUPP;
+
+ trap_item = devlink_trap_item_get_from_info(devlink, info);
+ if (!trap_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap");
+ return -ENOENT;
+ }
+
+ return devlink_trap_action_set(devlink, trap_item, info);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_lookup(struct devlink *devlink, const char *name)
+{
+ struct devlink_trap_group_item *group_item;
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+ if (!strcmp(group_item->group->name, name))
+ return group_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_lookup_by_id(struct devlink *devlink, u16 id)
+{
+ struct devlink_trap_group_item *group_item;
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+ if (group_item->group->id == id)
+ return group_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ char *name;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME])
+ return NULL;
+ name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]);
+
+ return devlink_trap_group_item_lookup(devlink, name);
+}
+
+static int
+devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+ group_item->group->name))
+ goto nla_put_failure;
+
+ if (group_item->group->generic &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+ goto nla_put_failure;
+
+ if (group_item->policer_item &&
+ nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
+ group_item->policer_item->policer->id))
+ goto nla_put_failure;
+
+ err = devlink_trap_group_stats_put(msg, group_item->stats);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_trap_group_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_group_item *group_item;
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_group_list))
+ return -EOPNOTSUPP;
+
+ group_item = devlink_trap_group_item_get_from_info(devlink, info);
+ if (!group_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap group");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err)
+ goto err_trap_group_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_group_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_trap_group_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_trap_group_item *group_item;
+ int idx = 0;
+ int err = 0;
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
+ }
+
+ return err;
+}
+
+int devlink_nl_trap_group_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_trap_group_get_dump_one);
+}
+
+static int
+__devlink_trap_group_action_set(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item,
+ enum devlink_trap_action trap_action,
+ struct netlink_ext_ack *extack)
+{
+ const char *group_name = group_item->group->name;
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ if (devlink->ops->trap_group_action_set) {
+ err = devlink->ops->trap_group_action_set(devlink, group_item->group,
+ trap_action, extack);
+ if (err)
+ return err;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (strcmp(trap_item->group_item->group->name, group_name))
+ continue;
+ if (trap_item->action != trap_action &&
+ trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP)
+ continue;
+ trap_item->action = trap_action;
+ }
+
+ return 0;
+ }
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (strcmp(trap_item->group_item->group->name, group_name))
+ continue;
+ err = __devlink_trap_action_set(devlink, trap_item,
+ trap_action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+devlink_trap_group_action_set(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item,
+ struct genl_info *info, bool *p_modified)
+{
+ enum devlink_trap_action trap_action;
+ int err;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+ return 0;
+
+ err = devlink_trap_action_get_from_info(info, &trap_action);
+ if (err) {
+ NL_SET_ERR_MSG(info->extack, "Invalid trap action");
+ return -EINVAL;
+ }
+
+ err = __devlink_trap_group_action_set(devlink, group_item, trap_action,
+ info->extack);
+ if (err)
+ return err;
+
+ *p_modified = true;
+
+ return 0;
+}
+
+static int devlink_trap_group_set(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item,
+ struct genl_info *info)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct netlink_ext_ack *extack = info->extack;
+ const struct devlink_trap_policer *policer;
+ struct nlattr **attrs = info->attrs;
+ u32 policer_id;
+ int err;
+
+ if (!attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
+ return 0;
+
+ if (!devlink->ops->trap_group_set)
+ return -EOPNOTSUPP;
+
+ policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
+ policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
+ if (policer_id && !policer_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
+ return -ENOENT;
+ }
+ policer = policer_item ? policer_item->policer : NULL;
+
+ err = devlink->ops->trap_group_set(devlink, group_item->group, policer,
+ extack);
+ if (err)
+ return err;
+
+ group_item->policer_item = policer_item;
+
+ return 0;
+}
+
+int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_group_item *group_item;
+ bool modified = false;
+ int err;
+
+ if (list_empty(&devlink->trap_group_list))
+ return -EOPNOTSUPP;
+
+ group_item = devlink_trap_group_item_get_from_info(devlink, info);
+ if (!group_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap group");
+ return -ENOENT;
+ }
+
+ err = devlink_trap_group_action_set(devlink, group_item, info,
+ &modified);
+ if (err)
+ return err;
+
+ err = devlink_trap_group_set(devlink, group_item, info);
+ if (err)
+ goto err_trap_group_set;
+
+ return 0;
+
+err_trap_group_set:
+ if (modified)
+ NL_SET_ERR_MSG(extack, "Trap group set failed, but some changes were committed already");
+ return err;
+}
+
+static struct devlink_trap_policer_item *
+devlink_trap_policer_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ u32 id;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
+ return NULL;
+ id = nla_get_u32(info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
+
+ return devlink_trap_policer_item_lookup(devlink, id);
+}
+
+static int
+devlink_trap_policer_stats_put(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_policer *policer)
+{
+ struct nlattr *attr;
+ u64 drops;
+ int err;
+
+ if (!devlink->ops->trap_policer_counter_get)
+ return 0;
+
+ err = devlink->ops->trap_policer_counter_get(devlink, policer, &drops);
+ if (err)
+ return err;
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int
+devlink_nl_trap_policer_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_policer_item *policer_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
+ policer_item->policer->id))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_RATE,
+ policer_item->rate, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_BURST,
+ policer_item->burst, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ err = devlink_trap_policer_stats_put(msg, devlink,
+ policer_item->policer);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_trap_policer_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_policer_list))
+ return -EOPNOTSUPP;
+
+ policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
+ if (!policer_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err)
+ goto err_trap_policer_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_policer_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_trap_policer_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_trap_policer_item *policer_item;
+ int idx = 0;
+ int err = 0;
+
+ list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
+ }
+
+ return err;
+}
+
+int devlink_nl_trap_policer_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_trap_policer_get_dump_one);
+}
+
+static int
+devlink_trap_policer_set(struct devlink *devlink,
+ struct devlink_trap_policer_item *policer_item,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct nlattr **attrs = info->attrs;
+ u64 rate, burst;
+ int err;
+
+ rate = policer_item->rate;
+ burst = policer_item->burst;
+
+ if (attrs[DEVLINK_ATTR_TRAP_POLICER_RATE])
+ rate = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_RATE]);
+
+ if (attrs[DEVLINK_ATTR_TRAP_POLICER_BURST])
+ burst = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_BURST]);
+
+ if (rate < policer_item->policer->min_rate) {
+ NL_SET_ERR_MSG(extack, "Policer rate lower than limit");
+ return -EINVAL;
+ }
+
+ if (rate > policer_item->policer->max_rate) {
+ NL_SET_ERR_MSG(extack, "Policer rate higher than limit");
+ return -EINVAL;
+ }
+
+ if (burst < policer_item->policer->min_burst) {
+ NL_SET_ERR_MSG(extack, "Policer burst size lower than limit");
+ return -EINVAL;
+ }
+
+ if (burst > policer_item->policer->max_burst) {
+ NL_SET_ERR_MSG(extack, "Policer burst size higher than limit");
+ return -EINVAL;
+ }
+
+ err = devlink->ops->trap_policer_set(devlink, policer_item->policer,
+ rate, burst, info->extack);
+ if (err)
+ return err;
+
+ policer_item->rate = rate;
+ policer_item->burst = burst;
+
+ return 0;
+}
+
+int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (list_empty(&devlink->trap_policer_list))
+ return -EOPNOTSUPP;
+
+ if (!devlink->ops->trap_policer_set)
+ return -EOPNOTSUPP;
+
+ policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
+ if (!policer_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
+ return -ENOENT;
+ }
+
+ return devlink_trap_policer_set(devlink, policer_item, info);
+}
+
+#define DEVLINK_TRAP(_id, _type) \
+ { \
+ .type = DEVLINK_TRAP_TYPE_##_type, \
+ .id = DEVLINK_TRAP_GENERIC_ID_##_id, \
+ .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \
+ }
+
+static const struct devlink_trap devlink_trap_generic[] = {
+ DEVLINK_TRAP(SMAC_MC, DROP),
+ DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP),
+ DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP),
+ DEVLINK_TRAP(INGRESS_STP_FILTER, DROP),
+ DEVLINK_TRAP(EMPTY_TX_LIST, DROP),
+ DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP),
+ DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP),
+ DEVLINK_TRAP(TTL_ERROR, EXCEPTION),
+ DEVLINK_TRAP(TAIL_DROP, DROP),
+ DEVLINK_TRAP(NON_IP_PACKET, DROP),
+ DEVLINK_TRAP(UC_DIP_MC_DMAC, DROP),
+ DEVLINK_TRAP(DIP_LB, DROP),
+ DEVLINK_TRAP(SIP_MC, DROP),
+ DEVLINK_TRAP(SIP_LB, DROP),
+ DEVLINK_TRAP(CORRUPTED_IP_HDR, DROP),
+ DEVLINK_TRAP(IPV4_SIP_BC, DROP),
+ DEVLINK_TRAP(IPV6_MC_DIP_RESERVED_SCOPE, DROP),
+ DEVLINK_TRAP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, DROP),
+ DEVLINK_TRAP(MTU_ERROR, EXCEPTION),
+ DEVLINK_TRAP(UNRESOLVED_NEIGH, EXCEPTION),
+ DEVLINK_TRAP(RPF, EXCEPTION),
+ DEVLINK_TRAP(REJECT_ROUTE, EXCEPTION),
+ DEVLINK_TRAP(IPV4_LPM_UNICAST_MISS, EXCEPTION),
+ DEVLINK_TRAP(IPV6_LPM_UNICAST_MISS, EXCEPTION),
+ DEVLINK_TRAP(NON_ROUTABLE, DROP),
+ DEVLINK_TRAP(DECAP_ERROR, EXCEPTION),
+ DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP),
+ DEVLINK_TRAP(INGRESS_FLOW_ACTION_DROP, DROP),
+ DEVLINK_TRAP(EGRESS_FLOW_ACTION_DROP, DROP),
+ DEVLINK_TRAP(STP, CONTROL),
+ DEVLINK_TRAP(LACP, CONTROL),
+ DEVLINK_TRAP(LLDP, CONTROL),
+ DEVLINK_TRAP(IGMP_QUERY, CONTROL),
+ DEVLINK_TRAP(IGMP_V1_REPORT, CONTROL),
+ DEVLINK_TRAP(IGMP_V2_REPORT, CONTROL),
+ DEVLINK_TRAP(IGMP_V3_REPORT, CONTROL),
+ DEVLINK_TRAP(IGMP_V2_LEAVE, CONTROL),
+ DEVLINK_TRAP(MLD_QUERY, CONTROL),
+ DEVLINK_TRAP(MLD_V1_REPORT, CONTROL),
+ DEVLINK_TRAP(MLD_V2_REPORT, CONTROL),
+ DEVLINK_TRAP(MLD_V1_DONE, CONTROL),
+ DEVLINK_TRAP(IPV4_DHCP, CONTROL),
+ DEVLINK_TRAP(IPV6_DHCP, CONTROL),
+ DEVLINK_TRAP(ARP_REQUEST, CONTROL),
+ DEVLINK_TRAP(ARP_RESPONSE, CONTROL),
+ DEVLINK_TRAP(ARP_OVERLAY, CONTROL),
+ DEVLINK_TRAP(IPV6_NEIGH_SOLICIT, CONTROL),
+ DEVLINK_TRAP(IPV6_NEIGH_ADVERT, CONTROL),
+ DEVLINK_TRAP(IPV4_BFD, CONTROL),
+ DEVLINK_TRAP(IPV6_BFD, CONTROL),
+ DEVLINK_TRAP(IPV4_OSPF, CONTROL),
+ DEVLINK_TRAP(IPV6_OSPF, CONTROL),
+ DEVLINK_TRAP(IPV4_BGP, CONTROL),
+ DEVLINK_TRAP(IPV6_BGP, CONTROL),
+ DEVLINK_TRAP(IPV4_VRRP, CONTROL),
+ DEVLINK_TRAP(IPV6_VRRP, CONTROL),
+ DEVLINK_TRAP(IPV4_PIM, CONTROL),
+ DEVLINK_TRAP(IPV6_PIM, CONTROL),
+ DEVLINK_TRAP(UC_LB, CONTROL),
+ DEVLINK_TRAP(LOCAL_ROUTE, CONTROL),
+ DEVLINK_TRAP(EXTERNAL_ROUTE, CONTROL),
+ DEVLINK_TRAP(IPV6_UC_DIP_LINK_LOCAL_SCOPE, CONTROL),
+ DEVLINK_TRAP(IPV6_DIP_ALL_NODES, CONTROL),
+ DEVLINK_TRAP(IPV6_DIP_ALL_ROUTERS, CONTROL),
+ DEVLINK_TRAP(IPV6_ROUTER_SOLICIT, CONTROL),
+ DEVLINK_TRAP(IPV6_ROUTER_ADVERT, CONTROL),
+ DEVLINK_TRAP(IPV6_REDIRECT, CONTROL),
+ DEVLINK_TRAP(IPV4_ROUTER_ALERT, CONTROL),
+ DEVLINK_TRAP(IPV6_ROUTER_ALERT, CONTROL),
+ DEVLINK_TRAP(PTP_EVENT, CONTROL),
+ DEVLINK_TRAP(PTP_GENERAL, CONTROL),
+ DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL),
+ DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL),
+ DEVLINK_TRAP(EARLY_DROP, DROP),
+ DEVLINK_TRAP(VXLAN_PARSING, DROP),
+ DEVLINK_TRAP(LLC_SNAP_PARSING, DROP),
+ DEVLINK_TRAP(VLAN_PARSING, DROP),
+ DEVLINK_TRAP(PPPOE_PPP_PARSING, DROP),
+ DEVLINK_TRAP(MPLS_PARSING, DROP),
+ DEVLINK_TRAP(ARP_PARSING, DROP),
+ DEVLINK_TRAP(IP_1_PARSING, DROP),
+ DEVLINK_TRAP(IP_N_PARSING, DROP),
+ DEVLINK_TRAP(GRE_PARSING, DROP),
+ DEVLINK_TRAP(UDP_PARSING, DROP),
+ DEVLINK_TRAP(TCP_PARSING, DROP),
+ DEVLINK_TRAP(IPSEC_PARSING, DROP),
+ DEVLINK_TRAP(SCTP_PARSING, DROP),
+ DEVLINK_TRAP(DCCP_PARSING, DROP),
+ DEVLINK_TRAP(GTP_PARSING, DROP),
+ DEVLINK_TRAP(ESP_PARSING, DROP),
+ DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP),
+ DEVLINK_TRAP(DMAC_FILTER, DROP),
+ DEVLINK_TRAP(EAPOL, CONTROL),
+ DEVLINK_TRAP(LOCKED_PORT, DROP),
+};
+
+#define DEVLINK_TRAP_GROUP(_id) \
+ { \
+ .id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \
+ .name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \
+ }
+
+static const struct devlink_trap_group devlink_trap_group_generic[] = {
+ DEVLINK_TRAP_GROUP(L2_DROPS),
+ DEVLINK_TRAP_GROUP(L3_DROPS),
+ DEVLINK_TRAP_GROUP(L3_EXCEPTIONS),
+ DEVLINK_TRAP_GROUP(BUFFER_DROPS),
+ DEVLINK_TRAP_GROUP(TUNNEL_DROPS),
+ DEVLINK_TRAP_GROUP(ACL_DROPS),
+ DEVLINK_TRAP_GROUP(STP),
+ DEVLINK_TRAP_GROUP(LACP),
+ DEVLINK_TRAP_GROUP(LLDP),
+ DEVLINK_TRAP_GROUP(MC_SNOOPING),
+ DEVLINK_TRAP_GROUP(DHCP),
+ DEVLINK_TRAP_GROUP(NEIGH_DISCOVERY),
+ DEVLINK_TRAP_GROUP(BFD),
+ DEVLINK_TRAP_GROUP(OSPF),
+ DEVLINK_TRAP_GROUP(BGP),
+ DEVLINK_TRAP_GROUP(VRRP),
+ DEVLINK_TRAP_GROUP(PIM),
+ DEVLINK_TRAP_GROUP(UC_LB),
+ DEVLINK_TRAP_GROUP(LOCAL_DELIVERY),
+ DEVLINK_TRAP_GROUP(EXTERNAL_DELIVERY),
+ DEVLINK_TRAP_GROUP(IPV6),
+ DEVLINK_TRAP_GROUP(PTP_EVENT),
+ DEVLINK_TRAP_GROUP(PTP_GENERAL),
+ DEVLINK_TRAP_GROUP(ACL_SAMPLE),
+ DEVLINK_TRAP_GROUP(ACL_TRAP),
+ DEVLINK_TRAP_GROUP(PARSER_ERROR_DROPS),
+ DEVLINK_TRAP_GROUP(EAPOL),
+};
+
+static int devlink_trap_generic_verify(const struct devlink_trap *trap)
+{
+ if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ if (strcmp(trap->name, devlink_trap_generic[trap->id].name))
+ return -EINVAL;
+
+ if (trap->type != devlink_trap_generic[trap->id].type)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int devlink_trap_driver_verify(const struct devlink_trap *trap)
+{
+ int i;
+
+ if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) {
+ if (!strcmp(trap->name, devlink_trap_generic[i].name))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_verify(const struct devlink_trap *trap)
+{
+ if (!trap || !trap->name)
+ return -EINVAL;
+
+ if (trap->generic)
+ return devlink_trap_generic_verify(trap);
+ else
+ return devlink_trap_driver_verify(trap);
+}
+
+static int
+devlink_trap_group_generic_verify(const struct devlink_trap_group *group)
+{
+ if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ if (strcmp(group->name, devlink_trap_group_generic[group->id].name))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+devlink_trap_group_driver_verify(const struct devlink_trap_group *group)
+{
+ int i;
+
+ if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) {
+ if (!strcmp(group->name, devlink_trap_group_generic[i].name))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_group_verify(const struct devlink_trap_group *group)
+{
+ if (group->generic)
+ return devlink_trap_group_generic_verify(group);
+ else
+ return devlink_trap_group_driver_verify(group);
+}
+
+static void
+devlink_trap_group_notify(struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
+ cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0,
+ 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_trap_groups_notify_register(struct devlink *devlink)
+{
+ struct devlink_trap_group_item *group_item;
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list)
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW);
+}
+
+void devlink_trap_groups_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_trap_group_item *group_item;
+
+ list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list)
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_DEL);
+}
+
+static int
+devlink_trap_item_group_link(struct devlink *devlink,
+ struct devlink_trap_item *trap_item)
+{
+ u16 group_id = trap_item->trap->init_group_id;
+ struct devlink_trap_group_item *group_item;
+
+ group_item = devlink_trap_group_item_lookup_by_id(devlink, group_id);
+ if (WARN_ON_ONCE(!group_item))
+ return -EINVAL;
+
+ trap_item->group_item = group_item;
+
+ return 0;
+}
+
+static void devlink_trap_notify(struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
+ cmd != DEVLINK_CMD_TRAP_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_traps_notify_register(struct devlink *devlink)
+{
+ struct devlink_trap_item *trap_item;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list)
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+}
+
+void devlink_traps_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_trap_item *trap_item;
+
+ list_for_each_entry_reverse(trap_item, &devlink->trap_list, list)
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+}
+
+static int
+devlink_trap_register(struct devlink *devlink,
+ const struct devlink_trap *trap, void *priv)
+{
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ if (devlink_trap_item_lookup(devlink, trap->name))
+ return -EEXIST;
+
+ trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL);
+ if (!trap_item)
+ return -ENOMEM;
+
+ trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+ if (!trap_item->stats) {
+ err = -ENOMEM;
+ goto err_stats_alloc;
+ }
+
+ trap_item->trap = trap;
+ trap_item->action = trap->init_action;
+ trap_item->priv = priv;
+
+ err = devlink_trap_item_group_link(devlink, trap_item);
+ if (err)
+ goto err_group_link;
+
+ err = devlink->ops->trap_init(devlink, trap, trap_item);
+ if (err)
+ goto err_trap_init;
+
+ list_add_tail(&trap_item->list, &devlink->trap_list);
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+
+ return 0;
+
+err_trap_init:
+err_group_link:
+ free_percpu(trap_item->stats);
+err_stats_alloc:
+ kfree(trap_item);
+ return err;
+}
+
+static void devlink_trap_unregister(struct devlink *devlink,
+ const struct devlink_trap *trap)
+{
+ struct devlink_trap_item *trap_item;
+
+ trap_item = devlink_trap_item_lookup(devlink, trap->name);
+ if (WARN_ON_ONCE(!trap_item))
+ return;
+
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+ list_del(&trap_item->list);
+ if (devlink->ops->trap_fini)
+ devlink->ops->trap_fini(devlink, trap, trap_item);
+ free_percpu(trap_item->stats);
+ kfree(trap_item);
+}
+
+static void devlink_trap_disable(struct devlink *devlink,
+ const struct devlink_trap *trap)
+{
+ struct devlink_trap_item *trap_item;
+
+ trap_item = devlink_trap_item_lookup(devlink, trap->name);
+ if (WARN_ON_ONCE(!trap_item))
+ return;
+
+ devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP,
+ NULL);
+ trap_item->action = DEVLINK_TRAP_ACTION_DROP;
+}
+
+/**
+ * devl_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devl_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv)
+{
+ int i, err;
+
+ if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
+ return -EINVAL;
+
+ devl_assert_locked(devlink);
+ for (i = 0; i < traps_count; i++) {
+ const struct devlink_trap *trap = &traps[i];
+
+ err = devlink_trap_verify(trap);
+ if (err)
+ goto err_trap_verify;
+
+ err = devlink_trap_register(devlink, trap, priv);
+ if (err)
+ goto err_trap_register;
+ }
+
+ return 0;
+
+err_trap_register:
+err_trap_verify:
+ for (i--; i >= 0; i--)
+ devlink_trap_unregister(devlink, &traps[i]);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devl_traps_register);
+
+/**
+ * devlink_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_traps_register(devlink, traps, traps_count, priv);
+ devl_unlock(devlink);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_traps_register);
+
+/**
+ * devl_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ */
+void devl_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count)
+{
+ int i;
+
+ devl_assert_locked(devlink);
+ /* Make sure we do not have any packets in-flight while unregistering
+ * traps by disabling all of them and waiting for a grace period.
+ */
+ for (i = traps_count - 1; i >= 0; i--)
+ devlink_trap_disable(devlink, &traps[i]);
+ synchronize_rcu();
+ for (i = traps_count - 1; i >= 0; i--)
+ devlink_trap_unregister(devlink, &traps[i]);
+}
+EXPORT_SYMBOL_GPL(devl_traps_unregister);
+
+/**
+ * devlink_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count)
+{
+ devl_lock(devlink);
+ devl_traps_unregister(devlink, traps, traps_count);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_traps_unregister);
+
+static void
+devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
+ size_t skb_len)
+{
+ struct devlink_stats *stats;
+
+ stats = this_cpu_ptr(trap_stats);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_add(&stats->rx_bytes, skb_len);
+ u64_stats_inc(&stats->rx_packets);
+ u64_stats_update_end(&stats->syncp);
+}
+
+static void
+devlink_trap_report_metadata_set(struct devlink_trap_metadata *metadata,
+ const struct devlink_trap_item *trap_item,
+ struct devlink_port *in_devlink_port,
+ const struct flow_action_cookie *fa_cookie)
+{
+ metadata->trap_name = trap_item->trap->name;
+ metadata->trap_group_name = trap_item->group_item->group->name;
+ metadata->fa_cookie = fa_cookie;
+ metadata->trap_type = trap_item->trap->type;
+
+ spin_lock(&in_devlink_port->type_lock);
+ if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
+ metadata->input_dev = in_devlink_port->type_eth.netdev;
+ spin_unlock(&in_devlink_port->type_lock);
+}
+
+/**
+ * devlink_trap_report - Report trapped packet to drop monitor.
+ * @devlink: devlink.
+ * @skb: Trapped packet.
+ * @trap_ctx: Trap context.
+ * @in_devlink_port: Input devlink port.
+ * @fa_cookie: Flow action cookie. Could be NULL.
+ */
+void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
+ void *trap_ctx, struct devlink_port *in_devlink_port,
+ const struct flow_action_cookie *fa_cookie)
+
+{
+ struct devlink_trap_item *trap_item = trap_ctx;
+
+ devlink_trap_stats_update(trap_item->stats, skb->len);
+ devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
+
+ if (tracepoint_enabled(devlink_trap_report)) {
+ struct devlink_trap_metadata metadata = {};
+
+ devlink_trap_report_metadata_set(&metadata, trap_item,
+ in_devlink_port, fa_cookie);
+ trace_devlink_trap_report(devlink, skb, &metadata);
+ }
+}
+EXPORT_SYMBOL_GPL(devlink_trap_report);
+
+/**
+ * devlink_trap_ctx_priv - Trap context to driver private information.
+ * @trap_ctx: Trap context.
+ *
+ * Return: Driver private information passed during registration.
+ */
+void *devlink_trap_ctx_priv(void *trap_ctx)
+{
+ struct devlink_trap_item *trap_item = trap_ctx;
+
+ return trap_item->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv);
+
+static int
+devlink_trap_group_item_policer_link(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item)
+{
+ u32 policer_id = group_item->group->init_policer_id;
+ struct devlink_trap_policer_item *policer_item;
+
+ if (policer_id == 0)
+ return 0;
+
+ policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
+ if (WARN_ON_ONCE(!policer_item))
+ return -EINVAL;
+
+ group_item->policer_item = policer_item;
+
+ return 0;
+}
+
+static int
+devlink_trap_group_register(struct devlink *devlink,
+ const struct devlink_trap_group *group)
+{
+ struct devlink_trap_group_item *group_item;
+ int err;
+
+ if (devlink_trap_group_item_lookup(devlink, group->name))
+ return -EEXIST;
+
+ group_item = kzalloc(sizeof(*group_item), GFP_KERNEL);
+ if (!group_item)
+ return -ENOMEM;
+
+ group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+ if (!group_item->stats) {
+ err = -ENOMEM;
+ goto err_stats_alloc;
+ }
+
+ group_item->group = group;
+
+ err = devlink_trap_group_item_policer_link(devlink, group_item);
+ if (err)
+ goto err_policer_link;
+
+ if (devlink->ops->trap_group_init) {
+ err = devlink->ops->trap_group_init(devlink, group);
+ if (err)
+ goto err_group_init;
+ }
+
+ list_add_tail(&group_item->list, &devlink->trap_group_list);
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW);
+
+ return 0;
+
+err_group_init:
+err_policer_link:
+ free_percpu(group_item->stats);
+err_stats_alloc:
+ kfree(group_item);
+ return err;
+}
+
+static void
+devlink_trap_group_unregister(struct devlink *devlink,
+ const struct devlink_trap_group *group)
+{
+ struct devlink_trap_group_item *group_item;
+
+ group_item = devlink_trap_group_item_lookup(devlink, group->name);
+ if (WARN_ON_ONCE(!group_item))
+ return;
+
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_DEL);
+ list_del(&group_item->list);
+ free_percpu(group_item->stats);
+ kfree(group_item);
+}
+
+/**
+ * devl_trap_groups_register - Register packet trap groups with devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devl_trap_groups_register(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
+{
+ int i, err;
+
+ devl_assert_locked(devlink);
+ for (i = 0; i < groups_count; i++) {
+ const struct devlink_trap_group *group = &groups[i];
+
+ err = devlink_trap_group_verify(group);
+ if (err)
+ goto err_trap_group_verify;
+
+ err = devlink_trap_group_register(devlink, group);
+ if (err)
+ goto err_trap_group_register;
+ }
+
+ return 0;
+
+err_trap_group_register:
+err_trap_group_verify:
+ for (i--; i >= 0; i--)
+ devlink_trap_group_unregister(devlink, &groups[i]);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devl_trap_groups_register);
+
+/**
+ * devlink_trap_groups_register - Register packet trap groups with devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_trap_groups_register(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_trap_groups_register(devlink, groups, groups_count);
+ devl_unlock(devlink);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_trap_groups_register);
+
+/**
+ * devl_trap_groups_unregister - Unregister packet trap groups from devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ */
+void devl_trap_groups_unregister(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
+{
+ int i;
+
+ devl_assert_locked(devlink);
+ for (i = groups_count - 1; i >= 0; i--)
+ devlink_trap_group_unregister(devlink, &groups[i]);
+}
+EXPORT_SYMBOL_GPL(devl_trap_groups_unregister);
+
+/**
+ * devlink_trap_groups_unregister - Unregister packet trap groups from devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_trap_groups_unregister(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
+{
+ devl_lock(devlink);
+ devl_trap_groups_unregister(devlink, groups, groups_count);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister);
+
+static void
+devlink_trap_policer_notify(struct devlink *devlink,
+ const struct devlink_trap_policer_item *policer_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
+ cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_policer_fill(msg, devlink, policer_item, cmd, 0,
+ 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_trap_policers_notify_register(struct devlink *devlink)
+{
+ struct devlink_trap_policer_item *policer_item;
+
+ list_for_each_entry(policer_item, &devlink->trap_policer_list, list)
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW);
+}
+
+void devlink_trap_policers_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_trap_policer_item *policer_item;
+
+ list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list,
+ list)
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_DEL);
+}
+
+static int
+devlink_trap_policer_register(struct devlink *devlink,
+ const struct devlink_trap_policer *policer)
+{
+ struct devlink_trap_policer_item *policer_item;
+ int err;
+
+ if (devlink_trap_policer_item_lookup(devlink, policer->id))
+ return -EEXIST;
+
+ policer_item = kzalloc(sizeof(*policer_item), GFP_KERNEL);
+ if (!policer_item)
+ return -ENOMEM;
+
+ policer_item->policer = policer;
+ policer_item->rate = policer->init_rate;
+ policer_item->burst = policer->init_burst;
+
+ if (devlink->ops->trap_policer_init) {
+ err = devlink->ops->trap_policer_init(devlink, policer);
+ if (err)
+ goto err_policer_init;
+ }
+
+ list_add_tail(&policer_item->list, &devlink->trap_policer_list);
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW);
+
+ return 0;
+
+err_policer_init:
+ kfree(policer_item);
+ return err;
+}
+
+static void
+devlink_trap_policer_unregister(struct devlink *devlink,
+ const struct devlink_trap_policer *policer)
+{
+ struct devlink_trap_policer_item *policer_item;
+
+ policer_item = devlink_trap_policer_item_lookup(devlink, policer->id);
+ if (WARN_ON_ONCE(!policer_item))
+ return;
+
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_DEL);
+ list_del(&policer_item->list);
+ if (devlink->ops->trap_policer_fini)
+ devlink->ops->trap_policer_fini(devlink, policer);
+ kfree(policer_item);
+}
+
+/**
+ * devl_trap_policers_register - Register packet trap policers with devlink.
+ * @devlink: devlink.
+ * @policers: Packet trap policers.
+ * @policers_count: Count of provided packet trap policers.
+ *
+ * Return: Non-zero value on failure.
+ */
+int
+devl_trap_policers_register(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count)
+{
+ int i, err;
+
+ devl_assert_locked(devlink);
+ for (i = 0; i < policers_count; i++) {
+ const struct devlink_trap_policer *policer = &policers[i];
+
+ if (WARN_ON(policer->id == 0 ||
+ policer->max_rate < policer->min_rate ||
+ policer->max_burst < policer->min_burst)) {
+ err = -EINVAL;
+ goto err_trap_policer_verify;
+ }
+
+ err = devlink_trap_policer_register(devlink, policer);
+ if (err)
+ goto err_trap_policer_register;
+ }
+ return 0;
+
+err_trap_policer_register:
+err_trap_policer_verify:
+ for (i--; i >= 0; i--)
+ devlink_trap_policer_unregister(devlink, &policers[i]);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devl_trap_policers_register);
+
+/**
+ * devl_trap_policers_unregister - Unregister packet trap policers from devlink.
+ * @devlink: devlink.
+ * @policers: Packet trap policers.
+ * @policers_count: Count of provided packet trap policers.
+ */
+void
+devl_trap_policers_unregister(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count)
+{
+ int i;
+
+ devl_assert_locked(devlink);
+ for (i = policers_count - 1; i >= 0; i--)
+ devlink_trap_policer_unregister(devlink, &policers[i]);
+}
+EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 0ce8fd311c78..37ab238e8304 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1568,27 +1568,6 @@ static void dsa_port_phylink_validate(struct phylink_config *config,
phylink_generic_validate(config, supported, state);
}
-static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state)
-{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
- struct dsa_switch *ds = dp->ds;
- int err;
-
- /* Only called for inband modes */
- if (!ds->ops->phylink_mac_link_state) {
- state->link = 0;
- return;
- }
-
- err = ds->ops->phylink_mac_link_state(ds, dp->index, state);
- if (err < 0) {
- dev_err(ds->dev, "p%d: phylink_mac_link_state() failed: %d\n",
- dp->index, err);
- state->link = 0;
- }
-}
-
static struct phylink_pcs *
dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
phy_interface_t interface)
@@ -1646,17 +1625,6 @@ static int dsa_port_phylink_mac_finish(struct phylink_config *config,
return err;
}
-static void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
-{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_an_restart)
- return;
-
- ds->ops->phylink_mac_an_restart(ds, dp->index);
-}
-
static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
unsigned int mode,
phy_interface_t interface)
@@ -1700,11 +1668,9 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
.validate = dsa_port_phylink_validate,
.mac_select_pcs = dsa_port_phylink_mac_select_pcs,
- .mac_pcs_get_state = dsa_port_phylink_mac_pcs_get_state,
.mac_prepare = dsa_port_phylink_mac_prepare,
.mac_config = dsa_port_phylink_mac_config,
.mac_finish = dsa_port_phylink_mac_finish,
- .mac_an_restart = dsa_port_phylink_mac_an_restart,
.mac_link_down = dsa_port_phylink_mac_link_down,
.mac_link_up = dsa_port_phylink_mac_link_up,
};
@@ -1720,15 +1686,19 @@ int dsa_port_phylink_create(struct dsa_port *dp)
if (err)
mode = PHY_INTERFACE_MODE_NA;
- /* Presence of phylink_mac_link_state or phylink_mac_an_restart is
- * an indicator of a legacy phylink driver.
- */
- if (ds->ops->phylink_mac_link_state ||
- ds->ops->phylink_mac_an_restart)
- dp->pl_config.legacy_pre_march2020 = true;
-
- if (ds->ops->phylink_get_caps)
+ if (ds->ops->phylink_get_caps) {
ds->ops->phylink_get_caps(ds, dp->index, &dp->pl_config);
+ } else {
+ /* For legacy drivers */
+ if (mode != PHY_INTERFACE_MODE_NA) {
+ __set_bit(mode, dp->pl_config.supported_interfaces);
+ } else {
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+ dp->pl_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ dp->pl_config.supported_interfaces);
+ }
+ }
pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn),
mode, &dsa_port_phylink_mac_ops);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 527b1d576460..48db91b33390 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -21,6 +21,7 @@
#include <linux/if_hsr.h>
#include <net/dcbnl.h>
#include <linux/netpoll.h>
+#include <linux/string.h>
#include "dsa.h"
#include "port.h"
@@ -1056,10 +1057,10 @@ static void dsa_slave_get_strings(struct net_device *dev,
if (stringset == ETH_SS_STATS) {
int len = ETH_GSTRING_LEN;
- strncpy(data, "tx_packets", len);
- strncpy(data + len, "tx_bytes", len);
- strncpy(data + 2 * len, "rx_packets", len);
- strncpy(data + 3 * len, "rx_bytes", len);
+ strscpy_pad(data, "tx_packets", len);
+ strscpy_pad(data + len, "tx_bytes", len);
+ strscpy_pad(data + 2 * len, "rx_packets", len);
+ strscpy_pad(data + 3 * len, "rx_bytes", len);
if (ds->ops->get_strings)
ds->ops->get_strings(ds, dp->index, stringset,
data + 4 * len);
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index e757c8de06f1..e5ff7c34e577 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -75,10 +75,6 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
return NULL;
}
- /* Remove QCA tag and recalculate checksum */
- skb_pull_rcsum(skb, QCA_HDR_LEN);
- dsa_strip_etype_header(skb, QCA_HDR_LEN);
-
/* Get source port information */
port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, hdr);
@@ -86,6 +82,10 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
if (!skb->dev)
return NULL;
+ /* Remove QCA tag and recalculate checksum */
+ skb_pull_rcsum(skb, QCA_HDR_LEN);
+ dsa_strip_etype_header(skb, QCA_HDR_LEN);
+
return skb;
}
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index 61c40e889a4d..7b4bbd674bae 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -24,7 +24,7 @@ const struct nla_policy ethnl_channels_get_policy[] = {
static int channels_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct channels_reply_data *data = CHANNELS_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
index 01a59ce211c8..83112c1a71ae 100644
--- a/net/ethtool/coalesce.c
+++ b/net/ethtool/coalesce.c
@@ -59,10 +59,9 @@ const struct nla_policy ethnl_coalesce_get_policy[] = {
static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct net_device *dev = reply_base->dev;
int ret;
@@ -73,7 +72,8 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
ret = dev->ethtool_ops->get_coalesce(dev, &data->coalesce,
- &data->kernel_coalesce, extack);
+ &data->kernel_coalesce,
+ info->extack);
ethnl_ops_complete(dev);
return ret;
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 5fb19050991e..f5598c5f50de 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -665,9 +665,8 @@ const struct ethtool_phy_ops *ethtool_phy_ops;
void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops)
{
- rtnl_lock();
+ ASSERT_RTNL();
ethtool_phy_ops = ops;
- rtnl_unlock();
}
EXPORT_SYMBOL_GPL(ethtool_set_ethtool_phy_ops);
diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c
index e4369769817e..0b2dea56d461 100644
--- a/net/ethtool/debug.c
+++ b/net/ethtool/debug.c
@@ -23,7 +23,7 @@ const struct nla_policy ethnl_debug_get_policy[] = {
static int debug_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct debug_reply_data *data = DEBUG_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c
index 42104bcb0e47..2853394d06a8 100644
--- a/net/ethtool/eee.c
+++ b/net/ethtool/eee.c
@@ -26,7 +26,7 @@ const struct nla_policy ethnl_eee_get_policy[] = {
static int eee_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct eee_reply_data *data = EEE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 49c0a2a77f02..6209c3a9c8f7 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -51,8 +51,7 @@ static int fallback_set_params(struct eeprom_req_info *request,
}
static int eeprom_fallback(struct eeprom_req_info *request,
- struct eeprom_reply_data *reply,
- struct genl_info *info)
+ struct eeprom_reply_data *reply)
{
struct net_device *dev = reply->base.dev;
struct ethtool_modinfo modinfo = {0};
@@ -103,7 +102,7 @@ static int get_module_eeprom_by_page(struct net_device *dev,
static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base);
struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base);
@@ -124,7 +123,7 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
if (ret)
goto err_free;
- ret = get_module_eeprom_by_page(dev, &page_data, info ? info->extack : NULL);
+ ret = get_module_eeprom_by_page(dev, &page_data, info->extack);
if (ret < 0)
goto err_ops;
@@ -140,7 +139,7 @@ err_free:
kfree(page_data.data);
if (ret == -EOPNOTSUPP)
- return eeprom_fallback(request, reply, info);
+ return eeprom_fallback(request, reply);
return ret;
}
diff --git a/net/ethtool/features.c b/net/ethtool/features.c
index 55d449a2d3fc..a79af8c25a07 100644
--- a/net/ethtool/features.c
+++ b/net/ethtool/features.c
@@ -35,7 +35,7 @@ static void ethnl_features_to_bitmap32(u32 *dest, netdev_features_t src)
static int features_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct features_reply_data *data = FEATURES_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c
index 0d9a3d153170..e7d3f2c352a3 100644
--- a/net/ethtool/fec.c
+++ b/net/ethtool/fec.c
@@ -92,7 +92,7 @@ fec_stats_recalc(struct fec_stat_grp *grp, struct ethtool_fec_stat *stats)
static int fec_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(active_fec_modes) = {};
struct fec_reply_data *data = FEC_REPDATA(reply_base);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 4a51e0ec295c..0b0ce4f81c01 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -907,6 +907,38 @@ static int ethtool_rxnfc_copy_to_compat(void __user *useraddr,
return 0;
}
+static int ethtool_rxnfc_copy_struct(u32 cmd, struct ethtool_rxnfc *info,
+ size_t *info_size, void __user *useraddr)
+{
+ /* struct ethtool_rxnfc was originally defined for
+ * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+ * members. User-space might still be using that
+ * definition.
+ */
+ if (cmd == ETHTOOL_GRXFH || cmd == ETHTOOL_SRXFH)
+ *info_size = (offsetof(struct ethtool_rxnfc, data) +
+ sizeof(info->data));
+
+ if (ethtool_rxnfc_copy_from_user(info, useraddr, *info_size))
+ return -EFAULT;
+
+ if ((cmd == ETHTOOL_GRXFH || cmd == ETHTOOL_SRXFH) && info->flow_type & FLOW_RSS) {
+ *info_size = sizeof(*info);
+ if (ethtool_rxnfc_copy_from_user(info, useraddr, *info_size))
+ return -EFAULT;
+ /* Since malicious users may modify the original data,
+ * we need to check whether FLOW_RSS is still requested.
+ */
+ if (!(info->flow_type & FLOW_RSS))
+ return -EINVAL;
+ }
+
+ if (info->cmd != cmd)
+ return -EINVAL;
+
+ return 0;
+}
+
static int ethtool_rxnfc_copy_to_user(void __user *useraddr,
const struct ethtool_rxnfc *rxnfc,
size_t size, const u32 *rule_buf)
@@ -944,16 +976,9 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
if (!dev->ethtool_ops->set_rxnfc)
return -EOPNOTSUPP;
- /* struct ethtool_rxnfc was originally defined for
- * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
- * members. User-space might still be using that
- * definition. */
- if (cmd == ETHTOOL_SRXFH)
- info_size = (offsetof(struct ethtool_rxnfc, data) +
- sizeof(info.data));
-
- if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
- return -EFAULT;
+ rc = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr);
+ if (rc)
+ return rc;
rc = dev->ethtool_ops->set_rxnfc(dev, &info);
if (rc)
@@ -978,33 +1003,9 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (!ops->get_rxnfc)
return -EOPNOTSUPP;
- /* struct ethtool_rxnfc was originally defined for
- * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
- * members. User-space might still be using that
- * definition. */
- if (cmd == ETHTOOL_GRXFH)
- info_size = (offsetof(struct ethtool_rxnfc, data) +
- sizeof(info.data));
-
- if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
- return -EFAULT;
-
- /* If FLOW_RSS was requested then user-space must be using the
- * new definition, as FLOW_RSS is newer.
- */
- if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
- info_size = sizeof(info);
- if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
- return -EFAULT;
- /* Since malicious users may modify the original data,
- * we need to check whether FLOW_RSS is still requested.
- */
- if (!(info.flow_type & FLOW_RSS))
- return -EINVAL;
- }
-
- if (info.cmd != cmd)
- return -EINVAL;
+ ret = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr);
+ if (ret)
+ return ret;
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
if (info.rule_cnt > 0) {
@@ -3207,7 +3208,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (v4_m_spec->ip4src ||
v4_m_spec->ip4dst) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS);
match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] =
offsetof(struct ethtool_rx_flow_key, ipv4);
}
@@ -3222,7 +3223,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (v4_m_spec->psrc ||
v4_m_spec->pdst) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_PORTS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_PORTS);
match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
offsetof(struct ethtool_rx_flow_key, tp);
}
@@ -3259,7 +3260,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (!ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6src) ||
!ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6dst)) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] =
offsetof(struct ethtool_rx_flow_key, ipv6);
}
@@ -3274,7 +3275,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (v6_m_spec->psrc ||
v6_m_spec->pdst) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_PORTS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_PORTS);
match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
offsetof(struct ethtool_rx_flow_key, tp);
}
@@ -3282,7 +3283,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
match->key.ip.tos = v6_spec->tclass;
match->mask.ip.tos = v6_m_spec->tclass;
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IP);
+ BIT_ULL(FLOW_DISSECTOR_KEY_IP);
match->dissector.offset[FLOW_DISSECTOR_KEY_IP] =
offsetof(struct ethtool_rx_flow_key, ip);
}
@@ -3306,7 +3307,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
break;
}
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_BASIC);
match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] =
offsetof(struct ethtool_rx_flow_key, basic);
@@ -3339,7 +3340,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (ext_m_spec->vlan_etype ||
ext_m_spec->vlan_tci) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_VLAN);
+ BIT_ULL(FLOW_DISSECTOR_KEY_VLAN);
match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
offsetof(struct ethtool_rx_flow_key, vlan);
}
@@ -3354,7 +3355,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
ETH_ALEN);
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS);
match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] =
offsetof(struct ethtool_rx_flow_key, eth_addrs);
}
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
index 310dfe63292a..5c317d23787b 100644
--- a/net/ethtool/linkinfo.c
+++ b/net/ethtool/linkinfo.c
@@ -23,7 +23,7 @@ const struct nla_policy ethnl_linkinfo_get_policy[] = {
static int linkinfo_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct linkinfo_reply_data *data = LINKINFO_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index 20165e07ef90..b2591db49f7d 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -27,7 +27,7 @@ const struct nla_policy ethnl_linkmodes_get_policy[] = {
static int linkmodes_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index 2158c17a0b32..b2de2108b356 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -81,7 +81,7 @@ static int linkstate_get_link_ext_state(struct net_device *dev,
static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/mm.c b/net/ethtool/mm.c
index 4058a557b5a4..2816bb23c3ad 100644
--- a/net/ethtool/mm.c
+++ b/net/ethtool/mm.c
@@ -27,7 +27,7 @@ const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1] = {
static int mm_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct mm_reply_data *data = MM_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/module.c b/net/ethtool/module.c
index e0d539b21423..ceb575efc290 100644
--- a/net/ethtool/module.c
+++ b/net/ethtool/module.c
@@ -38,10 +38,9 @@ static int module_get_power_mode(struct net_device *dev,
static int module_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct module_reply_data *data = MODULE_REPDATA(reply_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct net_device *dev = reply_base->dev;
int ret;
@@ -49,7 +48,7 @@ static int module_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
- ret = module_get_power_mode(dev, data, extack);
+ ret = module_get_power_mode(dev, data, info->extack);
if (ret < 0)
goto out_complete;
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 39a459b0111b..3bbd5afb7b31 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -252,8 +252,7 @@ int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
* @ops: request ops of currently processed message type
* @req_info: parsed request header of processed request
* @reply_data: data needed to compose the reply
- * @pos_hash: saved iteration position - hashbucket
- * @pos_idx: saved iteration position - index
+ * @pos_ifindex: saved iteration position - ifindex
*
* These parameters are kept in struct netlink_callback as context preserved
* between iterations. They are initialized by ethnl_default_start() and used
@@ -263,8 +262,7 @@ struct ethnl_dump_ctx {
const struct ethnl_request_ops *ops;
struct ethnl_req_info *req_info;
struct ethnl_reply_data *reply_data;
- int pos_hash;
- int pos_idx;
+ unsigned long pos_ifindex;
};
static const struct ethnl_request_ops *
@@ -318,10 +316,8 @@ static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
/**
* ethnl_default_parse() - Parse request message
* @req_info: pointer to structure to put data into
- * @tb: parsed attributes
- * @net: request netns
+ * @info: genl_info from the request
* @request_ops: struct request_ops for request type
- * @extack: netlink extack for error reporting
* @require_dev: fail if no device identified in header
*
* Parse universal request header and call request specific ->parse_request()
@@ -330,19 +326,21 @@ static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
* Return: 0 on success or negative error code
*/
static int ethnl_default_parse(struct ethnl_req_info *req_info,
- struct nlattr **tb, struct net *net,
+ const struct genl_info *info,
const struct ethnl_request_ops *request_ops,
- struct netlink_ext_ack *extack, bool require_dev)
+ bool require_dev)
{
+ struct nlattr **tb = info->attrs;
int ret;
ret = ethnl_parse_header_dev_get(req_info, tb[request_ops->hdr_attr],
- net, extack, require_dev);
+ genl_info_net(info), info->extack,
+ require_dev);
if (ret < 0)
return ret;
if (request_ops->parse_request) {
- ret = request_ops->parse_request(req_info, tb, extack);
+ ret = request_ops->parse_request(req_info, tb, info->extack);
if (ret < 0)
return ret;
}
@@ -395,8 +393,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
}
- ret = ethnl_default_parse(req_info, info->attrs, genl_info_net(info),
- ops, info->extack, !ops->allow_nodev_do);
+ ret = ethnl_default_parse(req_info, info, ops, !ops->allow_nodev_do);
if (ret < 0)
goto err_dev;
ethnl_init_reply_data(reply_data, ops, req_info->dev);
@@ -447,12 +444,12 @@ err_dev:
static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
const struct ethnl_dump_ctx *ctx,
- struct netlink_callback *cb)
+ const struct genl_info *info)
{
void *ehdr;
int ret;
- ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ ehdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
&ethtool_genl_family, NLM_F_MULTI,
ctx->ops->reply_cmd);
if (!ehdr)
@@ -460,7 +457,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev);
rtnl_lock();
- ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL);
+ ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, info);
rtnl_unlock();
if (ret < 0)
goto out;
@@ -490,55 +487,27 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
{
struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
struct net *net = sock_net(skb->sk);
- int s_idx = ctx->pos_idx;
- int h, idx = 0;
+ struct net_device *dev;
int ret = 0;
rtnl_lock();
- for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- struct hlist_head *head;
- struct net_device *dev;
- unsigned int seq;
-
- head = &net->dev_index_head[h];
-
-restart_chain:
- seq = net->dev_base_seq;
- cb->seq = seq;
- idx = 0;
- hlist_for_each_entry(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- dev_hold(dev);
- rtnl_unlock();
-
- ret = ethnl_default_dump_one(skb, dev, ctx, cb);
- dev_put(dev);
- if (ret < 0) {
- if (ret == -EOPNOTSUPP)
- goto lock_and_cont;
- if (likely(skb->len))
- ret = skb->len;
- goto out;
- }
-lock_and_cont:
- rtnl_lock();
- if (net->dev_base_seq != seq) {
- s_idx = idx + 1;
- goto restart_chain;
- }
-cont:
- idx++;
- }
+ for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
+ dev_hold(dev);
+ rtnl_unlock();
+
+ ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb));
+ rtnl_lock();
+ dev_put(dev);
+
+ if (ret < 0 && ret != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ ret = skb->len;
+ break;
+ }
}
rtnl_unlock();
-out:
- ctx->pos_hash = h;
- ctx->pos_idx = idx;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-
return ret;
}
@@ -568,8 +537,7 @@ static int ethnl_default_start(struct netlink_callback *cb)
goto free_req_info;
}
- ret = ethnl_default_parse(req_info, info->attrs, sock_net(cb->skb->sk),
- ops, cb->extack, false);
+ ret = ethnl_default_parse(req_info, &info->info, ops, false);
if (req_info->dev) {
/* We ignore device specification in dump requests but as the
* same parser as for non-dump (doit) requests is used, it
@@ -584,8 +552,7 @@ static int ethnl_default_start(struct netlink_callback *cb)
ctx->ops = ops;
ctx->req_info = req_info;
ctx->reply_data = reply_data;
- ctx->pos_hash = 0;
- ctx->pos_idx = 0;
+ ctx->pos_ifindex = 0;
return 0;
@@ -680,11 +647,14 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
struct ethnl_reply_data *reply_data;
const struct ethnl_request_ops *ops;
struct ethnl_req_info *req_info;
+ struct genl_info info;
struct sk_buff *skb;
void *reply_payload;
int reply_len;
int ret;
+ genl_info_init_ntf(&info, &ethtool_genl_family, cmd);
+
if (WARN_ONCE(cmd > ETHTOOL_MSG_KERNEL_MAX ||
!ethnl_default_notify_ops[cmd],
"unexpected notification type %u\n", cmd))
@@ -703,7 +673,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
req_info->flags |= ETHTOOL_FLAG_COMPACT_BITSETS;
ethnl_init_reply_data(reply_data, ops, dev);
- ret = ops->prepare_data(req_info, reply_data, NULL);
+ ret = ops->prepare_data(req_info, reply_data, &info);
if (ret < 0)
goto err_cleanup;
ret = ops->reply_size(req_info, reply_data);
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 79424b34b553..9a333a8d04c1 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -355,7 +355,7 @@ struct ethnl_request_ops {
struct netlink_ext_ack *extack);
int (*prepare_data)(const struct ethnl_req_info *req_info,
struct ethnl_reply_data *reply_data,
- struct genl_info *info);
+ const struct genl_info *info);
int (*reply_size)(const struct ethnl_req_info *req_info,
const struct ethnl_reply_data *reply_data);
int (*fill_reply)(struct sk_buff *skb,
diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c
index 6657d0b888d8..f7c847aeb1a2 100644
--- a/net/ethtool/pause.c
+++ b/net/ethtool/pause.c
@@ -51,10 +51,9 @@ static int pause_parse_request(struct ethnl_req_info *req_base,
static int pause_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
const struct pause_req_info *req_info = PAUSE_REQINFO(req_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct pause_reply_data *data = PAUSE_REPDATA(reply_base);
enum ethtool_mac_stats_src src = req_info->src;
struct net_device *dev = reply_base->dev;
@@ -74,7 +73,7 @@ static int pause_prepare_data(const struct ethnl_req_info *req_base,
if ((src == ETHTOOL_MAC_STATS_SRC_EMAC ||
src == ETHTOOL_MAC_STATS_SRC_PMAC) &&
!__ethtool_dev_mm_supported(dev)) {
- NL_SET_ERR_MSG_MOD(extack,
+ NL_SET_ERR_MSG_MOD(info->extack,
"Device does not support MAC merge layer");
ethnl_ops_complete(dev);
return -EOPNOTSUPP;
diff --git a/net/ethtool/phc_vclocks.c b/net/ethtool/phc_vclocks.c
index 637b2f5297d5..cadaabed60bd 100644
--- a/net/ethtool/phc_vclocks.c
+++ b/net/ethtool/phc_vclocks.c
@@ -24,7 +24,7 @@ const struct nla_policy ethnl_phc_vclocks_get_policy[] = {
static int phc_vclocks_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct phc_vclocks_reply_data *data = PHC_VCLOCKS_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c
index 5a8cab4df0c9..b238a1afe9ae 100644
--- a/net/ethtool/plca.c
+++ b/net/ethtool/plca.c
@@ -40,7 +40,7 @@ const struct nla_policy ethnl_plca_get_cfg_policy[] = {
static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct plca_reply_data *data = PLCA_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
@@ -183,7 +183,7 @@ const struct nla_policy ethnl_plca_get_status_policy[] = {
static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct plca_reply_data *data = PLCA_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c
index 23264a1ebf12..297be6a13ab9 100644
--- a/net/ethtool/privflags.c
+++ b/net/ethtool/privflags.c
@@ -57,7 +57,7 @@ static int ethnl_get_priv_flags_info(struct net_device *dev,
static int privflags_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index 530b8b99e6df..cc478af77111 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -53,8 +53,8 @@ static int pse_get_pse_attributes(struct net_device *dev,
}
static int pse_prepare_data(const struct ethnl_req_info *req_base,
- struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ struct ethnl_reply_data *reply_base,
+ const struct genl_info *info)
{
struct pse_reply_data *data = PSE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
@@ -64,7 +64,7 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
- ret = pse_get_pse_attributes(dev, info ? info->extack : NULL, data);
+ ret = pse_get_pse_attributes(dev, info->extack, data);
ethnl_ops_complete(dev);
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index 1c4972526142..fb09f774ea01 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -24,10 +24,9 @@ const struct nla_policy ethnl_rings_get_policy[] = {
static int rings_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct rings_reply_data *data = RINGS_REPDATA(reply_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct net_device *dev = reply_base->dev;
int ret;
@@ -39,7 +38,7 @@ static int rings_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
dev->ethtool_ops->get_ringparam(dev, &data->ringparam,
- &data->kernel_ringparam, extack);
+ &data->kernel_ringparam, info->extack);
ethnl_ops_complete(dev);
return 0;
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index be260ab34e58..5764202e6cb6 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -42,7 +42,8 @@ rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
static int
rss_prepare_data(const struct ethnl_req_info *req_base,
- struct ethnl_reply_data *reply_base, struct genl_info *info)
+ struct ethnl_reply_data *reply_base,
+ const struct genl_info *info)
{
struct rss_reply_data *data = RSS_REPDATA(reply_base);
struct rss_req_info *request = RSS_REQINFO(req_base);
diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index 010ed19ccc99..912f0c4fff2f 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -114,10 +114,9 @@ static int stats_parse_request(struct ethnl_req_info *req_base,
static int stats_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
const struct stats_req_info *req_info = STATS_REQINFO(req_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct stats_reply_data *data = STATS_REPDATA(reply_base);
enum ethtool_mac_stats_src src = req_info->src;
struct net_device *dev = reply_base->dev;
@@ -130,7 +129,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
if ((src == ETHTOOL_MAC_STATS_SRC_EMAC ||
src == ETHTOOL_MAC_STATS_SRC_PMAC) &&
!__ethtool_dev_mm_supported(dev)) {
- NL_SET_ERR_MSG_MOD(extack,
+ NL_SET_ERR_MSG_MOD(info->extack,
"Device does not support MAC merge layer");
ethnl_ops_complete(dev);
return -EOPNOTSUPP;
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index 3f7de54d85fb..c678b484a079 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -274,7 +274,7 @@ static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
static int strset_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
const struct strset_req_info *req_info = STRSET_REQINFO(req_base);
struct strset_reply_data *data = STRSET_REPDATA(reply_base);
diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c
index 63b5814bd460..9daed0aab162 100644
--- a/net/ethtool/tsinfo.c
+++ b/net/ethtool/tsinfo.c
@@ -25,7 +25,7 @@ const struct nla_policy ethnl_tsinfo_get_policy[] = {
static int tsinfo_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c
index 67fb414ca859..b4ce47dd2aa6 100644
--- a/net/ethtool/tunnels.c
+++ b/net/ethtool/tunnels.c
@@ -212,15 +212,14 @@ err_unlock_rtnl:
struct ethnl_tunnel_info_dump_ctx {
struct ethnl_req_info req_info;
- int pos_hash;
- int pos_idx;
+ unsigned long ifindex;
};
int ethnl_tunnel_info_start(struct netlink_callback *cb)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx;
- struct nlattr **tb = info->attrs;
+ struct nlattr **tb = info->info.attrs;
int ret;
BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
@@ -243,57 +242,39 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
- int s_idx = ctx->pos_idx;
- int h, idx = 0;
+ struct net_device *dev;
int ret = 0;
void *ehdr;
rtnl_lock();
- cb->seq = net->dev_base_seq;
- for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- struct hlist_head *head;
- struct net_device *dev;
-
- head = &net->dev_index_head[h];
- idx = 0;
- hlist_for_each_entry(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
-
- ehdr = ethnl_dump_put(skb, cb,
- ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY);
- if (!ehdr) {
- ret = -EMSGSIZE;
- goto out;
- }
-
- ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TUNNEL_INFO_HEADER);
- if (ret < 0) {
- genlmsg_cancel(skb, ehdr);
- goto out;
- }
-
- ctx->req_info.dev = dev;
- ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb);
- ctx->req_info.dev = NULL;
- if (ret < 0) {
- genlmsg_cancel(skb, ehdr);
- if (ret == -EOPNOTSUPP)
- goto cont;
- goto out;
- }
- genlmsg_end(skb, ehdr);
-cont:
- idx++;
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ ehdr = ethnl_dump_put(skb, cb,
+ ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY);
+ if (!ehdr) {
+ ret = -EMSGSIZE;
+ break;
+ }
+
+ ret = ethnl_fill_reply_header(skb, dev,
+ ETHTOOL_A_TUNNEL_INFO_HEADER);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ break;
}
+
+ ctx->req_info.dev = dev;
+ ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb);
+ ctx->req_info.dev = NULL;
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ if (ret == -EOPNOTSUPP)
+ continue;
+ break;
+ }
+ genlmsg_end(skb, ehdr);
}
-out:
rtnl_unlock();
- ctx->pos_hash = h;
- ctx->pos_idx = idx;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-
if (ret == -EMSGSIZE && skb->len)
return skb->len;
return ret;
diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c
index a4a43d9e6e9d..0ed56c9ac1bc 100644
--- a/net/ethtool/wol.c
+++ b/net/ethtool/wol.c
@@ -24,7 +24,7 @@ const struct nla_policy ethnl_wol_get_policy[] = {
static int wol_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct wol_reply_data *data = WOL_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
@@ -39,7 +39,8 @@ static int wol_prepare_data(const struct ethnl_req_info *req_base,
dev->ethtool_ops->get_wol(dev, &data->wol);
ethnl_ops_complete(dev);
/* do not include password in notifications */
- data->show_sopass = info && (data->wol.supported & WAKE_MAGICSECURE);
+ data->show_sopass = !genl_info_is_ntf(info) &&
+ (data->wol.supported & WAKE_MAGICSECURE);
return 0;
}
diff --git a/net/handshake/Makefile b/net/handshake/Makefile
index 247d73c6ff6e..ef4d9a2112bd 100644
--- a/net/handshake/Makefile
+++ b/net/handshake/Makefile
@@ -8,6 +8,6 @@
#
obj-y += handshake.o
-handshake-y := genl.o netlink.o request.o tlshd.o trace.o
+handshake-y := alert.o genl.o netlink.o request.o tlshd.o trace.o
obj-$(CONFIG_NET_HANDSHAKE_KUNIT_TEST) += handshake-test.o
diff --git a/net/handshake/alert.c b/net/handshake/alert.c
new file mode 100644
index 000000000000..329d91984683
--- /dev/null
+++ b/net/handshake/alert.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Handle the TLS Alert protocol
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+
+#include <net/sock.h>
+#include <net/handshake.h>
+#include <net/tls.h>
+#include <net/tls_prot.h>
+
+#include "handshake.h"
+
+#include <trace/events/handshake.h>
+
+/**
+ * tls_alert_send - send a TLS Alert on a kTLS socket
+ * @sock: open kTLS socket to send on
+ * @level: TLS Alert level
+ * @description: TLS Alert description
+ *
+ * Returns zero on success or a negative errno.
+ */
+int tls_alert_send(struct socket *sock, u8 level, u8 description)
+{
+ u8 record_type = TLS_RECORD_TYPE_ALERT;
+ u8 buf[CMSG_SPACE(sizeof(record_type))];
+ struct msghdr msg = { 0 };
+ struct cmsghdr *cmsg;
+ struct kvec iov;
+ u8 alert[2];
+ int ret;
+
+ trace_tls_alert_send(sock->sk, level, description);
+
+ alert[0] = level;
+ alert[1] = description;
+ iov.iov_base = alert;
+ iov.iov_len = sizeof(alert);
+
+ memset(buf, 0, sizeof(buf));
+ msg.msg_control = buf;
+ msg.msg_controllen = sizeof(buf);
+ msg.msg_flags = MSG_DONTWAIT;
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_TLS;
+ cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(record_type));
+ memcpy(CMSG_DATA(cmsg), &record_type, sizeof(record_type));
+
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, iov.iov_len);
+ ret = sock_sendmsg(sock, &msg);
+ return ret < 0 ? ret : 0;
+}
+
+/**
+ * tls_get_record_type - Look for TLS RECORD_TYPE information
+ * @sk: socket (for IP address information)
+ * @cmsg: incoming message to be parsed
+ *
+ * Returns zero or a TLS_RECORD_TYPE value.
+ */
+u8 tls_get_record_type(const struct sock *sk, const struct cmsghdr *cmsg)
+{
+ u8 record_type;
+
+ if (cmsg->cmsg_level != SOL_TLS)
+ return 0;
+ if (cmsg->cmsg_type != TLS_GET_RECORD_TYPE)
+ return 0;
+
+ record_type = *((u8 *)CMSG_DATA(cmsg));
+ trace_tls_contenttype(sk, record_type);
+ return record_type;
+}
+EXPORT_SYMBOL(tls_get_record_type);
+
+/**
+ * tls_alert_recv - Parse TLS Alert messages
+ * @sk: socket (for IP address information)
+ * @msg: incoming message to be parsed
+ * @level: OUT - TLS AlertLevel value
+ * @description: OUT - TLS AlertDescription value
+ *
+ */
+void tls_alert_recv(const struct sock *sk, const struct msghdr *msg,
+ u8 *level, u8 *description)
+{
+ const struct kvec *iov;
+ u8 *data;
+
+ iov = msg->msg_iter.kvec;
+ data = iov->iov_base;
+ *level = data[0];
+ *description = data[1];
+
+ trace_tls_alert_recv(sk, *level, *description);
+}
+EXPORT_SYMBOL(tls_alert_recv);
diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h
index 4dac965c99df..a48163765a7a 100644
--- a/net/handshake/handshake.h
+++ b/net/handshake/handshake.h
@@ -41,8 +41,11 @@ struct handshake_req {
enum hr_flags_bits {
HANDSHAKE_F_REQ_COMPLETED,
+ HANDSHAKE_F_REQ_SESSION,
};
+struct genl_info;
+
/* Invariants for all handshake requests for one transport layer
* security protocol
*/
@@ -63,6 +66,9 @@ enum hp_flags_bits {
HANDSHAKE_F_PROTO_NOTIFY,
};
+/* alert.c */
+int tls_alert_send(struct socket *sock, u8 level, u8 description);
+
/* netlink.c */
int handshake_genl_notify(struct net *net, const struct handshake_proto *proto,
gfp_t flags);
diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c
index b735f5cced2f..bbfb4095ddd6 100644
--- a/net/handshake/tlshd.c
+++ b/net/handshake/tlshd.c
@@ -18,6 +18,7 @@
#include <net/sock.h>
#include <net/handshake.h>
#include <net/genetlink.h>
+#include <net/tls_prot.h>
#include <uapi/linux/keyctl.h>
#include <uapi/linux/handshake.h>
@@ -100,6 +101,9 @@ static void tls_handshake_done(struct handshake_req *req,
if (info)
tls_handshake_remote_peerids(treq, info);
+ if (!status)
+ set_bit(HANDSHAKE_F_REQ_SESSION, &req->hr_flags);
+
treq->th_consumer_done(treq->th_consumer_data, -status,
treq->th_peerid[0]);
}
@@ -424,3 +428,22 @@ bool tls_handshake_cancel(struct sock *sk)
return handshake_req_cancel(sk);
}
EXPORT_SYMBOL(tls_handshake_cancel);
+
+/**
+ * tls_handshake_close - send a Closure alert
+ * @sock: an open socket
+ *
+ */
+void tls_handshake_close(struct socket *sock)
+{
+ struct handshake_req *req;
+
+ req = handshake_req_hash_lookup(sock->sk);
+ if (!req)
+ return;
+ if (!test_and_clear_bit(HANDSHAKE_F_REQ_SESSION, &req->hr_flags))
+ return;
+ tls_alert_send(sock, TLS_ALERT_LEVEL_WARNING,
+ TLS_ALERT_DESC_CLOSE_NOTIFY);
+}
+EXPORT_SYMBOL(tls_handshake_close);
diff --git a/net/handshake/trace.c b/net/handshake/trace.c
index 1c4d8e27e17a..44432d0857b9 100644
--- a/net/handshake/trace.c
+++ b/net/handshake/trace.c
@@ -8,8 +8,10 @@
*/
#include <linux/types.h>
+#include <linux/ipv6.h>
#include <net/sock.h>
+#include <net/inet_sock.h>
#include <net/netlink.h>
#include <net/genetlink.h>
diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h
index 501552d9753b..8c99e64e1cea 100644
--- a/net/hsr/hsr_netlink.h
+++ b/net/hsr/hsr_netlink.h
@@ -23,7 +23,5 @@ void __exit hsr_netlink_exit(void);
void hsr_nl_ringerror(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN],
struct hsr_port *port);
void hsr_nl_nodedown(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN]);
-void hsr_nl_framedrop(int dropcount, int dev_idx);
-void hsr_nl_linkdown(int dev_idx);
#endif /* __HSR_NETLINK_H */
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index d610c1886160..1a265a421308 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -262,7 +262,7 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
if (!cb->args[0]) {
*wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
- info->attrs);
+ info->info.attrs);
if (IS_ERR(*wpan_dev)) {
err = PTR_ERR(*wpan_dev);
goto out_unlock;
@@ -570,7 +570,7 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
struct nl802154_dump_wpan_phy_state *state)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- struct nlattr **tb = info->attrs;
+ struct nlattr **tb = info->info.attrs;
if (tb[NL802154_ATTR_WPAN_PHY])
state->filter_wpan_phy = nla_get_u32(tb[NL802154_ATTR_WPAN_PHY]);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9b2ca2fcc5a1..3d2e30e20473 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -187,24 +187,13 @@ static int inet_autobind(struct sock *sk)
return 0;
}
-/*
- * Move a socket into listening state.
- */
-int inet_listen(struct socket *sock, int backlog)
+int __inet_listen_sk(struct sock *sk, int backlog)
{
- struct sock *sk = sock->sk;
- unsigned char old_state;
+ unsigned char old_state = sk->sk_state;
int err, tcp_fastopen;
- lock_sock(sk);
-
- err = -EINVAL;
- if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
- goto out;
-
- old_state = sk->sk_state;
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
- goto out;
+ return -EINVAL;
WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
/* Really, if the socket is already in listen state
@@ -227,10 +216,27 @@ int inet_listen(struct socket *sock, int backlog)
err = inet_csk_listen_start(sk);
if (err)
- goto out;
+ return err;
+
tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
}
- err = 0;
+ return 0;
+}
+
+/*
+ * Move a socket into listening state.
+ */
+int inet_listen(struct socket *sock, int backlog)
+{
+ struct sock *sk = sock->sk;
+ int err = -EINVAL;
+
+ lock_sock(sk);
+
+ if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
+ goto out;
+
+ err = __inet_listen_sk(sk, backlog);
out:
release_sock(sk);
@@ -325,14 +331,14 @@ lookup_protocol:
sk->sk_reuse = SK_CAN_REUSE;
inet = inet_sk(sk);
- inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
+ inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
- inet->nodefrag = 0;
+ inet_clear_bit(NODEFRAG, sk);
if (SOCK_RAW == sock->type) {
inet->inet_num = protocol;
if (IPPROTO_RAW == protocol)
- inet->hdrincl = 1;
+ inet_set_bit(HDRINCL, sk);
}
if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
@@ -340,7 +346,7 @@ lookup_protocol:
else
inet->pmtudisc = IP_PMTUDISC_WANT;
- inet->inet_id = 0;
+ atomic_set(&inet->inet_id, 0);
sock_init_data(sock, sk);
@@ -350,9 +356,9 @@ lookup_protocol:
sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
inet->uc_ttl = -1;
- inet->mc_loop = 1;
+ inet_set_bit(MC_LOOP, sk);
inet->mc_ttl = 1;
- inet->mc_all = 1;
+ inet_set_bit(MC_ALL, sk);
inet->mc_index = 0;
inet->mc_list = NULL;
inet->rcv_tos = 0;
@@ -431,9 +437,8 @@ int inet_release(struct socket *sock)
}
EXPORT_SYMBOL(inet_release);
-int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
int err;
@@ -454,6 +459,11 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return __inet_bind(sk, uaddr, addr_len, flags);
}
+
+int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+ return inet_bind_sk(sock->sk, uaddr, addr_len);
+}
EXPORT_SYMBOL(inet_bind);
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
@@ -519,7 +529,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
inet->inet_saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
- if (snum || !(inet->bind_address_no_port ||
+ if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) ||
(flags & BIND_FORCE_ADDRESS_NO_PORT))) {
err = sk->sk_prot->get_port(sk, snum);
if (err) {
@@ -646,7 +656,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
err = -EISCONN;
goto out;
case SS_CONNECTING:
- if (inet_sk(sk)->defer_connect)
+ if (inet_test_bit(DEFER_CONNECT, sk))
err = is_sendmsg ? -EINPROGRESS : -EISCONN;
else
err = -EALREADY;
@@ -669,7 +679,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTING;
- if (!err && inet_sk(sk)->defer_connect)
+ if (!err && inet_test_bit(DEFER_CONNECT, sk))
goto out;
/* Just entered SS_CONNECTING state; the only
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 4406d796cc2f..39dcccf0f174 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -51,8 +51,6 @@ static bool is_unsupported(u32 member_offset)
return false;
}
-extern struct btf *btf_vmlinux;
-
static bool bpf_tcp_ca_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 79ae7204e8ed..d048aa833293 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1881,7 +1881,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
old = rcu_dereference_protected(sk_inet->inet_opt,
lockdep_sock_is_held(sk));
- if (sk_inet->is_icsk) {
+ if (inet_test_bit(IS_ICSK, sk)) {
sk_conn = inet_csk(sk);
if (old)
sk_conn->icsk_ext_hdr_len -= old->opt.optlen;
@@ -2051,7 +2051,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
sk_inet = inet_sk(sk);
hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
- if (sk_inet->is_icsk && hdr_delta > 0) {
+ if (inet_test_bit(IS_ICSK, sk) && hdr_delta > 0) {
struct inet_connection_sock *sk_conn = inet_csk(sk);
sk_conn->icsk_ext_hdr_len -= hdr_delta;
sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 4d1af0cd7d99..cb5dbee9e018 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
reuseport_has_conns_set(sk);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
- inet->inet_id = get_random_u16();
+ atomic_set(&inet->inet_id, get_random_u16());
sk_dst_set(sk, &rt->dst);
err = 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 89087844ea6e..9cf64ee47dd2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -509,6 +509,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
return -EEXIST;
}
if (ifa1->ifa_scope != ifa->ifa_scope) {
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
inet_free_ifa(ifa);
return -EINVAL;
}
@@ -664,6 +665,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifm = nlmsg_data(nlh);
in_dev = inetdev_by_index(net, ifm->ifa_index);
if (!in_dev) {
+ NL_SET_ERR_MSG(extack, "ipv4: Device not found");
err = -ENODEV;
goto errout;
}
@@ -688,6 +690,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
+ NL_SET_ERR_MSG(extack, "ipv4: Address not found");
err = -EADDRNOTAVAIL;
errout:
return err;
@@ -839,13 +842,23 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
ifm = nlmsg_data(nlh);
err = -EINVAL;
- if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
+
+ if (ifm->ifa_prefixlen > 32) {
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
+ goto errout;
+ }
+
+ if (!tb[IFA_LOCAL]) {
+ NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
goto errout;
+ }
dev = __dev_get_by_index(net, ifm->ifa_index);
err = -ENODEV;
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "ipv4: Device not found");
goto errout;
+ }
in_dev = __in_dev_get_rtnl(dev);
err = -ENOBUFS;
@@ -897,6 +910,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
ci = nla_data(tb[IFA_CACHEINFO]);
if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
+ NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
err = -EINVAL;
goto errout_free;
}
@@ -954,6 +968,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
int ret = ip_mc_autojoin_config(net, true, ifa);
if (ret < 0) {
+ NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
inet_free_ifa(ifa);
return ret;
}
@@ -967,8 +982,10 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
inet_free_ifa(ifa);
if (nlh->nlmsg_flags & NLM_F_EXCL ||
- !(nlh->nlmsg_flags & NLM_F_REPLACE))
+ !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
return -EEXIST;
+ }
ifa = ifa_existing;
if (ifa->ifa_rt_priority != new_metric) {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index ba06ed42e428..2be2d4922557 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1132,7 +1132,7 @@ static int esp_init_authenc(struct xfrm_state *x,
err = crypto_aead_setkey(aead, key, keylen);
free_key:
- kfree(key);
+ kfree_sensitive(key);
error:
return err;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 48ff5f13e797..0c9e768e5628 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2658,7 +2658,7 @@ int ip_mc_sf_allow(const struct sock *sk, __be32 loc_addr, __be32 rmt_addr,
(sdif && pmc->multi.imr_ifindex == sdif)))
break;
}
- ret = inet->mc_all;
+ ret = inet_test_bit(MC_ALL, sk);
if (!pmc)
goto unlock;
psl = rcu_dereference(pmc->sflist);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0cc19cfbb673..aeebe8816689 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1019,7 +1019,7 @@ static void reqsk_timer_handler(struct timer_list *t)
icsk = inet_csk(sk_listener);
net = sock_net(sk_listener);
- max_syn_ack_retries = icsk->icsk_syn_retries ? :
+ max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index b812eb36f0e3..e13a84433413 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -150,7 +150,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
}
#endif
- if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
+ if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark)))
goto errout;
if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
@@ -182,17 +182,17 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
r->idiag_inode = sock_i_ino(sk);
memset(&inet_sockopt, 0, sizeof(inet_sockopt));
- inet_sockopt.recverr = inet->recverr;
- inet_sockopt.is_icsk = inet->is_icsk;
- inet_sockopt.freebind = inet->freebind;
- inet_sockopt.hdrincl = inet->hdrincl;
- inet_sockopt.mc_loop = inet->mc_loop;
- inet_sockopt.transparent = inet->transparent;
- inet_sockopt.mc_all = inet->mc_all;
- inet_sockopt.nodefrag = inet->nodefrag;
- inet_sockopt.bind_address_no_port = inet->bind_address_no_port;
- inet_sockopt.recverr_rfc4884 = inet->recverr_rfc4884;
- inet_sockopt.defer_connect = inet->defer_connect;
+ inet_sockopt.recverr = inet_test_bit(RECVERR, sk);
+ inet_sockopt.is_icsk = inet_test_bit(IS_ICSK, sk);
+ inet_sockopt.freebind = inet_test_bit(FREEBIND, sk);
+ inet_sockopt.hdrincl = inet_test_bit(HDRINCL, sk);
+ inet_sockopt.mc_loop = inet_test_bit(MC_LOOP, sk);
+ inet_sockopt.transparent = inet_test_bit(TRANSPARENT, sk);
+ inet_sockopt.mc_all = inet_test_bit(MC_ALL, sk);
+ inet_sockopt.nodefrag = inet_test_bit(NODEFRAG, sk);
+ inet_sockopt.bind_address_no_port = inet_test_bit(BIND_ADDRESS_NO_PORT, sk);
+ inet_sockopt.recverr_rfc4884 = inet_test_bit(RECVERR_RFC4884, sk);
+ inet_sockopt.defer_connect = inet_test_bit(DEFER_CONNECT, sk);
if (nla_put(skb, INET_DIAG_SOCKOPT, sizeof(inet_sockopt),
&inet_sockopt))
goto errout;
@@ -799,7 +799,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry.ifindex = sk->sk_bound_dev_if;
entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
if (sk_fullsock(sk))
- entry.mark = sk->sk_mark;
+ entry.mark = READ_ONCE(sk->sk_mark);
else if (sk->sk_state == TCP_NEW_SYN_RECV)
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
else if (sk->sk_state == TCP_TIME_WAIT)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e7391bf310a7..7876b7d703cb 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -28,9 +28,9 @@
#include <net/tcp.h>
#include <net/sock_reuseport.h>
-static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
- const __u16 lport, const __be32 faddr,
- const __be16 fport)
+u32 inet_ehashfn(const struct net *net, const __be32 laddr,
+ const __u16 lport, const __be32 faddr,
+ const __be16 fport)
{
static u32 inet_ehash_secret __read_mostly;
@@ -39,6 +39,7 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
return __inet_ehashfn(laddr, lport, faddr, fport,
inet_ehash_secret + net_hash_mix(net));
}
+EXPORT_SYMBOL_GPL(inet_ehashfn);
/* This function handles inet_sock, but also timewait and request sockets
* for IPv4/IPv6.
@@ -332,20 +333,38 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
-static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb, int doff,
- __be32 saddr, __be16 sport,
- __be32 daddr, unsigned short hnum)
+/**
+ * inet_lookup_reuseport() - execute reuseport logic on AF_INET socket if necessary.
+ * @net: network namespace.
+ * @sk: AF_INET socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP.
+ * @skb: context for a potential SK_REUSEPORT program.
+ * @doff: header offset.
+ * @saddr: source address.
+ * @sport: source port.
+ * @daddr: destination address.
+ * @hnum: destination port in host byte order.
+ * @ehashfn: hash function used to generate the fallback hash.
+ *
+ * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
+ * the selected sock or an error.
+ */
+struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum,
+ inet_ehashfn_t *ehashfn)
{
struct sock *reuse_sk = NULL;
u32 phash;
if (sk->sk_reuseport) {
- phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
+ phash = INDIRECT_CALL_2(ehashfn, udp_ehashfn, inet_ehashfn,
+ net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
}
return reuse_sk;
}
+EXPORT_SYMBOL_GPL(inet_lookup_reuseport);
/*
* Here are some nice properties to exploit here. The BSD API
@@ -369,8 +388,8 @@ static struct sock *inet_lhash2_lookup(struct net *net,
sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
- result = lookup_reuseport(net, sk, skb, doff,
- saddr, sport, daddr, hnum);
+ result = inet_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum, inet_ehashfn);
if (result)
return result;
@@ -382,24 +401,23 @@ static struct sock *inet_lhash2_lookup(struct net *net,
return result;
}
-static inline struct sock *inet_lookup_run_bpf(struct net *net,
- struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
- __be32 saddr, __be16 sport,
- __be32 daddr, u16 hnum, const int dif)
+struct sock *inet_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum, const int dif,
+ inet_ehashfn_t *ehashfn)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
- if (hashinfo != net->ipv4.tcp_death_row.hashinfo)
- return NULL; /* only TCP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport,
+ no_reuseport = bpf_sk_lookup_run_v4(net, protocol, saddr, sport,
daddr, hnum, dif, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;
- reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ reuse_sk = inet_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum,
+ ehashfn);
if (reuse_sk)
sk = reuse_sk;
return sk;
@@ -417,9 +435,11 @@ struct sock *__inet_lookup_listener(struct net *net,
unsigned int hash2;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ hashinfo == net->ipv4.tcp_death_row.hashinfo) {
+ result = inet_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff,
+ saddr, sport, daddr, hnum, dif,
+ inet_ehashfn);
if (result)
goto done;
}
@@ -650,20 +670,8 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
spin_lock(lock);
if (osk) {
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
- ret = sk_hashed(osk);
- if (ret) {
- /* Before deleting the node, we insert a new one to make
- * sure that the look-up-sk process would not miss either
- * of them and that at least one node would exist in ehash
- * table all the time. Otherwise there's a tiny chance
- * that lookup process could find nothing in ehash table.
- */
- __sk_nulls_add_node_tail_rcu(sk, list);
- sk_nulls_del_node_init_rcu(osk);
- }
- goto unlock;
- }
- if (found_dup_sk) {
+ ret = sk_nulls_del_node_init_rcu(osk);
+ } else if (found_dup_sk) {
*found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
if (*found_dup_sk)
ret = false;
@@ -672,7 +680,6 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
if (ret)
__sk_nulls_add_node_rcu(sk, list);
-unlock:
spin_unlock(lock);
return ret;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 40052414c7c7..dd37a5bf6881 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -88,10 +88,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
}
EXPORT_SYMBOL_GPL(inet_twsk_put);
-static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw,
- struct hlist_nulls_head *list)
+static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
+ struct hlist_nulls_head *list)
{
- hlist_nulls_add_tail_rcu(&tw->tw_node, list);
+ hlist_nulls_add_head_rcu(&tw->tw_node, list);
}
static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
@@ -144,7 +144,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
spin_lock(lock);
- inet_twsk_add_node_tail_rcu(tw, &ehead->chain);
+ inet_twsk_add_node_rcu(tw, &ehead->chain);
/* Step 3: Remove SK from hash chain */
if (__sk_nulls_del_node_init_rcu(sk))
@@ -203,7 +203,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
tw->tw_reuseport = sk->sk_reuseport;
tw->tw_hash = sk->sk_hash;
tw->tw_ipv6only = 0;
- tw->tw_transparent = inet->transparent;
+ tw->tw_transparent = inet_test_bit(TRANSPARENT, sk);
tw->tw_prot = sk->sk_prot_creator;
atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
twsk_net_set(tw, sock_net(sk));
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 81a1cce1a7d1..22a26d1d29a0 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -548,7 +548,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+ goto err_free_skb;
truncate = true;
}
@@ -689,7 +690,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
goto free_skb;
if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+ goto free_skb;
truncate = true;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6e70839257f7..43ba4b77b248 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -133,7 +133,7 @@ EXPORT_SYMBOL_GPL(ip_local_out);
static inline int ip_select_ttl(const struct inet_sock *inet,
const struct dst_entry *dst)
{
- int ttl = inet->uc_ttl;
+ int ttl = READ_ONCE(inet->uc_ttl);
if (ttl < 0)
ttl = ip4_dst_hoplimit(dst);
@@ -184,9 +184,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
ip_options_build(skb, &opt->opt, daddr, rt);
}
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
if (!skb->mark)
- skb->mark = sk->sk_mark;
+ skb->mark = READ_ONCE(sk->sk_mark);
/* Send it out. */
return ip_local_out(net, skb->sk, skb);
@@ -216,7 +216,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);
- if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+ if (res != LWTUNNEL_XMIT_CONTINUE)
return res;
}
@@ -236,7 +236,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
__func__);
kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
- return -EINVAL;
+ return PTR_ERR(neigh);
}
static int ip_finish_output_gso(struct net *net, struct sock *sk,
@@ -528,8 +528,8 @@ packet_routed:
skb_shinfo(skb)->gso_segs ?: 1);
/* TODO : should we use skb->sk here instead of sk ? */
- skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->priority = READ_ONCE(sk->sk_priority);
+ skb->mark = READ_ONCE(sk->sk_mark);
res = ip_local_out(net, sk, skb);
rcu_read_unlock();
@@ -1039,7 +1039,7 @@ static int __ip_append_data(struct sock *sk,
}
}
} else if ((flags & MSG_SPLICE_PAGES) && length) {
- if (inet->hdrincl)
+ if (inet_test_bit(HDRINCL, sk))
return -EPERM;
if (rt->dst.dev->features & NETIF_F_SG &&
getfrag == ip_generic_getfrag)
@@ -1158,10 +1158,15 @@ alloc_new_skb:
}
copy = datalen - transhdrlen - fraggap - pagedlen;
+ /* [!] NOTE: copy will be negative if pagedlen>0
+ * because then the equation reduces to -fraggap.
+ */
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
+ } else if (flags & MSG_SPLICE_PAGES) {
+ copy = 0;
}
offset += copy;
@@ -1209,6 +1214,10 @@ alloc_new_skb:
} else if (flags & MSG_SPLICE_PAGES) {
struct msghdr *msg = from;
+ err = -EIO;
+ if (WARN_ON_ONCE(copy > msg->msg_iter.count))
+ goto error;
+
err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
sk->sk_allocation);
if (err < 0)
@@ -1458,7 +1467,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
* so icmphdr does not in skb linear region and can not get icmp_type
* by icmp_hdr(skb)->type.
*/
- if (sk->sk_type == SOCK_RAW && !inet_sk(sk)->hdrincl)
+ if (sk->sk_type == SOCK_RAW &&
+ !inet_test_bit(HDRINCL, sk))
icmp_type = fl4->fl4_icmp_type;
else
icmp_type = icmp_hdr(skb)->type;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8e97d8d4cc9d..d1c73660b844 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -171,8 +171,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, int tlen, int offset)
{
- struct inet_sock *inet = inet_sk(sk);
- unsigned int flags = inet->cmsg_flags;
+ unsigned long flags = inet_cmsg_flags(inet_sk(sk));
+
+ if (!flags)
+ return;
/* Ordered by supposed usage frequency */
if (flags & IP_CMSG_PKTINFO) {
@@ -431,7 +433,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
serr->port = port;
if (skb_pull(skb, payload - skb->data)) {
- if (inet_sk(sk)->recverr_rfc4884)
+ if (inet_test_bit(RECVERR_RFC4884, sk))
ipv4_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
skb_reset_transport_header(skb);
@@ -444,12 +446,11 @@ EXPORT_SYMBOL_GPL(ip_icmp_error);
void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
{
- struct inet_sock *inet = inet_sk(sk);
struct sock_exterr_skb *serr;
struct iphdr *iph;
struct sk_buff *skb;
- if (!inet->recverr)
+ if (!inet_test_bit(RECVERR, sk))
return;
skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
@@ -568,7 +569,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
- if (inet_sk(sk)->cmsg_flags)
+ if (inet_cmsg_flags(inet_sk(sk)))
ip_cmsg_recv(msg, skb);
}
@@ -592,7 +593,7 @@ void __ip_sock_set_tos(struct sock *sk, int val)
}
if (inet_sk(sk)->tos != val) {
inet_sk(sk)->tos = val;
- sk->sk_priority = rt_tos2priority(val);
+ WRITE_ONCE(sk->sk_priority, rt_tos2priority(val));
sk_dst_reset(sk);
}
}
@@ -607,17 +608,13 @@ EXPORT_SYMBOL(ip_sock_set_tos);
void ip_sock_set_freebind(struct sock *sk)
{
- lock_sock(sk);
- inet_sk(sk)->freebind = true;
- release_sock(sk);
+ inet_set_bit(FREEBIND, sk);
}
EXPORT_SYMBOL(ip_sock_set_freebind);
void ip_sock_set_recverr(struct sock *sk)
{
- lock_sock(sk);
- inet_sk(sk)->recverr = true;
- release_sock(sk);
+ inet_set_bit(RECVERR, sk);
}
EXPORT_SYMBOL(ip_sock_set_recverr);
@@ -634,9 +631,7 @@ EXPORT_SYMBOL(ip_sock_set_mtu_discover);
void ip_sock_set_pktinfo(struct sock *sk)
{
- lock_sock(sk);
- inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO;
- release_sock(sk);
+ inet_set_bit(PKTINFO, sk);
}
EXPORT_SYMBOL(ip_sock_set_pktinfo);
@@ -950,6 +945,102 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (ip_mroute_opt(optname))
return ip_mroute_setsockopt(sk, optname, optval, optlen);
+ /* Handle options that can be set without locking the socket. */
+ switch (optname) {
+ case IP_PKTINFO:
+ inet_assign_bit(PKTINFO, sk, val);
+ return 0;
+ case IP_RECVTTL:
+ inet_assign_bit(TTL, sk, val);
+ return 0;
+ case IP_RECVTOS:
+ inet_assign_bit(TOS, sk, val);
+ return 0;
+ case IP_RECVOPTS:
+ inet_assign_bit(RECVOPTS, sk, val);
+ return 0;
+ case IP_RETOPTS:
+ inet_assign_bit(RETOPTS, sk, val);
+ return 0;
+ case IP_PASSSEC:
+ inet_assign_bit(PASSSEC, sk, val);
+ return 0;
+ case IP_RECVORIGDSTADDR:
+ inet_assign_bit(ORIGDSTADDR, sk, val);
+ return 0;
+ case IP_RECVFRAGSIZE:
+ if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
+ return -EINVAL;
+ inet_assign_bit(RECVFRAGSIZE, sk, val);
+ return 0;
+ case IP_RECVERR:
+ inet_assign_bit(RECVERR, sk, val);
+ if (!val)
+ skb_errqueue_purge(&sk->sk_error_queue);
+ return 0;
+ case IP_RECVERR_RFC4884:
+ if (val < 0 || val > 1)
+ return -EINVAL;
+ inet_assign_bit(RECVERR_RFC4884, sk, val);
+ return 0;
+ case IP_FREEBIND:
+ if (optlen < 1)
+ return -EINVAL;
+ inet_assign_bit(FREEBIND, sk, val);
+ return 0;
+ case IP_HDRINCL:
+ if (sk->sk_type != SOCK_RAW)
+ return -ENOPROTOOPT;
+ inet_assign_bit(HDRINCL, sk, val);
+ return 0;
+ case IP_MULTICAST_LOOP:
+ if (optlen < 1)
+ return -EINVAL;
+ inet_assign_bit(MC_LOOP, sk, val);
+ return 0;
+ case IP_MULTICAST_ALL:
+ if (optlen < 1)
+ return -EINVAL;
+ if (val != 0 && val != 1)
+ return -EINVAL;
+ inet_assign_bit(MC_ALL, sk, val);
+ return 0;
+ case IP_TRANSPARENT:
+ if (!!val && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+ if (optlen < 1)
+ return -EINVAL;
+ inet_assign_bit(TRANSPARENT, sk, val);
+ return 0;
+ case IP_NODEFRAG:
+ if (sk->sk_type != SOCK_RAW)
+ return -ENOPROTOOPT;
+ inet_assign_bit(NODEFRAG, sk, val);
+ return 0;
+ case IP_BIND_ADDRESS_NO_PORT:
+ inet_assign_bit(BIND_ADDRESS_NO_PORT, sk, val);
+ return 0;
+ case IP_TTL:
+ if (optlen < 1)
+ return -EINVAL;
+ if (val != -1 && (val < 1 || val > 255))
+ return -EINVAL;
+ WRITE_ONCE(inet->uc_ttl, val);
+ return 0;
+ case IP_MINTTL:
+ if (optlen < 1)
+ return -EINVAL;
+ if (val < 0 || val > 255)
+ return -EINVAL;
+
+ if (val)
+ static_branch_enable(&ip4_min_ttl);
+
+ WRITE_ONCE(inet->min_ttl, val);
+ return 0;
+ }
+
err = 0;
if (needs_rtnl)
rtnl_lock();
@@ -967,7 +1058,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
break;
old = rcu_dereference_protected(inet->inet_opt,
lockdep_sock_is_held(sk));
- if (inet->is_icsk) {
+ if (inet_test_bit(IS_ICSK, sk)) {
struct inet_connection_sock *icsk = inet_csk(sk);
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == PF_INET ||
@@ -989,111 +1080,27 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
kfree_rcu(old, rcu);
break;
}
- case IP_PKTINFO:
- if (val)
- inet->cmsg_flags |= IP_CMSG_PKTINFO;
- else
- inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
- break;
- case IP_RECVTTL:
- if (val)
- inet->cmsg_flags |= IP_CMSG_TTL;
- else
- inet->cmsg_flags &= ~IP_CMSG_TTL;
- break;
- case IP_RECVTOS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_TOS;
- else
- inet->cmsg_flags &= ~IP_CMSG_TOS;
- break;
- case IP_RECVOPTS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_RECVOPTS;
- else
- inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
- break;
- case IP_RETOPTS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_RETOPTS;
- else
- inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
- break;
- case IP_PASSSEC:
- if (val)
- inet->cmsg_flags |= IP_CMSG_PASSSEC;
- else
- inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
- break;
- case IP_RECVORIGDSTADDR:
- if (val)
- inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR;
- else
- inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
- break;
case IP_CHECKSUM:
if (val) {
- if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
+ if (!(inet_test_bit(CHECKSUM, sk))) {
inet_inc_convert_csum(sk);
- inet->cmsg_flags |= IP_CMSG_CHECKSUM;
+ inet_set_bit(CHECKSUM, sk);
}
} else {
- if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
+ if (inet_test_bit(CHECKSUM, sk)) {
inet_dec_convert_csum(sk);
- inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
+ inet_clear_bit(CHECKSUM, sk);
}
}
break;
- case IP_RECVFRAGSIZE:
- if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
- goto e_inval;
- if (val)
- inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
- else
- inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
- break;
case IP_TOS: /* This sets both TOS and Precedence */
__ip_sock_set_tos(sk, val);
break;
- case IP_TTL:
- if (optlen < 1)
- goto e_inval;
- if (val != -1 && (val < 1 || val > 255))
- goto e_inval;
- inet->uc_ttl = val;
- break;
- case IP_HDRINCL:
- if (sk->sk_type != SOCK_RAW) {
- err = -ENOPROTOOPT;
- break;
- }
- inet->hdrincl = val ? 1 : 0;
- break;
- case IP_NODEFRAG:
- if (sk->sk_type != SOCK_RAW) {
- err = -ENOPROTOOPT;
- break;
- }
- inet->nodefrag = val ? 1 : 0;
- break;
- case IP_BIND_ADDRESS_NO_PORT:
- inet->bind_address_no_port = val ? 1 : 0;
- break;
case IP_MTU_DISCOVER:
if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
goto e_inval;
inet->pmtudisc = val;
break;
- case IP_RECVERR:
- inet->recverr = !!val;
- if (!val)
- skb_queue_purge(&sk->sk_error_queue);
- break;
- case IP_RECVERR_RFC4884:
- if (val < 0 || val > 1)
- goto e_inval;
- inet->recverr_rfc4884 = !!val;
- break;
case IP_MULTICAST_TTL:
if (sk->sk_type == SOCK_STREAM)
goto e_inval;
@@ -1105,11 +1112,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
goto e_inval;
inet->mc_ttl = val;
break;
- case IP_MULTICAST_LOOP:
- if (optlen < 1)
- goto e_inval;
- inet->mc_loop = !!val;
- break;
case IP_UNICAST_IF:
{
struct net_device *dev = NULL;
@@ -1214,7 +1216,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
struct ip_mreqn mreq;
err = -EPROTO;
- if (inet_sk(sk)->is_icsk)
+ if (inet_test_bit(IS_ICSK, sk))
break;
if (optlen < sizeof(struct ip_mreq))
@@ -1325,20 +1327,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
else
err = ip_set_mcast_msfilter(sk, optval, optlen);
break;
- case IP_MULTICAST_ALL:
- if (optlen < 1)
- goto e_inval;
- if (val != 0 && val != 1)
- goto e_inval;
- inet->mc_all = val;
- break;
-
- case IP_FREEBIND:
- if (optlen < 1)
- goto e_inval;
- inet->freebind = !!val;
- break;
-
case IP_IPSEC_POLICY:
case IP_XFRM_POLICY:
err = -EPERM;
@@ -1347,32 +1335,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
err = xfrm_user_policy(sk, optname, optval, optlen);
break;
- case IP_TRANSPARENT:
- if (!!val && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
- !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
- err = -EPERM;
- break;
- }
- if (optlen < 1)
- goto e_inval;
- inet->transparent = !!val;
- break;
-
- case IP_MINTTL:
- if (optlen < 1)
- goto e_inval;
- if (val < 0 || val > 255)
- goto e_inval;
-
- if (val)
- static_branch_enable(&ip4_min_ttl);
-
- /* tcp_v4_err() and tcp_v4_rcv() might read min_ttl
- * while we are changint it.
- */
- WRITE_ONCE(inet->min_ttl, val);
- break;
-
case IP_LOCAL_PORT_RANGE:
{
const __u16 lo = val;
@@ -1415,7 +1377,7 @@ e_inval:
void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
{
struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
- bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
+ bool prepare = inet_test_bit(PKTINFO, sk) ||
ipv6_sk_rxinfo(sk);
if (prepare && skb_rtable(skb)) {
@@ -1566,6 +1528,72 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (len < 0)
return -EINVAL;
+ /* Handle options that can be read without locking the socket. */
+ switch (optname) {
+ case IP_PKTINFO:
+ val = inet_test_bit(PKTINFO, sk);
+ goto copyval;
+ case IP_RECVTTL:
+ val = inet_test_bit(TTL, sk);
+ goto copyval;
+ case IP_RECVTOS:
+ val = inet_test_bit(TOS, sk);
+ goto copyval;
+ case IP_RECVOPTS:
+ val = inet_test_bit(RECVOPTS, sk);
+ goto copyval;
+ case IP_RETOPTS:
+ val = inet_test_bit(RETOPTS, sk);
+ goto copyval;
+ case IP_PASSSEC:
+ val = inet_test_bit(PASSSEC, sk);
+ goto copyval;
+ case IP_RECVORIGDSTADDR:
+ val = inet_test_bit(ORIGDSTADDR, sk);
+ goto copyval;
+ case IP_CHECKSUM:
+ val = inet_test_bit(CHECKSUM, sk);
+ goto copyval;
+ case IP_RECVFRAGSIZE:
+ val = inet_test_bit(RECVFRAGSIZE, sk);
+ goto copyval;
+ case IP_RECVERR:
+ val = inet_test_bit(RECVERR, sk);
+ goto copyval;
+ case IP_RECVERR_RFC4884:
+ val = inet_test_bit(RECVERR_RFC4884, sk);
+ goto copyval;
+ case IP_FREEBIND:
+ val = inet_test_bit(FREEBIND, sk);
+ goto copyval;
+ case IP_HDRINCL:
+ val = inet_test_bit(HDRINCL, sk);
+ goto copyval;
+ case IP_MULTICAST_LOOP:
+ val = inet_test_bit(MC_LOOP, sk);
+ goto copyval;
+ case IP_MULTICAST_ALL:
+ val = inet_test_bit(MC_ALL, sk);
+ goto copyval;
+ case IP_TRANSPARENT:
+ val = inet_test_bit(TRANSPARENT, sk);
+ goto copyval;
+ case IP_NODEFRAG:
+ val = inet_test_bit(NODEFRAG, sk);
+ goto copyval;
+ case IP_BIND_ADDRESS_NO_PORT:
+ val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk);
+ goto copyval;
+ case IP_TTL:
+ val = READ_ONCE(inet->uc_ttl);
+ if (val < 0)
+ val = READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_default_ttl);
+ goto copyval;
+ case IP_MINTTL:
+ val = READ_ONCE(inet->min_ttl);
+ goto copyval;
+ }
+
if (needs_rtnl)
rtnl_lock();
sockopt_lock_sock(sk);
@@ -1600,53 +1628,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
}
- case IP_PKTINFO:
- val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
- break;
- case IP_RECVTTL:
- val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
- break;
- case IP_RECVTOS:
- val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
- break;
- case IP_RECVOPTS:
- val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
- break;
- case IP_RETOPTS:
- val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
- break;
- case IP_PASSSEC:
- val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
- break;
- case IP_RECVORIGDSTADDR:
- val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
- break;
- case IP_CHECKSUM:
- val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
- break;
- case IP_RECVFRAGSIZE:
- val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
- break;
case IP_TOS:
val = inet->tos;
break;
- case IP_TTL:
- {
- struct net *net = sock_net(sk);
- val = (inet->uc_ttl == -1 ?
- READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
- inet->uc_ttl);
- break;
- }
- case IP_HDRINCL:
- val = inet->hdrincl;
- break;
- case IP_NODEFRAG:
- val = inet->nodefrag;
- break;
- case IP_BIND_ADDRESS_NO_PORT:
- val = inet->bind_address_no_port;
- break;
case IP_MTU_DISCOVER:
val = inet->pmtudisc;
break;
@@ -1665,18 +1649,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
}
break;
}
- case IP_RECVERR:
- val = inet->recverr;
- break;
- case IP_RECVERR_RFC4884:
- val = inet->recverr_rfc4884;
- break;
case IP_MULTICAST_TTL:
val = inet->mc_ttl;
break;
- case IP_MULTICAST_LOOP:
- val = inet->mc_loop;
- break;
case IP_UNICAST_IF:
val = (__force int)htonl((__u32) inet->uc_index);
break;
@@ -1715,9 +1690,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
else
err = ip_get_mcast_msfilter(sk, optval, optlen, len);
goto out;
- case IP_MULTICAST_ALL:
- val = inet->mc_all;
- break;
case IP_PKTOPTIONS:
{
struct msghdr msg;
@@ -1737,7 +1709,7 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
msg.msg_controllen = len;
msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0;
- if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
+ if (inet_test_bit(PKTINFO, sk)) {
struct in_pktinfo info;
info.ipi_addr.s_addr = inet->inet_rcv_saddr;
@@ -1745,26 +1717,17 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
info.ipi_ifindex = inet->mc_index;
put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
- if (inet->cmsg_flags & IP_CMSG_TTL) {
+ if (inet_test_bit(TTL, sk)) {
int hlim = inet->mc_ttl;
put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
}
- if (inet->cmsg_flags & IP_CMSG_TOS) {
+ if (inet_test_bit(TOS, sk)) {
int tos = inet->rcv_tos;
put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
}
len -= msg.msg_controllen;
return copy_to_sockptr(optlen, &len, sizeof(int));
}
- case IP_FREEBIND:
- val = inet->freebind;
- break;
- case IP_TRANSPARENT:
- val = inet->transparent;
- break;
- case IP_MINTTL:
- val = inet->min_ttl;
- break;
case IP_LOCAL_PORT_RANGE:
val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
break;
@@ -1776,7 +1739,7 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -ENOPROTOOPT;
}
sockopt_release_sock(sk);
-
+copyval:
if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
unsigned char ucval = (unsigned char)val;
len = 1;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 92c02c886fe7..586b1b3e35b8 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -224,7 +224,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
.un.frag.__unused = 0,
.un.frag.mtu = htons(mtu),
};
- icmph->checksum = ip_compute_csum(icmph, len);
+ icmph->checksum = csum_fold(skb_checksum(skb, 0, len, 0));
skb_reset_transport_header(skb);
niph = skb_push(skb, sizeof(*niph));
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 53bfd8af6920..d1e7d0ceb7ed 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -287,12 +287,12 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IP):
- xfrm_decode_session(skb, &fl, AF_INET);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET);
break;
case htons(ETH_P_IPV6):
- xfrm_decode_session(skb, &fl, AF_INET6);
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET6);
break;
default:
goto tx_err;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index e61ea428ea18..265b39bc435b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -7,6 +7,7 @@
#include <linux/ip.h>
#include <linux/netfilter.h>
#include <linux/module.h>
+#include <linux/rcupdate.h>
#include <linux/skbuff.h>
#include <net/netns/generic.h>
#include <net/route.h>
@@ -65,7 +66,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
struct sock *sk = skb->sk;
if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) &&
- inet_sk(sk)->nodefrag)
+ inet_test_bit(NODEFRAG, sk))
return NF_ACCEPT;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
@@ -113,17 +114,31 @@ static void __net_exit defrag4_net_exit(struct net *net)
}
}
+static const struct nf_defrag_hook defrag_hook = {
+ .owner = THIS_MODULE,
+ .enable = nf_defrag_ipv4_enable,
+ .disable = nf_defrag_ipv4_disable,
+};
+
static struct pernet_operations defrag4_net_ops = {
.exit = defrag4_net_exit,
};
static int __init nf_defrag_init(void)
{
- return register_pernet_subsys(&defrag4_net_ops);
+ int err;
+
+ err = register_pernet_subsys(&defrag4_net_ops);
+ if (err)
+ return err;
+
+ rcu_assign_pointer(nf_defrag_v4_hook, &defrag_hook);
+ return err;
}
static void __exit nf_defrag_fini(void)
{
+ rcu_assign_pointer(nf_defrag_v4_hook, NULL);
unregister_pernet_subsys(&defrag4_net_ops);
}
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index f95142e56da0..bbff68b5b5d4 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1152,41 +1152,64 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
return !!(state & NUD_VALID);
}
-static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
+static bool nexthop_is_good_nh(const struct nexthop *nh)
+{
+ struct nh_info *nhi = rcu_dereference(nh->nh_info);
+
+ switch (nhi->family) {
+ case AF_INET:
+ return ipv4_good_nh(&nhi->fib_nh);
+ case AF_INET6:
+ return ipv6_good_nh(&nhi->fib6_nh);
+ }
+
+ return false;
+}
+
+static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
{
- struct nexthop *rc = NULL;
int i;
- for (i = 0; i < nhg->num_nh; ++i) {
+ for (i = 0; i < nhg->num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- struct nh_info *nhi;
if (hash > atomic_read(&nhge->hthr.upper_bound))
continue;
- nhi = rcu_dereference(nhge->nh->nh_info);
- if (nhi->fdb_nh)
- return nhge->nh;
+ return nhge->nh;
+ }
+
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
+static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
+{
+ struct nexthop *rc = NULL;
+ int i;
+
+ if (nhg->fdb_nh)
+ return nexthop_select_path_fdb(nhg, hash);
+
+ for (i = 0; i < nhg->num_nh; ++i) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
/* nexthops always check if it is good and does
* not rely on a sysctl for this behavior
*/
- switch (nhi->family) {
- case AF_INET:
- if (ipv4_good_nh(&nhi->fib_nh))
- return nhge->nh;
- break;
- case AF_INET6:
- if (ipv6_good_nh(&nhi->fib6_nh))
- return nhge->nh;
- break;
- }
+ if (!nexthop_is_good_nh(nhge->nh))
+ continue;
if (!rc)
rc = nhge->nh;
+
+ if (hash > atomic_read(&nhge->hthr.upper_bound))
+ continue;
+
+ return nhge->nh;
}
- return rc;
+ return rc ? : nhg->nh_entries[0].nh;
}
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
@@ -3186,7 +3209,6 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb,
return err;
}
- ctx->idx++;
return 0;
}
@@ -3221,13 +3243,9 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
&rtm_dump_nexthop_cb, &filter);
if (err < 0) {
if (likely(skb->len))
- goto out;
- goto out_err;
+ err = skb->len;
}
-out:
- err = skb->len;
-out_err:
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return err;
@@ -3318,7 +3336,6 @@ static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh,
struct rtm_dump_res_bucket_ctx {
struct rtm_dump_nh_ctx nh;
u16 bucket_index;
- u32 done_nh_idx; /* 1 + the index of the last fully processed NH. */
};
static struct rtm_dump_res_bucket_ctx *
@@ -3347,9 +3364,6 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
u16 bucket_index;
int err;
- if (dd->ctx->nh.idx < dd->ctx->done_nh_idx)
- return 0;
-
nhg = rtnl_dereference(nh->nh_grp);
res_table = rtnl_dereference(nhg->res_table);
for (bucket_index = dd->ctx->bucket_index;
@@ -3367,25 +3381,18 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
dd->filter.res_bucket_nh_id != nhge->nh->id)
continue;
+ dd->ctx->bucket_index = bucket_index;
err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
RTM_NEWNEXTHOPBUCKET, portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
cb->extack);
- if (err < 0) {
- if (likely(skb->len))
- goto out;
- goto out_err;
- }
+ if (err)
+ return err;
}
- dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
- bucket_index = 0;
+ dd->ctx->bucket_index = 0;
-out:
- err = skb->len;
-out_err:
- dd->ctx->bucket_index = bucket_index;
- return err;
+ return 0;
}
static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
@@ -3434,13 +3441,9 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
if (err < 0) {
if (likely(skb->len))
- goto out;
- goto out_err;
+ err = skb->len;
}
-out:
- err = skb->len;
-out_err:
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return err;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 25dd78cee179..75e0aee35eb7 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -580,7 +580,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
* RFC1122: OK. Passes ICMP errors back to application, as per
* 4.1.3.3.
*/
- if ((family == AF_INET && !inet_sock->recverr) ||
+ if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) ||
(family == AF_INET6 && !inet6_sk(sk)->recverr)) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
@@ -894,7 +894,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
*addr_len = sizeof(*sin);
}
- if (isk->cmsg_flags)
+ if (inet_cmsg_flags(isk))
ip_cmsg_recv(msg, skb);
#if IS_ENABLED(CONFIG_IPV6)
@@ -921,7 +921,8 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
if (skb->protocol == htons(ETH_P_IPV6) &&
inet6_sk(sk)->rxopt.all)
pingv6_ops.ip6_datagram_recv_specific_ctl(sk, msg, skb);
- else if (skb->protocol == htons(ETH_P_IP) && isk->cmsg_flags)
+ else if (skb->protocol == htons(ETH_P_IP) &&
+ inet_cmsg_flags(isk))
ip_cmsg_recv(msg, skb);
#endif
} else {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 7782ff5e6539..4b5db5d1edc2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -203,8 +203,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
struct inet_sock *inet = inet_sk(sk);
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
- int err = 0;
int harderr = 0;
+ bool recverr;
+ int err = 0;
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
ipv4_sk_update_pmtu(skb, sk, info);
@@ -218,7 +219,8 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
2. Socket is connected (otherwise the error indication
is useless without ip_recverr and error is hard.
*/
- if (!inet->recverr && sk->sk_state != TCP_ESTABLISHED)
+ recverr = inet_test_bit(RECVERR, sk);
+ if (!recverr && sk->sk_state != TCP_ESTABLISHED)
return;
switch (type) {
@@ -245,16 +247,16 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
}
}
- if (inet->recverr) {
+ if (recverr) {
const struct iphdr *iph = (const struct iphdr *)skb->data;
u8 *payload = skb->data + (iph->ihl << 2);
- if (inet->hdrincl)
+ if (inet_test_bit(HDRINCL, sk))
payload = skb->data;
ip_icmp_error(sk, skb, err, 0, info, payload);
}
- if (inet->recverr || harderr) {
+ if (recverr || harderr) {
sk->sk_err = err;
sk_error_report(sk);
}
@@ -348,7 +350,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
goto error;
skb_reserve(skb, hlen);
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
skb_dst_set(skb, &rt->dst);
@@ -413,7 +415,7 @@ error_free:
kfree_skb(skb);
error:
IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
- if (err == -ENOBUFS && !inet->recverr)
+ if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk))
err = 0;
return err;
}
@@ -489,12 +491,8 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (len > 0xFFFF)
goto out;
- /* hdrincl should be READ_ONCE(inet->hdrincl)
- * but READ_ONCE() doesn't work with bit fields.
- * Doing this indirectly yields the same result.
- */
- hdrincl = inet->hdrincl;
- hdrincl = READ_ONCE(hdrincl);
+ hdrincl = inet_test_bit(HDRINCL, sk);
+
/*
* Check the flags.
*/
@@ -645,7 +643,7 @@ back_from_confirm:
ip_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE)) {
err = ip_push_pending_frames(sk, &fl4);
- if (err == -ENOBUFS && !inet->recverr)
+ if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk))
err = 0;
}
release_sock(sk);
@@ -767,7 +765,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
*addr_len = sizeof(*sin);
}
- if (inet->cmsg_flags)
+ if (inet_cmsg_flags(inet))
ip_cmsg_recv(msg, skb);
if (flags & MSG_TRUNC)
copied = skb->len;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e7e9fba0357a..d8c99bdc6170 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -515,13 +515,12 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
__u8 scope = RT_SCOPE_UNIVERSE;
if (sk) {
- const struct inet_sock *inet = inet_sk(sk);
-
oif = sk->sk_bound_dev_if;
- mark = sk->sk_mark;
+ mark = READ_ONCE(sk->sk_mark);
tos = ip_sock_rt_tos(sk);
scope = ip_sock_rt_scope(sk);
- prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
+ prot = inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW :
+ sk->sk_protocol;
}
flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope,
@@ -552,10 +551,11 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
inet_opt = rcu_dereference(inet->inet_opt);
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
- flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+ flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
ip_sock_rt_scope(sk),
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ inet_test_bit(HDRINCL, sk) ?
+ IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk),
daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
rcu_read_unlock();
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e03e08745308..b1559481898d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -457,6 +457,7 @@ void tcp_init_sock(struct sock *sk)
WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
+ tcp_scaling_ratio_init(sk);
set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
sk_sockets_allocated_inc(sk);
@@ -582,7 +583,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (urg_data & TCP_URG_VALID)
mask |= EPOLLPRI;
- } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
+ } else if (state == TCP_SYN_SENT &&
+ inet_test_bit(DEFER_CONNECT, sk)) {
/* Active TCP fastopen socket with defer_connect
* Return EPOLLOUT so application can call write()
* in order for kernel to generate SYN+data
@@ -1006,7 +1008,7 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
tp->fastopen_req->size = size;
tp->fastopen_req->uarg = uarg;
- if (inet->defer_connect) {
+ if (inet_test_bit(DEFER_CONNECT, sk)) {
err = tcp_connect(sk);
/* Same failure procedure as in tcp_v4/6_connect */
if (err) {
@@ -1024,7 +1026,7 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
if (tp->fastopen_req) {
*copied = tp->fastopen_req->copied;
tcp_free_fastopen_req(tp);
- inet->defer_connect = 0;
+ inet_clear_bit(DEFER_CONNECT, sk);
}
return err;
}
@@ -1065,7 +1067,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
zc = MSG_SPLICE_PAGES;
}
- if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
+ if (unlikely(flags & MSG_FASTOPEN ||
+ inet_test_bit(DEFER_CONNECT, sk)) &&
!tp->repair) {
err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size, uarg);
if (err == -EINPROGRESS && copied_syn > 0)
@@ -1700,7 +1703,7 @@ EXPORT_SYMBOL(tcp_peek_len);
/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
int tcp_set_rcvlowat(struct sock *sk, int val)
{
- int cap;
+ int space, cap;
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
@@ -1715,10 +1718,10 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
return 0;
- val <<= 1;
- if (val > sk->sk_rcvbuf) {
- WRITE_ONCE(sk->sk_rcvbuf, val);
- tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
+ space = tcp_space_from_win(sk, val);
+ if (space > sk->sk_rcvbuf) {
+ WRITE_ONCE(sk->sk_rcvbuf, space);
+ tcp_sk(sk)->window_clamp = val;
}
return 0;
}
@@ -1739,7 +1742,7 @@ void tcp_update_recv_tstamps(struct sk_buff *skb,
}
#ifdef CONFIG_MMU
-const struct vm_operations_struct tcp_vm_ops = {
+static const struct vm_operations_struct tcp_vm_ops = {
};
int tcp_mmap(struct file *file, struct socket *sock,
@@ -2042,13 +2045,10 @@ static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm,
unsigned long address,
bool *mmap_locked)
{
- struct vm_area_struct *vma = NULL;
+ struct vm_area_struct *vma = lock_vma_under_rcu(mm, address);
-#ifdef CONFIG_PER_VMA_LOCK
- vma = lock_vma_under_rcu(mm, address);
-#endif
if (vma) {
- if (!vma_is_tcp(vma)) {
+ if (vma->vm_ops != &tcp_vm_ops) {
vma_end_read(vma);
return NULL;
}
@@ -2058,7 +2058,7 @@ static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm,
mmap_read_lock(mm);
vma = vma_lookup(mm, address);
- if (!vma || !vma_is_tcp(vma)) {
+ if (!vma || vma->vm_ops != &tcp_vm_ops) {
mmap_read_unlock(mm);
return NULL;
}
@@ -2864,7 +2864,7 @@ adjudge_to_death:
if (sk->sk_state == TCP_FIN_WAIT2) {
struct tcp_sock *tp = tcp_sk(sk);
- if (tp->linger2 < 0) {
+ if (READ_ONCE(tp->linger2) < 0) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
__NET_INC_STATS(sock_net(sk),
@@ -3087,7 +3087,7 @@ int tcp_disconnect(struct sock *sk, int flags)
/* Clean up fastopen related fields */
tcp_free_fastopen_req(tp);
- inet->defer_connect = 0;
+ inet_clear_bit(DEFER_CONNECT, sk);
tp->fastopen_client_fail = 0;
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
@@ -3290,18 +3290,21 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
if (val < 1 || val > MAX_TCP_SYNCNT)
return -EINVAL;
- lock_sock(sk);
- inet_csk(sk)->icsk_syn_retries = val;
- release_sock(sk);
+ WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
return 0;
}
EXPORT_SYMBOL(tcp_sock_set_syncnt);
-void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
+int tcp_sock_set_user_timeout(struct sock *sk, int val)
{
- lock_sock(sk);
- inet_csk(sk)->icsk_user_timeout = val;
- release_sock(sk);
+ /* Cap the max time in ms TCP will retry or probe the window
+ * before giving up and aborting (ETIMEDOUT) a connection.
+ */
+ if (val < 0)
+ return -EINVAL;
+
+ WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
+ return 0;
}
EXPORT_SYMBOL(tcp_sock_set_user_timeout);
@@ -3312,7 +3315,8 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
if (val < 1 || val > MAX_TCP_KEEPIDLE)
return -EINVAL;
- tp->keepalive_time = val * HZ;
+ /* Paired with WRITE_ONCE() in keepalive_time_when() */
+ WRITE_ONCE(tp->keepalive_time, val * HZ);
if (sock_flag(sk, SOCK_KEEPOPEN) &&
!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
u32 elapsed = keepalive_time_elapsed(tp);
@@ -3343,9 +3347,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val)
if (val < 1 || val > MAX_TCP_KEEPINTVL)
return -EINVAL;
- lock_sock(sk);
- tcp_sk(sk)->keepalive_intvl = val * HZ;
- release_sock(sk);
+ WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
return 0;
}
EXPORT_SYMBOL(tcp_sock_set_keepintvl);
@@ -3355,9 +3357,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val)
if (val < 1 || val > MAX_TCP_KEEPCNT)
return -EINVAL;
- lock_sock(sk);
- tcp_sk(sk)->keepalive_probes = val;
- release_sock(sk);
+ /* Paired with READ_ONCE() in keepalive_probes() */
+ WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
return 0;
}
EXPORT_SYMBOL(tcp_sock_set_keepcnt);
@@ -3459,6 +3460,32 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
+ /* Handle options that can be set without locking the socket. */
+ switch (optname) {
+ case TCP_SYNCNT:
+ return tcp_sock_set_syncnt(sk, val);
+ case TCP_USER_TIMEOUT:
+ return tcp_sock_set_user_timeout(sk, val);
+ case TCP_KEEPINTVL:
+ return tcp_sock_set_keepintvl(sk, val);
+ case TCP_KEEPCNT:
+ return tcp_sock_set_keepcnt(sk, val);
+ case TCP_LINGER2:
+ if (val < 0)
+ WRITE_ONCE(tp->linger2, -1);
+ else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
+ WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
+ else
+ WRITE_ONCE(tp->linger2, val * HZ);
+ return 0;
+ case TCP_DEFER_ACCEPT:
+ /* Translate value in seconds to number of retransmits */
+ WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
+ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+ TCP_RTO_MAX / HZ));
+ return 0;
+ }
+
sockopt_lock_sock(sk);
switch (optname) {
@@ -3554,25 +3581,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_KEEPIDLE:
err = tcp_sock_set_keepidle_locked(sk, val);
break;
- case TCP_KEEPINTVL:
- if (val < 1 || val > MAX_TCP_KEEPINTVL)
- err = -EINVAL;
- else
- tp->keepalive_intvl = val * HZ;
- break;
- case TCP_KEEPCNT:
- if (val < 1 || val > MAX_TCP_KEEPCNT)
- err = -EINVAL;
- else
- tp->keepalive_probes = val;
- break;
- case TCP_SYNCNT:
- if (val < 1 || val > MAX_TCP_SYNCNT)
- err = -EINVAL;
- else
- icsk->icsk_syn_retries = val;
- break;
-
case TCP_SAVE_SYN:
/* 0: disable, 1: enable, 2: start from ether_header */
if (val < 0 || val > 2)
@@ -3581,22 +3589,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
tp->save_syn = val;
break;
- case TCP_LINGER2:
- if (val < 0)
- tp->linger2 = -1;
- else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
- tp->linger2 = TCP_FIN_TIMEOUT_MAX;
- else
- tp->linger2 = val * HZ;
- break;
-
- case TCP_DEFER_ACCEPT:
- /* Translate value in seconds to number of retransmits */
- icsk->icsk_accept_queue.rskq_defer_accept =
- secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
- TCP_RTO_MAX / HZ);
- break;
-
case TCP_WINDOW_CLAMP:
err = tcp_set_window_clamp(sk, val);
break;
@@ -3611,16 +3603,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
break;
#endif
- case TCP_USER_TIMEOUT:
- /* Cap the max time in ms TCP will retry or probe the window
- * before giving up and aborting (ETIMEDOUT) a connection.
- */
- if (val < 0)
- err = -EINVAL;
- else
- icsk->icsk_user_timeout = val;
- break;
-
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
@@ -3656,13 +3638,13 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
if (!tp->repair)
err = -EPERM;
else
- tp->tsoffset = val - tcp_time_stamp_raw();
+ WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw());
break;
case TCP_REPAIR_WINDOW:
err = tcp_repair_set_window(tp, optval, optlen);
break;
case TCP_NOTSENT_LOWAT:
- tp->notsent_lowat = val;
+ WRITE_ONCE(tp->notsent_lowat, val);
sk->sk_write_space(sk);
break;
case TCP_INQ:
@@ -3674,7 +3656,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_TX_DELAY:
if (val)
tcp_enable_tx_delay();
- tp->tcp_tx_delay = val;
+ WRITE_ONCE(tp->tcp_tx_delay, val);
break;
default:
err = -ENOPROTOOPT;
@@ -3991,17 +3973,18 @@ int do_tcp_getsockopt(struct sock *sk, int level,
val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
- val = icsk->icsk_syn_retries ? :
+ val = READ_ONCE(icsk->icsk_syn_retries) ? :
READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
break;
case TCP_LINGER2:
- val = tp->linger2;
+ val = READ_ONCE(tp->linger2);
if (val >= 0)
val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
break;
case TCP_DEFER_ACCEPT:
- val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
- TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
+ val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
+ val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
+ TCP_RTO_MAX / HZ);
break;
case TCP_WINDOW_CLAMP:
val = tp->window_clamp;
@@ -4138,11 +4121,11 @@ int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_USER_TIMEOUT:
- val = icsk->icsk_user_timeout;
+ val = READ_ONCE(icsk->icsk_user_timeout);
break;
case TCP_FASTOPEN:
- val = icsk->icsk_accept_queue.fastopenq.max_qlen;
+ val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
break;
case TCP_FASTOPEN_CONNECT:
@@ -4154,14 +4137,14 @@ int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_TX_DELAY:
- val = tp->tcp_tx_delay;
+ val = READ_ONCE(tp->tcp_tx_delay);
break;
case TCP_TIMESTAMP:
- val = tcp_time_stamp_raw() + tp->tsoffset;
+ val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
break;
case TCP_NOTSENT_LOWAT:
- val = tp->notsent_lowat;
+ val = READ_ONCE(tp->notsent_lowat);
break;
case TCP_INQ:
val = tp->recvmsg_inq;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 45cc7f1ca296..8ed54e7334a9 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -296,6 +296,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
static bool tcp_fastopen_queue_check(struct sock *sk)
{
struct fastopen_queue *fastopenq;
+ int max_qlen;
/* Make sure the listener has enabled fastopen, and we don't
* exceed the max # of pending TFO requests allowed before trying
@@ -308,10 +309,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
* temporarily vs a server not supporting Fast Open at all.
*/
fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
- if (fastopenq->max_qlen == 0)
+ max_qlen = READ_ONCE(fastopenq->max_qlen);
+ if (max_qlen == 0)
return false;
- if (fastopenq->qlen >= fastopenq->max_qlen) {
+ if (fastopenq->qlen >= max_qlen) {
struct request_sock *req1;
spin_lock(&fastopenq->lock);
req1 = fastopenq->rskq_rst_head;
@@ -449,7 +451,7 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
if (tp->fastopen_connect && !tp->fastopen_req) {
if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) {
- inet_sk(sk)->defer_connect = 1;
+ inet_set_bit(DEFER_CONNECT, sk);
return true;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 57c8af1859c1..06fe1cf645d5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -237,6 +237,16 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
*/
len = skb_shinfo(skb)->gso_size ? : skb->len;
if (len >= icsk->icsk_ack.rcv_mss) {
+ /* Note: divides are still a bit expensive.
+ * For the moment, only adjust scaling_ratio
+ * when we update icsk_ack.rcv_mss.
+ */
+ if (unlikely(len != icsk->icsk_ack.rcv_mss)) {
+ u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE;
+
+ do_div(val, skb->truesize);
+ tcp_sk(sk)->scaling_ratio = val ? val : 1;
+ }
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
/* Account for possibly-removed options */
@@ -287,7 +297,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.quick = quickacks;
}
-void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -295,7 +305,6 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
-EXPORT_SYMBOL(tcp_enter_quickack_mode);
/* Send ACKs quickly, if "quick" count is not exhausted
* and the session is not interactive.
@@ -727,8 +736,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvmem, rcvbuf;
u64 rcvwin, grow;
+ int rcvbuf;
/* minimal window to cope with packet losses, assuming
* steady state. Add some cushion because of small variations.
@@ -740,12 +749,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
do_div(grow, tp->rcvq_space.space);
rcvwin += (grow << 1);
- rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
- rcvmem += 128;
-
- do_div(rcvwin, tp->advmss);
- rcvbuf = min_t(u64, rcvwin * rcvmem,
+ rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
@@ -3521,7 +3525,7 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
{
return after(ack, tp->snd_una) ||
after(ack_seq, tp->snd_wl1) ||
- (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
+ (ack_seq == tp->snd_wl1 && (nwin > tp->snd_wnd || !nwin));
}
/* If we update tp->snd_una, also update tp->bytes_acked */
@@ -4122,9 +4126,8 @@ void tcp_parse_options(const struct net *net,
break;
#ifdef CONFIG_TCP_MD5SIG
case TCPOPT_MD5SIG:
- /*
- * The MD5 Hash has already been
- * checked (see tcp_v{4,6}_do_rcv()).
+ /* The MD5 Hash has already been
+ * checked (see tcp_v{4,6}_rcv()).
*/
break;
#endif
@@ -4308,10 +4311,16 @@ static inline bool tcp_paws_discard(const struct sock *sk,
* (borrowed from freebsd)
*/
-static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
+static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
+ u32 seq, u32 end_seq)
{
- return !before(end_seq, tp->rcv_wup) &&
- !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
+ if (before(end_seq, tp->rcv_wup))
+ return SKB_DROP_REASON_TCP_OLD_SEQUENCE;
+
+ if (after(seq, tp->rcv_nxt + tcp_receive_window(tp)))
+ return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
+
+ return SKB_NOT_DROPPED_YET;
}
/* When we get a reset we do this. */
@@ -5050,13 +5059,19 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
/* Ok. In sequence. In window. */
queue_and_out:
- if (skb_queue_len(&sk->sk_receive_queue) == 0)
- sk_forced_mem_schedule(sk, skb->truesize);
- else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
- reason = SKB_DROP_REASON_PROTO_MEM;
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
+ if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+ /* TODO: maybe ratelimit these WIN 0 ACK ? */
+ inet_csk(sk)->icsk_ack.pending |=
+ (ICSK_ACK_NOMEM | ICSK_ACK_NOW);
+ inet_csk_schedule_ack(sk);
sk->sk_data_ready(sk);
- goto drop;
+
+ if (skb_queue_len(&sk->sk_receive_queue)) {
+ reason = SKB_DROP_REASON_PROTO_MEM;
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
+ goto drop;
+ }
+ sk_forced_mem_schedule(sk, skb->truesize);
}
eaten = tcp_queue_rcv(sk, skb, &fragstolen);
@@ -5734,7 +5749,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
}
/* Step 1: check sequence number */
- if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+ reason = tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ if (reason) {
/* RFC793, page 37: "In all states except SYN-SENT, all reset
* (RST) segments are validated by checking their SEQ-fields."
* And page 69: "If an incoming segment is not acceptable,
@@ -5751,7 +5767,6 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
} else if (tcp_reset_check(sk, skb)) {
goto reset;
}
- SKB_DR_SET(reason, TCP_INVALID_SEQUENCE);
goto discard;
}
@@ -6315,7 +6330,7 @@ consume:
if (fastopen_fail)
return -1;
if (sk->sk_write_pending ||
- icsk->icsk_accept_queue.rskq_defer_accept ||
+ READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept) ||
inet_csk_in_pingpong_mode(sk)) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
@@ -6615,7 +6630,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
break;
}
- if (tp->linger2 < 0) {
+ if (READ_ONCE(tp->linger2) < 0) {
tcp_done(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;
@@ -6985,7 +7000,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk);
- inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
+ inet_rsk(req)->no_srccheck = inet_test_bit(TRANSPARENT, sk);
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fd365de4d5ff..27140e5cdc06 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -57,6 +57,7 @@
#include <linux/init.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
@@ -307,11 +308,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr,
inet->inet_sport,
usin->sin_port));
- tp->tsoffset = secure_tcp_ts_off(net, inet->inet_saddr,
- inet->inet_daddr);
+ WRITE_ONCE(tp->tsoffset,
+ secure_tcp_ts_off(net, inet->inet_saddr,
+ inet->inet_daddr));
}
- inet->inet_id = get_random_u16();
+ atomic_set(&inet->inet_id, get_random_u16());
if (tcp_fastopen_defer_connect(sk, &err))
return err;
@@ -475,7 +477,6 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
const struct iphdr *iph = (const struct iphdr *)skb->data;
struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
struct tcp_sock *tp;
- struct inet_sock *inet;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct sock *sk;
@@ -623,8 +624,8 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
* --ANK (980905)
*/
- inet = inet_sk(sk);
- if (!sock_owned_by_user(sk) && inet->recverr) {
+ if (!sock_owned_by_user(sk) &&
+ inet_test_bit(RECVERR, sk)) {
WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk);
} else { /* Only an error on timeout */
@@ -930,9 +931,9 @@ static void tcp_v4_send_ack(const struct sock *sk,
ctl_sk = this_cpu_read(ipv4_tcp_sk);
sock_net_set(ctl_sk, net);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_mark : sk->sk_mark;
+ inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark);
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_priority : sk->sk_priority;
+ inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
transmit_time = tcp_transmit_time(sk);
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -988,11 +989,12 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
- req->ts_recent,
+ READ_ONCE(req->ts_recent),
0,
tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
- ip_hdr(skb)->tos, tcp_rsk(req)->txhash);
+ ip_hdr(skb)->tos,
+ READ_ONCE(tcp_rsk(req)->txhash));
}
/*
@@ -1594,7 +1596,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
- newinet->inet_id = get_random_u16();
+ atomic_set(&newinet->inet_id, get_random_u16());
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
@@ -2446,6 +2448,8 @@ static void *established_get_first(struct seq_file *seq)
struct hlist_nulls_node *node;
spinlock_t *lock = inet_ehash_lockp(hinfo, st->bucket);
+ cond_resched();
+
/* Lockless fast path for the common case of empty buckets */
if (empty_bucket(hinfo, st))
continue;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 82f4575f9cd9..c196759f1d3b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {
struct tcp_metrics_block {
struct tcp_metrics_block __rcu *tcpm_next;
- possible_net_t tcpm_net;
+ struct net *tcpm_net;
struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
@@ -51,34 +51,38 @@ struct tcp_metrics_block {
struct rcu_head rcu_head;
};
-static inline struct net *tm_net(struct tcp_metrics_block *tm)
+static inline struct net *tm_net(const struct tcp_metrics_block *tm)
{
- return read_pnet(&tm->tcpm_net);
+ /* Paired with the WRITE_ONCE() in tcpm_new() */
+ return READ_ONCE(tm->tcpm_net);
}
static bool tcp_metric_locked(struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
- return tm->tcpm_lock & (1 << idx);
+ /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
+ return READ_ONCE(tm->tcpm_lock) & (1 << idx);
}
-static u32 tcp_metric_get(struct tcp_metrics_block *tm,
+static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
- return tm->tcpm_vals[idx];
+ /* Paired with WRITE_ONCE() in tcp_metric_set() */
+ return READ_ONCE(tm->tcpm_vals[idx]);
}
static void tcp_metric_set(struct tcp_metrics_block *tm,
enum tcp_metric_index idx,
u32 val)
{
- tm->tcpm_vals[idx] = val;
+ /* Paired with READ_ONCE() in tcp_metric_get() */
+ WRITE_ONCE(tm->tcpm_vals[idx], val);
}
static bool addr_same(const struct inetpeer_addr *a,
const struct inetpeer_addr *b)
{
- return inetpeer_addr_cmp(a, b) == 0;
+ return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
}
struct tcpm_hash_bucket {
@@ -89,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
static unsigned int tcp_metrics_hash_log __read_mostly;
static DEFINE_SPINLOCK(tcp_metrics_lock);
+static DEFINE_SEQLOCK(fastopen_seqlock);
static void tcpm_suck_dst(struct tcp_metrics_block *tm,
const struct dst_entry *dst,
@@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
u32 msval;
u32 val;
- tm->tcpm_stamp = jiffies;
+ WRITE_ONCE(tm->tcpm_stamp, jiffies);
val = 0;
if (dst_metric_locked(dst, RTAX_RTT))
@@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
val |= 1 << TCP_METRIC_CWND;
if (dst_metric_locked(dst, RTAX_REORDERING))
val |= 1 << TCP_METRIC_REORDERING;
- tm->tcpm_lock = val;
+ /* Paired with READ_ONCE() in tcp_metric_locked() */
+ WRITE_ONCE(tm->tcpm_lock, val);
msval = dst_metric_raw(dst, RTAX_RTT);
- tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+ tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
msval = dst_metric_raw(dst, RTAX_RTTVAR);
- tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
- tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
- tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
- tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
+ tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
+ tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+ dst_metric_raw(dst, RTAX_SSTHRESH));
+ tcp_metric_set(tm, TCP_METRIC_CWND,
+ dst_metric_raw(dst, RTAX_CWND));
+ tcp_metric_set(tm, TCP_METRIC_REORDERING,
+ dst_metric_raw(dst, RTAX_REORDERING));
if (fastopen_clear) {
+ write_seqlock(&fastopen_seqlock);
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
tm->tcpm_fastopen.try_exp = 0;
tm->tcpm_fastopen.cookie.exp = false;
tm->tcpm_fastopen.cookie.len = 0;
+ write_sequnlock(&fastopen_seqlock);
}
}
#define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
+static void tcpm_check_stamp(struct tcp_metrics_block *tm,
+ const struct dst_entry *dst)
{
- if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
+ unsigned long limit;
+
+ if (!tm)
+ return;
+ limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
+ if (unlikely(time_after(jiffies, limit)))
tcpm_suck_dst(tm, dst, false);
}
@@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
oldest = deref_locked(tcp_metrics_hash[hash].chain);
for (tm = deref_locked(oldest->tcpm_next); tm;
tm = deref_locked(tm->tcpm_next)) {
- if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
+ if (time_before(READ_ONCE(tm->tcpm_stamp),
+ READ_ONCE(oldest->tcpm_stamp)))
oldest = tm;
}
tm = oldest;
} else {
- tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
+ tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
if (!tm)
goto out_unlock;
}
- write_pnet(&tm->tcpm_net, net);
+ /* Paired with the READ_ONCE() in tm_net() */
+ WRITE_ONCE(tm->tcpm_net, net);
+
tm->tcpm_saddr = *saddr;
tm->tcpm_daddr = *daddr;
- tcpm_suck_dst(tm, dst, true);
+ tcpm_suck_dst(tm, dst, reclaim);
if (likely(!reclaim)) {
tm->tcpm_next = tcp_metrics_hash[hash].chain;
@@ -434,7 +454,7 @@ void tcp_update_metrics(struct sock *sk)
tp->reordering);
}
}
- tm->tcpm_stamp = jiffies;
+ WRITE_ONCE(tm->tcpm_stamp, jiffies);
out_unlock:
rcu_read_unlock();
}
@@ -539,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
return ret;
}
-static DEFINE_SEQLOCK(fastopen_seqlock);
-
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie)
{
@@ -647,7 +665,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
}
if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
- jiffies - tm->tcpm_stamp,
+ jiffies - READ_ONCE(tm->tcpm_stamp),
TCP_METRICS_ATTR_PAD) < 0)
goto nla_put_failure;
@@ -658,7 +676,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
if (!nest)
goto nla_put_failure;
for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
- u32 val = tm->tcpm_vals[i];
+ u32 val = tcp_metric_get(tm, i);
if (!val)
continue;
@@ -972,7 +990,7 @@ static struct genl_family tcp_metrics_nl_family __ro_after_init = {
.resv_start_op = TCP_METRICS_CMD_DEL + 1,
};
-static unsigned int tcpmhash_entries;
+static unsigned int tcpmhash_entries __initdata;
static int __init set_tcpmhash_entries(char *str)
{
ssize_t ret;
@@ -988,15 +1006,11 @@ static int __init set_tcpmhash_entries(char *str)
}
__setup("tcpmhash_entries=", set_tcpmhash_entries);
-static int __net_init tcp_net_metrics_init(struct net *net)
+static void __init tcp_metrics_hash_alloc(void)
{
+ unsigned int slots = tcpmhash_entries;
size_t size;
- unsigned int slots;
- if (!net_eq(net, &init_net))
- return 0;
-
- slots = tcpmhash_entries;
if (!slots) {
if (totalram_pages() >= 128 * 1024)
slots = 16 * 1024;
@@ -1009,9 +1023,7 @@ static int __net_init tcp_net_metrics_init(struct net *net)
tcp_metrics_hash = kvzalloc(size, GFP_KERNEL);
if (!tcp_metrics_hash)
- return -ENOMEM;
-
- return 0;
+ panic("Could not allocate the tcp_metrics hash table\n");
}
static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list)
@@ -1020,7 +1032,6 @@ static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_lis
}
static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
- .init = tcp_net_metrics_init,
.exit_batch = tcp_net_metrics_exit_batch,
};
@@ -1028,9 +1039,11 @@ void __init tcp_metrics_init(void)
{
int ret;
+ tcp_metrics_hash_alloc();
+
ret = register_pernet_subsys(&tcp_net_metrics_ops);
if (ret < 0)
- panic("Could not allocate the tcp_metrics hash table\n");
+ panic("Could not register tcp_net_metrics_ops\n");
ret = genl_register_family(&tcp_metrics_nl_family);
if (ret < 0)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 04fc328727e6..b98d476f1594 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -289,9 +289,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (tw) {
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
- struct inet_sock *inet = inet_sk(sk);
- tw->tw_transparent = inet->transparent;
+ tw->tw_transparent = inet_test_bit(TRANSPARENT, sk);
tw->tw_mark = sk->sk_mark;
tw->tw_priority = sk->sk_priority;
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
@@ -528,7 +527,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
newtp->lsndtime = tcp_jiffies32;
- newsk->sk_txhash = treq->txhash;
+ newsk->sk_txhash = READ_ONCE(treq->txhash);
newtp->total_retrans = req->num_retrans;
tcp_init_xmit_timers(newsk);
@@ -555,7 +554,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->max_window = newtp->snd_wnd;
if (newtp->rx_opt.tstamp_ok) {
- newtp->rx_opt.ts_recent = req->ts_recent;
+ newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent);
newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
} else {
@@ -570,8 +569,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
- if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))
- newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
@@ -619,7 +616,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
if (tmp_opt.saw_tstamp) {
- tmp_opt.ts_recent = req->ts_recent;
+ tmp_opt.ts_recent = READ_ONCE(req->ts_recent);
if (tmp_opt.rcv_tsecr)
tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
/* We do not store true stamp, but it is not required,
@@ -758,8 +755,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* In sequence, PAWS is OK. */
+ /* TODO: We probably should defer ts_recent change once
+ * we take ownership of @req.
+ */
if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
- req->ts_recent = tmp_opt.rcv_tsval;
+ WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval);
if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
/* Truncate SYN, it is out of window starting
@@ -791,7 +791,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
return sk;
/* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
- if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+ if (req->num_timeout < READ_ONCE(inet_csk(sk)->icsk_accept_queue.rskq_defer_accept) &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
inet_rsk(req)->acked = 1;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2cb39b6dad02..e6b4fbd642f7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -257,11 +257,19 @@ EXPORT_SYMBOL(tcp_select_initial_window);
static u16 tcp_select_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 old_win = tp->rcv_wnd;
- u32 cur_win = tcp_receive_window(tp);
- u32 new_win = __tcp_select_window(sk);
struct net *net = sock_net(sk);
+ u32 old_win = tp->rcv_wnd;
+ u32 cur_win, new_win;
+
+ /* Make the window 0 if we failed to queue the data because we
+ * are out of memory. The window is temporary, so we don't store
+ * it on the socket.
+ */
+ if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM))
+ return 0;
+ cur_win = tcp_receive_window(tp);
+ new_win = __tcp_select_window(sk);
if (new_win < cur_win) {
/* Danger Will Robinson!
* Don't update rcv_wup/rcv_wnd here or else
@@ -878,7 +886,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
if (likely(ireq->tstamp_ok)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
- opts->tsecr = req->ts_recent;
+ opts->tsecr = READ_ONCE(req->ts_recent);
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
if (likely(ireq->sack_ok)) {
@@ -1293,14 +1301,21 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
}
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
- /* if no packet is in qdisc/device queue, then allow XPS to select
- * another queue. We can be called from tcp_tsq_handler()
- * which holds one reference to sk.
- *
- * TODO: Ideally, in-flight pure ACK packets should not matter here.
- * One way to get this would be to set skb->truesize = 2 on them.
+ /* We set skb->ooo_okay to one if this packet can select
+ * a different TX queue than prior packets of this flow,
+ * to avoid self inflicted reorders.
+ * The 'other' queue decision is based on current cpu number
+ * if XPS is enabled, or sk->sk_txhash otherwise.
+ * We can switch to another (and better) queue if:
+ * 1) No packet with payload is in qdisc/device queues.
+ * Delays in TX completion can defeat the test
+ * even if packets were already sent.
+ * 2) Or rtx queue is empty.
+ * This mitigates above case if ACK packets for
+ * all prior packets were already processed.
*/
- skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
+ skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) ||
+ tcp_rtx_queue_empty(sk);
/* If we had to use memory reserve to allocate this skb,
* this might cause drops if packet is looped back :
@@ -3660,7 +3675,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
rcu_read_lock();
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
- skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
+ skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4);
/* bpf program will be interested in the tcp_flags */
TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK;
tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
@@ -3741,11 +3756,6 @@ static void tcp_connect_init(struct sock *sk)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
-#ifdef CONFIG_TCP_MD5SIG
- if (tp->af_specific->md5_lookup(sk, sk))
- tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
-#endif
-
/* If user gave his TCP_MAXSEG, record it to clamp */
if (tp->rx_opt.user_mss)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
@@ -4210,7 +4220,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
/* Paired with WRITE_ONCE() in sock_setsockopt() */
if (READ_ONCE(sk->sk_txrehash) == SOCK_TXREHASH_ENABLED)
- tcp_rsk(req)->txhash = net_tx_rndhash();
+ WRITE_ONCE(tcp_rsk(req)->txhash, net_tx_rndhash());
res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
NULL);
if (!res) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 470f581eedd4..984ab4a0421e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -26,14 +26,15 @@
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- u32 elapsed, start_ts;
+ u32 elapsed, start_ts, user_timeout;
s32 remaining;
start_ts = tcp_sk(sk)->retrans_stamp;
- if (!icsk->icsk_user_timeout)
+ user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+ if (!user_timeout)
return icsk->icsk_rto;
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
- remaining = icsk->icsk_user_timeout - elapsed;
+ remaining = user_timeout - elapsed;
if (remaining <= 0)
return 1; /* user timeout has passed; fire ASAP */
@@ -43,16 +44,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- u32 remaining;
+ u32 remaining, user_timeout;
s32 elapsed;
- if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp)
+ user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+ if (!user_timeout || !icsk->icsk_probes_tstamp)
return when;
elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;
if (unlikely(elapsed < 0))
elapsed = 0;
- remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed;
+ remaining = msecs_to_jiffies(user_timeout) - elapsed;
remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);
return min_t(u32, remaining, when);
@@ -239,7 +241,8 @@ static int tcp_write_timeout(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
__dst_negative_advice(sk);
- retry_until = icsk->icsk_syn_retries ? :
+ /* Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */
+ retry_until = READ_ONCE(icsk->icsk_syn_retries) ? :
READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
max_retransmits = retry_until;
@@ -269,7 +272,7 @@ static int tcp_write_timeout(struct sock *sk)
}
if (!expired)
expired = retransmits_timed_out(sk, retry_until,
- icsk->icsk_user_timeout);
+ READ_ONCE(icsk->icsk_user_timeout));
tcp_fastopen_active_detect_blackhole(sk, expired);
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
@@ -383,13 +386,16 @@ static void tcp_probe_timer(struct sock *sk)
* corresponding system limit. We also implement similar policy when
* we use RTO to probe window in tcp_retransmit_timer().
*/
- if (!icsk->icsk_probes_tstamp)
+ if (!icsk->icsk_probes_tstamp) {
icsk->icsk_probes_tstamp = tcp_jiffies32;
- else if (icsk->icsk_user_timeout &&
- (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
- msecs_to_jiffies(icsk->icsk_user_timeout))
- goto abort;
+ } else {
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+ if (user_timeout &&
+ (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
+ msecs_to_jiffies(user_timeout))
+ goto abort;
+ }
max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
@@ -421,8 +427,10 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
req->rsk_ops->syn_ack_timeout(req);
- /* add one more retry for fastopen */
- max_retries = icsk->icsk_syn_retries ? :
+ /* Add one more retry for fastopen.
+ * Paired with WRITE_ONCE() in tcp_sock_set_syncnt()
+ */
+ max_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
if (req->num_timeout >= max_retries) {
@@ -446,6 +454,22 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
req->timeout << req->num_timeout, TCP_RTO_MAX);
}
+static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ const int timeout = TCP_RTO_MAX * 2;
+ u32 rcv_delta, rtx_delta;
+
+ rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
+ if (rcv_delta <= timeout)
+ return false;
+
+ rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) -
+ (tp->retrans_stamp ?: tcp_skb_timestamp(skb)));
+
+ return rtx_delta > timeout;
+}
/**
* tcp_retransmit_timer() - The TCP retransmit timeout handler
@@ -495,23 +519,26 @@ void tcp_retransmit_timer(struct sock *sk)
* we cannot allow such beasts to hang infinitely.
*/
struct inet_sock *inet = inet_sk(sk);
+ u32 rtx_delta;
+
+ rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp(skb));
if (sk->sk_family == AF_INET) {
- net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
- &inet->inet_daddr,
- ntohs(inet->inet_dport),
- inet->inet_num,
- tp->snd_una, tp->snd_nxt);
+ net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n",
+ &inet->inet_daddr, ntohs(inet->inet_dport),
+ inet->inet_num, tp->snd_una, tp->snd_nxt,
+ jiffies_to_msecs(jiffies - tp->rcv_tstamp),
+ rtx_delta);
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
- &sk->sk_v6_daddr,
- ntohs(inet->inet_dport),
- inet->inet_num,
- tp->snd_una, tp->snd_nxt);
+ net_dbg_ratelimited("Probing zero-window on %pI6:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n",
+ &sk->sk_v6_daddr, ntohs(inet->inet_dport),
+ inet->inet_num, tp->snd_una, tp->snd_nxt,
+ jiffies_to_msecs(jiffies - tp->rcv_tstamp),
+ rtx_delta);
}
#endif
- if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) {
+ if (tcp_rtx_probe0_timed_out(sk, skb)) {
tcp_write_err(sk);
goto out;
}
@@ -591,7 +618,9 @@ out_reset_timer:
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
- icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
+ icsk->icsk_rto = clamp(__tcp_set_rto(tp),
+ tcp_rto_min(sk),
+ TCP_RTO_MAX);
} else if (sk->sk_state != TCP_SYN_SENT ||
icsk->icsk_backoff >
READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
@@ -706,7 +735,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
tcp_mstamp_refresh(tp);
if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
- if (tp->linger2 >= 0) {
+ if (READ_ONCE(tp->linger2) >= 0) {
const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
if (tmo > 0) {
@@ -731,13 +760,15 @@ static void tcp_keepalive_timer (struct timer_list *t)
elapsed = keepalive_time_elapsed(tp);
if (elapsed >= keepalive_time_when(tp)) {
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+
/* If the TCP_USER_TIMEOUT option is enabled, use that
* to determine when to timeout instead.
*/
- if ((icsk->icsk_user_timeout != 0 &&
- elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
+ if ((user_timeout != 0 &&
+ elapsed >= msecs_to_jiffies(user_timeout) &&
icsk->icsk_probes_out > 0) ||
- (icsk->icsk_user_timeout == 0 &&
+ (user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) {
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_write_err(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 42a96b3547c9..0794a2c46a56 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,6 +114,7 @@
#include <net/sock_reuseport.h>
#include <net/addrconf.h>
#include <net/udp_tunnel.h>
+#include <net/gro.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
@@ -406,9 +407,9 @@ static int compute_score(struct sock *sk, struct net *net,
return score;
}
-static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
- const __u16 lport, const __be32 faddr,
- const __be16 fport)
+INDIRECT_CALLABLE_SCOPE
+u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
+ const __be32 faddr, const __be16 fport)
{
static u32 udp_ehash_secret __read_mostly;
@@ -418,22 +419,6 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
udp_ehash_secret + net_hash_mix(net));
}
-static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb,
- __be32 saddr, __be16 sport,
- __be32 daddr, unsigned short hnum)
-{
- struct sock *reuse_sk = NULL;
- u32 hash;
-
- if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
- hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
- reuse_sk = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- }
- return reuse_sk;
-}
-
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
@@ -451,42 +436,36 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- result = lookup_reuseport(net, sk, skb,
- saddr, sport, daddr, hnum);
+ badness = score;
+
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ result = sk;
+ continue;
+ }
+
+ result = inet_lookup_reuseport(net, sk, skb, sizeof(struct udphdr),
+ saddr, sport, daddr, hnum, udp_ehashfn);
+ if (!result) {
+ result = sk;
+ continue;
+ }
+
/* Fall back to scoring if group has connections */
- if (result && !reuseport_has_conns(sk))
+ if (!reuseport_has_conns(sk))
return result;
- result = result ? : sk;
- badness = score;
+ /* Reuseport logic returned an error, keep original score. */
+ if (IS_ERR(result))
+ continue;
+
+ badness = compute_score(result, net, saddr, sport,
+ daddr, hnum, dif, sdif);
+
}
}
return result;
}
-static struct sock *udp4_lookup_run_bpf(struct net *net,
- struct udp_table *udptable,
- struct sk_buff *skb,
- __be32 saddr, __be16 sport,
- __be32 daddr, u16 hnum, const int dif)
-{
- struct sock *sk, *reuse_sk;
- bool no_reuseport;
-
- if (udptable != net->ipv4.udp_table)
- return NULL; /* only UDP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport,
- daddr, hnum, dif, &sk);
- if (no_reuseport || IS_ERR_OR_NULL(sk))
- return sk;
-
- reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
- if (reuse_sk)
- sk = reuse_sk;
- return sk;
-}
-
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
@@ -511,9 +490,11 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
goto done;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- sk = udp4_lookup_run_bpf(net, udptable, skb,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ udptable == net->ipv4.udp_table) {
+ sk = inet_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr),
+ saddr, sport, daddr, hnum, dif,
+ udp_ehashfn);
if (sk) {
result = sk;
goto done;
@@ -555,10 +536,13 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
{
const struct iphdr *iph = ip_hdr(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
return __udp4_lib_lookup(net, iph->saddr, sport,
- iph->daddr, dport, inet_iif(skb),
- inet_sdif(skb), net->ipv4.udp_table, NULL);
+ iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
/* Must be called under rcu_read_lock().
@@ -795,7 +779,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
(u8 *)(uh+1));
goto out;
}
- if (!inet->recverr) {
+ if (!inet_test_bit(RECVERR, sk)) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else
@@ -978,7 +962,8 @@ csum_partial:
send:
err = ip_send_skb(sock_net(sk), skb);
if (err) {
- if (err == -ENOBUFS && !inet->recverr) {
+ if (err == -ENOBUFS &&
+ !inet_test_bit(RECVERR, sk)) {
UDP_INC_STATS(sock_net(sk),
UDP_MIB_SNDBUFERRORS, is_udplite);
err = 0;
@@ -1553,7 +1538,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
spin_unlock(&list->lock);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
+ INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
busylock_release(busy);
return 0;
@@ -1886,7 +1871,7 @@ try_again:
if (udp_sk(sk)->gro_enabled)
udp_cmsg_recv(msg, sk, skb);
- if (inet->cmsg_flags)
+ if (inet_cmsg_flags(inet))
ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
err = copied;
@@ -2408,7 +2393,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (udp4_csum_init(skb, uh, proto))
goto csum_error;
- sk = skb_steal_sock(skb, &refcounted);
+ sk = inet_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest,
+ &refcounted, udp_ehashfn);
+ if (IS_ERR(sk))
+ goto no_sk;
+
if (sk) {
struct dst_entry *dst = skb_dst(skb);
int ret;
@@ -2429,7 +2418,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk)
return udp_unicast_rcv_skb(sk, skb, uh);
-
+no_sk:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
nf_reset_ct(skb);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 75aa4de5b731..0f46b3c2e4ac 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -274,13 +274,20 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
__sum16 check;
__be16 newlen;
- if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
- return __udp_gso_segment_list(gso_skb, features, is_ipv6);
-
mss = skb_shinfo(gso_skb)->gso_size;
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
+ if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh),
+ mss);
+ return NULL;
+ }
+
+ if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+
skb_pull(gso_skb, sizeof(*uh));
/* clear destructor to avoid skb_segment assigning it to tail */
@@ -388,8 +395,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
- !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
return __udp_gso_segment(skb, features, false);
mss = skb_shinfo(skb)->gso_size;
@@ -603,10 +609,13 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
{
const struct iphdr *iph = skb_gro_network_header(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
return __udp4_lib_lookup(net, iph->saddr, sport,
- iph->daddr, dport, inet_iif(skb),
- inet_sdif(skb), net->ipv4.udp_table, NULL);
+ iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
INDIRECT_CALLABLE_SCOPE
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 5f8104cf082d..9b18f371af0d 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -63,7 +63,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
struct sock *sk = sock->sk;
/* Disable multicast loopback */
- inet_sk(sk)->mc_loop = 0;
+ inet_clear_bit(MC_LOOP, sk);
/* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
inet_inc_convert_csum(sk);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 57ea394ffa8c..c33bca2c3841 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -124,22 +124,13 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
xfrm_dst_destroy(xdst);
}
-static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int unregister)
-{
- if (!unregister)
- return;
-
- xfrm_dst_ifdown(dst, dev);
-}
-
static struct dst_ops xfrm4_dst_ops_template = {
.family = AF_INET,
.update_pmtu = xfrm4_update_pmtu,
.redirect = xfrm4_redirect,
.cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm4_dst_destroy,
- .ifdown = xfrm4_dst_ifdown,
+ .ifdown = xfrm_dst_ifdown,
.local_out = __ip_local_out,
.gc_thresh = 32768,
};
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 658bfed1df8b..08d4b7132d4c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -152,7 +152,7 @@ config INET6_TUNNEL
default n
config IPV6_VTI
-tristate "Virtual (secure) IPv6: tunneling"
+ tristate "Virtual (secure) IPv6: tunneling"
select IPV6_TUNNEL
select NET_IP_TUNNEL
select XFRM
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 61291bd247b9..967913ad65e5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -202,6 +202,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
+ .accept_ra_min_lft = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -262,6 +263,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
+ .accept_ra_min_lft = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -318,9 +320,8 @@ static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
static void addrconf_mod_rs_timer(struct inet6_dev *idev,
unsigned long when)
{
- if (!timer_pending(&idev->rs_timer))
+ if (!mod_timer(&idev->rs_timer, jiffies + when))
in6_dev_hold(idev);
- mod_timer(&idev->rs_timer, jiffies + when);
}
static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
@@ -1062,20 +1063,28 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
struct fib6_info *f6i = NULL;
int err = 0;
- if (addr_type == IPV6_ADDR_ANY ||
- (addr_type & IPV6_ADDR_MULTICAST &&
- !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) ||
- (!(idev->dev->flags & IFF_LOOPBACK) &&
- !netif_is_l3_master(idev->dev) &&
- addr_type & IPV6_ADDR_LOOPBACK))
+ if (addr_type == IPV6_ADDR_ANY) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid address");
return ERR_PTR(-EADDRNOTAVAIL);
+ } else if (addr_type & IPV6_ADDR_MULTICAST &&
+ !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot assign multicast address without \"IFA_F_MCAUTOJOIN\" flag");
+ return ERR_PTR(-EADDRNOTAVAIL);
+ } else if (!(idev->dev->flags & IFF_LOOPBACK) &&
+ !netif_is_l3_master(idev->dev) &&
+ addr_type & IPV6_ADDR_LOOPBACK) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot assign loopback address on this device");
+ return ERR_PTR(-EADDRNOTAVAIL);
+ }
if (idev->dead) {
- err = -ENODEV; /*XXX*/
+ NL_SET_ERR_MSG_MOD(extack, "device is going away");
+ err = -ENODEV;
goto out;
}
if (idev->cnf.disable_ipv6) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
err = -EACCES;
goto out;
}
@@ -1102,7 +1111,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
goto out;
}
- f6i = addrconf_f6i_alloc(net, idev, cfg->pfx, false, gfp_flags);
+ f6i = addrconf_f6i_alloc(net, idev, cfg->pfx, false, gfp_flags, extack);
if (IS_ERR(f6i)) {
err = PTR_ERR(f6i);
f6i = NULL;
@@ -2562,12 +2571,18 @@ static void manage_tempaddrs(struct inet6_dev *idev,
ipv6_ifa_notify(0, ift);
}
- if ((create || list_empty(&idev->tempaddr_list)) &&
- idev->cnf.use_tempaddr > 0) {
+ /* Also create a temporary address if it's enabled but no temporary
+ * address currently exists.
+ * However, we get called with valid_lft == 0, prefered_lft == 0, create == false
+ * as part of cleanup (ie. deleting the mngtmpaddr).
+ * We don't want that to result in creating a new temporary ip address.
+ */
+ if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft))
+ create = true;
+
+ if (create && idev->cnf.use_tempaddr > 0) {
/* When a new public address is created as described
* in [ADDRCONF], also create a new temporary address.
- * Also create a temporary address if it's enabled but
- * no temporary address currently exists.
*/
read_unlock_bh(&idev->lock);
ipv6_create_tempaddr(ifp, false);
@@ -2726,6 +2741,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
return;
}
+ if (valid_lft != 0 && valid_lft < in6_dev->cnf.accept_ra_min_lft)
+ goto put;
+
/*
* Two things going on here:
* 1) Add routes for on-link prefixes
@@ -2920,30 +2938,40 @@ static int inet6_addr_add(struct net *net, int ifindex,
ASSERT_RTNL();
- if (cfg->plen > 128)
+ if (cfg->plen > 128) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
return -EINVAL;
+ }
/* check the lifetime */
- if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft)
+ if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft) {
+ NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid");
return -EINVAL;
+ }
- if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64)
+ if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64) {
+ NL_SET_ERR_MSG_MOD(extack, "address with \"mngtmpaddr\" flag must have a prefix length of 64");
return -EINVAL;
+ }
dev = __dev_get_by_index(net, ifindex);
if (!dev)
return -ENODEV;
idev = addrconf_add_dev(dev);
- if (IS_ERR(idev))
+ if (IS_ERR(idev)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
return PTR_ERR(idev);
+ }
if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
true, cfg->pfx, ifindex);
- if (ret < 0)
+ if (ret < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Multicast auto join failed");
return ret;
+ }
}
cfg->scope = ipv6_addr_scope(cfg->pfx);
@@ -3000,22 +3028,29 @@ static int inet6_addr_add(struct net *net, int ifindex,
}
static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
- const struct in6_addr *pfx, unsigned int plen)
+ const struct in6_addr *pfx, unsigned int plen,
+ struct netlink_ext_ack *extack)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
struct net_device *dev;
- if (plen > 128)
+ if (plen > 128) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
return -EINVAL;
+ }
dev = __dev_get_by_index(net, ifindex);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
return -ENODEV;
+ }
idev = __in6_dev_get(dev);
- if (!idev)
+ if (!idev) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
return -ENXIO;
+ }
read_lock_bh(&idev->lock);
list_for_each_entry(ifp, &idev->addr_list, if_list) {
@@ -3038,6 +3073,8 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
}
}
read_unlock_bh(&idev->lock);
+
+ NL_SET_ERR_MSG_MOD(extack, "address not found");
return -EADDRNOTAVAIL;
}
@@ -3080,7 +3117,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
rtnl_lock();
err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
- ireq.ifr6_prefixlen);
+ ireq.ifr6_prefixlen, NULL);
rtnl_unlock();
return err;
}
@@ -3483,7 +3520,7 @@ static int fixup_permanent_addr(struct net *net,
struct fib6_info *f6i, *prev;
f6i = addrconf_f6i_alloc(net, idev, &ifp->addr, false,
- GFP_ATOMIC);
+ GFP_ATOMIC, NULL);
if (IS_ERR(f6i))
return PTR_ERR(f6i);
@@ -4693,7 +4730,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifa_flags &= IFA_F_MANAGETEMPADDR;
return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
- ifm->ifa_prefixlen);
+ ifm->ifa_prefixlen, extack);
}
static int modify_prefix_route(struct inet6_ifaddr *ifp,
@@ -4898,8 +4935,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
}
dev = __dev_get_by_index(net, ifm->ifa_index);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
return -ENODEV;
+ }
if (tb[IFA_FLAGS])
cfg.ifa_flags = nla_get_u32(tb[IFA_FLAGS]);
@@ -4934,10 +4973,12 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
}
if (nlh->nlmsg_flags & NLM_F_EXCL ||
- !(nlh->nlmsg_flags & NLM_F_REPLACE))
+ !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG_MOD(extack, "address already assigned");
err = -EEXIST;
- else
+ } else {
err = inet6_addr_modify(net, ifa, &cfg);
+ }
in6_ifa_put(ifa);
@@ -5597,6 +5638,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
+ array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft;
}
static inline size_t inet6_ifla6_size(void)
@@ -6791,6 +6833,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "accept_ra_min_lft",
+ .data = &ipv6_devconf.accept_ra_min_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "accept_ra_pinfo",
.data = &ipv6_devconf.accept_ra_pinfo,
.maxlen = sizeof(int),
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 5d593ddc0347..368824fe9719 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -102,9 +102,9 @@ bool ipv6_mod_enabled(void)
}
EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
-static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
+static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
- const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
+ const int offset = sk->sk_prot->ipv6_pinfo_offset;
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
@@ -200,12 +200,12 @@ lookup_protocol:
sk->sk_reuse = SK_CAN_REUSE;
inet = inet_sk(sk);
- inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
+ inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
if (SOCK_RAW == sock->type) {
inet->inet_num = protocol;
if (IPPROTO_RAW == protocol)
- inet->hdrincl = 1;
+ inet_set_bit(HDRINCL, sk);
}
sk->sk_destruct = inet6_sock_destruct;
@@ -229,7 +229,7 @@ lookup_protocol:
*/
inet->uc_ttl = -1;
- inet->mc_loop = 1;
+ inet_set_bit(MC_LOOP, sk);
inet->mc_ttl = 1;
inet->mc_index = 0;
RCU_INIT_POINTER(inet->mc_list, NULL);
@@ -399,7 +399,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
sk->sk_ipv6only = 1;
/* Make sure we are allowed to bind here. */
- if (snum || !(inet->bind_address_no_port ||
+ if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) ||
(flags & BIND_FORCE_ADDRESS_NO_PORT))) {
err = sk->sk_prot->get_port(sk, snum);
if (err) {
@@ -435,10 +435,8 @@ out_unlock:
goto out;
}
-/* bind for INET6 API */
-int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
const struct proto *prot;
int err = 0;
@@ -462,6 +460,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return __inet6_bind(sk, uaddr, addr_len, flags);
}
+
+/* bind for INET6 API */
+int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+ return inet6_bind_sk(sock->sk, uaddr, addr_len);
+}
EXPORT_SYMBOL(inet6_bind);
int inet6_release(struct socket *sock)
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index dacdea7fcb62..bb17f484ee2c 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -305,7 +305,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
}
net = dev_net(idev->dev);
- f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC);
+ f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC, NULL);
if (IS_ERR(f6i)) {
err = PTR_ERR(f6i);
goto out;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9b6818453afe..41ebc4e57473 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -38,10 +38,11 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a)
return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
}
-static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
+static void ip6_datagram_flow_key_init(struct flowi6 *fl6,
+ const struct sock *sk)
{
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
int oif = sk->sk_bound_dev_if;
memset(fl6, 0, sizeof(*fl6));
@@ -523,7 +524,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
} else {
ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
&sin->sin6_addr);
- if (inet_sk(sk)->cmsg_flags)
+ if (inet_cmsg_flags(inet_sk(sk)))
ip_cmsg_recv(msg, skb);
}
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 202fc3aaa83c..4952ae792450 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -612,8 +612,6 @@ looped_back:
kfree(buf);
- skb_dst_drop(skb);
-
ip6_route_input(skb);
if (skb_dst(skb)->error) {
@@ -650,7 +648,6 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
struct inet6_dev *idev = __in6_dev_get(skb->dev);
struct inet6_skb_parm *opt = IP6CB(skb);
struct in6_addr *addr = NULL;
- struct in6_addr daddr;
int n, i;
struct ipv6_rt_hdr *hdr;
struct rt0_hdr *rthdr;
@@ -798,9 +795,7 @@ looped_back:
return -1;
}
- daddr = *addr;
- *addr = ipv6_hdr(skb)->daddr;
- ipv6_hdr(skb)->daddr = daddr;
+ swap(*addr, ipv6_hdr(skb)->daddr);
ip6_route_input(skb);
if (skb_dst(skb)->error) {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4159662fa214..93a594a901d1 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -424,7 +424,10 @@ static struct net_device *icmp6_dev(const struct sk_buff *skb)
if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
const struct rt6_info *rt6 = skb_rt6_info(skb);
- if (rt6)
+ /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
+ * and ip6_null_entry could be set to skb if no route is found.
+ */
+ if (rt6 && rt6->rt6i_idev)
dev = rt6->rt6i_idev->dev;
}
@@ -1031,11 +1034,9 @@ drop_no_count:
return 0;
}
-void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
- u8 type,
+void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- int oif)
+ const struct in6_addr *daddr, int oif)
{
memset(fl6, 0, sizeof(*fl6));
fl6->saddr = *saddr;
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 3faf62530d6a..69caed07315f 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <net/genetlink.h>
-#include <net/ila.h>
#include <net/netns/generic.h>
#include <uapi/linux/genetlink.h>
#include "ila.h"
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index bee45dfeb187..67e8c9440977 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -5,7 +5,6 @@
#include <linux/rhashtable.h>
#include <linux/vmalloc.h>
#include <net/genetlink.h>
-#include <net/ila.h>
#include <net/netns/generic.h>
#include <uapi/linux/genetlink.h>
#include "ila.h"
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b64b49012655..b0e8d278e8a9 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -39,6 +39,7 @@ u32 inet6_ehashfn(const struct net *net,
return __inet6_ehashfn(lhash, lport, fhash, fport,
inet6_ehash_secret + net_hash_mix(net));
}
+EXPORT_SYMBOL_GPL(inet6_ehashfn);
/*
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
@@ -111,22 +112,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
-static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb, int doff,
- const struct in6_addr *saddr,
- __be16 sport,
- const struct in6_addr *daddr,
- unsigned short hnum)
+/**
+ * inet6_lookup_reuseport() - execute reuseport logic on AF_INET6 socket if necessary.
+ * @net: network namespace.
+ * @sk: AF_INET6 socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP.
+ * @skb: context for a potential SK_REUSEPORT program.
+ * @doff: header offset.
+ * @saddr: source address.
+ * @sport: source port.
+ * @daddr: destination address.
+ * @hnum: destination port in host byte order.
+ * @ehashfn: hash function used to generate the fallback hash.
+ *
+ * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
+ * the selected sock or an error.
+ */
+struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned short hnum,
+ inet6_ehashfn_t *ehashfn)
{
struct sock *reuse_sk = NULL;
u32 phash;
if (sk->sk_reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+ phash = INDIRECT_CALL_INET(ehashfn, udp6_ehashfn, inet6_ehashfn,
+ net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
}
return reuse_sk;
}
+EXPORT_SYMBOL_GPL(inet6_lookup_reuseport);
/* called with rcu_read_lock() */
static struct sock *inet6_lhash2_lookup(struct net *net,
@@ -143,8 +162,8 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
- result = lookup_reuseport(net, sk, skb, doff,
- saddr, sport, daddr, hnum);
+ result = inet6_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum, inet6_ehashfn);
if (result)
return result;
@@ -156,30 +175,30 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
return result;
}
-static inline struct sock *inet6_lookup_run_bpf(struct net *net,
- struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
- const struct in6_addr *saddr,
- const __be16 sport,
- const struct in6_addr *daddr,
- const u16 hnum, const int dif)
+struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum, const int dif,
+ inet6_ehashfn_t *ehashfn)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
- if (hashinfo != net->ipv4.tcp_death_row.hashinfo)
- return NULL; /* only TCP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport,
+ no_reuseport = bpf_sk_lookup_run_v6(net, protocol, saddr, sport,
daddr, hnum, dif, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;
- reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum, ehashfn);
if (reuse_sk)
sk = reuse_sk;
return sk;
}
+EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup);
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
@@ -193,9 +212,11 @@ struct sock *inet6_lookup_listener(struct net *net,
unsigned int hash2;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ hashinfo == net->ipv4.tcp_death_row.hashinfo) {
+ result = inet6_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff,
+ saddr, sport, daddr, hnum, dif,
+ inet6_ehashfn);
if (result)
goto done;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index bac768d36cc1..28b01a068412 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
+ INIT_HLIST_NODE(&f6i->gc_link);
+
return f6i;
}
@@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
net->ipv6.fib6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
+ INIT_HLIST_HEAD(&table->tb6_gc_hlist);
}
return table;
@@ -1057,6 +1060,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
lockdep_is_held(&table->tb6_lock));
}
}
+
+ fib6_clean_expires_locked(rt);
}
/*
@@ -1118,9 +1123,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
if (!(rt->fib6_flags & RTF_EXPIRES))
- fib6_clean_expires(iter);
+ fib6_clean_expires_locked(iter);
else
- fib6_set_expires(iter, rt->expires);
+ fib6_set_expires_locked(iter,
+ rt->expires);
if (rt->fib6_pmtu)
fib6_metric_set(iter, RTAX_MTU,
@@ -1479,6 +1485,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
+
+ if (fib6_has_expires(rt))
+ hlist_add_head(&rt->gc_link, &table->tb6_gc_hlist);
+
fib6_start_gc(info->nl_net, rt);
}
@@ -2285,9 +2295,8 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-static int fib6_age(struct fib6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
- struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;
/*
@@ -2295,7 +2304,7 @@ static int fib6_age(struct fib6_info *rt, void *arg)
* Routes are expired even if they are in use.
*/
- if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
+ if (fib6_has_expires(rt) && rt->expires) {
if (time_after(now, rt->expires)) {
RT6_TRACE("expiring %p\n", rt);
return -1;
@@ -2312,6 +2321,40 @@ static int fib6_age(struct fib6_info *rt, void *arg)
return 0;
}
+static void fib6_gc_table(struct net *net,
+ struct fib6_table *tb6,
+ struct fib6_gc_args *gc_args)
+{
+ struct fib6_info *rt;
+ struct hlist_node *n;
+ struct nl_info info = {
+ .nl_net = net,
+ .skip_notify = false,
+ };
+
+ hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
+ if (fib6_age(rt, gc_args) == -1)
+ fib6_del(rt, &info);
+}
+
+static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
+{
+ struct fib6_table *table;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+ spin_lock_bh(&table->tb6_lock);
+ fib6_gc_table(net, table, gc_args);
+ spin_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
+
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
struct fib6_gc_args gc_args;
@@ -2327,7 +2370,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = 0;
- fib6_clean_all(net, fib6_age, &gc_args);
+ fib6_gc_all(net, &gc_args);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index da80974ad23a..070d87abf7c0 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -955,7 +955,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
goto tx_err;
if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+ goto tx_err;
truncate = true;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1e8c90e97608..0665e8b09968 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -113,7 +113,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);
- if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+ if (res != LWTUNNEL_XMIT_CONTINUE)
return res;
}
@@ -1591,7 +1591,7 @@ emsgsize:
}
}
} else if ((flags & MSG_SPLICE_PAGES) && length) {
- if (inet_sk(sk)->hdrincl)
+ if (inet_test_bit(HDRINCL, sk))
return -EPERM;
if (rt->dst.dev->features & NETIF_F_SG &&
getfrag == ip_generic_getfrag)
@@ -1693,7 +1693,10 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
copy = datalen - transhdrlen - fraggap - pagedlen;
- if (copy < 0) {
+ /* [!] NOTE: copy may be negative if pagedlen>0
+ * because then the equation may reduces to -fraggap.
+ */
+ if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) {
err = -EINVAL;
goto error;
}
@@ -1744,6 +1747,8 @@ alloc_new_skb:
err = -EFAULT;
kfree_skb(skb);
goto error;
+ } else if (flags & MSG_SPLICE_PAGES) {
+ copy = 0;
}
offset += copy;
@@ -1791,6 +1796,10 @@ alloc_new_skb:
} else if (flags & MSG_SPLICE_PAGES) {
struct msghdr *msg = from;
+ err = -EIO;
+ if (WARN_ON_ONCE(copy > msg->msg_iter.count))
+ goto error;
+
err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
sk->sk_allocation);
if (err < 0)
@@ -1986,7 +1995,8 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
u8 icmp6_type;
- if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
+ if (sk->sk_socket->type == SOCK_RAW &&
+ !inet_test_bit(HDRINCL, sk))
icmp6_type = fl6->fl6_icmp_type;
else
icmp6_type = icmp6_hdr(skb)->icmp6_type;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 10b222865d46..73c85d4e0e9c 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -568,12 +568,12 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
vti6_addr_conflict(t, ipv6_hdr(skb)))
goto tx_err;
- xfrm_decode_session(skb, &fl, AF_INET6);
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET6);
break;
case htons(ETH_P_IP):
- xfrm_decode_session(skb, &fl, AF_INET);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET);
break;
default:
goto tx_err;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index cc3d5ad17257..67a3b8f6e72b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1073,7 +1073,7 @@ static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
And all this only to mangle msg->im6_msgtype and
to set msg->im6_mbz to "mbz" :-)
*/
- skb_push(skb, -skb_network_offset(pkt));
+ __skb_pull(skb, skb_network_offset(pkt));
skb_push(skb, sizeof(*msg));
skb_reset_transport_header(skb);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index ae818ff46224..0e2a0847b387 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -102,7 +102,7 @@ int ip6_ra_control(struct sock *sk, int sel)
struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
struct ipv6_txoptions *opt)
{
- if (inet_sk(sk)->is_icsk) {
+ if (inet_test_bit(IS_ICSK, sk)) {
if (opt &&
!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) {
@@ -474,8 +474,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
WRITE_ONCE(sk->sk_prot, &tcp_prot);
/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific);
- sk->sk_socket->ops = &inet_stream_ops;
- sk->sk_family = PF_INET;
+ WRITE_ONCE(sk->sk_socket->ops, &inet_stream_ops);
+ WRITE_ONCE(sk->sk_family, PF_INET);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
} else {
struct proto *prot = &udp_prot;
@@ -488,8 +488,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
/* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */
WRITE_ONCE(sk->sk_prot, prot);
- sk->sk_socket->ops = &inet_dgram_ops;
- sk->sk_family = PF_INET;
+ WRITE_ONCE(sk->sk_socket->ops, &inet_dgram_ops);
+ WRITE_ONCE(sk->sk_family, PF_INET);
}
/* Disable all options not to allocate memory anymore,
@@ -633,7 +633,7 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
goto e_inval;
/* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
- inet_sk(sk)->transparent = valbool;
+ inet_assign_bit(TRANSPARENT, sk, valbool);
retv = 0;
break;
@@ -641,7 +641,7 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
goto e_inval;
/* we also don't have a separate freebind bit for IPV6 */
- inet_sk(sk)->freebind = valbool;
+ inet_assign_bit(FREEBIND, sk, valbool);
retv = 0;
break;
@@ -831,7 +831,7 @@ done:
goto e_inval;
retv = -EPROTO;
- if (inet_sk(sk)->is_icsk)
+ if (inet_test_bit(IS_ICSK, sk))
break;
retv = -EFAULT;
@@ -923,7 +923,7 @@ done:
goto e_inval;
np->recverr = valbool;
if (!val)
- skb_queue_purge(&sk->sk_error_queue);
+ skb_errqueue_purge(&sk->sk_error_queue);
retv = 0;
break;
case IPV6_FLOWINFO_SEND:
@@ -1330,11 +1330,11 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
case IPV6_TRANSPARENT:
- val = inet_sk(sk)->transparent;
+ val = inet_test_bit(TRANSPARENT, sk);
break;
case IPV6_FREEBIND:
- val = inet_sk(sk)->freebind;
+ val = inet_test_bit(FREEBIND, sk);
break;
case IPV6_RECVORIGDSTADDR:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 714cdc9e2b8e..5ce25bcb9974 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1699,11 +1699,9 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
-static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
- struct net_device *dev,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- int proto, int len)
+static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb,
+ struct net_device *dev, const struct in6_addr *saddr,
+ const struct in6_addr *daddr, int proto, int len)
{
struct ipv6hdr *hdr;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 18634ebd20a4..553c8664e0a7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -197,7 +197,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
static inline int ndisc_is_useropt(const struct net_device *dev,
struct nd_opt_hdr *opt)
{
- return opt->nd_opt_type == ND_OPT_RDNSS ||
+ return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
+ opt->nd_opt_type == ND_OPT_RDNSS ||
opt->nd_opt_type == ND_OPT_DNSSL ||
opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
opt->nd_opt_type == ND_OPT_PREF64 ||
@@ -1266,10 +1267,6 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
}
#endif
- /*
- * set the RA_RECV flag in the interface
- */
-
in6_dev = __in6_dev_get(skb->dev);
if (!in6_dev) {
ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n",
@@ -1327,6 +1324,14 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
goto skip_defrtr;
}
+ lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
+ if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_lft) {
+ ND_PRINTK(2, info,
+ "RA: router lifetime (%ds) is too short: %s\n",
+ lifetime, skb->dev->name);
+ goto skip_defrtr;
+ }
+
/* Do not accept RA with source-addr found on local machine unless
* accept_ra_from_local is set to true.
*/
@@ -1339,8 +1344,6 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
goto skip_defrtr;
}
- lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
-
#ifdef CONFIG_IPV6_ROUTER_PREF
pref = ra_msg->icmph.icmp6_router_pref;
/* 10b is handled as if it were 00b (medium) */
@@ -1516,6 +1519,9 @@ skip_linkparms:
if (ri->prefix_len == 0 &&
!in6_dev->cnf.accept_ra_defrtr)
continue;
+ if (ri->lifetime != 0 &&
+ ntohl(ri->lifetime) < in6_dev->cnf.accept_ra_min_lft)
+ continue;
if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
continue;
if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index cb4eb1d2c620..d59b296b4f51 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
+#include <linux/rcupdate.h>
#include <linux/sysctl.h>
#include <net/ipv6_frag.h>
@@ -96,6 +97,12 @@ static void __net_exit defrag6_net_exit(struct net *net)
}
}
+static const struct nf_defrag_hook defrag_hook = {
+ .owner = THIS_MODULE,
+ .enable = nf_defrag_ipv6_enable,
+ .disable = nf_defrag_ipv6_disable,
+};
+
static struct pernet_operations defrag6_net_ops = {
.exit = defrag6_net_exit,
};
@@ -114,6 +121,9 @@ static int __init nf_defrag_init(void)
pr_err("nf_defrag_ipv6: can't register pernet ops\n");
goto cleanup_frag6;
}
+
+ rcu_assign_pointer(nf_defrag_v6_hook, &defrag_hook);
+
return ret;
cleanup_frag6:
@@ -124,6 +134,7 @@ cleanup_frag6:
static void __exit nf_defrag_fini(void)
{
+ rcu_assign_pointer(nf_defrag_v6_hook, NULL);
unregister_pernet_subsys(&defrag6_net_ops);
nf_ct_frag6_cleanup();
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index f804c11e2146..1b2772834972 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -120,7 +120,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init_sk(&ipc6, np);
ipc6.sockc.tsflags = sk->sk_tsflags;
- ipc6.sockc.mark = sk->sk_mark;
+ ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_oif = oif;
@@ -215,6 +215,7 @@ struct proto pingv6_prot = {
.get_port = ping_get_port,
.put_port = ping_unhash,
.obj_size = sizeof(struct raw6_sock),
+ .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
};
EXPORT_SYMBOL_GPL(pingv6_prot);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ac1cef094c5f..0eae7661a85c 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -291,7 +291,6 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
int err;
int harderr;
@@ -315,7 +314,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
}
if (np->recverr) {
u8 *payload = skb->data;
- if (!inet->hdrincl)
+ if (!inet_test_bit(HDRINCL, sk))
payload += offset;
ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
}
@@ -406,7 +405,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
skb->len,
inet->inet_num, 0));
- if (inet->hdrincl) {
+ if (inet_test_bit(HDRINCL, sk)) {
if (skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
@@ -614,7 +613,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb_reserve(skb, hlen);
skb->protocol = htons(ETH_P_IPV6);
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
@@ -762,24 +761,19 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
- /* hdrincl should be READ_ONCE(inet->hdrincl)
- * but READ_ONCE() doesn't work with bit fields.
- * Doing this indirectly yields the same result.
- */
- hdrincl = inet->hdrincl;
- hdrincl = READ_ONCE(hdrincl);
+ hdrincl = inet_test_bit(HDRINCL, sk);
/*
* Get and verify the address.
*/
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_uid = sk->sk_uid;
ipcm6_init(&ipc6);
ipc6.sockc.tsflags = sk->sk_tsflags;
- ipc6.sockc.mark = sk->sk_mark;
+ ipc6.sockc.mark = fl6.flowi6_mark;
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
@@ -1000,7 +994,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
case IPV6_HDRINCL:
if (sk->sk_type != SOCK_RAW)
return -EINVAL;
- inet_sk(sk)->hdrincl = !!val;
+ inet_assign_bit(HDRINCL, sk, val);
return 0;
case IPV6_CHECKSUM:
if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
@@ -1068,7 +1062,7 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case IPV6_HDRINCL:
- val = inet_sk(sk)->hdrincl;
+ val = inet_test_bit(HDRINCL, sk);
break;
case IPV6_CHECKSUM:
/*
@@ -1216,6 +1210,7 @@ struct proto rawv6_prot = {
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw6_sock),
+ .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
.useroffset = offsetof(struct raw6_sock, filter),
.usersize = sizeof_field(struct raw6_sock, filter),
.h.raw_hash = &raw_v6_hashinfo,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 15cade57f7c6..d15a9e3aa24a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -90,7 +90,7 @@ unsigned int ip6_mtu(const struct dst_entry *dst);
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
- struct net_device *dev, int how);
+ struct net_device *dev);
static void ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
@@ -371,8 +371,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
fib6_info_release(from);
}
-static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int how)
+static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
@@ -2951,7 +2950,8 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
if (!oif && skb->dev)
oif = l3mdev_master_ifindex(skb->dev);
- ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
+ ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
+ sk->sk_uid);
dst = __sk_dst_get(sk);
if (!dst || !dst->obsolete ||
@@ -3172,8 +3172,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
- ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
- sk->sk_uid);
+ ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
+ READ_ONCE(sk->sk_mark), sk->sk_uid);
}
EXPORT_SYMBOL_GPL(ip6_sk_redirect);
@@ -3760,10 +3760,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->dst_nocount = true;
if (cfg->fc_flags & RTF_EXPIRES)
- fib6_set_expires(rt, jiffies +
- clock_t_to_jiffies(cfg->fc_expires));
+ fib6_set_expires_locked(rt, jiffies +
+ clock_t_to_jiffies(cfg->fc_expires));
else
- fib6_clean_expires(rt);
+ fib6_clean_expires_locked(rt);
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
@@ -4543,7 +4543,8 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff
struct fib6_info *addrconf_f6i_alloc(struct net *net,
struct inet6_dev *idev,
const struct in6_addr *addr,
- bool anycast, gfp_t gfp_flags)
+ bool anycast, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
@@ -4565,7 +4566,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
cfg.fc_flags |= RTF_LOCAL;
}
- f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
+ f6i = ip6_route_info_create(&cfg, gfp_flags, extack);
if (!IS_ERR(f6i)) {
f6i->dst_nocount = true;
@@ -4580,21 +4581,19 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
/* remove deleted ip from prefsrc entries */
struct arg_dev_net_ip {
- struct net_device *dev;
struct net *net;
struct in6_addr *addr;
};
static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
{
- struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
if (!rt->nh &&
- ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
rt != net->ipv6.fib6_null_entry &&
- ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
+ ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr) &&
+ !ipv6_chk_addr(net, addr, rt->fib6_nh->fib_nh_dev, 0)) {
spin_lock_bh(&rt6_exception_lock);
/* remove prefsrc entry */
rt->fib6_prefsrc.plen = 0;
@@ -4607,7 +4606,6 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
{
struct net *net = dev_net(ifp->idev->dev);
struct arg_dev_net_ip adni = {
- .dev = ifp->idev->dev,
.net = net,
.addr = &ifp->addr,
};
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index b1c028df686e..a013b92cbb86 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -272,8 +272,6 @@ static int rpl_input(struct sk_buff *skb)
dst = dst_cache_get(&rlwt->cache);
preempt_enable();
- skb_dst_drop(skb);
-
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
@@ -284,6 +282,7 @@ static int rpl_input(struct sk_buff *skb)
preempt_enable();
}
} else {
+ skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index dd433cc265c8..24e2b4b494cb 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -109,15 +109,19 @@ struct bpf_lwt_prog {
#define next_csid_chk_lcnode_fn_bits(flen) \
next_csid_chk_lcblock_bits(flen)
+/* flag indicating that flavors are set up for a given End* behavior */
+#define SEG6_F_LOCAL_FLAVORS SEG6_F_ATTR(SEG6_LOCAL_FLAVORS)
+
#define SEG6_F_LOCAL_FLV_OP(flvname) BIT(SEG6_LOCAL_FLV_OP_##flvname)
+#define SEG6_F_LOCAL_FLV_NEXT_CSID SEG6_F_LOCAL_FLV_OP(NEXT_CSID)
#define SEG6_F_LOCAL_FLV_PSP SEG6_F_LOCAL_FLV_OP(PSP)
/* Supported RFC8986 Flavor operations are reported in this bitmask */
#define SEG6_LOCAL_FLV8986_SUPP_OPS SEG6_F_LOCAL_FLV_PSP
-/* Supported Flavor operations are reported in this bitmask */
-#define SEG6_LOCAL_FLV_SUPP_OPS (SEG6_F_LOCAL_FLV_OP(NEXT_CSID) | \
+#define SEG6_LOCAL_END_FLV_SUPP_OPS (SEG6_F_LOCAL_FLV_NEXT_CSID | \
SEG6_LOCAL_FLV8986_SUPP_OPS)
+#define SEG6_LOCAL_END_X_FLV_SUPP_OPS SEG6_F_LOCAL_FLV_NEXT_CSID
struct seg6_flavors_info {
/* Flavor operations */
@@ -411,9 +415,72 @@ static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
return input_action_end_finish(skb, slwt);
}
+static int input_action_end_x_finish(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ seg6_lookup_nexthop(skb, &slwt->nh6, 0);
+
+ return dst_input(skb);
+}
+
+static int input_action_end_x_core(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct ipv6_sr_hdr *srh;
+
+ srh = get_and_validate_srh(skb);
+ if (!srh)
+ goto drop;
+
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+ return input_action_end_x_finish(skb, slwt);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int end_x_next_csid_core(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ const struct seg6_flavors_info *finfo = &slwt->flv_info;
+ struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
+
+ if (seg6_next_csid_is_arg_zero(daddr, finfo))
+ return input_action_end_x_core(skb, slwt);
+
+ /* update DA */
+ seg6_next_csid_advance_arg(daddr, finfo);
+
+ return input_action_end_x_finish(skb, slwt);
+}
+
static bool seg6_next_csid_enabled(__u32 fops)
{
- return fops & BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID);
+ return fops & SEG6_F_LOCAL_FLV_NEXT_CSID;
+}
+
+/* Processing of SRv6 End, End.X, and End.T behaviors can be extended through
+ * the flavors framework. These behaviors must report the subset of (flavor)
+ * operations they currently implement. In this way, if a user specifies a
+ * flavor combination that is not supported by a given End* behavior, the
+ * kernel refuses to instantiate the tunnel reporting the error.
+ */
+static int seg6_flv_supp_ops_by_action(int action, __u32 *fops)
+{
+ switch (action) {
+ case SEG6_LOCAL_ACTION_END:
+ *fops = SEG6_LOCAL_END_FLV_SUPP_OPS;
+ break;
+ case SEG6_LOCAL_ACTION_END_X:
+ *fops = SEG6_LOCAL_END_X_FLV_SUPP_OPS;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
}
/* We describe the packet state in relation to the absence/presence of the SRH
@@ -746,21 +813,14 @@ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
/* regular endpoint, and forward to specified nexthop */
static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
- struct ipv6_sr_hdr *srh;
-
- srh = get_and_validate_srh(skb);
- if (!srh)
- goto drop;
-
- advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
-
- seg6_lookup_nexthop(skb, &slwt->nh6, 0);
+ const struct seg6_flavors_info *finfo = &slwt->flv_info;
+ __u32 fops = finfo->flv_ops;
- return dst_input(skb);
+ /* check for the presence of NEXT-C-SID since it applies first */
+ if (seg6_next_csid_enabled(fops))
+ return end_x_next_csid_core(skb, slwt);
-drop:
- kfree_skb(skb);
- return -EINVAL;
+ return input_action_end_x_core(skb, slwt);
}
static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
@@ -1404,13 +1464,14 @@ static struct seg6_action_desc seg6_action_table[] = {
.action = SEG6_LOCAL_ACTION_END,
.attrs = 0,
.optattrs = SEG6_F_LOCAL_COUNTERS |
- SEG6_F_ATTR(SEG6_LOCAL_FLAVORS),
+ SEG6_F_LOCAL_FLAVORS,
.input = input_action_end,
},
{
.action = SEG6_LOCAL_ACTION_END_X,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
- .optattrs = SEG6_F_LOCAL_COUNTERS,
+ .optattrs = SEG6_F_LOCAL_COUNTERS |
+ SEG6_F_LOCAL_FLAVORS,
.input = input_action_end_x,
},
{
@@ -2070,7 +2131,8 @@ static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
{
struct seg6_flavors_info *finfo = &slwt->flv_info;
struct nlattr *tb[SEG6_LOCAL_FLV_MAX + 1];
- unsigned long fops;
+ int action = slwt->action;
+ __u32 fops, supp_fops;
int rc;
rc = nla_parse_nested_deprecated(tb, SEG6_LOCAL_FLV_MAX,
@@ -2086,7 +2148,8 @@ static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
return -EINVAL;
fops = nla_get_u32(tb[SEG6_LOCAL_FLV_OPERATION]);
- if (fops & ~SEG6_LOCAL_FLV_SUPP_OPS) {
+ rc = seg6_flv_supp_ops_by_action(action, &supp_fops);
+ if (rc < 0 || (fops & ~supp_fops)) {
NL_SET_ERR_MSG(extack, "Unsupported Flavor operation(s)");
return -EOPNOTSUPP;
}
@@ -2618,6 +2681,11 @@ int __init seg6_local_init(void)
*/
BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long));
+ /* Check whether the number of defined flavors exceeds the maximum
+ * allowed value.
+ */
+ BUILD_BUG_ON(SEG6_LOCAL_FLV_OP_MAX + 1 > BITS_PER_TYPE(__u32));
+
/* If the default NEXT-C-SID Locator-Block/Node Function lengths (in
* bits) have been changed with invalid values, kernel build stops
* here.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 40dd92a2f480..3a88545a265d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -564,8 +564,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
- tclass, sk->sk_priority);
+ err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
+ opt, tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -939,7 +939,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (sk->sk_state == TCP_TIME_WAIT)
mark = inet_twsk(sk)->tw_mark;
else
- mark = sk->sk_mark;
+ mark = READ_ONCE(sk->sk_mark);
skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
}
if (txhash) {
@@ -1126,10 +1126,11 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
- req->ts_recent, sk->sk_bound_dev_if,
+ READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
- ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
- tcp_rsk(req)->txhash);
+ ipv6_get_dsfield(ipv6_hdr(skb)), 0,
+ READ_ONCE(sk->sk_priority),
+ READ_ONCE(tcp_rsk(req)->txhash));
}
@@ -2175,6 +2176,7 @@ struct proto tcpv6_prot = {
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 317b01c9bc39..ebc6ae47cfea 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -45,11 +45,13 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/ip6_tunnel.h>
+#include <trace/events/udp.h>
#include <net/xfrm.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/busy_poll.h>
#include <net/sock_reuseport.h>
+#include <net/gro.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -70,11 +72,12 @@ int udpv6_init_sock(struct sock *sk)
return 0;
}
-static u32 udp6_ehashfn(const struct net *net,
- const struct in6_addr *laddr,
- const u16 lport,
- const struct in6_addr *faddr,
- const __be16 fport)
+INDIRECT_CALLABLE_SCOPE
+u32 udp6_ehashfn(const struct net *net,
+ const struct in6_addr *laddr,
+ const u16 lport,
+ const struct in6_addr *faddr,
+ const __be16 fport)
{
static u32 udp6_ehash_secret __read_mostly;
static u32 udp_ipv6_hash_secret __read_mostly;
@@ -90,7 +93,7 @@ static u32 udp6_ehashfn(const struct net *net,
fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
return __inet6_ehashfn(lhash, lport, fhash, fport,
- udp_ipv6_hash_secret + net_hash_mix(net));
+ udp6_ehash_secret + net_hash_mix(net));
}
int udp_v6_get_port(struct sock *sk, unsigned short snum)
@@ -159,24 +162,6 @@ static int compute_score(struct sock *sk, struct net *net,
return score;
}
-static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb,
- const struct in6_addr *saddr,
- __be16 sport,
- const struct in6_addr *daddr,
- unsigned int hnum)
-{
- struct sock *reuse_sk = NULL;
- u32 hash;
-
- if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
- hash = udp6_ehashfn(net, daddr, hnum, saddr, sport);
- reuse_sk = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- }
- return reuse_sk;
-}
-
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -193,44 +178,35 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- result = lookup_reuseport(net, sk, skb,
- saddr, sport, daddr, hnum);
+ badness = score;
+
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ result = sk;
+ continue;
+ }
+
+ result = inet6_lookup_reuseport(net, sk, skb, sizeof(struct udphdr),
+ saddr, sport, daddr, hnum, udp6_ehashfn);
+ if (!result) {
+ result = sk;
+ continue;
+ }
+
/* Fall back to scoring if group has connections */
- if (result && !reuseport_has_conns(sk))
+ if (!reuseport_has_conns(sk))
return result;
- result = result ? : sk;
- badness = score;
+ /* Reuseport logic returned an error, keep original score. */
+ if (IS_ERR(result))
+ continue;
+
+ badness = compute_score(sk, net, saddr, sport,
+ daddr, hnum, dif, sdif);
}
}
return result;
}
-static inline struct sock *udp6_lookup_run_bpf(struct net *net,
- struct udp_table *udptable,
- struct sk_buff *skb,
- const struct in6_addr *saddr,
- __be16 sport,
- const struct in6_addr *daddr,
- u16 hnum, const int dif)
-{
- struct sock *sk, *reuse_sk;
- bool no_reuseport;
-
- if (udptable != net->ipv4.udp_table)
- return NULL; /* only UDP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport,
- daddr, hnum, dif, &sk);
- if (no_reuseport || IS_ERR_OR_NULL(sk))
- return sk;
-
- reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
- if (reuse_sk)
- sk = reuse_sk;
- return sk;
-}
-
/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -255,9 +231,11 @@ struct sock *__udp6_lib_lookup(struct net *net,
goto done;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- sk = udp6_lookup_run_bpf(net, udptable, skb,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ udptable == net->ipv4.udp_table) {
+ sk = inet6_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr),
+ saddr, sport, daddr, hnum, dif,
+ udp6_ehashfn);
if (sk) {
result = sk;
goto done;
@@ -299,10 +277,13 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet6_get_iif_sdif(skb, &iif, &sdif);
return __udp6_lib_lookup(net, &iph->saddr, sport,
- &iph->daddr, dport, inet6_iif(skb),
- inet6_sdif(skb), net->ipv4.udp_table, NULL);
+ &iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
/* Must be called under rcu_read_lock().
@@ -439,7 +420,7 @@ try_again:
ip6_datagram_recv_common_ctl(sk, msg, skb);
if (is_udp4) {
- if (inet->cmsg_flags)
+ if (inet_cmsg_flags(inet))
ip_cmsg_recv_offset(msg, sk, skb,
sizeof(struct udphdr), off);
} else {
@@ -623,7 +604,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type == NDISC_REDIRECT) {
if (tunnel) {
ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
- sk->sk_mark, sk->sk_uid);
+ READ_ONCE(sk->sk_mark), sk->sk_uid);
} else {
ip6_sk_redirect(skb, sk);
}
@@ -680,6 +661,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb_reason(skb, drop_reason);
+ trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
}
@@ -986,7 +968,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
goto csum_error;
/* Check if the socket is already available, e.g. due to early demux */
- sk = skb_steal_sock(skb, &refcounted);
+ sk = inet6_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest,
+ &refcounted, udp6_ehashfn);
+ if (IS_ERR(sk))
+ goto no_sk;
+
if (sk) {
struct dst_entry *dst = skb_dst(skb);
int ret;
@@ -1020,7 +1006,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
goto report_csum_error;
return udp6_unicast_rcv_skb(sk, skb, uh);
}
-
+no_sk:
reason = SKB_DROP_REASON_NO_SOCKET;
if (!uh->check)
@@ -1354,7 +1340,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.gso_size = READ_ONCE(up->gso_size);
ipc6.sockc.tsflags = sk->sk_tsflags;
- ipc6.sockc.mark = sk->sk_mark;
+ ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
/* destination address check */
if (sin6) {
@@ -1796,6 +1782,7 @@ struct proto udpv6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = NULL,
.diag_destroy = udp_abort,
};
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index ad3b8726873e..6b95ba241ebe 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -43,8 +43,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
- !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
return __udp_gso_segment(skb, features, true);
mss = skb_shinfo(skb)->gso_size;
@@ -119,10 +118,13 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet6_get_iif_sdif(skb, &iif, &sdif);
return __udp6_lib_lookup(net, &iph->saddr, sport,
- &iph->daddr, dport, inet6_iif(skb),
- inet6_sdif(skb), net->ipv4.udp_table, NULL);
+ &iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
INDIRECT_CALLABLE_SCOPE
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 8e010d07917a..267d491e9707 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -67,6 +67,7 @@ struct proto udplitev6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = &udplite_table,
};
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 8f931e46b460..41a680c76d2e 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -124,14 +124,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
xfrm_dst_destroy(xdst);
}
-static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int unregister)
+static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
struct xfrm_dst *xdst;
- if (!unregister)
- return;
-
xdst = (struct xfrm_dst *)dst;
if (xdst->u.rt6.rt6i_idev->dev == dev) {
struct inet6_dev *loopback_idev =
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ede3c6a60353..d68d01804dc7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1281,7 +1281,6 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1];
natt->encap_dport = n_port->sadb_x_nat_t_port_port;
}
- memset(&natt->encap_oa, 0, sizeof(natt->encap_oa));
}
err = xfrm_init_state(x);
@@ -1848,9 +1847,9 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
- if ((xfilter->sadb_x_filter_splen >=
+ if ((xfilter->sadb_x_filter_splen >
(sizeof(xfrm_address_t) << 3)) ||
- (xfilter->sadb_x_filter_dplen >=
+ (xfilter->sadb_x_filter_dplen >
(sizeof(xfrm_address_t) << 3))) {
mutex_unlock(&pfk->dump_lock);
return -EINVAL;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index f9073bc7281f..9a2a9ed3ba47 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -552,7 +552,7 @@ static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
*addr_len = sizeof(*sin);
}
- if (inet->cmsg_flags)
+ if (inet_cmsg_flags(inet))
ip_cmsg_recv(msg, skb);
if (flags & MSG_TRUNC)
copied = skb->len;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index b1623f9c4f92..ed8ebb6f5909 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -36,9 +36,6 @@ struct l2tp_ip6_sock {
u32 conn_id;
u32 peer_conn_id;
- /* ipv6_pinfo has to be the last member of l2tp_ip6_sock, see
- * inet6_sk_generic
- */
struct ipv6_pinfo inet6;
};
@@ -519,7 +516,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* Get and verify the address */
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_uid = sk->sk_uid;
ipcm6_init(&ipc6);
@@ -730,6 +727,7 @@ static struct proto l2tp_ip6_prot = {
.hash = l2tp_ip6_hash,
.unhash = l2tp_ip6_unhash,
.obj_size = sizeof(struct l2tp_ip6_sock),
+ .ipv6_pinfo_offset = offsetof(struct l2tp_ip6_sock, inet6),
};
static const struct proto_ops l2tp_ip6_ops = {
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 57c35c960b2c..9b06c380866b 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -402,7 +402,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
memcpy(laddr.mac, addr->sllc_mac, IFHWADDRLEN);
laddr.lsap = addr->sllc_sap;
rc = -EADDRINUSE; /* mac + sap clash. */
- ask = llc_lookup_established(sap, &daddr, &laddr);
+ ask = llc_lookup_established(sap, &daddr, &laddr, &init_net);
if (ask) {
sock_put(ask);
goto out_put;
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 912aa9bd5e29..0a3f5e0bec00 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -14,14 +14,15 @@
#include <linux/init.h>
#include <linux/slab.h>
-#include <net/llc_sap.h>
-#include <net/llc_conn.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <net/llc_c_ev.h>
+#include <net/llc.h>
#include <net/llc_c_ac.h>
+#include <net/llc_c_ev.h>
#include <net/llc_c_st.h>
+#include <net/llc_conn.h>
#include <net/llc_pdu.h>
+#include <net/llc_sap.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
#if 0
#define dprintk(args...) printk(KERN_DEBUG args)
@@ -453,11 +454,13 @@ static int llc_exec_conn_trans_actions(struct sock *sk,
static inline bool llc_estab_match(const struct llc_sap *sap,
const struct llc_addr *daddr,
const struct llc_addr *laddr,
- const struct sock *sk)
+ const struct sock *sk,
+ const struct net *net)
{
struct llc_sock *llc = llc_sk(sk);
- return llc->laddr.lsap == laddr->lsap &&
+ return net_eq(sock_net(sk), net) &&
+ llc->laddr.lsap == laddr->lsap &&
llc->daddr.lsap == daddr->lsap &&
ether_addr_equal(llc->laddr.mac, laddr->mac) &&
ether_addr_equal(llc->daddr.mac, daddr->mac);
@@ -468,6 +471,7 @@ static inline bool llc_estab_match(const struct llc_sap *sap,
* @sap: SAP
* @daddr: address of remote LLC (MAC + SAP)
* @laddr: address of local LLC (MAC + SAP)
+ * @net: netns to look up a socket in
*
* Search connection list of the SAP and finds connection using the remote
* mac, remote sap, local mac, and local sap. Returns pointer for
@@ -476,7 +480,8 @@ static inline bool llc_estab_match(const struct llc_sap *sap,
*/
static struct sock *__llc_lookup_established(struct llc_sap *sap,
struct llc_addr *daddr,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
struct sock *rc;
struct hlist_nulls_node *node;
@@ -486,12 +491,12 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap,
rcu_read_lock();
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
- if (llc_estab_match(sap, daddr, laddr, rc)) {
+ if (llc_estab_match(sap, daddr, laddr, rc, net)) {
/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
- !llc_estab_match(sap, daddr, laddr, rc))) {
+ !llc_estab_match(sap, daddr, laddr, rc, net))) {
sock_put(rc);
continue;
}
@@ -513,29 +518,33 @@ found:
struct sock *llc_lookup_established(struct llc_sap *sap,
struct llc_addr *daddr,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
struct sock *sk;
local_bh_disable();
- sk = __llc_lookup_established(sap, daddr, laddr);
+ sk = __llc_lookup_established(sap, daddr, laddr, net);
local_bh_enable();
return sk;
}
static inline bool llc_listener_match(const struct llc_sap *sap,
const struct llc_addr *laddr,
- const struct sock *sk)
+ const struct sock *sk,
+ const struct net *net)
{
struct llc_sock *llc = llc_sk(sk);
- return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
+ return net_eq(sock_net(sk), net) &&
+ sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
llc->laddr.lsap == laddr->lsap &&
ether_addr_equal(llc->laddr.mac, laddr->mac);
}
static struct sock *__llc_lookup_listener(struct llc_sap *sap,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
struct sock *rc;
struct hlist_nulls_node *node;
@@ -545,12 +554,12 @@ static struct sock *__llc_lookup_listener(struct llc_sap *sap,
rcu_read_lock();
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
- if (llc_listener_match(sap, laddr, rc)) {
+ if (llc_listener_match(sap, laddr, rc, net)) {
/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
- !llc_listener_match(sap, laddr, rc))) {
+ !llc_listener_match(sap, laddr, rc, net))) {
sock_put(rc);
continue;
}
@@ -574,6 +583,7 @@ found:
* llc_lookup_listener - Finds listener for local MAC + SAP
* @sap: SAP
* @laddr: address of local LLC (MAC + SAP)
+ * @net: netns to look up a socket in
*
* Search connection list of the SAP and finds connection listening on
* local mac, and local sap. Returns pointer for parent socket found,
@@ -581,24 +591,26 @@ found:
* Caller has to make sure local_bh is disabled.
*/
static struct sock *llc_lookup_listener(struct llc_sap *sap,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
+ struct sock *rc = __llc_lookup_listener(sap, laddr, net);
static struct llc_addr null_addr;
- struct sock *rc = __llc_lookup_listener(sap, laddr);
if (!rc)
- rc = __llc_lookup_listener(sap, &null_addr);
+ rc = __llc_lookup_listener(sap, &null_addr, net);
return rc;
}
static struct sock *__llc_lookup(struct llc_sap *sap,
struct llc_addr *daddr,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
- struct sock *sk = __llc_lookup_established(sap, daddr, laddr);
+ struct sock *sk = __llc_lookup_established(sap, daddr, laddr, net);
- return sk ? : llc_lookup_listener(sap, laddr);
+ return sk ? : llc_lookup_listener(sap, laddr, net);
}
/**
@@ -776,7 +788,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
llc_pdu_decode_da(skb, daddr.mac);
llc_pdu_decode_dsap(skb, &daddr.lsap);
- sk = __llc_lookup(sap, &saddr, &daddr);
+ sk = __llc_lookup(sap, &saddr, &daddr, dev_net(skb->dev));
if (!sk)
goto drop;
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index dde9bf08a593..58a5f419adc6 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -92,7 +92,7 @@ int llc_establish_connection(struct sock *sk, const u8 *lmac, u8 *dmac, u8 dsap)
daddr.lsap = dsap;
memcpy(daddr.mac, dmac, sizeof(daddr.mac));
memcpy(laddr.mac, lmac, sizeof(laddr.mac));
- existing = llc_lookup_established(llc->sap, &daddr, &laddr);
+ existing = llc_lookup_established(llc->sap, &daddr, &laddr, sock_net(sk));
if (existing) {
if (existing->sk_state == TCP_ESTABLISHED) {
sk = existing;
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index c309b72a5877..7cac441862e2 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -163,9 +163,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
void (*sta_handler)(struct sk_buff *skb);
void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
- if (!net_eq(dev_net(dev), &init_net))
- goto drop;
-
/*
* When the interface is in promisc. mode, drop all the crap that it
* receives, do not try to analyse it.
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 6805ce43a055..116c0e479183 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -294,25 +294,29 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb,
static inline bool llc_dgram_match(const struct llc_sap *sap,
const struct llc_addr *laddr,
- const struct sock *sk)
+ const struct sock *sk,
+ const struct net *net)
{
struct llc_sock *llc = llc_sk(sk);
return sk->sk_type == SOCK_DGRAM &&
- llc->laddr.lsap == laddr->lsap &&
- ether_addr_equal(llc->laddr.mac, laddr->mac);
+ net_eq(sock_net(sk), net) &&
+ llc->laddr.lsap == laddr->lsap &&
+ ether_addr_equal(llc->laddr.mac, laddr->mac);
}
/**
* llc_lookup_dgram - Finds dgram socket for the local sap/mac
* @sap: SAP
* @laddr: address of local LLC (MAC + SAP)
+ * @net: netns to look up a socket in
*
* Search socket list of the SAP and finds connection using the local
* mac, and local sap. Returns pointer for socket found, %NULL otherwise.
*/
static struct sock *llc_lookup_dgram(struct llc_sap *sap,
- const struct llc_addr *laddr)
+ const struct llc_addr *laddr,
+ const struct net *net)
{
struct sock *rc;
struct hlist_nulls_node *node;
@@ -322,12 +326,12 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap,
rcu_read_lock_bh();
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
- if (llc_dgram_match(sap, laddr, rc)) {
+ if (llc_dgram_match(sap, laddr, rc, net)) {
/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
- !llc_dgram_match(sap, laddr, rc))) {
+ !llc_dgram_match(sap, laddr, rc, net))) {
sock_put(rc);
continue;
}
@@ -429,7 +433,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb)
llc_sap_mcast(sap, &laddr, skb);
kfree_skb(skb);
} else {
- struct sock *sk = llc_lookup_dgram(sap, &laddr);
+ struct sock *sk = llc_lookup_dgram(sap, &laddr, dev_net(skb->dev));
if (sk) {
llc_sap_rcv(sap, skb, sk);
sock_put(sk);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e7ac24603892..45e7a5d9c7d9 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3648,12 +3648,6 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&local->mtx);
lockdep_assert_held(&local->chanctx_mtx);
- if (sdata->vif.bss_conf.eht_puncturing != sdata->vif.bss_conf.csa_punct_bitmap) {
- sdata->vif.bss_conf.eht_puncturing =
- sdata->vif.bss_conf.csa_punct_bitmap;
- changed |= BSS_CHANGED_EHT_PUNCTURING;
- }
-
/*
* using reservation isn't immediate as it may be deferred until later
* with multi-vif. once reservation is complete it will re-schedule the
@@ -3683,6 +3677,12 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
if (err)
return err;
+ if (sdata->vif.bss_conf.eht_puncturing != sdata->vif.bss_conf.csa_punct_bitmap) {
+ sdata->vif.bss_conf.eht_puncturing =
+ sdata->vif.bss_conf.csa_punct_bitmap;
+ changed |= BSS_CHANGED_EHT_PUNCTURING;
+ }
+
ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed);
if (sdata->deflink.csa_block_tx) {
@@ -4133,19 +4133,20 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
mutex_lock(&local->mtx);
rcu_read_lock();
+ sta = sta_info_get_bss(sdata, peer);
+ if (!sta) {
+ ret = -ENOLINK;
+ goto unlock;
+ }
+
+ qos = sta->sta.wme;
+
chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
ret = -EINVAL;
goto unlock;
}
band = chanctx_conf->def.chan->band;
- sta = sta_info_get_bss(sdata, peer);
- if (sta) {
- qos = sta->sta.wme;
- } else {
- ret = -ENOLINK;
- goto unlock;
- }
if (qos) {
fc = cpu_to_le16(IEEE80211_FTYPE_DATA |
diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c
index e1d4cfd99128..912c46f74d24 100644
--- a/net/mac80211/fils_aead.c
+++ b/net/mac80211/fils_aead.c
@@ -5,9 +5,9 @@
*/
#include <crypto/aes.h>
-#include <crypto/algapi.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
+#include <crypto/utils.h>
#include "ieee80211_i.h"
#include "aes_cmac.h"
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 91633a0b723e..06bd406846d2 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1872,7 +1872,6 @@ void ieee80211_send_pspoll(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata);
void ieee80211_recalc_ps(struct ieee80211_local *local);
void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata);
-int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
@@ -2564,7 +2563,6 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
struct ieee80211_link_data *rsvd_for);
bool ieee80211_is_radar_required(struct ieee80211_local *local);
-void ieee80211_dfs_cac_timer(unsigned long data);
void ieee80211_dfs_cac_timer_work(struct work_struct *work);
void ieee80211_dfs_cac_cancel(struct ieee80211_local *local);
void ieee80211_dfs_radar_detected_work(struct work_struct *work);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 21cf5a208910..13050dc9321f 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -9,6 +9,7 @@
* Copyright 2018-2020, 2022-2023 Intel Corporation
*/
+#include <crypto/utils.h>
#include <linux/if_ether.h>
#include <linux/etherdevice.h>
#include <linux/list.h>
@@ -17,7 +18,6 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <net/mac80211.h>
-#include <crypto/algapi.h>
#include <asm/unaligned.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 6c94222a9df5..ad8469293d71 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -212,7 +212,6 @@ int mesh_rmc_check(struct ieee80211_sub_if_data *sdata,
const u8 *addr, struct ieee80211s_hdr *mesh_hdr);
bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *ie);
-void mesh_ids_set_default(struct ieee80211_if_mesh *mesh);
int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
int mesh_add_meshid_ie(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4f707d2a160f..e751cda5eef6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1083,7 +1083,8 @@ static inline bool ieee80211_rx_reorder_ready(struct tid_ampdu_rx *tid_agg_rx,
struct sk_buff *tail = skb_peek_tail(frames);
struct ieee80211_rx_status *status;
- if (tid_agg_rx->reorder_buf_filtered & BIT_ULL(index))
+ if (tid_agg_rx->reorder_buf_filtered &&
+ tid_agg_rx->reorder_buf_filtered & BIT_ULL(index))
return true;
if (!tail)
@@ -1124,7 +1125,8 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
}
no_frame:
- tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
+ if (tid_agg_rx->reorder_buf_filtered)
+ tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);
}
@@ -3732,6 +3734,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
break;
goto queue;
case WLAN_CATEGORY_S1G:
+ if (len < offsetofend(typeof(*mgmt),
+ u.action.u.s1g.action_code))
+ break;
+
switch (mgmt->u.action.u.s1g.action_code) {
case WLAN_S1G_TWT_SETUP:
case WLAN_S1G_TWT_TEARDOWN:
@@ -4264,6 +4270,7 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
u16 ssn, u64 filtered,
u16 received_mpdus)
{
+ struct ieee80211_local *local;
struct sta_info *sta;
struct tid_ampdu_rx *tid_agg_rx;
struct sk_buff_head frames;
@@ -4281,6 +4288,11 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
sta = container_of(pubsta, struct sta_info, sta);
+ local = sta->sdata->local;
+ WARN_ONCE(local->hw.max_rx_aggregation_subframes > 64,
+ "RX BA marker can't support max_rx_aggregation_subframes %u > 64\n",
+ local->hw.max_rx_aggregation_subframes);
+
if (!ieee80211_rx_data_set_sta(&rx, sta, -1))
return;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 4133496da378..2d8e38b3bcb5 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -15,7 +15,7 @@
#include <asm/unaligned.h>
#include <net/mac80211.h>
#include <crypto/aes.h>
-#include <crypto/algapi.h>
+#include <crypto/utils.h>
#include "ieee80211_i.h"
#include "michael.h"
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index a3829ce548f9..84e531f86b82 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
- mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o
+ mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 5a0a84ad94af..8a16672b94e2 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -19,3 +19,18 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
return NULL;
}
+
+BTF_SET8_START(bpf_mptcp_fmodret_ids)
+BTF_ID_FLAGS(func, update_socket_protocol)
+BTF_SET8_END(bpf_mptcp_fmodret_ids)
+
+static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_mptcp_fmodret_ids,
+};
+
+static int __init bpf_mptcp_kfunc_init(void)
+{
+ return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
+}
+late_initcall(bpf_mptcp_kfunc_init);
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 43e540328a52..e72b518c5d02 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -32,6 +32,7 @@ struct mptcp_pernet {
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
u8 pm_type;
+ char scheduler[MPTCP_SCHED_NAME_MAX];
};
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -69,6 +70,11 @@ int mptcp_get_pm_type(const struct net *net)
return mptcp_get_pernet(net)->pm_type;
}
+const char *mptcp_get_scheduler(const struct net *net)
+{
+ return mptcp_get_pernet(net)->scheduler;
+}
+
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
@@ -77,6 +83,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
+ strcpy(pernet->scheduler, "default");
}
#ifdef CONFIG_SYSCTL
@@ -128,6 +135,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &mptcp_pm_type_max
},
+ {
+ .procname = "scheduler",
+ .maxlen = MPTCP_SCHED_NAME_MAX,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
{}
};
@@ -149,6 +162,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[3].data = &pernet->allow_join_initial_addr_port;
table[4].data = &pernet->stale_loss_cnt;
table[5].data = &pernet->pm_type;
+ table[6].data = &pernet->scheduler;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 7dbbad1e4f55..d8da5374d9e1 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -299,15 +299,8 @@ void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
msk = mptcp_sk(sk);
- if (subflow->backup != bkup) {
+ if (subflow->backup != bkup)
subflow->backup = bkup;
- mptcp_data_lock(sk);
- if (!sock_owned_by_user(sk))
- msk->last_snd = NULL;
- else
- __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags);
- mptcp_data_unlock(sk);
- }
mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC);
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 5692daf57a4d..9661f3812682 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -9,6 +9,7 @@
#include <linux/inet.h>
#include <linux/kernel.h>
#include <net/tcp.h>
+#include <net/inet_common.h>
#include <net/netns/generic.h>
#include <net/mptcp.h>
#include <net/genetlink.h>
@@ -471,9 +472,6 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con
slow = lock_sock_fast(ssk);
if (prio) {
- if (subflow->backup != backup)
- msk->last_snd = NULL;
-
subflow->send_mp_prio = 1;
subflow->backup = backup;
subflow->request_bkup = backup;
@@ -1005,8 +1003,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
bool is_ipv6 = sk->sk_family == AF_INET6;
int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
- struct socket *ssock;
- struct sock *newsk;
+ struct sock *newsk, *ssk;
int backlog = 1024;
int err;
@@ -1032,28 +1029,32 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
&mptcp_keys[is_ipv6]);
lock_sock(newsk);
- ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
+ ssk = __mptcp_nmpc_sk(mptcp_sk(newsk));
release_sock(newsk);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ if (IS_ERR(ssk))
+ return PTR_ERR(ssk);
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if (entry->addr.family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
+ if (ssk->sk_family == AF_INET)
+ err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ else if (ssk->sk_family == AF_INET6)
+ err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen);
+#endif
if (err)
return err;
inet_sk_state_store(newsk, TCP_LISTEN);
- err = kernel_listen(ssock, backlog);
- if (err)
- return err;
-
- mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
-
- return 0;
+ lock_sock(ssk);
+ err = __inet_listen_sk(ssk, backlog);
+ if (!err)
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
+ release_sock(ssk);
+ return err;
}
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3613489eb6e3..933b257eee02 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -67,11 +67,11 @@ static bool mptcp_is_tcpsk(struct sock *sk)
* Hand the socket over to tcp so all further socket ops
* bypass mptcp.
*/
- sock->ops = &inet_stream_ops;
+ WRITE_ONCE(sock->ops, &inet_stream_ops);
return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
} else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
- sock->ops = &inet6_stream_ops;
+ WRITE_ONCE(sock->ops, &inet6_stream_ops);
return true;
#endif
}
@@ -90,8 +90,8 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
if (err)
return err;
+ msk->scaling_ratio = tcp_sk(ssock->sk)->scaling_ratio;
WRITE_ONCE(msk->first, ssock->sk);
- WRITE_ONCE(msk->subflow, ssock);
subflow = mptcp_subflow_ctx(ssock->sk);
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
@@ -101,6 +101,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
/* This is the first subflow, always with id 0 */
subflow->local_id_valid = 1;
mptcp_sock_graft(msk->first, sk->sk_socket);
+ iput(SOCK_INODE(ssock));
return 0;
}
@@ -108,7 +109,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
/* If the MPC handshake is not started, returns the first subflow,
* eventually allocating it.
*/
-struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
+struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
int ret;
@@ -116,10 +117,7 @@ struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
return ERR_PTR(-EINVAL);
- if (!msk->subflow) {
- if (msk->first)
- return ERR_PTR(-EINVAL);
-
+ if (!msk->first) {
ret = __mptcp_socket_create(msk);
if (ret)
return ERR_PTR(ret);
@@ -127,7 +125,7 @@ struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
mptcp_sockopt_sync(msk, msk->first);
}
- return msk->subflow;
+ return msk->first;
}
static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
@@ -1368,7 +1366,7 @@ bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
* returns the subflow that will transmit the next DSS
* additionally updates the rtx timeout
*/
-static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
+struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
{
struct subflow_send_info send_info[SSK_MODE_MAX];
struct mptcp_subflow_context *subflow;
@@ -1379,23 +1377,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
u64 linger_time;
long tout = 0;
- msk_owned_by_me(msk);
-
- if (__mptcp_check_fallback(msk)) {
- if (!msk->first)
- return NULL;
- return __tcp_can_send(msk->first) &&
- sk_stream_memory_free(msk->first) ? msk->first : NULL;
- }
-
- /* re-use last subflow, if the burst allow that */
- if (msk->last_snd && msk->snd_burst > 0 &&
- sk_stream_memory_free(msk->last_snd) &&
- mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
- mptcp_set_timeout(sk);
- return msk->last_snd;
- }
-
/* pick the subflow with the lower wmem/wspace ratio */
for (i = 0; i < SSK_MODE_MAX; ++i) {
send_info[i].ssk = NULL;
@@ -1448,16 +1429,13 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
wmem = READ_ONCE(ssk->sk_wmem_queued);
- if (!burst) {
- msk->last_snd = NULL;
+ if (!burst)
return ssk;
- }
subflow = mptcp_subflow_ctx(ssk);
subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
READ_ONCE(ssk->sk_pacing_rate) * burst,
burst + wmem);
- msk->last_snd = ssk;
msk->snd_burst = burst;
return ssk;
}
@@ -1501,64 +1479,106 @@ void mptcp_check_and_set_pending(struct sock *sk)
mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
}
-void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
+ struct mptcp_sendmsg_info *info)
{
- struct sock *prev_ssk = NULL, *ssk = NULL;
struct mptcp_sock *msk = mptcp_sk(sk);
- struct mptcp_sendmsg_info info = {
- .flags = flags,
- };
- bool do_check_data_fin = false;
struct mptcp_data_frag *dfrag;
- int len;
+ int len, copied = 0, err = 0;
while ((dfrag = mptcp_send_head(sk))) {
- info.sent = dfrag->already_sent;
- info.limit = dfrag->data_len;
+ info->sent = dfrag->already_sent;
+ info->limit = dfrag->data_len;
len = dfrag->data_len - dfrag->already_sent;
while (len > 0) {
int ret = 0;
- prev_ssk = ssk;
- ssk = mptcp_subflow_get_send(msk);
-
- /* First check. If the ssk has changed since
- * the last round, release prev_ssk
- */
- if (ssk != prev_ssk && prev_ssk)
- mptcp_push_release(prev_ssk, &info);
- if (!ssk)
- goto out;
-
- /* Need to lock the new subflow only if different
- * from the previous one, otherwise we are still
- * helding the relevant lock
- */
- if (ssk != prev_ssk)
- lock_sock(ssk);
-
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+ ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info);
if (ret <= 0) {
- if (ret == -EAGAIN)
- continue;
- mptcp_push_release(ssk, &info);
+ err = copied ? : ret;
goto out;
}
- do_check_data_fin = true;
- info.sent += ret;
+ info->sent += ret;
+ copied += ret;
len -= ret;
mptcp_update_post_push(msk, dfrag, ret);
}
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+
+ if (msk->snd_burst <= 0 ||
+ !sk_stream_memory_free(ssk) ||
+ !mptcp_subflow_active(mptcp_subflow_ctx(ssk))) {
+ err = copied;
+ goto out;
+ }
+ mptcp_set_timeout(sk);
+ }
+ err = copied;
+
+out:
+ return err;
+}
+
+void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+{
+ struct sock *prev_ssk = NULL, *ssk = NULL;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_sendmsg_info info = {
+ .flags = flags,
+ };
+ bool do_check_data_fin = false;
+ int push_count = 1;
+
+ while (mptcp_send_head(sk) && (push_count > 0)) {
+ struct mptcp_subflow_context *subflow;
+ int ret = 0;
+
+ if (mptcp_sched_get_send(msk))
+ break;
+
+ push_count = 0;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled)) {
+ mptcp_subflow_set_scheduled(subflow, false);
+
+ prev_ssk = ssk;
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ if (ssk != prev_ssk) {
+ /* First check. If the ssk has changed since
+ * the last round, release prev_ssk
+ */
+ if (prev_ssk)
+ mptcp_push_release(prev_ssk, &info);
+
+ /* Need to lock the new subflow only if different
+ * from the previous one, otherwise we are still
+ * helding the relevant lock
+ */
+ lock_sock(ssk);
+ }
+
+ push_count++;
+
+ ret = __subflow_push_pending(sk, ssk, &info);
+ if (ret <= 0) {
+ if (ret != -EAGAIN ||
+ (1 << ssk->sk_state) &
+ (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSE))
+ push_count--;
+ continue;
+ }
+ do_check_data_fin = true;
+ }
+ }
}
/* at this point we held the socket lock for the last subflow we used */
if (ssk)
mptcp_push_release(ssk, &info);
-out:
/* ensure the rtx timer is running */
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
@@ -1572,42 +1592,49 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
struct mptcp_sendmsg_info info = {
.data_lock_held = true,
};
- struct mptcp_data_frag *dfrag;
+ bool keep_pushing = true;
struct sock *xmit_ssk;
- int len, copied = 0;
+ int copied = 0;
info.flags = 0;
- while ((dfrag = mptcp_send_head(sk))) {
- info.sent = dfrag->already_sent;
- info.limit = dfrag->data_len;
- len = dfrag->data_len - dfrag->already_sent;
- while (len > 0) {
- int ret = 0;
+ while (mptcp_send_head(sk) && keep_pushing) {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ int ret = 0;
- /* check for a different subflow usage only after
- * spooling the first chunk of data
- */
- xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
- if (!xmit_ssk)
- goto out;
- if (xmit_ssk != ssk) {
- mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
- MPTCP_DELEGATE_SEND);
- goto out;
- }
-
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+ /* check for a different subflow usage only after
+ * spooling the first chunk of data
+ */
+ if (first) {
+ mptcp_subflow_set_scheduled(subflow, false);
+ ret = __subflow_push_pending(sk, ssk, &info);
+ first = false;
if (ret <= 0)
- goto out;
+ break;
+ copied += ret;
+ continue;
+ }
+
+ if (mptcp_sched_get_send(msk))
+ goto out;
- info.sent += ret;
+ if (READ_ONCE(subflow->scheduled)) {
+ mptcp_subflow_set_scheduled(subflow, false);
+ ret = __subflow_push_pending(sk, ssk, &info);
+ if (ret <= 0)
+ keep_pushing = false;
copied += ret;
- len -= ret;
- first = false;
+ }
- mptcp_update_post_push(msk, dfrag, ret);
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled)) {
+ xmit_ssk = mptcp_subflow_tcp_sock(subflow);
+ if (xmit_ssk != ssk) {
+ mptcp_subflow_delegate(subflow,
+ MPTCP_DELEGATE_SEND);
+ keep_pushing = false;
+ }
+ }
}
- WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
}
out:
@@ -1642,7 +1669,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
{
unsigned int saved_flags = msg->msg_flags;
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
struct sock *ssk;
int ret;
@@ -1653,9 +1679,9 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
* fastopen attempt, no need to check for additional subflow status.
*/
if (msg->msg_flags & MSG_FASTOPEN) {
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk))
+ return PTR_ERR(ssk);
}
if (!msk->first)
return -EINVAL;
@@ -1689,7 +1715,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
if (!mptcp_disconnect(sk, 0))
sk->sk_socket->state = SS_UNCONNECTED;
}
- inet_sk(sk)->defer_connect = 0;
+ inet_clear_bit(DEFER_CONNECT, sk);
return ret;
}
@@ -1707,7 +1733,8 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
- if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)) {
+ if (unlikely(inet_test_bit(DEFER_CONNECT, sk) ||
+ msg->msg_flags & MSG_FASTOPEN)) {
int copied_syn = 0;
ret = mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn);
@@ -1881,6 +1908,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
+ u8 scaling_ratio = U8_MAX;
u32 time, advmss = 1;
u64 rtt_us, mstamp;
@@ -1911,9 +1939,11 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
rtt_us = max(sf_rtt_us, rtt_us);
advmss = max(sf_advmss, advmss);
+ scaling_ratio = min(tp->scaling_ratio, scaling_ratio);
}
msk->rcvq_space.rtt_us = rtt_us;
+ msk->scaling_ratio = scaling_ratio;
if (time < (rtt_us >> 3) || rtt_us == 0)
return;
@@ -1922,8 +1952,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvmem, rcvbuf;
u64 rcvwin, grow;
+ int rcvbuf;
rcvwin = ((u64)msk->rcvq_space.copied << 1) + 16 * advmss;
@@ -1932,18 +1962,13 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
do_div(grow, msk->rcvq_space.space);
rcvwin += (grow << 1);
- rcvmem = SKB_TRUESIZE(advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(sk, rcvmem) < advmss)
- rcvmem += 128;
-
- do_div(rcvwin, advmss);
- rcvbuf = min_t(u64, rcvwin * rcvmem,
+ rcvbuf = min_t(u64, __tcp_space_from_win(scaling_ratio, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
u32 window_clamp;
- window_clamp = tcp_win_from_space(sk, rcvbuf);
+ window_clamp = __tcp_win_from_space(scaling_ratio, rcvbuf);
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make subflows follow along. If we do not do this, we
@@ -2202,17 +2227,12 @@ static void mptcp_timeout_timer(struct timer_list *t)
*
* A backup subflow is returned only if that is the only kind available.
*/
-static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
+struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
{
struct sock *backup = NULL, *pick = NULL;
struct mptcp_subflow_context *subflow;
int min_stale_count = INT_MAX;
- msk_owned_by_me(msk);
-
- if (__mptcp_check_fallback(msk))
- return NULL;
-
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -2243,14 +2263,6 @@ static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
return min_stale_count > 1 ? backup : NULL;
}
-static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
-{
- if (msk->subflow) {
- iput(SOCK_INODE(msk->subflow));
- WRITE_ONCE(msk->subflow, NULL);
- }
-}
-
bool __mptcp_retransmit_pending_data(struct sock *sk)
{
struct mptcp_data_frag *cur, *rtx_head;
@@ -2329,13 +2341,13 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
goto out_release;
}
- dispose_it = !msk->subflow || ssk != msk->subflow->sk;
+ dispose_it = msk->free_first || ssk != msk->first;
if (dispose_it)
list_del(&subflow->node);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
- if (flags & MPTCP_CF_FASTCLOSE) {
+ if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
/* be sure to force the tcp_disconnect() path,
* to generate the egress reset
*/
@@ -2350,7 +2362,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
* disconnect should never fail
*/
WARN_ON_ONCE(tcp_disconnect(ssk, 0));
- msk->subflow->state = SS_UNCONNECTED;
mptcp_subflow_ctx_reset(subflow);
release_sock(ssk);
@@ -2383,9 +2394,6 @@ out_release:
WRITE_ONCE(msk->first, NULL);
out:
- if (ssk == msk->last_snd)
- msk->last_snd = NULL;
-
if (need_push)
__mptcp_push_pending(sk, 0);
}
@@ -2502,16 +2510,17 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
static void __mptcp_retrans(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_subflow_context *subflow;
struct mptcp_sendmsg_info info = {};
struct mptcp_data_frag *dfrag;
- size_t copied = 0;
struct sock *ssk;
- int ret;
+ int ret, err;
+ u16 len = 0;
mptcp_clean_una_wakeup(sk);
/* first check ssk: need to kick "stale" logic */
- ssk = mptcp_subflow_get_retrans(msk);
+ err = mptcp_sched_get_retrans(msk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag) {
if (mptcp_data_fin_enabled(msk)) {
@@ -2530,32 +2539,45 @@ static void __mptcp_retrans(struct sock *sk)
goto reset_timer;
}
- if (!ssk)
+ if (err)
goto reset_timer;
- lock_sock(ssk);
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled)) {
+ u16 copied = 0;
- /* limit retransmission to the bytes already sent on some subflows */
- info.sent = 0;
- info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
- while (info.sent < info.limit) {
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
- if (ret <= 0)
- break;
+ mptcp_subflow_set_scheduled(subflow, false);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
- copied += ret;
- info.sent += ret;
- }
- if (copied) {
- dfrag->already_sent = max(dfrag->already_sent, info.sent);
- msk->bytes_retrans += copied;
- tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
- info.size_goal);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
+ ssk = mptcp_subflow_tcp_sock(subflow);
+
+ lock_sock(ssk);
+
+ /* limit retransmission to the bytes already sent on some subflows */
+ info.sent = 0;
+ info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len :
+ dfrag->already_sent;
+ while (info.sent < info.limit) {
+ ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+ if (ret <= 0)
+ break;
+
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
+ copied += ret;
+ info.sent += ret;
+ }
+ if (copied) {
+ len = max(copied, len);
+ tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
+ info.size_goal);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
+ }
+
+ release_sock(ssk);
+ }
}
- release_sock(ssk);
+ msk->bytes_retrans += len;
+ dfrag->already_sent = max(dfrag->already_sent, len);
reset_timer:
mptcp_check_and_set_pending(sk);
@@ -2663,7 +2685,7 @@ unlock:
sock_put(sk);
}
-static int __mptcp_init_sock(struct sock *sk)
+static void __mptcp_init_sock(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2690,8 +2712,6 @@ static int __mptcp_init_sock(struct sock *sk)
/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
-
- return 0;
}
static void mptcp_ca_reset(struct sock *sk)
@@ -2711,9 +2731,7 @@ static int mptcp_init_sock(struct sock *sk)
struct net *net = sock_net(sk);
int ret;
- ret = __mptcp_init_sock(sk);
- if (ret)
- return ret;
+ __mptcp_init_sock(sk);
if (!mptcp_is_enabled(net))
return -ENOPROTOOPT;
@@ -2721,6 +2739,11 @@ static int mptcp_init_sock(struct sock *sk)
if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net))
return -ENOMEM;
+ ret = mptcp_init_sched(mptcp_sk(sk),
+ mptcp_sched_find(mptcp_get_scheduler(net)));
+ if (ret)
+ return ret;
+
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
@@ -2866,6 +2889,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
mptcp_stop_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
+ mptcp_release_sched(msk);
sk->sk_prot->destroy(sk);
@@ -3055,7 +3079,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
* subflow
*/
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
- msk->last_snd = NULL;
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
msk->push_pending = 0;
@@ -3111,7 +3134,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
- WRITE_ONCE(msk->subflow, NULL);
msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
@@ -3122,6 +3144,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->snd_una = msk->write_seq;
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
+ mptcp_init_sched(msk, mptcp_sk(sk)->sched);
/* passive msk is created after the first/MPC subflow */
msk->subflow_id = 2;
@@ -3175,25 +3198,17 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
}
-static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
+static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
bool kern)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *listener;
struct sock *newsk;
- listener = READ_ONCE(msk->subflow);
- if (WARN_ON_ONCE(!listener)) {
- *err = -EINVAL;
- return NULL;
- }
-
- pr_debug("msk=%p, listener=%p", msk, mptcp_subflow_ctx(listener->sk));
- newsk = inet_csk_accept(listener->sk, flags, err, kern);
+ pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
+ newsk = inet_csk_accept(ssk, flags, err, kern);
if (!newsk)
return NULL;
- pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk));
+ pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
struct sock *new_mptcp_sock;
@@ -3210,9 +3225,9 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
}
newsk = new_mptcp_sock;
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
} else {
- MPTCP_INC_STATS(sock_net(sk),
+ MPTCP_INC_STATS(sock_net(ssk),
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
}
@@ -3253,10 +3268,8 @@ static void mptcp_destroy(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- /* clears msk->subflow, allowing the following to close
- * even the initial subflow
- */
- mptcp_dispose_initial_subflow(msk);
+ /* allow the following to close even the initial subflow */
+ msk->free_first = 1;
mptcp_destroy_common(msk, 0);
sk_sockets_allocated_dec(sk);
}
@@ -3328,7 +3341,7 @@ static void mptcp_release_cb(struct sock *sk)
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
- if (unlikely(&msk->cb_flags)) {
+ if (unlikely(msk->cb_flags)) {
/* be sure to set the current sk state before tacking actions
* depending on sk_state, that is processing MPTCP_ERROR_REPORT
*/
@@ -3336,8 +3349,6 @@ static void mptcp_release_cb(struct sock *sk)
__mptcp_set_connected(sk);
if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
__mptcp_error_report(sk);
- if (__test_and_clear_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags))
- msk->last_snd = NULL;
}
__mptcp_update_rmem(sk);
@@ -3406,14 +3417,12 @@ static void mptcp_unhash(struct sock *sk)
static int mptcp_get_port(struct sock *sk, unsigned short snum)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
- ssock = msk->subflow;
- pr_debug("msk=%p, subflow=%p", msk, ssock);
- if (WARN_ON_ONCE(!ssock))
+ pr_debug("msk=%p, ssk=%p", msk, msk->first);
+ if (WARN_ON_ONCE(!msk->first))
return -EINVAL;
- return inet_csk_get_port(ssock->sk, snum);
+ return inet_csk_get_port(msk->first, snum);
}
void mptcp_finish_connect(struct sock *ssk)
@@ -3588,25 +3597,24 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
int err = -EINVAL;
+ struct sock *ssk;
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk))
+ return PTR_ERR(ssk);
- mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_SYN_SENT);
- subflow = mptcp_subflow_ctx(ssock->sk);
+ subflow = mptcp_subflow_ctx(ssk);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
* TCP option space.
*/
- if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
+ if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
mptcp_subflow_early_fallback(msk, subflow);
#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
- MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
+ if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
mptcp_subflow_early_fallback(msk, subflow);
}
if (likely(!__mptcp_check_fallback(msk)))
@@ -3615,25 +3623,42 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* if reaching here via the fastopen/sendmsg path, the caller already
* acquired the subflow socket lock, too.
*/
- if (msk->fastopening)
- err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
- else
- err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
- inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
+ if (!msk->fastopening)
+ lock_sock(ssk);
+
+ /* the following mirrors closely a very small chunk of code from
+ * __inet_stream_connect()
+ */
+ if (ssk->sk_state != TCP_CLOSE)
+ goto out;
+
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
+ err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
+ if (err)
+ goto out;
+ }
+
+ err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
+ if (err < 0)
+ goto out;
+
+ inet_assign_bit(DEFER_CONNECT, sk, inet_test_bit(DEFER_CONNECT, ssk));
+
+out:
+ if (!msk->fastopening)
+ release_sock(ssk);
/* on successful connect, the msk state will be moved to established by
* subflow_finish_connect()
*/
- if (unlikely(err && err != -EINPROGRESS)) {
- inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ if (unlikely(err)) {
+ /* avoid leaving a dangling token in an unconnected socket */
+ mptcp_token_destroy(msk);
+ inet_sk_state_store(sk, TCP_CLOSE);
return err;
}
- mptcp_copy_inaddrs(sk, ssock->sk);
-
- /* silence EINPROGRESS and let the caller inet_stream_connect
- * handle the connection in progress
- */
+ mptcp_copy_inaddrs(sk, ssk);
return 0;
}
@@ -3674,22 +3699,27 @@ static struct proto mptcp_prot = {
static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
- struct socket *ssock;
- int err;
+ struct sock *ssk, *sk = sock->sk;
+ int err = -EINVAL;
- lock_sock(sock->sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ lock_sock(sk);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ err = PTR_ERR(ssk);
goto unlock;
}
- err = ssock->ops->bind(ssock, uaddr, addr_len);
+ if (sk->sk_family == AF_INET)
+ err = inet_bind_sk(ssk, uaddr, addr_len);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ else if (sk->sk_family == AF_INET6)
+ err = inet6_bind_sk(ssk, uaddr, addr_len);
+#endif
if (!err)
- mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ mptcp_copy_inaddrs(sk, ssk);
unlock:
- release_sock(sock->sk);
+ release_sock(sk);
return err;
}
@@ -3697,7 +3727,7 @@ static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct sock *sk = sock->sk;
- struct socket *ssock;
+ struct sock *ssk;
int err;
pr_debug("msk=%p", msk);
@@ -3708,25 +3738,26 @@ static int mptcp_listen(struct socket *sock, int backlog)
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto unlock;
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ err = PTR_ERR(ssk);
goto unlock;
}
- mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_LISTEN);
sock_set_flag(sk, SOCK_RCU_FREE);
- err = ssock->ops->listen(ssock, backlog);
- inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ lock_sock(ssk);
+ err = __inet_listen_sk(ssk, backlog);
+ release_sock(ssk);
+ inet_sk_state_store(sk, inet_sk_state_load(ssk));
+
if (!err) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- mptcp_copy_inaddrs(sk, ssock->sk);
+ mptcp_copy_inaddrs(sk, ssk);
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
}
- mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
-
unlock:
release_sock(sk);
return err;
@@ -3736,8 +3767,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
int flags, bool kern)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
- struct socket *ssock;
- struct sock *newsk;
+ struct sock *ssk, *newsk;
int err;
pr_debug("msk=%p", msk);
@@ -3745,11 +3775,11 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* Buggy applications can call accept on socket states other then LISTEN
* but no need to allocate the first subflow just to error out.
*/
- ssock = READ_ONCE(msk->subflow);
- if (!ssock)
+ ssk = READ_ONCE(msk->first);
+ if (!ssk)
return -EINVAL;
- newsk = mptcp_accept(sock->sk, flags, &err, kern);
+ newsk = mptcp_accept(ssk, flags, &err, kern);
if (!newsk)
return err;
@@ -3776,11 +3806,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* Do late cleanup for the first subflow as necessary. Also
* deal with bad peers not doing a complete shutdown.
*/
- if (msk->first &&
- unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
+ if (unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
__mptcp_close_ssk(newsk, msk->first,
mptcp_subflow_ctx(msk->first), 0);
- if (unlikely(list_empty(&msk->conn_list)))
+ if (unlikely(list_is_singular(&msk->conn_list)))
inet_sk_state_store(newsk, TCP_CLOSE);
}
}
@@ -3819,12 +3848,12 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
if (state == TCP_LISTEN) {
- struct socket *ssock = READ_ONCE(msk->subflow);
+ struct sock *ssk = READ_ONCE(msk->first);
- if (WARN_ON_ONCE(!ssock || !ssock->sk))
+ if (WARN_ON_ONCE(!ssk))
return 0;
- return inet_csk_listen_poll(ssock->sk);
+ return inet_csk_listen_poll(ssk);
}
shutdown = READ_ONCE(sk->sk_shutdown);
@@ -3839,7 +3868,8 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
mask |= EPOLLOUT | EPOLLWRNORM;
else
mask |= mptcp_check_writeable(msk);
- } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
+ } else if (state == TCP_SYN_SENT &&
+ inet_test_bit(DEFER_CONNECT, sk)) {
/* cf tcp_poll() note about TFO */
mask |= EPOLLOUT | EPOLLWRNORM;
}
@@ -3935,6 +3965,7 @@ void __init mptcp_proto_init(void)
mptcp_subflow_init();
mptcp_pm_init();
+ mptcp_sched_init();
mptcp_token_init();
if (proto_register(&mptcp_prot, 1) != 0)
@@ -3988,6 +4019,7 @@ int __init mptcp_proto_v6_init(void)
strcpy(mptcp_v6_prot.name, "MPTCPv6");
mptcp_v6_prot.slab = NULL;
mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
+ mptcp_v6_prot.ipv6_pinfo_offset = offsetof(struct mptcp6_sock, np);
err = proto_register(&mptcp_v6_prot, 1);
if (err)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 37fbe22e2433..7254b3562575 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -123,7 +123,6 @@
#define MPTCP_RETRANSMIT 4
#define MPTCP_FLUSH_JOIN_LIST 5
#define MPTCP_CONNECTED 6
-#define MPTCP_RESET_SCHEDULER 7
struct mptcp_skb_cb {
u64 map_seq;
@@ -269,7 +268,6 @@ struct mptcp_sock {
u64 rcv_data_fin_seq;
u64 bytes_retrans;
int rmem_fwd_alloc;
- struct sock *last_snd;
int snd_burst;
int old_wspace;
u64 recovery_snd_nxt; /* in recovery mode accept up to this seq;
@@ -299,7 +297,8 @@ struct mptcp_sock {
cork:1,
nodelay:1,
fastopening:1,
- in_accept_queue:1;
+ in_accept_queue:1,
+ free_first:1;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -308,24 +307,23 @@ struct mptcp_sock {
struct list_head rtx_queue;
struct mptcp_data_frag *first_pending;
struct list_head join_list;
- struct socket *subflow; /* outgoing connect/listener/!mp_capable
- * The mptcp ops can safely dereference, using suitable
- * ONCE annotation, the subflow outside the socket
- * lock as such sock is freed after close().
- */
- struct sock *first;
+ struct sock *first; /* The mptcp ops can safely dereference, using suitable
+ * ONCE annotation, the subflow outside the socket
+ * lock as such sock is freed after close().
+ */
struct mptcp_pm_data pm;
+ struct mptcp_sched_ops *sched;
struct {
u32 space; /* bytes copied in last measurement window */
u32 copied; /* bytes copied in this measurement window */
u64 time; /* start time of measurement window */
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
+ u8 scaling_ratio;
u32 subflow_id;
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
- struct mptcp_sock *dl_next;
};
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
@@ -351,9 +349,14 @@ static inline int __mptcp_rmem(const struct sock *sk)
return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
}
+static inline int mptcp_win_from_space(const struct sock *sk, int space)
+{
+ return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space);
+}
+
static inline int __mptcp_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
+ return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
@@ -488,6 +491,7 @@ struct mptcp_subflow_context {
is_mptfo : 1, /* subflow is doing TFO */
__unused : 9;
enum mptcp_data_avail data_avail;
+ bool scheduled;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -621,6 +625,7 @@ int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net);
+const char *mptcp_get_scheduler(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
@@ -635,7 +640,7 @@ void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
-struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk);
+struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void __mptcp_unaccepted_force_close(struct sock *sk);
@@ -653,6 +658,19 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
+struct mptcp_sched_ops *mptcp_sched_find(const char *name);
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched);
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
+void mptcp_sched_init(void);
+int mptcp_init_sched(struct mptcp_sock *msk,
+ struct mptcp_sched_ops *sched);
+void mptcp_release_sched(struct mptcp_sock *msk);
+void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
+ bool scheduled);
+struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
+struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
+int mptcp_sched_get_send(struct mptcp_sock *msk);
+int mptcp_sched_get_retrans(struct mptcp_sock *msk);
static inline bool __tcp_can_send(const struct sock *ssk)
{
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
new file mode 100644
index 000000000000..4ab0693c069c
--- /dev/null
+++ b/net/mptcp/sched.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2022, SUSE.
+ */
+
+#define pr_fmt(fmt) "MPTCP: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include "protocol.h"
+
+static DEFINE_SPINLOCK(mptcp_sched_list_lock);
+static LIST_HEAD(mptcp_sched_list);
+
+static int mptcp_sched_default_get_subflow(struct mptcp_sock *msk,
+ struct mptcp_sched_data *data)
+{
+ struct sock *ssk;
+
+ ssk = data->reinject ? mptcp_subflow_get_retrans(msk) :
+ mptcp_subflow_get_send(msk);
+ if (!ssk)
+ return -EINVAL;
+
+ mptcp_subflow_set_scheduled(mptcp_subflow_ctx(ssk), true);
+ return 0;
+}
+
+static struct mptcp_sched_ops mptcp_sched_default = {
+ .get_subflow = mptcp_sched_default_get_subflow,
+ .name = "default",
+ .owner = THIS_MODULE,
+};
+
+/* Must be called with rcu read lock held */
+struct mptcp_sched_ops *mptcp_sched_find(const char *name)
+{
+ struct mptcp_sched_ops *sched, *ret = NULL;
+
+ list_for_each_entry_rcu(sched, &mptcp_sched_list, list) {
+ if (!strcmp(sched->name, name)) {
+ ret = sched;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
+{
+ if (!sched->get_subflow)
+ return -EINVAL;
+
+ spin_lock(&mptcp_sched_list_lock);
+ if (mptcp_sched_find(sched->name)) {
+ spin_unlock(&mptcp_sched_list_lock);
+ return -EEXIST;
+ }
+ list_add_tail_rcu(&sched->list, &mptcp_sched_list);
+ spin_unlock(&mptcp_sched_list_lock);
+
+ pr_debug("%s registered", sched->name);
+ return 0;
+}
+
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched)
+{
+ if (sched == &mptcp_sched_default)
+ return;
+
+ spin_lock(&mptcp_sched_list_lock);
+ list_del_rcu(&sched->list);
+ spin_unlock(&mptcp_sched_list_lock);
+}
+
+void mptcp_sched_init(void)
+{
+ mptcp_register_scheduler(&mptcp_sched_default);
+}
+
+int mptcp_init_sched(struct mptcp_sock *msk,
+ struct mptcp_sched_ops *sched)
+{
+ if (!sched)
+ sched = &mptcp_sched_default;
+
+ if (!bpf_try_module_get(sched, sched->owner))
+ return -EBUSY;
+
+ msk->sched = sched;
+ if (msk->sched->init)
+ msk->sched->init(msk);
+
+ pr_debug("sched=%s", msk->sched->name);
+
+ return 0;
+}
+
+void mptcp_release_sched(struct mptcp_sock *msk)
+{
+ struct mptcp_sched_ops *sched = msk->sched;
+
+ if (!sched)
+ return;
+
+ msk->sched = NULL;
+ if (sched->release)
+ sched->release(msk);
+
+ bpf_module_put(sched, sched->owner);
+}
+
+void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
+ bool scheduled)
+{
+ WRITE_ONCE(subflow->scheduled, scheduled);
+}
+
+int mptcp_sched_get_send(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sched_data data;
+
+ msk_owned_by_me(msk);
+
+ /* the following check is moved out of mptcp_subflow_get_send */
+ if (__mptcp_check_fallback(msk)) {
+ if (msk->first &&
+ __tcp_can_send(msk->first) &&
+ sk_stream_memory_free(msk->first)) {
+ mptcp_subflow_set_scheduled(mptcp_subflow_ctx(msk->first), true);
+ return 0;
+ }
+ return -EINVAL;
+ }
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled))
+ return 0;
+ }
+
+ data.reinject = false;
+ if (msk->sched == &mptcp_sched_default || !msk->sched)
+ return mptcp_sched_default_get_subflow(msk, &data);
+ return msk->sched->get_subflow(msk, &data);
+}
+
+int mptcp_sched_get_retrans(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sched_data data;
+
+ msk_owned_by_me(msk);
+
+ /* the following check is moved out of mptcp_subflow_get_retrans */
+ if (__mptcp_check_fallback(msk))
+ return -EINVAL;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled))
+ return 0;
+ }
+
+ data.reinject = true;
+ if (msk->sched == &mptcp_sched_default || !msk->sched)
+ return mptcp_sched_default_get_subflow(msk, &data);
+ return msk->sched->get_subflow(msk, &data);
+}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 63f7a09335c5..8260202c0066 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -103,7 +103,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
break;
case SO_MARK:
if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
- ssk->sk_mark = sk->sk_mark;
+ WRITE_ONCE(ssk->sk_mark, sk->sk_mark);
sk_dst_reset(ssk);
}
break;
@@ -292,7 +292,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
- struct socket *ssock;
+ struct sock *ssk;
int ret;
switch (optname) {
@@ -301,22 +301,22 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_BINDTODEVICE:
case SO_BINDTOIFINDEX:
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
release_sock(sk);
- return PTR_ERR(ssock);
+ return PTR_ERR(ssk);
}
- ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
+ ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen);
if (ret == 0) {
if (optname == SO_REUSEPORT)
- sk->sk_reuseport = ssock->sk->sk_reuseport;
+ sk->sk_reuseport = ssk->sk_reuseport;
else if (optname == SO_REUSEADDR)
- sk->sk_reuse = ssock->sk->sk_reuse;
+ sk->sk_reuse = ssk->sk_reuse;
else if (optname == SO_BINDTODEVICE)
- sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
+ sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
else if (optname == SO_BINDTOIFINDEX)
- sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
+ sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
}
release_sock(sk);
return ret;
@@ -390,20 +390,20 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
{
struct sock *sk = (struct sock *)msk;
int ret = -EOPNOTSUPP;
- struct socket *ssock;
+ struct sock *ssk;
switch (optname) {
case IPV6_V6ONLY:
case IPV6_TRANSPARENT:
case IPV6_FREEBIND:
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
release_sock(sk);
- return PTR_ERR(ssock);
+ return PTR_ERR(ssk);
}
- ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
+ ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen);
if (ret != 0) {
release_sock(sk);
return ret;
@@ -413,13 +413,15 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
switch (optname) {
case IPV6_V6ONLY:
- sk->sk_ipv6only = ssock->sk->sk_ipv6only;
+ sk->sk_ipv6only = ssk->sk_ipv6only;
break;
case IPV6_TRANSPARENT:
- inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent;
+ inet_assign_bit(TRANSPARENT, sk,
+ inet_test_bit(TRANSPARENT, ssk));
break;
case IPV6_FREEBIND:
- inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind;
+ inet_assign_bit(FREEBIND, sk,
+ inet_test_bit(FREEBIND, ssk));
break;
}
@@ -684,8 +686,7 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
- struct inet_sock *issk;
- struct socket *ssock;
+ struct sock *ssk;
int err;
err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
@@ -694,20 +695,19 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
release_sock(sk);
- return PTR_ERR(ssock);
+ return PTR_ERR(ssk);
}
- issk = inet_sk(ssock->sk);
-
switch (optname) {
case IP_FREEBIND:
- issk->freebind = inet_sk(sk)->freebind;
+ inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
break;
case IP_TRANSPARENT:
- issk->transparent = inet_sk(sk)->transparent;
+ inet_assign_bit(TRANSPARENT, ssk,
+ inet_test_bit(TRANSPARENT, sk));
break;
default:
release_sock(sk);
@@ -763,18 +763,18 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
- struct socket *sock;
+ struct sock *ssk;
int ret;
/* Limit to first subflow, before the connection establishment */
lock_sock(sk);
- sock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(sock)) {
- ret = PTR_ERR(sock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ ret = PTR_ERR(ssk);
goto unlock;
}
- ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
+ ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
unlock:
release_sock(sk);
@@ -864,9 +864,8 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
char __user *optval, int __user *optlen)
{
struct sock *sk = (struct sock *)msk;
- struct socket *ssock;
- int ret;
struct sock *ssk;
+ int ret;
lock_sock(sk);
ssk = msk->first;
@@ -875,13 +874,13 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
goto out;
}
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
- ret = PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ ret = PTR_ERR(ssk);
goto out;
}
- ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen);
+ ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
out:
release_sock(sk);
@@ -1441,8 +1440,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
__tcp_sock_set_cork(ssk, !!msk->cork);
__tcp_sock_set_nodelay(ssk, !!msk->nodelay);
- inet_sk(ssk)->transparent = inet_sk(sk)->transparent;
- inet_sk(ssk)->freebind = inet_sk(sk)->freebind;
+ inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
+ inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
}
static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9ee3b7abbaf6..9bf3c7bc1762 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1359,7 +1359,7 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
const struct sock *sk = subflow->conn;
*space = __mptcp_space(sk);
- *full_space = tcp_full_space(sk);
+ *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
void __mptcp_error_report(struct sock *sk)
@@ -1793,16 +1793,31 @@ static void subflow_state_change(struct sock *sk)
void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
{
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
- struct mptcp_sock *msk, *next, *head = NULL;
- struct request_sock *req;
- struct sock *sk;
+ struct request_sock *req, *head, *tail;
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk, *ssk;
- /* build a list of all unaccepted mptcp sockets */
+ /* Due to lock dependencies no relevant lock can be acquired under rskq_lock.
+ * Splice the req list, so that accept() can not reach the pending ssk after
+ * the listener socket is released below.
+ */
spin_lock_bh(&queue->rskq_lock);
- for (req = queue->rskq_accept_head; req; req = req->dl_next) {
- struct mptcp_subflow_context *subflow;
- struct sock *ssk = req->sk;
+ head = queue->rskq_accept_head;
+ tail = queue->rskq_accept_tail;
+ queue->rskq_accept_head = NULL;
+ queue->rskq_accept_tail = NULL;
+ spin_unlock_bh(&queue->rskq_lock);
+
+ if (!head)
+ return;
+ /* can't acquire the msk socket lock under the subflow one,
+ * or will cause ABBA deadlock
+ */
+ release_sock(listener_ssk);
+
+ for (req = head; req; req = req->dl_next) {
+ ssk = req->sk;
if (!sk_is_mptcp(ssk))
continue;
@@ -1810,32 +1825,10 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
if (!subflow || !subflow->conn)
continue;
- /* skip if already in list */
sk = subflow->conn;
- msk = mptcp_sk(sk);
- if (msk->dl_next || msk == head)
- continue;
-
sock_hold(sk);
- msk->dl_next = head;
- head = msk;
- }
- spin_unlock_bh(&queue->rskq_lock);
- if (!head)
- return;
-
- /* can't acquire the msk socket lock under the subflow one,
- * or will cause ABBA deadlock
- */
- release_sock(listener_ssk);
-
- for (msk = head; msk; msk = next) {
- sk = (struct sock *)msk;
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
- next = msk->dl_next;
- msk->dl_next = NULL;
-
__mptcp_unaccepted_force_close(sk);
release_sock(sk);
@@ -1859,6 +1852,13 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
/* we are still under the listener msk socket lock */
lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+
+ /* restore the listener queue, to let the TCP code clean it up */
+ spin_lock_bh(&queue->rskq_lock);
+ WARN_ON_ONCE(queue->rskq_accept_head);
+ queue->rskq_accept_head = head;
+ queue->rskq_accept_tail = tail;
+ spin_unlock_bh(&queue->rskq_lock);
}
static int subflow_ulp_init(struct sock *sk)
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index d27f4eccce6d..a3a6753a1db7 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -563,7 +563,7 @@ int ncsi_send_netlink_timeout(struct ncsi_request *nr,
int ncsi_send_netlink_err(struct net_device *dev,
u32 snd_seq,
u32 snd_portid,
- struct nlmsghdr *nlhdr,
+ const struct nlmsghdr *nlhdr,
int err)
{
struct nlmsghdr *nlh;
diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h
index 39a1a9d7bf77..747767ea0aae 100644
--- a/net/ncsi/ncsi-netlink.h
+++ b/net/ncsi/ncsi-netlink.h
@@ -19,7 +19,7 @@ int ncsi_send_netlink_timeout(struct ncsi_request *nr,
int ncsi_send_netlink_err(struct net_device *dev,
u32 snd_seq,
u32 snd_portid,
- struct nlmsghdr *nlhdr,
+ const struct nlmsghdr *nlhdr,
int err);
#endif /* __NCSI_NETLINK_H__ */
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 5f76ae86a656..ef4e76e5aef9 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -680,6 +680,12 @@ EXPORT_SYMBOL_GPL(nfnl_ct_hook);
const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_hook);
+const struct nf_defrag_hook __rcu *nf_defrag_v4_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_defrag_v4_hook);
+
+const struct nf_defrag_hook __rcu *nf_defrag_v6_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_defrag_v6_hook);
+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
u8 nf_ctnetlink_has_listener;
EXPORT_SYMBOL_GPL(nf_ctnetlink_has_listener);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 0b68e2e2824e..e564b5174261 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -872,7 +872,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
BUG_ON(!set);
read_lock_bh(&ip_set_ref_lock);
- strncpy(name, set->name, IPSET_MAXNAMELEN);
+ strscpy_pad(name, set->name, IPSET_MAXNAMELEN);
read_unlock_bh(&ip_set_ref_lock);
}
EXPORT_SYMBOL_GPL(ip_set_name_byindex);
@@ -1326,7 +1326,7 @@ static int ip_set_rename(struct sk_buff *skb, const struct nfnl_info *info,
goto out;
}
}
- strncpy(set->name, name2, IPSET_MAXNAMELEN);
+ strscpy_pad(set->name, name2, IPSET_MAXNAMELEN);
out:
write_unlock_bh(&ip_set_ref_lock);
@@ -1380,9 +1380,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
return -EBUSY;
}
- strncpy(from_name, from->name, IPSET_MAXNAMELEN);
- strncpy(from->name, to->name, IPSET_MAXNAMELEN);
- strncpy(to->name, from_name, IPSET_MAXNAMELEN);
+ strscpy_pad(from_name, from->name, IPSET_MAXNAMELEN);
+ strscpy_pad(from->name, to->name, IPSET_MAXNAMELEN);
+ strscpy_pad(to->name, from_name, IPSET_MAXNAMELEN);
swap(from->ref, to->ref);
ip_set(inst, from_id) = to;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index cb83ca506c5c..3230506ae3ff 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1346,7 +1346,7 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
+ if (sk->sk_family == PF_INET && inet_test_bit(NODEFRAG, sk))
return NF_ACCEPT;
}
@@ -1946,7 +1946,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
+ if (sk->sk_family == PF_INET && inet_test_bit(NODEFRAG, sk))
return NF_ACCEPT;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 8d69e4c2d822..143a341bbc0a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1876,6 +1876,7 @@ static int
proc_do_sync_threshold(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
+ struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
int val[2];
int rc;
@@ -1885,6 +1886,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
.mode = table->mode,
};
+ mutex_lock(&ipvs->sync_mutex);
memcpy(val, valp, sizeof(val));
rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
if (write) {
@@ -1894,6 +1896,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
else
memcpy(valp, val, sizeof(val));
}
+ mutex_unlock(&ipvs->sync_mutex);
return rc;
}
@@ -4324,6 +4327,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
tbl[idx].data = &ipvs->sysctl_sync_threshold;
+ tbl[idx].extra2 = ipvs;
tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 264f2f87a437..da5af28ff57b 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1297,11 +1297,9 @@ static void set_sock_size(struct sock *sk, int mode, int val)
*/
static void set_mcast_loop(struct sock *sk, u_char loop)
{
- struct inet_sock *inet = inet_sk(sk);
-
/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
lock_sock(sk);
- inet->mc_loop = loop ? 1 : 0;
+ inet_assign_bit(MC_LOOP, sk, loop);
#ifdef CONFIG_IP_VS_IPV6
if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index c36da56d756f..e502ec00b2fe 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_bpf_link.h>
@@ -23,8 +25,90 @@ struct bpf_nf_link {
struct nf_hook_ops hook_ops;
struct net *net;
u32 dead;
+ const struct nf_defrag_hook *defrag_hook;
};
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+static const struct nf_defrag_hook *
+get_proto_defrag_hook(struct bpf_nf_link *link,
+ const struct nf_defrag_hook __rcu *global_hook,
+ const char *mod)
+{
+ const struct nf_defrag_hook *hook;
+ int err;
+
+ /* RCU protects us from races against module unloading */
+ rcu_read_lock();
+ hook = rcu_dereference(global_hook);
+ if (!hook) {
+ rcu_read_unlock();
+ err = request_module(mod);
+ if (err)
+ return ERR_PTR(err < 0 ? err : -EINVAL);
+
+ rcu_read_lock();
+ hook = rcu_dereference(global_hook);
+ }
+
+ if (hook && try_module_get(hook->owner)) {
+ /* Once we have a refcnt on the module, we no longer need RCU */
+ hook = rcu_pointer_handoff(hook);
+ } else {
+ WARN_ONCE(!hook, "%s has bad registration", mod);
+ hook = ERR_PTR(-ENOENT);
+ }
+ rcu_read_unlock();
+
+ if (!IS_ERR(hook)) {
+ err = hook->enable(link->net);
+ if (err) {
+ module_put(hook->owner);
+ hook = ERR_PTR(err);
+ }
+ }
+
+ return hook;
+}
+#endif
+
+static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
+{
+ const struct nf_defrag_hook __maybe_unused *hook;
+
+ switch (link->hook_ops.pf) {
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ case NFPROTO_IPV4:
+ hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4");
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ link->defrag_hook = hook;
+ return 0;
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ case NFPROTO_IPV6:
+ hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6");
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ link->defrag_hook = hook;
+ return 0;
+#endif
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
+{
+ const struct nf_defrag_hook *hook = link->defrag_hook;
+
+ if (!hook)
+ return;
+ hook->disable(link->net);
+ module_put(hook->owner);
+}
+
static void bpf_nf_link_release(struct bpf_link *link)
{
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
@@ -32,11 +116,11 @@ static void bpf_nf_link_release(struct bpf_link *link)
if (nf_link->dead)
return;
- /* prevent hook-not-found warning splat from netfilter core when
- * .detach was already called
- */
- if (!cmpxchg(&nf_link->dead, 0, 1))
+ /* do not double release in case .detach was already called */
+ if (!cmpxchg(&nf_link->dead, 0, 1)) {
nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
+ bpf_nf_disable_defrag(nf_link);
+ }
}
static void bpf_nf_link_dealloc(struct bpf_link *link)
@@ -92,6 +176,8 @@ static const struct bpf_link_ops bpf_nf_link_lops = {
static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
{
+ int prio;
+
switch (attr->link_create.netfilter.pf) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
@@ -102,19 +188,18 @@ static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
return -EAFNOSUPPORT;
}
- if (attr->link_create.netfilter.flags)
+ if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG)
return -EOPNOTSUPP;
- /* make sure conntrack confirm is always last.
- *
- * In the future, if userspace can e.g. request defrag, then
- * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
- * should fail.
- */
- switch (attr->link_create.netfilter.priority) {
- case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
- case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
- }
+ /* make sure conntrack confirm is always last */
+ prio = attr->link_create.netfilter.priority;
+ if (prio == NF_IP_PRI_FIRST)
+ return -ERANGE; /* sabotage_in and other warts */
+ else if (prio == NF_IP_PRI_LAST)
+ return -ERANGE; /* e.g. conntrack confirm */
+ else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
+ prio <= NF_IP_PRI_CONNTRACK_DEFRAG)
+ return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */
return 0;
}
@@ -149,6 +234,7 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
link->net = net;
link->dead = false;
+ link->defrag_hook = NULL;
err = bpf_link_prime(&link->link, &link_primer);
if (err) {
@@ -156,8 +242,17 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
return err;
}
+ if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) {
+ err = bpf_nf_enable_defrag(link);
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ return err;
+ }
+ }
+
err = nf_register_net_hook(net, &link->hook_ops);
if (err) {
+ bpf_nf_disable_defrag(link);
bpf_link_cleanup(&link_primer);
return err;
}
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 0d36d7285e3f..c7a6114091ae 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/btf_ids.h>
#include <linux/net_namespace.h>
+#include <net/xdp.h>
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_core.h>
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d119f1d4c2fc..9f6f2e643575 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -211,24 +211,18 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
unsigned int zoneid,
const struct net *net)
{
- u64 a, b, c, d;
+ siphash_key_t key;
get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
- /* The direction must be ignored, handle usable tuplehash members manually */
- a = (u64)tuple->src.u3.all[0] << 32 | tuple->src.u3.all[3];
- b = (u64)tuple->dst.u3.all[0] << 32 | tuple->dst.u3.all[3];
+ key = nf_conntrack_hash_rnd;
- c = (__force u64)tuple->src.u.all << 32 | (__force u64)tuple->dst.u.all << 16;
- c |= tuple->dst.protonum;
+ key.key[0] ^= zoneid;
+ key.key[1] ^= net_hash_mix(net);
- d = (u64)zoneid << 32 | net_hash_mix(net);
-
- /* IPv4: u3.all[1,2,3] == 0 */
- c ^= (u64)tuple->src.u3.all[1] << 32 | tuple->src.u3.all[2];
- d += (u64)tuple->dst.u3.all[1] << 32 | tuple->dst.u3.all[2];
-
- return (u32)siphash_4u64(a, b, c, d, &nf_conntrack_hash_rnd);
+ return siphash((void *)tuple,
+ offsetofend(struct nf_conntrack_tuple, dst.__nfct_hash_offsetend),
+ &key);
}
static u32 scale_hash(u32 hash)
@@ -1762,7 +1756,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
cnet = nf_ct_pernet(net);
if (cnet->expect_count) {
spin_lock_bh(&nf_conntrack_expect_lock);
- exp = nf_ct_find_expectation(net, zone, tuple);
+ exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl));
if (exp) {
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &ct->status);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 96948e98ec53..81ca348915c9 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
- const struct nf_conntrack_tuple *tuple)
+ const struct nf_conntrack_tuple *tuple, bool unlink)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct nf_conntrack_expect *i, *exp = NULL;
@@ -211,7 +211,7 @@ nf_ct_find_expectation(struct net *net,
!refcount_inc_not_zero(&exp->master->ct_general.use)))
return NULL;
- if (exp->flags & NF_CT_EXPECT_PERMANENT) {
+ if (exp->flags & NF_CT_EXPECT_PERMANENT || !unlink) {
refcount_inc(&exp->use);
return exp;
} else if (del_timer(&exp->timeout)) {
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 0c4db2f2ac43..f22691f83853 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -360,6 +360,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
+ if (!nf_ct_helper_hash)
+ return -ENOENT;
+
if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
return -EINVAL;
@@ -515,4 +518,5 @@ int nf_conntrack_helper_init(void)
void nf_conntrack_helper_fini(void)
{
kvfree(nf_ct_helper_hash);
+ nf_ct_helper_hash = NULL;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 69c8c8c7e9b8..334db22199c1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1321,15 +1321,11 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
struct nlattr *tb[CTA_IP_MAX+1];
int ret = 0;
- ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr, NULL, NULL);
+ ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr,
+ cta_ip_nla_policy, NULL);
if (ret < 0)
return ret;
- ret = nla_validate_nested_deprecated(attr, CTA_IP_MAX,
- cta_ip_nla_policy, NULL);
- if (ret)
- return ret;
-
switch (tuple->src.l3num) {
case NFPROTO_IPV4:
ret = ipv4_nlattr_to_tuple(tb, tuple, flags);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index d4fd626d2b8c..e2db1f4ec2df 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -69,6 +69,7 @@
#define DCCP_MSL (2 * 60 * HZ)
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
static const char * const dccp_state_names[] = {
[CT_DCCP_NONE] = "NONE",
[CT_DCCP_REQUEST] = "REQUEST",
@@ -81,6 +82,7 @@ static const char * const dccp_state_names[] = {
[CT_DCCP_IGNORE] = "IGNORE",
[CT_DCCP_INVALID] = "INVALID",
};
+#endif
#define sNO CT_DCCP_NONE
#define sRQ CT_DCCP_REQUEST
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index ad6f0ca40cd2..af369e686fc5 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -205,6 +205,8 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
+ unsigned long status;
+
if (!nf_ct_is_confirmed(ct)) {
unsigned int *timeouts = nf_ct_timeout_lookup(ct);
@@ -217,11 +219,17 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
}
+ status = READ_ONCE(ct->status);
/* If we've seen traffic both ways, this is a GRE connection.
* Extend timeout. */
- if (ct->status & IPS_SEEN_REPLY) {
+ if (status & IPS_SEEN_REPLY) {
nf_ct_refresh_acct(ct, ctinfo, skb,
ct->proto.gre.stream_timeout);
+
+ /* never set ASSURED for IPS_NAT_CLASH, they time out soon */
+ if (unlikely((status & IPS_NAT_CLASH)))
+ return NF_ACCEPT;
+
/* Also, more likely to be important, and not a probe. */
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 91eacc9b0b98..b6bcc8f2f46b 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -49,8 +49,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
[SCTP_CONNTRACK_ESTABLISHED] = 210 SECS,
- [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
- [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
+ [SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS,
+ [SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
};
@@ -105,7 +105,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
-/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
+/* init */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW},
/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 1c26f03fc661..a010b25076ca 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -34,7 +34,7 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
{
struct nf_flow_key *mask = &match->mask;
struct nf_flow_key *key = &match->key;
- unsigned int enc_keys;
+ unsigned long long enc_keys;
if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
return;
@@ -43,8 +43,8 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
mask->enc_key_id.keyid = 0xffffffff;
- enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
- BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
+ enc_keys = BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL);
if (ip_tunnel_info_af(tun_info) == AF_INET) {
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
@@ -55,7 +55,7 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
mask->enc_ipv4.src = 0xffffffff;
if (key->enc_ipv4.dst)
mask->enc_ipv4.dst = 0xffffffff;
- enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
+ enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
} else {
memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
@@ -70,7 +70,7 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
sizeof(struct in6_addr)))
memset(&mask->enc_ipv6.dst, 0xff,
sizeof(struct in6_addr));
- enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
+ enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
@@ -163,14 +163,14 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
return -EOPNOTSUPP;
}
mask->control.addr_type = 0xffff;
- match->dissector.used_keys |= BIT(key->control.addr_type);
+ match->dissector.used_keys |= BIT_ULL(key->control.addr_type);
mask->basic.n_proto = 0xffff;
switch (tuple->l4proto) {
case IPPROTO_TCP:
key->tcp.flags = 0;
mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_TCP);
break;
case IPPROTO_UDP:
case IPPROTO_GRE:
@@ -182,9 +182,9 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
key->basic.ip_proto = tuple->l4proto;
mask->basic.ip_proto = 0xff;
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
- BIT(FLOW_DISSECTOR_KEY_CONTROL) |
- BIT(FLOW_DISSECTOR_KEY_BASIC);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_META) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_BASIC);
switch (tuple->l4proto) {
case IPPROTO_TCP:
@@ -194,7 +194,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
key->tp.dst = tuple->dst_port;
mask->tp.dst = 0xffff;
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_PORTS);
break;
}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index fadbd4ed3dc0..c4e0516a8dfa 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -327,7 +327,7 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
/* If we source map this tuple so reply looks like reply_tuple, will
* that meet the constraints of range.
*/
-static int in_range(const struct nf_conntrack_tuple *tuple,
+static int nf_in_range(const struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range)
{
/* If we are supposed to map IPs, then we must be in the
@@ -376,7 +376,7 @@ find_appropriate_src(struct net *net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
result->dst = tuple->dst;
- if (in_range(result, range))
+ if (nf_in_range(result, range))
return 1;
}
}
@@ -607,7 +607,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
if (maniptype == NF_NAT_MANIP_SRC &&
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
/* try the original tuple first */
- if (in_range(orig_tuple, range)) {
+ if (nf_in_range(orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
*tuple = *orig_tuple;
return;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9573a8fcad79..41b826dff6f5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -31,7 +31,9 @@ static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
+static LIST_HEAD(nf_tables_gc_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
+static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
enum {
NFT_VALIDATE_SKIP = 0,
@@ -120,6 +122,9 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
static void nf_tables_trans_destroy_work(struct work_struct *w);
static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
+static void nft_trans_gc_work(struct work_struct *work);
+static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
+
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
const struct sk_buff *skb,
@@ -253,8 +258,10 @@ int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
if (chain->bound)
return -EBUSY;
+ if (!nft_use_inc(&chain->use))
+ return -EMFILE;
+
chain->bound = true;
- chain->use++;
nft_chain_trans_bind(ctx, chain);
return 0;
@@ -437,7 +444,7 @@ static int nft_delchain(struct nft_ctx *ctx)
if (IS_ERR(trans))
return PTR_ERR(trans);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
nft_deactivate_next(ctx->net, ctx->chain);
return 0;
@@ -476,7 +483,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
/* You cannot delete the same rule twice */
if (nft_is_active_next(ctx->net, rule)) {
nft_deactivate_next(ctx->net, rule);
- ctx->chain->use--;
+ nft_use_dec(&ctx->chain->use);
return 0;
}
return -ENOENT;
@@ -580,10 +587,6 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
return __nft_trans_set_add(ctx, msg_type, set, NULL);
}
-static void nft_setelem_data_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem);
-
static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
@@ -644,7 +647,7 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
nft_map_deactivate(ctx, set);
nft_deactivate_next(ctx->net, set);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -676,7 +679,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
return err;
nft_deactivate_next(ctx->net, obj);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -711,7 +714,7 @@ static int nft_delflowtable(struct nft_ctx *ctx,
return err;
nft_deactivate_next(ctx->net, flowtable);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -1370,7 +1373,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
if (table == NULL)
goto err_kzalloc;
- table->validate_state = NFT_VALIDATE_SKIP;
+ table->validate_state = nft_net->validate_state;
table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
if (table->name == NULL)
goto err_strdup;
@@ -2396,9 +2399,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_chain *chain;
int err;
- if (table->use == UINT_MAX)
- return -EOVERFLOW;
-
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_stats __percpu *stats = NULL;
struct nft_chain_hook hook = {};
@@ -2494,6 +2494,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (err < 0)
goto err_destroy_chain;
+ if (!nft_use_inc(&table->use)) {
+ err = -EMFILE;
+ goto err_use;
+ }
+
trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
@@ -2510,10 +2515,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
goto err_unregister_hook;
}
- table->use++;
-
return 0;
+
err_unregister_hook:
+ nft_use_dec_restore(&table->use);
+err_use:
nf_tables_unregister_hook(net, table, chain);
err_destroy_chain:
nf_tables_chain_destroy(ctx);
@@ -2694,7 +2700,7 @@ err_hooks:
static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
const struct nft_table *table,
- const struct nlattr *nla)
+ const struct nlattr *nla, u8 genmask)
{
struct nftables_pernet *nft_net = nft_pernet(net);
u32 id = ntohl(nla_get_be32(nla));
@@ -2705,7 +2711,8 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
if (trans->msg_type == NFT_MSG_NEWCHAIN &&
chain->table == table &&
- id == nft_trans_chain_id(trans))
+ id == nft_trans_chain_id(trans) &&
+ nft_active_genmask(chain, genmask))
return chain;
}
return ERR_PTR(-ENOENT);
@@ -3668,6 +3675,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
return -EMLINK;
list_for_each_entry(rule, &chain->rules, list) {
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
if (!nft_is_active_next(ctx->net, rule))
continue;
@@ -3679,8 +3689,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (err < 0)
return err;
}
-
- cond_resched();
}
return 0;
@@ -3704,6 +3712,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
err = nft_chain_validate(&ctx, chain);
if (err < 0)
return err;
+
+ cond_resched();
}
return 0;
@@ -3805,11 +3815,10 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
}
- if (nft_chain_is_bound(chain))
- return -EOPNOTSUPP;
} else if (nla[NFTA_RULE_CHAIN_ID]) {
- chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID]);
+ chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID],
+ genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]);
return PTR_ERR(chain);
@@ -3818,6 +3827,9 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
}
+ if (nft_chain_is_bound(chain))
+ return -EOPNOTSUPP;
+
if (nla[NFTA_RULE_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
rule = __nft_rule_lookup(chain, handle);
@@ -3840,9 +3852,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
handle = nf_tables_alloc_handle(table);
- if (chain->use == UINT_MAX)
- return -EOVERFLOW;
-
if (nla[NFTA_RULE_POSITION]) {
pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
old_rule = __nft_rule_lookup(chain, pos_handle);
@@ -3936,6 +3945,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
}
+ if (!nft_use_inc(&chain->use)) {
+ err = -EMFILE;
+ goto err_release_rule;
+ }
+
if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
err = nft_delrule(&ctx, old_rule);
if (err < 0)
@@ -3967,7 +3981,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
}
kvfree(expr_info);
- chain->use++;
if (flow)
nft_trans_flow_rule(trans) = flow;
@@ -3978,6 +3991,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return 0;
err_destroy_flow_rule:
+ nft_use_dec_restore(&chain->use);
if (flow)
nft_flow_rule_destroy(flow);
err_release_rule:
@@ -4078,6 +4092,8 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
+ if (nft_chain_is_bound(chain))
+ continue;
ctx.chain = chain;
err = nft_delrule_by_chain(&ctx);
@@ -5014,9 +5030,15 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
alloc_size = sizeof(*set) + size + udlen;
if (alloc_size < size || alloc_size > INT_MAX)
return -ENOMEM;
+
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT);
- if (!set)
- return -ENOMEM;
+ if (!set) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT);
if (!name) {
@@ -5037,6 +5059,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
INIT_LIST_HEAD(&set->bindings);
INIT_LIST_HEAD(&set->catchall_list);
+ refcount_set(&set->refs, 1);
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
@@ -5074,7 +5097,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
goto err_set_expr_alloc;
list_add_tail_rcu(&set->list, &table->sets);
- table->use++;
+
return 0;
err_set_expr_alloc:
@@ -5086,6 +5109,9 @@ err_set_init:
kfree(set->name);
err_set_name:
kvfree(set);
+err_alloc:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -5101,6 +5127,14 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
}
}
+static void nft_set_put(struct nft_set *set)
+{
+ if (refcount_dec_and_test(&set->refs)) {
+ kfree(set->name);
+ kvfree(set);
+ }
+}
+
static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{
int i;
@@ -5113,8 +5147,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
set->ops->destroy(ctx, set);
nft_set_catchall_destroy(ctx, set);
- kfree(set->name);
- kvfree(set);
+ nft_set_put(set);
}
static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
@@ -5224,9 +5257,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *i;
struct nft_set_iter iter;
- if (set->use == UINT_MAX)
- return -EOVERFLOW;
-
if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
return -EBUSY;
@@ -5254,10 +5284,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
return iter.err;
}
bind:
+ if (!nft_use_inc(&set->use))
+ return -EMFILE;
+
binding->chain = ctx->chain;
list_add_tail_rcu(&binding->list, &set->bindings);
nft_set_trans_bind(ctx, set);
- set->use++;
return 0;
}
@@ -5331,7 +5363,7 @@ void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
nft_clear(ctx->net, set);
}
- set->use++;
+ nft_use_inc_restore(&set->use);
}
EXPORT_SYMBOL_GPL(nf_tables_activate_set);
@@ -5347,7 +5379,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
else
list_del_rcu(&binding->list);
- set->use--;
+ nft_use_dec(&set->use);
break;
case NFT_TRANS_PREPARE:
if (nft_set_is_anonymous(set)) {
@@ -5356,7 +5388,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
nft_deactivate_next(ctx->net, set);
}
- set->use--;
+ nft_use_dec(&set->use);
return;
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
@@ -5364,7 +5396,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
nft_map_deactivate(ctx, set);
- set->use--;
+ nft_use_dec(&set->use);
fallthrough;
default:
nf_tables_unbind_set(ctx, set, binding,
@@ -5582,8 +5614,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_set_elem *elem)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
struct nft_set_dump_args *args;
+ if (nft_set_elem_expired(ext))
+ return 0;
+
args = container_of(iter, struct nft_set_dump_args, iter);
return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
}
@@ -6155,7 +6191,7 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use--;
+ nft_use_dec(&(*nft_set_ext_obj(ext))->use);
kfree(elem);
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
@@ -6254,7 +6290,8 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (nft_set_elem_active(ext, genmask) &&
- !nft_set_elem_expired(ext))
+ !nft_set_elem_expired(ext) &&
+ !nft_set_elem_is_dead(ext))
return ext;
}
@@ -6262,29 +6299,6 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
}
EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
-void *nft_set_catchall_gc(const struct nft_set *set)
-{
- struct nft_set_elem_catchall *catchall, *next;
- struct nft_set_ext *ext;
- void *elem = NULL;
-
- list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
- ext = nft_set_elem_ext(set, catchall->elem);
-
- if (!nft_set_elem_expired(ext) ||
- nft_set_elem_mark_busy(ext))
- continue;
-
- elem = catchall->elem;
- list_del_rcu(&catchall->list);
- kfree_rcu(catchall, rcu);
- break;
- }
-
- return elem;
-}
-EXPORT_SYMBOL_GPL(nft_set_catchall_gc);
-
static int nft_setelem_catchall_insert(const struct net *net,
struct nft_set *set,
const struct nft_set_elem *elem,
@@ -6346,7 +6360,6 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
if (nft_setelem_is_catchall(set, elem)) {
nft_set_elem_change_active(net, set, ext);
- nft_set_elem_clear_busy(ext);
} else {
set->ops->activate(net, set, elem);
}
@@ -6361,8 +6374,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,
list_for_each_entry(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_is_active(net, ext) ||
- nft_set_elem_mark_busy(ext))
+ if (!nft_is_active(net, ext))
continue;
kfree(elem->priv);
@@ -6657,8 +6669,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
set->objtype, genmask);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
+ obj = NULL;
+ goto err_parse_key_end;
+ }
+
+ if (!nft_use_inc(&obj->use)) {
+ err = -EMFILE;
+ obj = NULL;
goto err_parse_key_end;
}
+
err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
if (err < 0)
goto err_parse_key_end;
@@ -6727,10 +6747,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (flags)
*nft_set_ext_flags(ext) = flags;
- if (obj) {
+ if (obj)
*nft_set_ext_obj(ext) = obj;
- obj->use++;
- }
+
if (ulen > 0) {
if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) {
err = -EINVAL;
@@ -6750,7 +6769,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_elem_free;
}
- ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
+ ext->genmask = nft_genmask_cur(ctx->net);
err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
if (err) {
@@ -6798,12 +6817,13 @@ err_element_clash:
kfree(trans);
err_elem_free:
nf_tables_set_elem_destroy(ctx, set, elem.priv);
- if (obj)
- obj->use--;
err_parse_data:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_release(&elem.data.val, desc.type);
err_parse_key_end:
+ if (obj)
+ nft_use_dec_restore(&obj->use);
+
nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
err_parse_key:
nft_data_release(&elem.key.val, NFT_DATA_VALUE);
@@ -6883,7 +6903,7 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
case NFT_JUMP:
case NFT_GOTO:
chain = data->verdict.chain;
- chain->use++;
+ nft_use_inc_restore(&chain->use);
break;
}
}
@@ -6898,19 +6918,19 @@ static void nft_setelem_data_activate(const struct net *net,
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_hold(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use++;
+ nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
}
-static void nft_setelem_data_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem)
+void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use--;
+ nft_use_dec(&(*nft_set_ext_obj(ext))->use);
}
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
@@ -7067,14 +7087,14 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask) ||
- nft_set_elem_mark_busy(ext))
+ if (!nft_set_elem_active(ext, genmask))
continue;
elem.priv = catchall->elem;
ret = __nft_set_catchall_flush(ctx, set, &elem);
if (ret < 0)
break;
+ nft_set_elem_change_active(ctx->net, set, ext);
}
return ret;
@@ -7142,29 +7162,6 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
return err;
}
-void nft_set_gc_batch_release(struct rcu_head *rcu)
-{
- struct nft_set_gc_batch *gcb;
- unsigned int i;
-
- gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
- for (i = 0; i < gcb->head.cnt; i++)
- nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
- kfree(gcb);
-}
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
- gfp_t gfp)
-{
- struct nft_set_gc_batch *gcb;
-
- gcb = kzalloc(sizeof(*gcb), gfp);
- if (gcb == NULL)
- return gcb;
- gcb->head.set = set;
- return gcb;
-}
-
/*
* Stateful objects
*/
@@ -7453,9 +7450,14 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
type = nft_obj_type_get(net, objtype);
- if (IS_ERR(type))
- return PTR_ERR(type);
+ if (IS_ERR(type)) {
+ err = PTR_ERR(type);
+ goto err_type;
+ }
obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
if (IS_ERR(obj)) {
@@ -7489,7 +7491,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
goto err_obj_ht;
list_add_tail_rcu(&obj->list, &table->objects);
- table->use++;
+
return 0;
err_obj_ht:
/* queued in transaction log */
@@ -7505,6 +7507,9 @@ err_strdup:
kfree(obj);
err_init:
module_put(type->owner);
+err_type:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -7906,7 +7911,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
case NFT_TRANS_PREPARE:
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
- flowtable->use--;
+ nft_use_dec(&flowtable->use);
fallthrough;
default:
return;
@@ -8260,9 +8265,14 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT);
- if (!flowtable)
- return -ENOMEM;
+ if (!flowtable) {
+ err = -ENOMEM;
+ goto flowtable_alloc;
+ }
flowtable->table = table;
flowtable->handle = nf_tables_alloc_handle(table);
@@ -8317,7 +8327,6 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
goto err5;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
- table->use++;
return 0;
err5:
@@ -8334,6 +8343,9 @@ err2:
kfree(flowtable->name);
err1:
kfree(flowtable);
+flowtable_alloc:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -9042,9 +9054,8 @@ static int nf_tables_validate(struct net *net)
return -EAGAIN;
nft_validate_state_update(table, NFT_VALIDATE_SKIP);
+ break;
}
-
- break;
}
return 0;
@@ -9371,6 +9382,212 @@ void nft_chain_del(struct nft_chain *chain)
list_del_rcu(&chain->list);
}
+static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
+ struct nft_trans_gc *trans)
+{
+ void **priv = trans->priv;
+ unsigned int i;
+
+ for (i = 0; i < trans->count; i++) {
+ struct nft_set_elem elem = {
+ .priv = priv[i],
+ };
+
+ nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
+ nft_setelem_remove(ctx->net, trans->set, &elem);
+ }
+}
+
+void nft_trans_gc_destroy(struct nft_trans_gc *trans)
+{
+ nft_set_put(trans->set);
+ put_net(trans->net);
+ kfree(trans);
+}
+
+static void nft_trans_gc_trans_free(struct rcu_head *rcu)
+{
+ struct nft_set_elem elem = {};
+ struct nft_trans_gc *trans;
+ struct nft_ctx ctx = {};
+ unsigned int i;
+
+ trans = container_of(rcu, struct nft_trans_gc, rcu);
+ ctx.net = read_pnet(&trans->set->net);
+
+ for (i = 0; i < trans->count; i++) {
+ elem.priv = trans->priv[i];
+ if (!nft_setelem_is_catchall(trans->set, &elem))
+ atomic_dec(&trans->set->nelems);
+
+ nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
+ }
+
+ nft_trans_gc_destroy(trans);
+}
+
+static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
+{
+ struct nftables_pernet *nft_net;
+ struct nft_ctx ctx = {};
+
+ nft_net = nft_pernet(trans->net);
+
+ mutex_lock(&nft_net->commit_mutex);
+
+ /* Check for race with transaction, otherwise this batch refers to
+ * stale objects that might not be there anymore. Skip transaction if
+ * set has been destroyed from control plane transaction in case gc
+ * worker loses race.
+ */
+ if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
+ mutex_unlock(&nft_net->commit_mutex);
+ return false;
+ }
+
+ ctx.net = trans->net;
+ ctx.table = trans->set->table;
+
+ nft_trans_gc_setelem_remove(&ctx, trans);
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return true;
+}
+
+static void nft_trans_gc_work(struct work_struct *work)
+{
+ struct nft_trans_gc *trans, *next;
+ LIST_HEAD(trans_gc_list);
+
+ spin_lock(&nf_tables_gc_list_lock);
+ list_splice_init(&nf_tables_gc_list, &trans_gc_list);
+ spin_unlock(&nf_tables_gc_list_lock);
+
+ list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
+ list_del(&trans->list);
+ if (!nft_trans_gc_work_done(trans)) {
+ nft_trans_gc_destroy(trans);
+ continue;
+ }
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+ }
+}
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ unsigned int gc_seq, gfp_t gfp)
+{
+ struct net *net = read_pnet(&set->net);
+ struct nft_trans_gc *trans;
+
+ trans = kzalloc(sizeof(*trans), gfp);
+ if (!trans)
+ return NULL;
+
+ trans->net = maybe_get_net(net);
+ if (!trans->net) {
+ kfree(trans);
+ return NULL;
+ }
+
+ refcount_inc(&set->refs);
+ trans->set = set;
+ trans->seq = gc_seq;
+
+ return trans;
+}
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
+{
+ trans->priv[trans->count++] = priv;
+}
+
+static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
+{
+ spin_lock(&nf_tables_gc_list_lock);
+ list_add_tail(&trans->list, &nf_tables_gc_list);
+ spin_unlock(&nf_tables_gc_list_lock);
+
+ schedule_work(&trans_gc_work);
+}
+
+static int nft_trans_gc_space(struct nft_trans_gc *trans)
+{
+ return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp)
+{
+ if (nft_trans_gc_space(gc))
+ return gc;
+
+ nft_trans_gc_queue_work(gc);
+
+ return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+}
+
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
+{
+ if (trans->count == 0) {
+ nft_trans_gc_destroy(trans);
+ return;
+ }
+
+ nft_trans_gc_queue_work(trans);
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
+{
+ if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
+ return NULL;
+
+ if (nft_trans_gc_space(gc))
+ return gc;
+
+ call_rcu(&gc->rcu, nft_trans_gc_trans_free);
+
+ return nft_trans_gc_alloc(gc->set, 0, gfp);
+}
+
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
+{
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));
+
+ if (trans->count == 0) {
+ nft_trans_gc_destroy(trans);
+ return;
+ }
+
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ unsigned int gc_seq)
+{
+ struct nft_set_elem_catchall *catchall;
+ const struct nft_set *set = gc->set;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+
+ if (!nft_set_elem_expired(ext))
+ continue;
+ if (nft_set_elem_is_dead(ext))
+ goto dead_elem;
+
+ nft_set_elem_dead(ext);
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ return NULL;
+
+ nft_trans_gc_elem_add(gc, catchall->elem);
+ }
+
+ return gc;
+}
+
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -9529,15 +9746,31 @@ static void nft_set_commit_update(struct list_head *set_update_list)
}
}
+static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
+{
+ unsigned int gc_seq;
+
+ /* Bump gc counter, it becomes odd, this is the busy mark. */
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+
+ return gc_seq;
+}
+
+static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
+{
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+}
+
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ unsigned int base_seq, gc_seq;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
- unsigned int base_seq;
LIST_HEAD(adl);
int err;
@@ -9568,8 +9801,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
/* 0. Validate ruleset, otherwise roll back for error reporting. */
- if (nf_tables_validate(net) < 0)
+ if (nf_tables_validate(net) < 0) {
+ nft_net->validate_state = NFT_VALIDATE_DO;
return -EAGAIN;
+ }
err = nft_flow_rule_offload_commit(net);
if (err < 0)
@@ -9614,6 +9849,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
WRITE_ONCE(nft_net->base_seq, base_seq);
+ gc_seq = nft_gc_seq_begin(nft_net);
+
/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
@@ -9713,7 +9950,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
*/
if (nft_set_is_anonymous(nft_trans_set(trans)) &&
!list_empty(&nft_trans_set(trans)->bindings))
- trans->ctx.table->use--;
+ nft_use_dec(&trans->ctx.table->use);
}
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_NEWSET, GFP_KERNEL);
@@ -9721,6 +9958,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
+ nft_trans_set(trans)->dead = 1;
list_del_rcu(&nft_trans_set(trans)->list);
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
trans->msg_type, GFP_KERNEL);
@@ -9823,6 +10061,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+
+ nft_gc_seq_end(nft_net, gc_seq);
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
nf_tables_commit_release(net);
return 0;
@@ -9943,7 +10184,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
nft_chain_del(trans->ctx.chain);
nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table,
@@ -9956,7 +10197,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_splice(&nft_trans_chain_hooks(trans),
&nft_trans_basechain(trans)->hook_list);
} else {
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, trans->ctx.chain);
}
nft_trans_destroy(trans);
@@ -9966,7 +10207,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- trans->ctx.chain->use--;
+ nft_use_dec_restore(&trans->ctx.chain->use);
list_del_rcu(&nft_trans_rule(trans)->list);
nft_rule_expr_deactivate(&trans->ctx,
nft_trans_rule(trans),
@@ -9976,7 +10217,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELRULE:
case NFT_MSG_DESTROYRULE:
- trans->ctx.chain->use++;
+ nft_use_inc_restore(&trans->ctx.chain->use);
nft_clear(trans->ctx.net, nft_trans_rule(trans));
nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
@@ -9989,7 +10230,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
break;
@@ -9998,7 +10239,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_set(trans));
if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
nft_map_activate(&trans->ctx, nft_trans_set(trans));
@@ -10042,13 +10283,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
nft_trans_destroy(trans);
} else {
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
nft_obj_del(nft_trans_obj(trans));
}
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_obj(trans));
nft_trans_destroy(trans);
break;
@@ -10057,7 +10298,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable_hooks(trans));
} else {
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
list_del_rcu(&nft_trans_flowtable(trans)->list);
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable(trans)->hook_list);
@@ -10069,7 +10310,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_splice(&nft_trans_flowtable_hooks(trans),
&nft_trans_flowtable(trans)->hook_list);
} else {
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
}
nft_trans_destroy(trans);
@@ -10099,8 +10340,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
enum nfnl_abort_action action)
{
struct nftables_pernet *nft_net = nft_pernet(net);
- int ret = __nf_tables_abort(net, action);
+ unsigned int gc_seq;
+ int ret;
+ gc_seq = nft_gc_seq_begin(nft_net);
+ ret = __nf_tables_abort(net, action);
+ nft_gc_seq_end(nft_net, gc_seq);
mutex_unlock(&nft_net->commit_mutex);
return ret;
@@ -10243,6 +10488,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
if (ctx->chain == chain)
return -ELOOP;
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
list_for_each_entry(rule, &chain->rules, list) {
nft_rule_for_each_expr(expr, last, rule) {
struct nft_immediate_expr *priv;
@@ -10477,6 +10725,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (!tb[NFTA_VERDICT_CODE])
return -EINVAL;
+
+ /* zero padding hole for memcmp */
+ memset(data, 0, sizeof(*data));
data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict.code) {
@@ -10502,7 +10753,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
genmask);
} else if (tb[NFTA_VERDICT_CHAIN_ID]) {
chain = nft_chain_lookup_byid(ctx->net, ctx->table,
- tb[NFTA_VERDICT_CHAIN_ID]);
+ tb[NFTA_VERDICT_CHAIN_ID],
+ genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
} else {
@@ -10518,8 +10770,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (desc->flags & NFT_DATA_DESC_SETELEM &&
chain->flags & NFT_CHAIN_BINDING)
return -EINVAL;
+ if (!nft_use_inc(&chain->use))
+ return -EMFILE;
- chain->use++;
data->verdict.chain = chain;
break;
}
@@ -10537,7 +10790,7 @@ static void nft_verdict_uninit(const struct nft_data *data)
case NFT_JUMP:
case NFT_GOTO:
chain = data->verdict.chain;
- chain->use--;
+ nft_use_dec(&chain->use);
break;
}
}
@@ -10706,11 +10959,11 @@ int __nft_release_basechain(struct nft_ctx *ctx)
nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
list_del(&rule->list);
- ctx->chain->use--;
+ nft_use_dec(&ctx->chain->use);
nf_tables_rule_release(ctx, rule);
}
nft_chain_del(ctx->chain);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
nf_tables_chain_destroy(ctx);
return 0;
@@ -10757,21 +11010,24 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
ctx.family = table->family;
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
+ if (nft_chain_is_bound(chain))
+ continue;
+
ctx.chain = chain;
list_for_each_entry_safe(rule, nr, &chain->rules, list) {
list_del(&rule->list);
- chain->use--;
+ nft_use_dec(&chain->use);
nf_tables_rule_release(&ctx, rule);
}
}
list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
list_del(&flowtable->list);
- table->use--;
+ nft_use_dec(&table->use);
nf_tables_flowtable_destroy(flowtable);
}
list_for_each_entry_safe(set, ns, &table->sets, list) {
list_del(&set->list);
- table->use--;
+ nft_use_dec(&table->use);
if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
nft_map_deactivate(&ctx, set);
@@ -10779,13 +11035,13 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
}
list_for_each_entry_safe(obj, ne, &table->objects, list) {
nft_obj_del(obj);
- table->use--;
+ nft_use_dec(&table->use);
nft_obj_destroy(&ctx, obj);
}
list_for_each_entry_safe(chain, nc, &table->chains, list) {
ctx.chain = chain;
nft_chain_del(chain);
- table->use--;
+ nft_use_dec(&table->use);
nf_tables_chain_destroy(&ctx);
}
nf_tables_table_destroy(&ctx);
@@ -10815,6 +11071,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
struct net *net = n->net;
unsigned int deleted;
bool restart = false;
+ unsigned int gc_seq;
if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
return NOTIFY_DONE;
@@ -10822,8 +11079,11 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
nft_net = nft_pernet(net);
deleted = 0;
mutex_lock(&nft_net->commit_mutex);
+
+ gc_seq = nft_gc_seq_begin(nft_net);
+
if (!list_empty(&nf_tables_destroy_list))
- rcu_barrier();
+ nf_tables_trans_destroy_flush_work();
again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
@@ -10844,6 +11104,8 @@ again:
if (restart)
goto again;
}
+ nft_gc_seq_end(nft_net, gc_seq);
+
mutex_unlock(&nft_net->commit_mutex);
return NOTIFY_DONE;
@@ -10864,6 +11126,8 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->notify_list);
mutex_init(&nft_net->commit_mutex);
nft_net->base_seq = 1;
+ nft_net->gc_seq = 0;
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
return 0;
}
@@ -10880,22 +11144,36 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
static void __net_exit nf_tables_exit_net(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
+ unsigned int gc_seq;
mutex_lock(&nft_net->commit_mutex);
+
+ gc_seq = nft_gc_seq_begin(nft_net);
+
if (!list_empty(&nft_net->commit_list) ||
!list_empty(&nft_net->module_list))
__nf_tables_abort(net, NFNL_ABORT_NONE);
+
__nft_release_tables(net);
+
+ nft_gc_seq_end(nft_net, gc_seq);
+
mutex_unlock(&nft_net->commit_mutex);
WARN_ON_ONCE(!list_empty(&nft_net->tables));
WARN_ON_ONCE(!list_empty(&nft_net->module_list));
WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
}
+static void nf_tables_exit_batch(struct list_head *net_exit_list)
+{
+ flush_work(&trans_gc_work);
+}
+
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
.pre_exit = nf_tables_pre_exit_net,
.exit = nf_tables_exit_net,
+ .exit_batch = nf_tables_exit_batch,
.id = &nf_tables_net_id,
.size = sizeof(struct nftables_pernet),
};
@@ -10967,6 +11245,7 @@ static void __exit nf_tables_module_exit(void)
nft_chain_filter_fini();
nft_chain_route_fini();
unregister_pernet_subsys(&nf_tables_net_ops);
+ cancel_work_sync(&trans_gc_work);
cancel_work_sync(&trans_destroy_work);
rcu_barrier();
rhltable_destroy(&nft_objname_ht);
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 910ef881c3b8..12ab78fa5d84 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -35,12 +35,12 @@ void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
struct nft_flow_key *mask = &match->mask;
struct nft_flow_key *key = &match->key;
- if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL))
+ if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL))
return;
key->control.addr_type = addr_type;
mask->control.addr_type = 0xffff;
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL);
match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] =
offsetof(struct nft_flow_key, control);
}
@@ -59,7 +59,7 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
.mask = match->mask.basic.n_proto,
};
- if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_VLAN) &&
+ if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) &&
(match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) ||
match->key.vlan.vlan_tpid == htons(ETH_P_8021AD))) {
match->key.basic.n_proto = match->key.cvlan.vlan_tpid;
@@ -70,8 +70,9 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
match->mask.vlan.vlan_tpid = ethertype.mask;
match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] =
offsetof(struct nft_flow_key, cvlan);
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CVLAN);
- } else if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC) &&
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN);
+ } else if (match->dissector.used_keys &
+ BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) &&
(match->key.basic.n_proto == htons(ETH_P_8021Q) ||
match->key.basic.n_proto == htons(ETH_P_8021AD))) {
match->key.basic.n_proto = match->key.vlan.vlan_tpid;
@@ -80,7 +81,7 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
match->mask.vlan.vlan_tpid = ethertype.mask;
match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
offsetof(struct nft_flow_key, vlan);
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_VLAN);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_VLAN);
}
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e57eb168ee13..53c9e76473ba 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -470,7 +470,6 @@ __build_packet_message(struct nfnl_log_net *log,
sk_buff_data_t old_tail = inst->skb->tail;
struct sock *sk;
const unsigned char *hwhdrp;
- ktime_t tstamp;
nlh = nfnl_msg_put(inst->skb, 0, 0,
nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
@@ -599,10 +598,9 @@ __build_packet_message(struct nfnl_log_net *log,
goto nla_put_failure;
}
- tstamp = skb_tstamp_cond(skb, false);
- if (hooknum <= NF_INET_FORWARD && tstamp) {
+ if (hooknum <= NF_INET_FORWARD) {
+ struct timespec64 kts = ktime_to_timespec64(skb_tstamp_cond(skb, true));
struct nfulnl_msg_packet_timestamp ts;
- struct timespec64 kts = ktime_to_timespec64(tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 9a85e797ed58..e596d1a842f7 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -30,11 +30,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
const struct nft_byteorder *priv = nft_expr_priv(expr);
u32 *src = &regs->data[priv->sreg];
u32 *dst = &regs->data[priv->dreg];
- union { u32 u32; u16 u16; } *s, *d;
+ u16 *s16, *d16;
unsigned int i;
- s = (void *)src;
- d = (void *)dst;
+ s16 = (void *)src;
+ d16 = (void *)dst;
switch (priv->size) {
case 8: {
@@ -62,11 +62,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 4; i++)
- d[i].u32 = ntohl((__force __be32)s[i].u32);
+ dst[i] = ntohl((__force __be32)src[i]);
break;
case NFT_BYTEORDER_HTON:
for (i = 0; i < priv->len / 4; i++)
- d[i].u32 = (__force __u32)htonl(s[i].u32);
+ dst[i] = (__force __u32)htonl(src[i]);
break;
}
break;
@@ -74,11 +74,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 2; i++)
- d[i].u16 = ntohs((__force __be16)s[i].u16);
+ d16[i] = ntohs((__force __be16)s16[i]);
break;
case NFT_BYTEORDER_HTON:
for (i = 0; i < priv->len / 2; i++)
- d[i].u16 = (__force __u16)htons(s[i].u16);
+ d16[i] = (__force __u16)htons(s16[i]);
break;
}
break;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 6eb21a4f5698..cd4652259095 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -162,7 +162,7 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
memcpy(key + reg->offset, data, reg->len);
memcpy(mask + reg->offset, datamask, reg->len);
- flow->match.dissector.used_keys |= BIT(reg->key);
+ flow->match.dissector.used_keys |= BIT_ULL(reg->key);
flow->match.dissector.offset[reg->key] = reg->base_offset;
if (reg->key == FLOW_DISSECTOR_KEY_META &&
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 38958e067aa8..86bb9d7797d9 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -108,7 +108,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
helper = rcu_dereference(help->helper);
if (helper == NULL)
goto err;
- strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
+ strscpy_pad((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
return;
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS: {
@@ -262,6 +262,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
regs->verdict.code = NF_DROP;
return;
}
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
}
nf_ct_set(skb, ct, IP_CT_NEW);
@@ -368,6 +369,7 @@ static bool nft_ct_tmpl_alloc_pcpu(void)
return false;
}
+ __set_bit(IPS_CONFIRMED_BIT, &tmp->status);
per_cpu(nft_ct_pcpu_template, cpu) = tmp;
}
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 4fb34d76dbea..5c5cc01c73c5 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -191,6 +191,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_OBJECT)
+ return -EOPNOTSUPP;
+
if (set->ops->update == NULL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 6e049fd48760..04b51f285332 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -14,17 +14,18 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_fib.h>
+#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
+ NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
+ NFTA_FIB_F_PRESENT)
+
const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
[NFTA_FIB_DREG] = { .type = NLA_U32 },
[NFTA_FIB_RESULT] = { .type = NLA_U32 },
- [NFTA_FIB_FLAGS] = { .type = NLA_U32 },
+ [NFTA_FIB_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NFTA_FIB_F_ALL),
};
EXPORT_SYMBOL(nft_fib_policy);
-#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
- NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
- NFTA_FIB_F_PRESENT)
-
int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nft_data **data)
{
@@ -77,7 +78,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS]));
- if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL))
+ if (priv->flags == 0)
return -EINVAL;
if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) ==
@@ -150,7 +151,7 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv,
if (priv->flags & NFTA_FIB_F_PRESENT)
*dreg = !!dev;
else
- strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+ strscpy_pad(reg, dev ? dev->name : "", IFNAMSIZ);
break;
default:
WARN_ON_ONCE(1);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 5ef9146e74ad..ab3362c483b4 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -408,8 +408,10 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
+ if (!nft_use_inc(&flowtable->use))
+ return -EMFILE;
+
priv->flowtable = flowtable;
- flowtable->use++;
return nf_ct_netns_get(ctx->net, ctx->family);
}
@@ -428,7 +430,7 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx,
{
struct nft_flow_offload *priv = nft_expr_priv(expr);
- priv->flowtable->use++;
+ nft_use_inc_restore(&priv->flowtable->use);
}
static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 3d76ebfe8939..fccb3cf7749c 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -125,15 +125,27 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
}
+static void nft_immediate_chain_deactivate(const struct nft_ctx *ctx,
+ struct nft_chain *chain,
+ enum nft_trans_phase phase)
+{
+ struct nft_ctx chain_ctx;
+ struct nft_rule *rule;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_deactivate(&chain_ctx, rule, phase);
+}
+
static void nft_immediate_deactivate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
enum nft_trans_phase phase)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
const struct nft_data *data = &priv->data;
- struct nft_ctx chain_ctx;
struct nft_chain *chain;
- struct nft_rule *rule;
if (priv->dreg == NFT_REG_VERDICT) {
switch (data->verdict.code) {
@@ -143,23 +155,20 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
if (!nft_chain_binding(chain))
break;
- chain_ctx = *ctx;
- chain_ctx.chain = chain;
-
- list_for_each_entry(rule, &chain->rules, list)
- nft_rule_expr_deactivate(&chain_ctx, rule, phase);
-
switch (phase) {
case NFT_TRANS_PREPARE_ERROR:
nf_tables_unbind_chain(ctx, chain);
- fallthrough;
+ nft_deactivate_next(ctx->net, chain);
+ break;
case NFT_TRANS_PREPARE:
+ nft_immediate_chain_deactivate(ctx, chain, phase);
nft_deactivate_next(ctx->net, chain);
break;
default:
+ nft_immediate_chain_deactivate(ctx, chain, phase);
nft_chain_del(chain);
chain->bound = false;
- chain->table->use--;
+ nft_use_dec(&chain->table->use);
break;
}
break;
@@ -198,7 +207,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
* let the transaction records release this chain and its rules.
*/
if (chain->bound) {
- chain->use--;
+ nft_use_dec(&chain->use);
break;
}
@@ -206,9 +215,9 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
chain_ctx = *ctx;
chain_ctx.chain = chain;
- chain->use--;
+ nft_use_dec(&chain->use);
list_for_each_entry_safe(rule, n, &chain->rules, list) {
- chain->use--;
+ nft_use_dec(&chain->use);
list_del(&rule->list);
nf_tables_rule_destroy(&chain_ctx, rule);
}
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 29ac48cdd6db..870e5b113d13 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -90,7 +90,8 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
[NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 },
[NFTA_LOOKUP_SREG] = { .type = NLA_U32 },
[NFTA_LOOKUP_DREG] = { .type = NLA_U32 },
- [NFTA_LOOKUP_FLAGS] = { .type = NLA_U32 },
+ [NFTA_LOOKUP_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NFT_LOOKUP_F_INV),
};
static int nft_lookup_init(const struct nft_ctx *ctx,
@@ -120,9 +121,6 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (tb[NFTA_LOOKUP_FLAGS]) {
flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS]));
- if (flags & ~NFT_LOOKUP_F_INV)
- return -EINVAL;
-
if (flags & NFT_LOOKUP_F_INV)
priv->invert = true;
}
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index b115d77fbbc7..8a14aaca93bb 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -20,7 +20,8 @@ struct nft_masq {
};
static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
- [NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
+ [NFTA_MASQ_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK),
[NFTA_MASQ_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_MASQ_REG_PROTO_MAX] = { .type = NLA_U32 },
};
@@ -47,11 +48,8 @@ static int nft_masq_init(const struct nft_ctx *ctx,
struct nft_masq *priv = nft_expr_priv(expr);
int err;
- if (tb[NFTA_MASQ_FLAGS]) {
+ if (tb[NFTA_MASQ_FLAGS])
priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EINVAL;
- }
if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN],
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 8fdc7318c03c..f7da7c43333b 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -185,12 +185,12 @@ static noinline bool nft_meta_get_eval_kind(enum nft_meta_keys key,
case NFT_META_IIFKIND:
if (!in || !in->rtnl_link_ops)
return false;
- strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
+ strscpy_pad((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
break;
case NFT_META_OIFKIND:
if (!out || !out->rtnl_link_ops)
return false;
- strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
+ strscpy_pad((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
break;
default:
return false;
@@ -206,7 +206,7 @@ static void nft_meta_store_ifindex(u32 *dest, const struct net_device *dev)
static void nft_meta_store_ifname(u32 *dest, const struct net_device *dev)
{
- strncpy((char *)dest, dev ? dev->name : "", IFNAMSIZ);
+ strscpy_pad((char *)dest, dev ? dev->name : "", IFNAMSIZ);
}
static bool nft_meta_store_iftype(u32 *dest, const struct net_device *dev)
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 5c29915ab028..583885ce7232 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -132,7 +132,8 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
[NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 },
[NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 },
- [NFTA_NAT_FLAGS] = { .type = NLA_U32 },
+ [NFTA_NAT_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK),
};
static int nft_nat_validate(const struct nft_ctx *ctx,
@@ -246,11 +247,8 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
- if (tb[NFTA_NAT_FLAGS]) {
+ if (tb[NFTA_NAT_FLAGS])
priv->flags |= ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EOPNOTSUPP;
- }
return nf_ct_netns_get(ctx->net, family);
}
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index a48dd5b5d45b..509011b1ef59 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -41,8 +41,10 @@ static int nft_objref_init(const struct nft_ctx *ctx,
if (IS_ERR(obj))
return -ENOENT;
+ if (!nft_use_inc(&obj->use))
+ return -EMFILE;
+
nft_objref_priv(expr) = obj;
- obj->use++;
return 0;
}
@@ -72,7 +74,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx,
if (phase == NFT_TRANS_COMMIT)
return;
- obj->use--;
+ nft_use_dec(&obj->use);
}
static void nft_objref_activate(const struct nft_ctx *ctx,
@@ -80,7 +82,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx,
{
struct nft_object *obj = nft_objref_priv(expr);
- obj->use++;
+ nft_use_inc_restore(&obj->use);
}
static const struct nft_expr_ops nft_objref_ops = {
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 70820c66b591..7f61506e5b44 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -23,7 +23,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
struct nft_osf *priv = nft_expr_priv(expr);
u32 *dest = &regs->data[priv->dreg];
struct sk_buff *skb = pkt->skb;
- char os_match[NFT_OSF_MAXGENRELEN + 1];
+ char os_match[NFT_OSF_MAXGENRELEN];
const struct tcphdr *tcp;
struct nf_osf_data data;
struct tcphdr _tcph;
@@ -45,7 +45,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
}
if (!nf_osf_find(skb, nf_osf_fingers, priv->ttl, &data)) {
- strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
+ strscpy_pad((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
} else {
if (priv->flags & NFT_OSF_F_VERSION)
snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s",
@@ -53,7 +53,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
strscpy(os_match, data.genre, NFT_OSF_MAXGENRELEN);
- strncpy((char *)dest, os_match, NFT_OSF_MAXGENRELEN);
+ strscpy_pad((char *)dest, os_match, NFT_OSF_MAXGENRELEN);
}
}
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index a70196ffcb1e..a58bd8d291ff 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -22,7 +22,8 @@ struct nft_redir {
static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
[NFTA_REDIR_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_REDIR_REG_PROTO_MAX] = { .type = NLA_U32 },
- [NFTA_REDIR_FLAGS] = { .type = NLA_U32 },
+ [NFTA_REDIR_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK),
};
static int nft_redir_validate(const struct nft_ctx *ctx,
@@ -68,11 +69,8 @@ static int nft_redir_init(const struct nft_ctx *ctx,
priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
- if (tb[NFTA_REDIR_FLAGS]) {
+ if (tb[NFTA_REDIR_FLAGS])
priv->flags = ntohl(nla_get_be32(tb[NFTA_REDIR_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EINVAL;
- }
return nf_ct_netns_get(ctx->net, ctx->family);
}
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 0b73cb0e752f..524763659f25 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -59,6 +59,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
return 1;
+ if (nft_set_elem_is_dead(&he->ext))
+ return 1;
if (nft_set_elem_expired(&he->ext))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
@@ -188,7 +190,6 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
struct nft_rhash_elem *he = elem->priv;
nft_set_elem_change_active(net, set, &he->ext);
- nft_set_elem_clear_busy(&he->ext);
}
static bool nft_rhash_flush(const struct net *net,
@@ -196,12 +197,9 @@ static bool nft_rhash_flush(const struct net *net,
{
struct nft_rhash_elem *he = priv;
- if (!nft_set_elem_mark_busy(&he->ext) ||
- !nft_is_active(net, &he->ext)) {
- nft_set_elem_change_active(net, set, &he->ext);
- return true;
- }
- return false;
+ nft_set_elem_change_active(net, set, &he->ext);
+
+ return true;
}
static void *nft_rhash_deactivate(const struct net *net,
@@ -218,9 +216,8 @@ static void *nft_rhash_deactivate(const struct net *net,
rcu_read_lock();
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
- if (he != NULL &&
- !nft_rhash_flush(net, set, he))
- he = NULL;
+ if (he)
+ nft_set_elem_change_active(net, set, &he->ext);
rcu_read_unlock();
@@ -252,7 +249,9 @@ static bool nft_rhash_delete(const struct nft_set *set,
if (he == NULL)
return false;
- return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
+ nft_set_elem_dead(&he->ext);
+
+ return true;
}
static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
@@ -278,8 +277,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (iter->count < iter->skip)
goto cont;
- if (nft_set_elem_expired(&he->ext))
- goto cont;
if (!nft_set_elem_active(&he->ext, iter->genmask))
goto cont;
@@ -314,25 +311,51 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
static void nft_rhash_gc(struct work_struct *work)
{
+ struct nftables_pernet *nft_net;
struct nft_set *set;
struct nft_rhash_elem *he;
struct nft_rhash *priv;
- struct nft_set_gc_batch *gcb = NULL;
struct rhashtable_iter hti;
+ struct nft_trans_gc *gc;
+ struct net *net;
+ u32 gc_seq;
priv = container_of(work, struct nft_rhash, gc_work.work);
set = nft_set_container_of(priv);
+ net = read_pnet(&set->net);
+ nft_net = nft_pernet(net);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
+ if (nft_set_gc_is_pending(set))
+ goto done;
+
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN)
- break;
+ if (PTR_ERR(he) != -EAGAIN) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
continue;
}
+ /* Ruleset has been updated, try later. */
+ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
+
+ if (nft_set_elem_is_dead(&he->ext))
+ goto dead_elem;
+
if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
nft_rhash_expr_needs_gc_run(set, &he->ext))
goto needs_gc_run;
@@ -340,26 +363,26 @@ static void nft_rhash_gc(struct work_struct *work)
if (!nft_set_elem_expired(&he->ext))
continue;
needs_gc_run:
- if (nft_set_elem_mark_busy(&he->ext))
- continue;
+ nft_set_elem_dead(&he->ext);
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb == NULL)
- break;
- rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, he);
+ nft_trans_gc_elem_add(gc, he);
}
+
+ gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
+ /* catchall list iteration requires rcu read side lock. */
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
- he = nft_set_catchall_gc(set);
- if (he) {
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb)
- nft_set_gc_batch_add(gcb, he);
- }
- nft_set_gc_batch_complete(gcb);
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+
+done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
@@ -394,7 +417,7 @@ static int nft_rhash_init(const struct nft_set *set,
return err;
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
- if (set->flags & NFT_SET_TIMEOUT)
+ if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL))
nft_rhash_gc_init(set);
return 0;
@@ -422,7 +445,6 @@ static void nft_rhash_destroy(const struct nft_ctx *ctx,
};
cancel_delayed_work_sync(&priv->gc_work);
- rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
(void *)&rhash_ctx);
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index db526cb7a485..6af9c9ed4b5c 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -566,8 +566,9 @@ next_match:
goto out;
if (last) {
- if (nft_set_elem_expired(&f->mt[b].e->ext) ||
- (genmask &&
+ if (nft_set_elem_expired(&f->mt[b].e->ext))
+ goto next_match;
+ if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
goto next_match;
@@ -602,7 +603,7 @@ static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
return pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net));
+ nft_genmask_cur(net));
}
/**
@@ -901,12 +902,14 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
int mask_bits)
{
- int rule = f->rules++, group, ret, bit_offset = 0;
+ int rule = f->rules, group, ret, bit_offset = 0;
- ret = pipapo_resize(f, f->rules - 1, f->rules);
+ ret = pipapo_resize(f, f->rules, f->rules + 1);
if (ret)
return ret;
+ f->rules++;
+
for (group = 0; group < f->groups; group++) {
int i, v;
u8 mask;
@@ -1051,7 +1054,9 @@ static int pipapo_expand(struct nft_pipapo_field *f,
step++;
if (step >= len) {
if (!masks) {
- pipapo_insert(f, base, 0);
+ err = pipapo_insert(f, base, 0);
+ if (err < 0)
+ return err;
masks = 1;
}
goto out;
@@ -1234,6 +1239,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
ret = pipapo_expand(f, start, end, f->groups * f->bb);
+ if (ret < 0)
+ return ret;
+
if (f->bsize > bsize_max)
bsize_max = f->bsize;
@@ -1528,16 +1536,34 @@ static void pipapo_drop(struct nft_pipapo_match *m,
}
}
+static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
+ struct nft_pipapo_elem *e)
+
+{
+ struct nft_set_elem elem = {
+ .priv = e,
+ };
+
+ nft_setelem_data_deactivate(net, set, &elem);
+}
+
/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
- * @set: nftables API set representation
+ * @_set: nftables API set representation
* @m: Matching data
*/
-static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
{
+ struct nft_set *set = (struct nft_set *) _set;
struct nft_pipapo *priv = nft_set_priv(set);
+ struct net *net = read_pnet(&set->net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
+ struct nft_trans_gc *gc;
+
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+ if (!gc)
+ return;
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
@@ -1561,13 +1587,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
f--;
i--;
e = f->mt[rulemap[i].to].e;
- if (nft_set_elem_expired(&e->ext) &&
- !nft_set_elem_mark_busy(&e->ext)) {
+
+ /* synchronous gc never fails, there is no need to set on
+ * NFT_SET_ELEM_DEAD_BIT.
+ */
+ if (nft_set_elem_expired(&e->ext)) {
priv->dirty = true;
- pipapo_drop(m, rulemap);
- rcu_barrier();
- nft_set_elem_destroy(set, e, true);
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (!gc)
+ break;
+
+ nft_pipapo_gc_deactivate(net, set, e);
+ pipapo_drop(m, rulemap);
+ nft_trans_gc_elem_add(gc, e);
/* And check again current first rule, which is now the
* first we haven't checked.
@@ -1577,11 +1610,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
}
}
- e = nft_set_catchall_gc(set);
- if (e)
- nft_set_elem_destroy(set, e, true);
-
- priv->last_gc = jiffies;
+ gc = nft_trans_gc_catchall(gc, 0);
+ if (gc) {
+ nft_trans_gc_queue_sync_done(gc);
+ priv->last_gc = jiffies;
+ }
}
/**
@@ -1664,6 +1697,17 @@ static void nft_pipapo_commit(const struct nft_set *set)
priv->clone = new_clone;
}
+static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
+{
+#ifdef CONFIG_PROVE_LOCKING
+ const struct net *net = read_pnet(&set->net);
+
+ return lockdep_is_held(&nft_pernet(net)->commit_mutex);
+#else
+ return true;
+#endif
+}
+
static void nft_pipapo_abort(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
@@ -1672,7 +1716,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
if (!priv->dirty)
return;
- m = rcu_dereference(priv->match);
+ m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
new_clone = pipapo_clone(m);
if (IS_ERR(new_clone))
@@ -1699,14 +1743,9 @@ static void nft_pipapo_activate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_pipapo_elem *e;
-
- e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0);
- if (IS_ERR(e))
- return;
+ struct nft_pipapo_elem *e = elem->priv;
nft_set_elem_change_active(net, set, &e->ext);
- nft_set_elem_clear_busy(&e->ext);
}
/**
@@ -1918,10 +1957,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
data = (const u8 *)nft_set_ext_key(&e->ext);
- e = pipapo_get(net, set, data, 0);
- if (IS_ERR(e))
- return;
-
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *match_start, *match_end;
@@ -1929,7 +1964,11 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
int i, start, rules_fx;
match_start = data;
- match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
+
+ if (nft_set_ext_exists(&e->ext, NFT_SET_EXT_KEY_END))
+ match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
+ else
+ match_end = data;
start = first_rule;
rules_fx = rules_f0;
@@ -2001,8 +2040,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
goto cont;
e = f->mt[r].e;
- if (nft_set_elem_expired(&e->ext))
- goto cont;
elem.priv = e;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 5c05c9b990fb..c6435e709231 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -46,6 +46,12 @@ static int nft_rbtree_cmp(const struct nft_set *set,
set->klen);
}
+static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
+{
+ return nft_set_elem_expired(&rbe->ext) ||
+ nft_set_elem_is_dead(&rbe->ext);
+}
+
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext,
unsigned int seq)
@@ -80,7 +86,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
continue;
}
- if (nft_set_elem_expired(&rbe->ext))
+ if (nft_rbtree_elem_expired(rbe))
return false;
if (nft_rbtree_interval_end(rbe)) {
@@ -98,7 +104,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
- !nft_set_elem_expired(&interval->ext) &&
+ !nft_rbtree_elem_expired(interval) &&
nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
@@ -215,38 +221,70 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
return rbe;
}
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ struct nft_set_elem elem = {
+ .priv = rbe,
+ };
+
+ nft_setelem_data_deactivate(net, set, &elem);
+ rb_erase(&rbe->node, &priv->root);
+}
+
static int nft_rbtree_gc_elem(const struct nft_set *__set,
struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe)
+ struct nft_rbtree_elem *rbe,
+ u8 genmask)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
- struct nft_rbtree_elem *rbe_prev = NULL;
- struct nft_set_gc_batch *gcb;
+ struct net *net = read_pnet(&set->net);
+ struct nft_rbtree_elem *rbe_prev;
+ struct nft_trans_gc *gc;
- gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
- if (!gcb)
+ gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
+ if (!gc)
return -ENOMEM;
- /* search for expired end interval coming before this element. */
+ /* search for end interval coming before this element.
+ * end intervals don't carry a timeout extension, they
+ * are coupled with the interval start element.
+ */
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
- if (nft_rbtree_interval_end(rbe_prev))
+ if (nft_rbtree_interval_end(rbe_prev) &&
+ nft_set_elem_active(&rbe_prev->ext, genmask))
break;
prev = rb_prev(prev);
}
- if (rbe_prev) {
- rb_erase(&rbe_prev->node, &priv->root);
- atomic_dec(&set->nelems);
+ if (prev) {
+ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+ nft_rbtree_gc_remove(net, set, priv, rbe_prev);
+
+ /* There is always room in this trans gc for this element,
+ * memory allocation never actually happens, hence, the warning
+ * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
+ * this is synchronous gc which never fails.
+ */
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (WARN_ON_ONCE(!gc))
+ return -ENOMEM;
+
+ nft_trans_gc_elem_add(gc, rbe_prev);
}
- rb_erase(&rbe->node, &priv->root);
- atomic_dec(&set->nelems);
+ nft_rbtree_gc_remove(net, set, priv, rbe);
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (WARN_ON_ONCE(!gc))
+ return -ENOMEM;
- nft_set_gc_batch_add(gcb, rbe);
- nft_set_gc_batch_complete(gcb);
+ nft_trans_gc_elem_add(gc, rbe);
+
+ nft_trans_gc_queue_sync_done(gc);
return 0;
}
@@ -321,7 +359,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
/* perform garbage collection to avoid bogus overlap reports. */
if (nft_set_elem_expired(&rbe->ext)) {
- err = nft_rbtree_gc_elem(set, priv, rbe);
+ err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
if (err < 0)
return err;
@@ -474,7 +512,6 @@ static void nft_rbtree_activate(const struct net *net,
struct nft_rbtree_elem *rbe = elem->priv;
nft_set_elem_change_active(net, set, &rbe->ext);
- nft_set_elem_clear_busy(&rbe->ext);
}
static bool nft_rbtree_flush(const struct net *net,
@@ -482,12 +519,9 @@ static bool nft_rbtree_flush(const struct net *net,
{
struct nft_rbtree_elem *rbe = priv;
- if (!nft_set_elem_mark_busy(&rbe->ext) ||
- !nft_is_active(net, &rbe->ext)) {
- nft_set_elem_change_active(net, set, &rbe->ext);
- return true;
- }
- return false;
+ nft_set_elem_change_active(net, set, &rbe->ext);
+
+ return true;
}
static void *nft_rbtree_deactivate(const struct net *net,
@@ -544,8 +578,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (iter->count < iter->skip)
goto cont;
- if (nft_set_elem_expired(&rbe->ext))
- goto cont;
if (!nft_set_elem_active(&rbe->ext, iter->genmask))
goto cont;
@@ -564,26 +596,43 @@ cont:
static void nft_rbtree_gc(struct work_struct *work)
{
- struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
- struct nft_set_gc_batch *gcb = NULL;
+ struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+ struct nftables_pernet *nft_net;
struct nft_rbtree *priv;
+ struct nft_trans_gc *gc;
struct rb_node *node;
struct nft_set *set;
+ unsigned int gc_seq;
struct net *net;
- u8 genmask;
priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
- genmask = nft_genmask_cur(net);
+ nft_net = nft_pernet(net);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
+ if (nft_set_gc_is_pending(set))
+ goto done;
+
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+
+ /* Ruleset has been updated, try later. */
+ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
+
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (!nft_set_elem_active(&rbe->ext, genmask))
- continue;
+ if (nft_set_elem_is_dead(&rbe->ext))
+ goto dead_elem;
/* elements are reversed in the rbtree for historical reasons,
* from highest to lowest value, that is why end element is
@@ -596,46 +645,36 @@ static void nft_rbtree_gc(struct work_struct *work)
if (!nft_set_elem_expired(&rbe->ext))
continue;
- if (nft_set_elem_mark_busy(&rbe->ext)) {
- rbe_end = NULL;
+ nft_set_elem_dead(&rbe->ext);
+
+ if (!rbe_end)
continue;
- }
- if (rbe_prev) {
- rb_erase(&rbe_prev->node, &priv->root);
- rbe_prev = NULL;
- }
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (!gcb)
- break;
+ nft_set_elem_dead(&rbe_end->ext);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe);
- rbe_prev = rbe;
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
- if (rbe_end) {
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe_end);
- rb_erase(&rbe_end->node, &priv->root);
- rbe_end = NULL;
- }
- node = rb_next(node);
- if (!node)
- break;
+ nft_trans_gc_elem_add(gc, rbe_end);
+ rbe_end = NULL;
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
+
+ nft_trans_gc_elem_add(gc, rbe);
}
- if (rbe_prev)
- rb_erase(&rbe_prev->node, &priv->root);
+
+ gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
- rbe = nft_set_catchall_gc(set);
- if (rbe) {
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb)
- nft_set_gc_batch_add(gcb, rbe);
- }
- nft_set_gc_batch_complete(gcb);
-
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 84def74698b7..9ed85be79452 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -107,7 +107,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
break;
case NFT_SOCKET_MARK:
if (sk_fullsock(sk)) {
- *dest = sk->sk_mark;
+ *dest = READ_ONCE(sk->sk_mark);
} else {
regs->verdict.code = NFT_BREAK;
return;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 470282cf3fae..21624d68314f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -768,7 +768,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
m->u.user.match_size = msize;
strscpy(name, match->name, sizeof(name));
module_put(match->me);
- strncpy(m->u.user.name, name, sizeof(m->u.user.name));
+ strscpy_pad(m->u.user.name, name, sizeof(m->u.user.name));
*size += off;
*dstptr += msize;
@@ -1148,7 +1148,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
t->u.user.target_size = tsize;
strscpy(name, target->name, sizeof(name));
module_put(target->me);
- strncpy(t->u.user.name, name, sizeof(t->u.user.name));
+ strscpy_pad(t->u.user.name, name, sizeof(t->u.user.name));
*size += off;
*dstptr += tsize;
@@ -2014,4 +2014,3 @@ static void __exit xt_fini(void)
module_init(xt_init);
module_exit(xt_fini);
-
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
index 68ccbe50bb1e..5d1fb7018dba 100644
--- a/net/netfilter/xt_repldata.h
+++ b/net/netfilter/xt_repldata.h
@@ -29,7 +29,7 @@
if (tbl == NULL) \
return NULL; \
term = (struct type##_error *)&(((char *)tbl)[term_offset]); \
- strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
+ strscpy_pad(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
*term = (struct type##_error)typ2##_ERROR_INIT; \
tbl->repl.valid_hooks = hook_mask; \
tbl->repl.num_entries = nhooks + 1; \
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 7013f55f05d1..76e01f292aaf 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -77,7 +77,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
- pskb->mark = sk->sk_mark;
+ pskb->mark = READ_ONCE(sk->sk_mark);
if (sk != skb->sk)
sock_gen_put(sk);
@@ -138,7 +138,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
- pskb->mark = sk->sk_mark;
+ pskb->mark = READ_ONCE(sk->sk_mark);
if (sk != skb->sk)
sock_gen_put(sk);
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index 85d7ecb05728..9518ab56ec98 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -149,7 +149,4 @@ enum {
/* NetLabel protocol functions */
int netlbl_cipsov4_genl_init(void);
-/* Free the memory associated with a CIPSOv4 DOI definition */
-void netlbl_cipsov4_doi_free(struct rcu_head *entry);
-
#endif
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 383631873748..642b9d382fb4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -84,7 +84,7 @@ struct listeners {
static inline int netlink_is_kernel(struct sock *sk)
{
- return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
+ return nlk_test_bit(KERNEL_SOCKET, sk);
}
struct netlink_table *nl_table __read_mostly;
@@ -349,9 +349,7 @@ static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src,
static void netlink_overrun(struct sock *sk)
{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) {
+ if (!nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
if (!test_and_set_bit(NETLINK_S_CONGESTED,
&nlk_sk(sk)->state)) {
sk->sk_err = ENOBUFS;
@@ -677,6 +675,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
struct netlink_sock *nlk;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
+ void (*release)(struct sock *sock, unsigned long *groups);
int err = 0;
sock->state = SS_UNCONNECTED;
@@ -704,6 +703,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
cb_mutex = nl_table[protocol].cb_mutex;
bind = nl_table[protocol].bind;
unbind = nl_table[protocol].unbind;
+ release = nl_table[protocol].release;
netlink_unlock_table();
if (err < 0)
@@ -719,6 +719,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
nlk->module = module;
nlk->netlink_bind = bind;
nlk->netlink_unbind = unbind;
+ nlk->netlink_release = release;
out:
return err;
@@ -763,6 +764,8 @@ static int netlink_release(struct socket *sock)
* OK. Socket is unlinked, any packets that arrive now
* will be purged.
*/
+ if (nlk->netlink_release)
+ nlk->netlink_release(sk, nlk->groups);
/* must not acquire netlink_table_lock in any way again before unbind
* and notifying genetlink is done as otherwise it might deadlock
@@ -1402,9 +1405,7 @@ EXPORT_SYMBOL_GPL(netlink_has_listeners);
bool netlink_strict_get_check(struct sk_buff *skb)
{
- const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk);
-
- return nlk->flags & NETLINK_F_STRICT_CHK;
+ return nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
}
EXPORT_SYMBOL_GPL(netlink_strict_get_check);
@@ -1432,6 +1433,8 @@ struct netlink_broadcast_data {
int delivered;
gfp_t allocation;
struct sk_buff *skb, *skb2;
+ int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
+ void *tx_data;
};
static void do_one_broadcast(struct sock *sk,
@@ -1448,7 +1451,7 @@ static void do_one_broadcast(struct sock *sk,
return;
if (!net_eq(sock_net(sk), p->net)) {
- if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
+ if (!nlk_test_bit(LISTEN_ALL_NSID, sk))
return;
if (!peernet_has_id(sock_net(sk), p->net))
@@ -1481,10 +1484,17 @@ static void do_one_broadcast(struct sock *sk,
netlink_overrun(sk);
/* Clone failed. Notify ALL listeners. */
p->failure = 1;
- if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
+ if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
p->delivery_failure = 1;
goto out;
}
+
+ if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+ kfree_skb(p->skb2);
+ p->skb2 = NULL;
+ goto out;
+ }
+
if (sk_filter(sk, p->skb2)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
@@ -1496,7 +1506,7 @@ static void do_one_broadcast(struct sock *sk,
val = netlink_broadcast_deliver(sk, p->skb2);
if (val < 0) {
netlink_overrun(sk);
- if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
+ if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
p->delivery_failure = 1;
} else {
p->congested |= val;
@@ -1507,8 +1517,12 @@ out:
sock_put(sk);
}
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
- u32 group, gfp_t allocation)
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
+ u32 portid,
+ u32 group, gfp_t allocation,
+ int (*filter)(struct sock *dsk,
+ struct sk_buff *skb, void *data),
+ void *filter_data)
{
struct net *net = sock_net(ssk);
struct netlink_broadcast_data info;
@@ -1527,6 +1541,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
info.allocation = allocation;
info.skb = skb;
info.skb2 = NULL;
+ info.tx_filter = filter;
+ info.tx_data = filter_data;
/* While we sleep in clone, do not allow to change socket list */
@@ -1552,6 +1568,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
}
return -ESRCH;
}
+EXPORT_SYMBOL(netlink_broadcast_filtered);
+
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
+ u32 group, gfp_t allocation)
+{
+ return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
+ NULL, NULL);
+}
EXPORT_SYMBOL(netlink_broadcast);
struct netlink_set_err_data {
@@ -1576,7 +1600,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
!test_bit(p->group - 1, nlk->groups))
goto out;
- if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) {
+ if (p->code == ENOBUFS && nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
ret = 1;
goto out;
}
@@ -1629,10 +1653,7 @@ static void netlink_update_socket_mc(struct netlink_sock *nlk,
old = test_bit(group - 1, nlk->groups);
subscriptions = nlk->subscriptions - old + new;
- if (new)
- __set_bit(group - 1, nlk->groups);
- else
- __clear_bit(group - 1, nlk->groups);
+ __assign_bit(group - 1, nlk->groups, new);
netlink_update_subscriptions(&nlk->sk, subscriptions);
netlink_update_listeners(&nlk->sk);
}
@@ -1643,7 +1664,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
unsigned int val = 0;
- int err;
+ int nr = -1;
if (level != SOL_NETLINK)
return -ENOPROTOOPT;
@@ -1654,14 +1675,12 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case NETLINK_PKTINFO:
- if (val)
- nlk->flags |= NETLINK_F_RECV_PKTINFO;
- else
- nlk->flags &= ~NETLINK_F_RECV_PKTINFO;
- err = 0;
+ nr = NETLINK_F_RECV_PKTINFO;
break;
case NETLINK_ADD_MEMBERSHIP:
case NETLINK_DROP_MEMBERSHIP: {
+ int err;
+
if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
return -EPERM;
err = netlink_realloc_groups(sk);
@@ -1681,61 +1700,38 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
nlk->netlink_unbind(sock_net(sk), val);
- err = 0;
break;
}
case NETLINK_BROADCAST_ERROR:
- if (val)
- nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR;
- else
- nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR;
- err = 0;
+ nr = NETLINK_F_BROADCAST_SEND_ERROR;
break;
case NETLINK_NO_ENOBUFS:
+ assign_bit(NETLINK_F_RECV_NO_ENOBUFS, &nlk->flags, val);
if (val) {
- nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS;
clear_bit(NETLINK_S_CONGESTED, &nlk->state);
wake_up_interruptible(&nlk->wait);
- } else {
- nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS;
}
- err = 0;
break;
case NETLINK_LISTEN_ALL_NSID:
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
return -EPERM;
-
- if (val)
- nlk->flags |= NETLINK_F_LISTEN_ALL_NSID;
- else
- nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
- err = 0;
+ nr = NETLINK_F_LISTEN_ALL_NSID;
break;
case NETLINK_CAP_ACK:
- if (val)
- nlk->flags |= NETLINK_F_CAP_ACK;
- else
- nlk->flags &= ~NETLINK_F_CAP_ACK;
- err = 0;
+ nr = NETLINK_F_CAP_ACK;
break;
case NETLINK_EXT_ACK:
- if (val)
- nlk->flags |= NETLINK_F_EXT_ACK;
- else
- nlk->flags &= ~NETLINK_F_EXT_ACK;
- err = 0;
+ nr = NETLINK_F_EXT_ACK;
break;
case NETLINK_GET_STRICT_CHK:
- if (val)
- nlk->flags |= NETLINK_F_STRICT_CHK;
- else
- nlk->flags &= ~NETLINK_F_STRICT_CHK;
- err = 0;
+ nr = NETLINK_F_STRICT_CHK;
break;
default:
- err = -ENOPROTOOPT;
+ return -ENOPROTOOPT;
}
- return err;
+ if (nr >= 0)
+ assign_bit(nr, &nlk->flags, val);
+ return 0;
}
static int netlink_getsockopt(struct socket *sock, int level, int optname,
@@ -1802,7 +1798,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & flag ? 1 : 0;
+ val = test_bit(flag, &nlk->flags);
if (put_user(len, optlen) ||
copy_to_user(optval, &val, len))
@@ -1979,9 +1975,9 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
msg->msg_namelen = sizeof(*addr);
}
- if (nlk->flags & NETLINK_F_RECV_PKTINFO)
+ if (nlk_test_bit(RECV_PKTINFO, sk))
netlink_cmsg_recv_pktinfo(msg, skb);
- if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ if (nlk_test_bit(LISTEN_ALL_NSID, sk))
netlink_cmsg_listen_all_nsid(sk, msg, skb);
memset(&scm, 0, sizeof(scm));
@@ -2058,7 +2054,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
goto out_sock_release;
nlk = nlk_sk(sk);
- nlk->flags |= NETLINK_F_KERNEL_SOCKET;
+ set_bit(NETLINK_F_KERNEL_SOCKET, &nlk->flags);
netlink_table_grab();
if (!nl_table[unit].registered) {
@@ -2069,6 +2065,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (cfg) {
nl_table[unit].bind = cfg->bind;
nl_table[unit].unbind = cfg->unbind;
+ nl_table[unit].release = cfg->release;
nl_table[unit].flags = cfg->flags;
}
nl_table[unit].registered = 1;
@@ -2192,7 +2189,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
nl_dump_check_consistent(cb, nlh);
memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno));
- if (extack->_msg && nlk->flags & NETLINK_F_EXT_ACK) {
+ if (extack->_msg && test_bit(NETLINK_F_EXT_ACK, &nlk->flags)) {
nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg))
nlmsg_end(skb, nlh);
@@ -2321,8 +2318,8 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
struct netlink_dump_control *control)
{
- struct netlink_sock *nlk, *nlk2;
struct netlink_callback *cb;
+ struct netlink_sock *nlk;
struct sock *sk;
int ret;
@@ -2357,8 +2354,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->min_dump_alloc = control->min_dump_alloc;
cb->skb = skb;
- nlk2 = nlk_sk(NETLINK_CB(skb).sk);
- cb->strict_check = !!(nlk2->flags & NETLINK_F_STRICT_CHK);
+ cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
if (control->start) {
cb->extack = control->extack;
@@ -2402,7 +2398,7 @@ netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
{
size_t tlvlen;
- if (!extack || !(nlk->flags & NETLINK_F_EXT_ACK))
+ if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
return 0;
tlvlen = 0;
@@ -2474,7 +2470,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
* requests to cap the error message, and get extra error data if
* requested.
*/
- if (err && !(nlk->flags & NETLINK_F_CAP_ACK))
+ if (err && !test_bit(NETLINK_F_CAP_ACK, &nlk->flags))
payload += nlmsg_len(nlh);
else
flags |= NLM_F_CAPPED;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 90a3198a9b7f..2145979b9986 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -8,14 +8,16 @@
#include <net/sock.h>
/* flags */
-#define NETLINK_F_KERNEL_SOCKET 0x1
-#define NETLINK_F_RECV_PKTINFO 0x2
-#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
-#define NETLINK_F_RECV_NO_ENOBUFS 0x8
-#define NETLINK_F_LISTEN_ALL_NSID 0x10
-#define NETLINK_F_CAP_ACK 0x20
-#define NETLINK_F_EXT_ACK 0x40
-#define NETLINK_F_STRICT_CHK 0x80
+enum {
+ NETLINK_F_KERNEL_SOCKET,
+ NETLINK_F_RECV_PKTINFO,
+ NETLINK_F_BROADCAST_SEND_ERROR,
+ NETLINK_F_RECV_NO_ENOBUFS,
+ NETLINK_F_LISTEN_ALL_NSID,
+ NETLINK_F_CAP_ACK,
+ NETLINK_F_EXT_ACK,
+ NETLINK_F_STRICT_CHK,
+};
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
@@ -23,10 +25,10 @@
struct netlink_sock {
/* struct sock has to be the first member of netlink_sock */
struct sock sk;
+ unsigned long flags;
u32 portid;
u32 dst_portid;
u32 dst_group;
- u32 flags;
u32 subscriptions;
u32 ngroups;
unsigned long *groups;
@@ -42,6 +44,8 @@ struct netlink_sock {
void (*netlink_rcv)(struct sk_buff *skb);
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
+ void (*netlink_release)(struct sock *sk,
+ unsigned long *groups);
struct module *module;
struct rhash_head node;
@@ -54,6 +58,8 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
return container_of(sk, struct netlink_sock, sk);
}
+#define nlk_test_bit(nr, sk) test_bit(NETLINK_F_##nr, &nlk_sk(sk)->flags)
+
struct netlink_table {
struct rhashtable hash;
struct hlist_head mc_list;
@@ -64,6 +70,8 @@ struct netlink_table {
struct module *module;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
+ void (*release)(struct sock *sk,
+ unsigned long *groups);
int registered;
};
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index e4f21b1067bc..9c4f231be275 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -27,15 +27,15 @@ static int sk_diag_put_flags(struct sock *sk, struct sk_buff *skb)
if (nlk->cb_running)
flags |= NDIAG_FLAG_CB_RUNNING;
- if (nlk->flags & NETLINK_F_RECV_PKTINFO)
+ if (nlk_test_bit(RECV_PKTINFO, sk))
flags |= NDIAG_FLAG_PKTINFO;
- if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
+ if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
flags |= NDIAG_FLAG_BROADCAST_ERROR;
- if (nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)
+ if (nlk_test_bit(RECV_NO_ENOBUFS, sk))
flags |= NDIAG_FLAG_NO_ENOBUFS;
- if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ if (nlk_test_bit(LISTEN_ALL_NSID, sk))
flags |= NDIAG_FLAG_LISTEN_ALL_NSID;
- if (nlk->flags & NETLINK_F_CAP_ACK)
+ if (nlk_test_bit(CAP_ACK, sk))
flags |= NDIAG_FLAG_CAP_ACK;
return nla_put_u32(skb, NETLINK_DIAG_FLAGS, flags);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a157247a1e45..8315d31b53db 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -52,6 +52,18 @@ static void genl_unlock_all(void)
up_write(&cb_lock);
}
+static void genl_op_lock(const struct genl_family *family)
+{
+ if (!family->parallel_ops)
+ genl_lock();
+}
+
+static void genl_op_unlock(const struct genl_family *family)
+{
+ if (!family->parallel_ops)
+ genl_unlock();
+}
+
static DEFINE_IDR(genl_fam_idr);
/*
@@ -593,8 +605,12 @@ static int genl_validate_ops(const struct genl_family *family)
return -EINVAL;
/* Check sort order */
- if (a->cmd < b->cmd)
+ if (a->cmd < b->cmd) {
continue;
+ } else if (a->cmd > b->cmd) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
if (a->internal_flags != b->internal_flags ||
((a->flags ^ b->flags) & ~(GENL_CMD_CAP_DO |
@@ -828,64 +844,63 @@ static int genl_start(struct netlink_callback *cb)
genl_family_rcv_msg_attrs_free(attrs);
return -ENOMEM;
}
- info->family = ctx->family;
info->op = *ops;
- info->attrs = attrs;
+ info->info.family = ctx->family;
+ info->info.snd_seq = cb->nlh->nlmsg_seq;
+ info->info.snd_portid = NETLINK_CB(cb->skb).portid;
+ info->info.nlhdr = cb->nlh;
+ info->info.genlhdr = nlmsg_data(cb->nlh);
+ info->info.attrs = attrs;
+ genl_info_net_set(&info->info, sock_net(cb->skb->sk));
+ info->info.extack = cb->extack;
+ memset(&info->info.user_ptr, 0, sizeof(info->info.user_ptr));
cb->data = info;
if (ops->start) {
- if (!ctx->family->parallel_ops)
- genl_lock();
+ genl_op_lock(ctx->family);
rc = ops->start(cb);
- if (!ctx->family->parallel_ops)
- genl_unlock();
+ genl_op_unlock(ctx->family);
}
if (rc) {
- genl_family_rcv_msg_attrs_free(info->attrs);
+ genl_family_rcv_msg_attrs_free(info->info.attrs);
genl_dumpit_info_free(info);
cb->data = NULL;
}
return rc;
}
-static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+static int genl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
- const struct genl_split_ops *ops = &genl_dumpit_info(cb)->op;
+ struct genl_dumpit_info *dump_info = cb->data;
+ const struct genl_split_ops *ops = &dump_info->op;
+ struct genl_info *info = &dump_info->info;
int rc;
- genl_lock();
+ info->extack = cb->extack;
+
+ genl_op_lock(info->family);
rc = ops->dumpit(skb, cb);
- genl_unlock();
+ genl_op_unlock(info->family);
return rc;
}
-static int genl_lock_done(struct netlink_callback *cb)
+static int genl_done(struct netlink_callback *cb)
{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- const struct genl_split_ops *ops = &info->op;
+ struct genl_dumpit_info *dump_info = cb->data;
+ const struct genl_split_ops *ops = &dump_info->op;
+ struct genl_info *info = &dump_info->info;
int rc = 0;
+ info->extack = cb->extack;
+
if (ops->done) {
- genl_lock();
+ genl_op_lock(info->family);
rc = ops->done(cb);
- genl_unlock();
+ genl_op_unlock(info->family);
}
genl_family_rcv_msg_attrs_free(info->attrs);
- genl_dumpit_info_free(info);
- return rc;
-}
-
-static int genl_parallel_done(struct netlink_callback *cb)
-{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- const struct genl_split_ops *ops = &info->op;
- int rc = 0;
-
- if (ops->done)
- rc = ops->done(cb);
- genl_family_rcv_msg_attrs_free(info->attrs);
- genl_dumpit_info_free(info);
+ genl_dumpit_info_free(dump_info);
return rc;
}
@@ -897,6 +912,14 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
int hdrlen, struct net *net)
{
struct genl_start_context ctx;
+ struct netlink_dump_control c = {
+ .module = family->module,
+ .data = &ctx,
+ .start = genl_start,
+ .dump = genl_dumpit,
+ .done = genl_done,
+ .extack = extack,
+ };
int err;
ctx.family = family;
@@ -905,31 +928,9 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
ctx.ops = ops;
ctx.hdrlen = hdrlen;
- if (!family->parallel_ops) {
- struct netlink_dump_control c = {
- .module = family->module,
- .data = &ctx,
- .start = genl_start,
- .dump = genl_lock_dumpit,
- .done = genl_lock_done,
- .extack = extack,
- };
-
- genl_unlock();
- err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
- genl_lock();
- } else {
- struct netlink_dump_control c = {
- .module = family->module,
- .data = &ctx,
- .start = genl_start,
- .dump = ops->dumpit,
- .done = genl_parallel_done,
- .extack = extack,
- };
-
- err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
- }
+ genl_op_unlock(family);
+ err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
+ genl_op_lock(family);
return err;
}
@@ -953,9 +954,9 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family,
info.snd_seq = nlh->nlmsg_seq;
info.snd_portid = NETLINK_CB(skb).portid;
+ info.family = family;
info.nlhdr = nlh;
info.genlhdr = nlmsg_data(nlh);
- info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
info.attrs = attrbuf;
info.extack = extack;
genl_info_net_set(&info, net);
@@ -1061,13 +1062,9 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (family == NULL)
return -ENOENT;
- if (!family->parallel_ops)
- genl_lock();
-
+ genl_op_lock(family);
err = genl_family_rcv_msg(family, skb, nlh, extack);
-
- if (!family->parallel_ops)
- genl_unlock();
+ genl_op_unlock(family);
return err;
}
@@ -1392,7 +1389,7 @@ static int ctrl_dumppolicy_start(struct netlink_callback *cb)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct ctrl_dump_policy_ctx *ctx = (void *)cb->ctx;
- struct nlattr **tb = info->attrs;
+ struct nlattr **tb = info->info.attrs;
const struct genl_family *rt;
struct genl_op_iter i;
int err;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index eb8ccbd58df7..96e91ab71573 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -660,6 +660,11 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
goto out_release;
}
+ if (sock->state == SS_CONNECTING) {
+ err = -EALREADY;
+ goto out_release;
+ }
+
sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index e9ac6a6f934e..aa1dbf654c3e 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -110,10 +110,10 @@ static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
struct nfc_dev *dev;
u32 idx;
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+ if (!info->info.attrs[NFC_ATTR_DEVICE_INDEX])
return ERR_PTR(-EINVAL);
- idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+ idx = nla_get_u32(info->info.attrs[NFC_ATTR_DEVICE_INDEX]);
dev = nfc_get_device(idx);
if (!dev)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index cab1e02b63e0..fd66014d8a76 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -27,6 +27,7 @@
#include <net/sctp/checksum.h>
#include "datapath.h"
+#include "drop.h"
#include "flow.h"
#include "conntrack.h"
#include "vport.h"
@@ -781,7 +782,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk,
struct vport *vport = data->vport;
if (skb_cow_head(skb, data->l2_len) < 0) {
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM);
return -ENOMEM;
}
@@ -852,6 +853,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
struct sk_buff *skb, u16 mru,
struct sw_flow_key *key)
{
+ enum ovs_drop_reason reason;
u16 orig_network_offset = 0;
if (eth_p_mpls(skb->protocol)) {
@@ -861,6 +863,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
if (skb_network_offset(skb) > MAX_L2_LEN) {
OVS_NLERR(1, "L2 header too long to fragment");
+ reason = OVS_DROP_FRAG_L2_TOO_LONG;
goto err;
}
@@ -901,12 +904,13 @@ static void ovs_fragment(struct net *net, struct vport *vport,
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
ovs_vport_name(vport), ntohs(key->eth.type), mru,
vport->dev->mtu);
+ reason = OVS_DROP_FRAG_INVALID_PROTO;
goto err;
}
return;
err:
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, reason);
}
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
@@ -933,10 +937,10 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
ovs_fragment(net, vport, skb, mru, key);
} else {
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
}
} else {
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY);
}
}
@@ -1010,7 +1014,7 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
return clone_execute(dp, skb, key, 0, nla_data(actions),
nla_len(actions), true, false);
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_IP_TTL);
return 0;
}
@@ -1036,7 +1040,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
if ((arg->probability != U32_MAX) &&
(!arg->probability || get_random_u32() > arg->probability)) {
if (last)
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
return 0;
}
@@ -1297,6 +1301,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
if (trace_ovs_do_execute_action_enabled())
trace_ovs_do_execute_action(dp, skb, key, a, rem);
+ /* Actions that rightfully have to consume the skb should do it
+ * and return directly.
+ */
switch (nla_type(a)) {
case OVS_ACTION_ATTR_OUTPUT: {
int port = nla_get_u32(a);
@@ -1332,6 +1339,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
output_userspace(dp, skb, key, a, attr,
len, OVS_CB(skb)->cutlen);
OVS_CB(skb)->cutlen = 0;
+ if (nla_is_last(a, rem)) {
+ consume_skb(skb);
+ return 0;
+ }
break;
case OVS_ACTION_ATTR_HASH:
@@ -1446,7 +1457,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_METER:
if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_METER);
return 0;
}
break;
@@ -1477,15 +1488,24 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return dec_ttl_exception_handler(dp, skb,
key, a);
break;
+
+ case OVS_ACTION_ATTR_DROP: {
+ enum ovs_drop_reason reason = nla_get_u32(a)
+ ? OVS_DROP_EXPLICIT_WITH_ERROR
+ : OVS_DROP_EXPLICIT;
+
+ ovs_kfree_skb_reason(skb, reason);
+ return 0;
+ }
}
if (unlikely(err)) {
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_ACTION_ERROR);
return err;
}
}
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
return 0;
}
@@ -1547,7 +1567,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
/* Out of per CPU action FIFO space. Drop the 'skb' and
* log an error.
*/
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_DEFERRED_LIMIT);
if (net_ratelimit()) {
if (actions) { /* Sample action */
@@ -1599,7 +1619,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
if (unlikely(level > OVS_RECURSION_LIMIT)) {
net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
ovs_dp_name(dp));
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_RECURSION_LIMIT);
err = -ENETDOWN;
goto out;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 331730fd3580..0b9a785dea45 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -29,6 +29,7 @@
#include <net/netfilter/nf_conntrack_act_ct.h>
#include "datapath.h"
+#include "drop.h"
#include "conntrack.h"
#include "flow.h"
#include "flow_netlink.h"
@@ -455,45 +456,6 @@ static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
return 0;
}
-static struct nf_conntrack_expect *
-ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
- u16 proto, const struct sk_buff *skb)
-{
- struct nf_conntrack_tuple tuple;
- struct nf_conntrack_expect *exp;
-
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
- return NULL;
-
- exp = __nf_ct_expect_find(net, zone, &tuple);
- if (exp) {
- struct nf_conntrack_tuple_hash *h;
-
- /* Delete existing conntrack entry, if it clashes with the
- * expectation. This can happen since conntrack ALGs do not
- * check for clashes between (new) expectations and existing
- * conntrack entries. nf_conntrack_in() will check the
- * expectations only if a conntrack entry can not be found,
- * which can lead to OVS finding the expectation (here) in the
- * init direction, but which will not be removed by the
- * nf_conntrack_in() call, if a matching conntrack entry is
- * found instead. In this case all init direction packets
- * would be reported as new related packets, while reply
- * direction packets would be reported as un-related
- * established packets.
- */
- h = nf_conntrack_find_get(net, zone, &tuple);
- if (h) {
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-
- nf_ct_delete(ct, 0, 0);
- nf_ct_put(ct);
- }
- }
-
- return exp;
-}
-
/* This replicates logic from nf_conntrack_core.c that is not exported. */
static enum ip_conntrack_info
ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
@@ -852,36 +814,16 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
- struct nf_conntrack_expect *exp;
-
- /* If we pass an expected packet through nf_conntrack_in() the
- * expectation is typically removed, but the packet could still be
- * lost in upcall processing. To prevent this from happening we
- * perform an explicit expectation lookup. Expected connections are
- * always new, and will be passed through conntrack only when they are
- * committed, as it is OK to remove the expectation at that time.
- */
- exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
- if (exp) {
- u8 state;
-
- /* NOTE: New connections are NATted and Helped only when
- * committed, so we are not calling into NAT here.
- */
- state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
- __ovs_ct_update_key(key, state, &info->zone, exp->master);
- } else {
- struct nf_conn *ct;
- int err;
+ struct nf_conn *ct;
+ int err;
- err = __ovs_ct_lookup(net, key, info, skb);
- if (err)
- return err;
+ err = __ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ return err;
- ct = (struct nf_conn *)skb_nfct(skb);
- if (ct)
- nf_ct_deliver_cached_events(ct);
- }
+ ct = (struct nf_conn *)skb_nfct(skb);
+ if (ct)
+ nf_ct_deliver_cached_events(ct);
return 0;
}
@@ -1094,7 +1036,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
skb_push_rcsum(skb, nh_ofs);
if (err)
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
return err;
}
@@ -1460,7 +1402,8 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (err)
goto err_free_ct;
- __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+ if (ct_info.commit)
+ __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
return 0;
err_free_ct:
__ovs_ct_free_action(&ct_info);
@@ -1662,7 +1605,7 @@ static struct sk_buff *
ovs_ct_limit_cmd_reply_start(struct genl_info *info, u8 cmd,
struct ovs_header **ovs_reply_header)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sk_buff *skb;
skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a6d2a0b1aa21..11c69415c605 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -41,6 +41,7 @@
#include <net/pkt_cls.h>
#include "datapath.h"
+#include "drop.h"
#include "flow.h"
#include "flow_table.h"
#include "flow_netlink.h"
@@ -589,7 +590,7 @@ out:
static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct sw_flow_actions *acts;
@@ -966,7 +967,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sw_flow *flow = NULL, *new_flow;
struct sw_flow_mask mask;
struct sk_buff *reply;
@@ -1213,7 +1214,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sw_flow_key key;
struct sw_flow *flow;
struct sk_buff *reply = NULL;
@@ -1314,7 +1315,7 @@ error:
static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
@@ -1373,7 +1374,7 @@ unlock:
static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
@@ -1641,7 +1642,7 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
{
struct datapath *dp;
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
info->attrs);
if (IS_ERR(dp))
return;
@@ -1829,7 +1830,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
- ? nla_get_u32(a[OVS_DP_ATTR_IFINDEX]) : 0;
+ ? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0;
/* So far only local changes have been made, now need the lock. */
ovs_lock();
@@ -1934,7 +1935,8 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+ info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
goto err_unlock_free;
@@ -1967,7 +1969,8 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+ info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
goto err_unlock_free;
@@ -2002,7 +2005,8 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+ info->attrs);
if (IS_ERR(dp)) {
err = PTR_ERR(dp);
goto err_unlock_free;
@@ -2049,7 +2053,7 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
[OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
- [OVS_DP_ATTR_IFINDEX] = {.type = NLA_U32 },
+ [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
};
static const struct genl_small_ops dp_datapath_genl_ops[] = {
@@ -2245,7 +2249,7 @@ static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct vport_parms parms;
struct sk_buff *reply;
struct vport *vport;
@@ -2302,7 +2306,7 @@ restart:
parms.port_no = port_no;
parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
- ? nla_get_u32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
+ ? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
vport = new_vport(&parms);
err = PTR_ERR(vport);
@@ -2347,7 +2351,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
+ vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock_free;
@@ -2403,7 +2407,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
+ vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock_free;
@@ -2446,7 +2450,7 @@ exit_unlock_free:
static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sk_buff *reply;
struct vport *vport;
int err;
@@ -2539,7 +2543,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
- [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
[OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
};
@@ -2702,6 +2706,17 @@ static struct pernet_operations ovs_net_ops = {
.size = sizeof(struct ovs_net),
};
+static const char * const ovs_drop_reasons[] = {
+#define S(x) (#x),
+ OVS_DROP_REASONS(S)
+#undef S
+};
+
+static struct drop_reason_list drop_reason_list_ovs = {
+ .reasons = ovs_drop_reasons,
+ .n_reasons = ARRAY_SIZE(ovs_drop_reasons),
+};
+
static int __init dp_init(void)
{
int err;
@@ -2743,6 +2758,9 @@ static int __init dp_init(void)
if (err < 0)
goto error_unreg_netdev;
+ drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
+ &drop_reason_list_ovs);
+
return 0;
error_unreg_netdev:
@@ -2769,6 +2787,7 @@ static void dp_cleanup(void)
ovs_netdev_exit();
unregister_netdevice_notifier(&ovs_dp_device_notifier);
unregister_pernet_device(&ovs_net_ops);
+ drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH);
rcu_barrier();
ovs_vport_exit();
ovs_flow_exit();
diff --git a/net/openvswitch/drop.h b/net/openvswitch/drop.h
new file mode 100644
index 000000000000..cedf9b7b5796
--- /dev/null
+++ b/net/openvswitch/drop.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * OpenvSwitch drop reason list.
+ */
+
+#ifndef OPENVSWITCH_DROP_H
+#define OPENVSWITCH_DROP_H
+#include <linux/skbuff.h>
+#include <net/dropreason.h>
+
+#define OVS_DROP_REASONS(R) \
+ R(OVS_DROP_LAST_ACTION) \
+ R(OVS_DROP_ACTION_ERROR) \
+ R(OVS_DROP_EXPLICIT) \
+ R(OVS_DROP_EXPLICIT_WITH_ERROR) \
+ R(OVS_DROP_METER) \
+ R(OVS_DROP_RECURSION_LIMIT) \
+ R(OVS_DROP_DEFERRED_LIMIT) \
+ R(OVS_DROP_FRAG_L2_TOO_LONG) \
+ R(OVS_DROP_FRAG_INVALID_PROTO) \
+ R(OVS_DROP_CONNTRACK) \
+ R(OVS_DROP_IP_TTL) \
+ /* deliberate comment for trailing \ */
+
+enum ovs_drop_reason {
+ __OVS_DROP_REASON = SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
+ SKB_DROP_REASON_SUBSYS_SHIFT,
+#define ENUM(x) x,
+ OVS_DROP_REASONS(ENUM)
+#undef ENUM
+
+ OVS_DROP_MAX,
+};
+
+static inline void
+ovs_kfree_skb_reason(struct sk_buff *skb, enum ovs_drop_reason reason)
+{
+ kfree_skb_reason(skb, (u32)reason);
+}
+
+#endif /* OPENVSWITCH_DROP_H */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 41116361433d..88965e2068ac 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -38,6 +38,7 @@
#include <net/tun_proto.h>
#include <net/erspan.h>
+#include "drop.h"
#include "flow_netlink.h"
struct ovs_len_tbl {
@@ -61,6 +62,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_RECIRC:
case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_USERSPACE:
+ case OVS_ACTION_ATTR_DROP:
break;
case OVS_ACTION_ATTR_CT:
@@ -2394,7 +2396,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
/* Whenever new actions are added, the need to update this
* function should be considered.
*/
- BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
+ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24);
if (!actions)
return;
@@ -3182,6 +3184,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
+ [OVS_ACTION_ATTR_DROP] = sizeof(u32),
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -3453,6 +3456,11 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_DROP:
+ if (!nla_is_last(a, rem))
+ return -EINVAL;
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index c4ebf810e4b1..cc08e0403909 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -211,7 +211,7 @@ ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
struct ovs_header **ovs_reply_header)
{
struct sk_buff *skb;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb)
@@ -272,7 +272,7 @@ error:
static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr *nla, *band_nla;
struct sk_buff *reply;
@@ -409,7 +409,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct dp_meter *meter, *old_meter;
struct sk_buff *reply;
struct ovs_header *ovs_reply_header;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct dp_meter_table *meter_tbl;
struct datapath *dp;
int err;
@@ -482,7 +482,7 @@ exit_free_meter:
static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr **a = info->attrs;
struct dp_meter *meter;
@@ -535,7 +535,7 @@ exit_unlock:
static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr **a = info->attrs;
struct dp_meter *old_meter;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 85ff90a03b0c..8f97648d652f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -401,18 +401,20 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
{
union tpacket_uhdr h;
+ /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */
+
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
- h.h1->tp_status = status;
+ WRITE_ONCE(h.h1->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
break;
case TPACKET_V2:
- h.h2->tp_status = status;
+ WRITE_ONCE(h.h2->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
break;
case TPACKET_V3:
- h.h3->tp_status = status;
+ WRITE_ONCE(h.h3->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
break;
default:
@@ -429,17 +431,19 @@ static int __packet_get_status(const struct packet_sock *po, void *frame)
smp_rmb();
+ /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */
+
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
- return h.h1->tp_status;
+ return READ_ONCE(h.h1->tp_status);
case TPACKET_V2:
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
- return h.h2->tp_status;
+ return READ_ONCE(h.h2->tp_status);
case TPACKET_V3:
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
- return h.h3->tp_status;
+ return READ_ONCE(h.h3->tp_status);
default:
WARN(1, "TPACKET version not supported.\n");
BUG();
@@ -2050,8 +2054,8 @@ retry:
skb->protocol = proto;
skb->dev = dev;
- skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->priority = READ_ONCE(sk->sk_priority);
+ skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;
skb_setup_tx_timestamp(skb, sockc.tsflags);
@@ -2585,8 +2589,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->protocol = proto;
skb->dev = dev;
- skb->priority = po->sk.sk_priority;
- skb->mark = po->sk.sk_mark;
+ skb->priority = READ_ONCE(po->sk.sk_priority);
+ skb->mark = READ_ONCE(po->sk.sk_mark);
skb->tstamp = sockc->transmit_time;
skb_setup_tx_timestamp(skb, sockc->tsflags);
skb_zcopy_set_nouarg(skb, ph.raw);
@@ -2927,8 +2931,10 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
if (prepad + len < PAGE_SIZE || !linear)
linear = len;
+ if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+ linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER);
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
- err, 0);
+ err, PAGE_ALLOC_COSTLY_ORDER);
if (!skb)
return NULL;
@@ -2988,7 +2994,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
goto out_unlock;
sockcm_init(&sockc, sk);
- sockc.mark = sk->sk_mark;
+ sockc.mark = READ_ONCE(sk->sk_mark);
if (msg->msg_controllen) {
err = sock_cmsg_send(sk, msg, &sockc);
if (unlikely(err))
@@ -3061,7 +3067,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->protocol = proto;
skb->dev = dev;
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc.mark;
skb->tstamp = sockc.transmit_time;
@@ -3601,7 +3607,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
if (dev) {
sll->sll_hatype = dev->type;
sll->sll_halen = dev->addr_len;
- memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
+ memcpy(sll->sll_addr_flex, dev->dev_addr, dev->addr_len);
} else {
sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
sll->sll_halen = 0;
diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
index 78beb74146e7..41ece61eb57a 100644
--- a/net/qrtr/af_qrtr.c
+++ b/net/qrtr/af_qrtr.c
@@ -23,6 +23,8 @@
#define QRTR_EPH_PORT_RANGE \
XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET)
+#define QRTR_PORT_CTRL_LEGACY 0xffff
+
/**
* struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
* @version: protocol version
@@ -495,6 +497,9 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
goto err;
}
+ if (cb->dst_port == QRTR_PORT_CTRL_LEGACY)
+ cb->dst_port = QRTR_PORT_CTRL;
+
if (!size || len != ALIGN(size, 4) + hdrlen)
goto err;
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 0f7a729f1a1f..b1db0b519179 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -16,7 +16,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/qrtr.h>
-static RADIX_TREE(nodes, GFP_KERNEL);
+static DEFINE_XARRAY(nodes);
static struct {
struct socket *sock;
@@ -66,14 +66,14 @@ struct qrtr_server {
struct qrtr_node {
unsigned int id;
- struct radix_tree_root servers;
+ struct xarray servers;
};
static struct qrtr_node *node_get(unsigned int node_id)
{
struct qrtr_node *node;
- node = radix_tree_lookup(&nodes, node_id);
+ node = xa_load(&nodes, node_id);
if (node)
return node;
@@ -83,8 +83,9 @@ static struct qrtr_node *node_get(unsigned int node_id)
return NULL;
node->id = node_id;
+ xa_init(&node->servers);
- if (radix_tree_insert(&nodes, node_id, node)) {
+ if (xa_store(&nodes, node_id, node, GFP_KERNEL)) {
kfree(node);
return NULL;
}
@@ -193,40 +194,23 @@ static void lookup_notify(struct sockaddr_qrtr *to, struct qrtr_server *srv,
static int announce_servers(struct sockaddr_qrtr *sq)
{
- struct radix_tree_iter iter;
struct qrtr_server *srv;
struct qrtr_node *node;
- void __rcu **slot;
+ unsigned long index;
int ret;
node = node_get(qrtr_ns.local_node);
if (!node)
return 0;
- rcu_read_lock();
/* Announce the list of servers registered in this node */
- radix_tree_for_each_slot(slot, &node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
-
+ xa_for_each(&node->servers, index, srv) {
ret = service_announce_new(sq, srv);
if (ret < 0) {
pr_err("failed to announce new service\n");
return ret;
}
-
- rcu_read_lock();
}
-
- rcu_read_unlock();
-
return 0;
}
@@ -256,14 +240,17 @@ static struct qrtr_server *server_add(unsigned int service,
goto err;
/* Delete the old server on the same port */
- old = radix_tree_lookup(&node->servers, port);
+ old = xa_store(&node->servers, port, srv, GFP_KERNEL);
if (old) {
- radix_tree_delete(&node->servers, port);
- kfree(old);
+ if (xa_is_err(old)) {
+ pr_err("failed to add server [0x%x:0x%x] ret:%d\n",
+ srv->service, srv->instance, xa_err(old));
+ goto err;
+ } else {
+ kfree(old);
+ }
}
- radix_tree_insert(&node->servers, port, srv);
-
trace_qrtr_ns_server_add(srv->service, srv->instance,
srv->node, srv->port);
@@ -280,11 +267,11 @@ static int server_del(struct qrtr_node *node, unsigned int port, bool bcast)
struct qrtr_server *srv;
struct list_head *li;
- srv = radix_tree_lookup(&node->servers, port);
+ srv = xa_load(&node->servers, port);
if (!srv)
return -ENOENT;
- radix_tree_delete(&node->servers, port);
+ xa_erase(&node->servers, port);
/* Broadcast the removal of local servers */
if (srv->node == qrtr_ns.local_node && bcast)
@@ -344,13 +331,12 @@ static int ctrl_cmd_hello(struct sockaddr_qrtr *sq)
static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
{
struct qrtr_node *local_node;
- struct radix_tree_iter iter;
struct qrtr_ctrl_pkt pkt;
struct qrtr_server *srv;
struct sockaddr_qrtr sq;
struct msghdr msg = { };
struct qrtr_node *node;
- void __rcu **slot;
+ unsigned long index;
struct kvec iv;
int ret;
@@ -361,22 +347,9 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
if (!node)
return 0;
- rcu_read_lock();
/* Advertise removal of this client to all servers of remote node */
- radix_tree_for_each_slot(slot, &node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
+ xa_for_each(&node->servers, index, srv)
server_del(node, srv->port, true);
- rcu_read_lock();
- }
- rcu_read_unlock();
/* Advertise the removal of this client to all local servers */
local_node = node_get(qrtr_ns.local_node);
@@ -387,18 +360,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE);
pkt.client.node = cpu_to_le32(from->sq_node);
- rcu_read_lock();
- radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
-
+ xa_for_each(&local_node->servers, index, srv) {
sq.sq_family = AF_QIPCRTR;
sq.sq_node = srv->node;
sq.sq_port = srv->port;
@@ -411,11 +373,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
pr_err("failed to send bye cmd\n");
return ret;
}
- rcu_read_lock();
}
-
- rcu_read_unlock();
-
return 0;
}
@@ -423,7 +381,6 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
unsigned int node_id, unsigned int port)
{
struct qrtr_node *local_node;
- struct radix_tree_iter iter;
struct qrtr_lookup *lookup;
struct qrtr_ctrl_pkt pkt;
struct msghdr msg = { };
@@ -432,7 +389,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
struct qrtr_node *node;
struct list_head *tmp;
struct list_head *li;
- void __rcu **slot;
+ unsigned long index;
struct kvec iv;
int ret;
@@ -477,18 +434,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
pkt.client.node = cpu_to_le32(node_id);
pkt.client.port = cpu_to_le32(port);
- rcu_read_lock();
- radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
-
+ xa_for_each(&local_node->servers, index, srv) {
sq.sq_family = AF_QIPCRTR;
sq.sq_node = srv->node;
sq.sq_port = srv->port;
@@ -501,11 +447,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
pr_err("failed to send del client cmd\n");
return ret;
}
- rcu_read_lock();
}
-
- rcu_read_unlock();
-
return 0;
}
@@ -576,13 +518,12 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from,
static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
unsigned int service, unsigned int instance)
{
- struct radix_tree_iter node_iter;
struct qrtr_server_filter filter;
- struct radix_tree_iter srv_iter;
struct qrtr_lookup *lookup;
+ struct qrtr_server *srv;
struct qrtr_node *node;
- void __rcu **node_slot;
- void __rcu **srv_slot;
+ unsigned long node_idx;
+ unsigned long srv_idx;
/* Accept only local observers */
if (from->sq_node != qrtr_ns.local_node)
@@ -601,40 +542,14 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
filter.service = service;
filter.instance = instance;
- rcu_read_lock();
- radix_tree_for_each_slot(node_slot, &nodes, &node_iter, 0) {
- node = radix_tree_deref_slot(node_slot);
- if (!node)
- continue;
- if (radix_tree_deref_retry(node)) {
- node_slot = radix_tree_iter_retry(&node_iter);
- continue;
- }
- node_slot = radix_tree_iter_resume(node_slot, &node_iter);
-
- radix_tree_for_each_slot(srv_slot, &node->servers,
- &srv_iter, 0) {
- struct qrtr_server *srv;
-
- srv = radix_tree_deref_slot(srv_slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- srv_slot = radix_tree_iter_retry(&srv_iter);
- continue;
- }
-
+ xa_for_each(&nodes, node_idx, node) {
+ xa_for_each(&node->servers, srv_idx, srv) {
if (!server_match(srv, &filter))
continue;
- srv_slot = radix_tree_iter_resume(srv_slot, &srv_iter);
-
- rcu_read_unlock();
lookup_notify(from, srv, true);
- rcu_read_lock();
}
}
- rcu_read_unlock();
/* Empty notification, to indicate end of listing */
lookup_notify(from, NULL, true);
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index ca4c3a667091..d2fdb1529585 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -17,7 +17,6 @@
*/
#define RDS_RDMA_REJ_INCOMPAT 1
-int rds_rdma_conn_connect(struct rds_connection *conn);
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
diff --git a/net/rds/rds.h b/net/rds/rds.h
index d35d1fc39807..dc360252c515 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -863,7 +863,6 @@ int rds_message_next_extension(struct rds_header *hdr,
unsigned int *pos, void *buf, unsigned int *buflen);
int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
-void rds_message_inc_free(struct rds_incoming *inc);
void rds_message_addref(struct rds_message *rm);
void rds_message_put(struct rds_message *rm);
void rds_message_wait(struct rds_message *rm);
@@ -1013,7 +1012,5 @@ void rds_trans_put(struct rds_transport *trans);
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
unsigned int avail);
struct rds_transport *rds_trans_get(int t_type);
-int rds_trans_init(void);
-void rds_trans_exit(void);
#endif
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index f8b5930d7b34..053aa7da87ef 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -56,7 +56,6 @@ void rds_tcp_restore_callbacks(struct socket *sock,
struct rds_tcp_connection *tc);
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc);
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
-u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
extern struct rds_transport rds_tcp_transport;
void rds_tcp_accept_work(struct sock *sk);
int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 4b95cb1ac435..470c70deffe2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -347,8 +347,7 @@ config NET_SCH_FQ_PIE
config NET_SCH_INGRESS
tristate "Ingress/classifier-action Qdisc"
depends on NET_CLS_ACT
- select NET_INGRESS
- select NET_EGRESS
+ select NET_XGRESS
help
Say Y here if you want to use classifiers for incoming and/or outgoing
packets. This qdisc doesn't do anything else besides running classifiers,
@@ -679,6 +678,7 @@ config NET_EMATCH_IPT
config NET_CLS_ACT
bool "Actions"
select NET_CLS
+ select NET_XGRESS
help
Say Y here if you want to use traffic control actions. Actions
get attached to classifiers and are invoked after a successful
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f7887f42d542..9d3f26bf0440 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1320,7 +1320,7 @@ struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
return ERR_PTR(err);
}
} else {
- if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) {
+ if (strscpy(act_name, "police", IFNAMSIZ) < 0) {
NL_SET_ERR_MSG(extack, "TC action name too long");
return ERR_PTR(-EINVAL);
}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index abc71a06d634..7c652d14528b 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1238,7 +1238,8 @@ static int tcf_ct_fill_params(struct net *net,
}
}
- __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
+ if (p->ct_action & TCA_CT_ACT_COMMIT)
+ __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
return 0;
err:
nf_ct_put(p->tmpl);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 466c26df853a..382c7a71f81f 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -406,56 +406,6 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
return 0;
}
-static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
- struct cls_bpf_prog *prog, unsigned long base,
- struct nlattr **tb, struct nlattr *est, u32 flags,
- struct netlink_ext_ack *extack)
-{
- bool is_bpf, is_ebpf, have_exts = false;
- u32 gen_flags = 0;
- int ret;
-
- is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
- is_ebpf = tb[TCA_BPF_FD];
- if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
- return -EINVAL;
-
- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
- extack);
- if (ret < 0)
- return ret;
-
- if (tb[TCA_BPF_FLAGS]) {
- u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
-
- if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT)
- return -EINVAL;
-
- have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
- }
- if (tb[TCA_BPF_FLAGS_GEN]) {
- gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
- if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
- !tc_flags_valid(gen_flags))
- return -EINVAL;
- }
-
- prog->exts_integrated = have_exts;
- prog->gen_flags = gen_flags;
-
- ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
- cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
- if (ret < 0)
- return ret;
-
- if (tb[TCA_BPF_CLASSID]) {
- prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
- tcf_bind_filter(tp, &prog->res, base);
- }
-
- return 0;
-}
-
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
@@ -463,9 +413,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
+ bool is_bpf, is_ebpf, have_exts = false;
struct cls_bpf_prog *oldprog = *arg;
struct nlattr *tb[TCA_BPF_MAX + 1];
+ bool bound_to_filter = false;
struct cls_bpf_prog *prog;
+ u32 gen_flags = 0;
int ret;
if (tca[TCA_OPTIONS] == NULL)
@@ -504,11 +457,51 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
goto errout;
prog->handle = handle;
- ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
- extack);
+ is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
+ is_ebpf = tb[TCA_BPF_FD];
+ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
+ ret = -EINVAL;
+ goto errout_idr;
+ }
+
+ ret = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &prog->exts,
+ flags, extack);
+ if (ret < 0)
+ goto errout_idr;
+
+ if (tb[TCA_BPF_FLAGS]) {
+ u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
+
+ if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
+ ret = -EINVAL;
+ goto errout_idr;
+ }
+
+ have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
+ }
+ if (tb[TCA_BPF_FLAGS_GEN]) {
+ gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
+ if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
+ !tc_flags_valid(gen_flags)) {
+ ret = -EINVAL;
+ goto errout_idr;
+ }
+ }
+
+ prog->exts_integrated = have_exts;
+ prog->gen_flags = gen_flags;
+
+ ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
+ cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
if (ret < 0)
goto errout_idr;
+ if (tb[TCA_BPF_CLASSID]) {
+ prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+ tcf_bind_filter(tp, &prog->res, base);
+ bound_to_filter = true;
+ }
+
ret = cls_bpf_offload(tp, prog, oldprog, extack);
if (ret)
goto errout_parms;
@@ -530,6 +523,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
return 0;
errout_parms:
+ if (bound_to_filter)
+ tcf_unbind_filter(tp, &prog->res);
cls_bpf_free_parms(prog);
errout_idr:
if (!oldprog)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 56065cc5a661..e5314a31f75a 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -72,6 +72,7 @@ struct fl_flow_key {
struct flow_dissector_key_num_of_vlans num_of_vlans;
struct flow_dissector_key_pppoe pppoe;
struct flow_dissector_key_l2tpv3 l2tpv3;
+ struct flow_dissector_key_ipsec ipsec;
struct flow_dissector_key_cfm cfm;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
@@ -726,6 +727,8 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_PPPOE_SID] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_PPP_PROTO] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_L2TPV3_SID] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_SPI] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_SPI_MASK] = { .type = NLA_U32 },
[TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1),
[TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED },
};
@@ -776,7 +779,8 @@ mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL] = { .type = NLA_U32 },
};
-static const struct nla_policy cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX] = {
+static const struct nla_policy
+cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX + 1] = {
[TCA_FLOWER_KEY_CFM_MD_LEVEL] = NLA_POLICY_MAX(NLA_U8,
FLOW_DIS_CFM_MDL_MAX),
[TCA_FLOWER_KEY_CFM_OPCODE] = { .type = NLA_U8 },
@@ -795,6 +799,24 @@ static void fl_set_key_val(struct nlattr **tb,
nla_memcpy(mask, tb[mask_type], len);
}
+static int fl_set_key_spi(struct nlattr **tb, struct fl_flow_key *key,
+ struct fl_flow_key *mask,
+ struct netlink_ext_ack *extack)
+{
+ if (key->basic.ip_proto != IPPROTO_ESP &&
+ key->basic.ip_proto != IPPROTO_AH) {
+ NL_SET_ERR_MSG(extack,
+ "Protocol must be either ESP or AH");
+ return -EINVAL;
+ }
+
+ fl_set_key_val(tb, &key->ipsec.spi,
+ TCA_FLOWER_KEY_SPI,
+ &mask->ipsec.spi, TCA_FLOWER_KEY_SPI_MASK,
+ sizeof(key->ipsec.spi));
+ return 0;
+}
+
static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@ -812,6 +834,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src,
TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
+ if (mask->tp_range.tp_min.dst != mask->tp_range.tp_max.dst) {
+ NL_SET_ERR_MSG(extack,
+ "Both min and max destination ports must be specified");
+ return -EINVAL;
+ }
+ if (mask->tp_range.tp_min.src != mask->tp_range.tp_max.src) {
+ NL_SET_ERR_MSG(extack,
+ "Both min and max source ports must be specified");
+ return -EINVAL;
+ }
if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
ntohs(key->tp_range.tp_max.dst) <=
ntohs(key->tp_range.tp_min.dst)) {
@@ -1699,7 +1731,7 @@ static int fl_set_key_cfm(struct nlattr **tb,
struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
{
- struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX];
+ struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX + 1];
int err;
if (!tb[TCA_FLOWER_KEY_CFM])
@@ -1884,6 +1916,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
return ret;
}
+ if (tb[TCA_FLOWER_KEY_SPI]) {
+ ret = fl_set_key_spi(tb, key, mask, extack);
+ if (ret)
+ return ret;
+ }
+
if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -2057,6 +2095,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_L2TPV3, l2tpv3);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPSEC, ipsec);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_CFM, cfm);
skb_flow_dissector_init(dissector, keys, cnt);
@@ -2163,53 +2203,6 @@ static bool fl_needs_tc_skb_ext(const struct fl_flow_key *mask)
return mask->meta.l2_miss;
}
-static int fl_set_parms(struct net *net, struct tcf_proto *tp,
- struct cls_fl_filter *f, struct fl_flow_mask *mask,
- unsigned long base, struct nlattr **tb,
- struct nlattr *est,
- struct fl_flow_tmplt *tmplt,
- u32 flags, u32 fl_flags,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- err = tcf_exts_validate_ex(net, tp, tb, est, &f->exts, flags,
- fl_flags, extack);
- if (err < 0)
- return err;
-
- if (tb[TCA_FLOWER_CLASSID]) {
- f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
- if (flags & TCA_ACT_FLAGS_NO_RTNL)
- rtnl_lock();
- tcf_bind_filter(tp, &f->res, base);
- if (flags & TCA_ACT_FLAGS_NO_RTNL)
- rtnl_unlock();
- }
-
- err = fl_set_key(net, tb, &f->key, &mask->key, extack);
- if (err)
- return err;
-
- fl_mask_update_range(mask);
- fl_set_masked_key(&f->mkey, &f->key, mask);
-
- if (!fl_mask_fits_tmplt(tmplt, mask)) {
- NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template");
- return -EINVAL;
- }
-
- /* Enable tc skb extension if filter matches on data extracted from
- * this extension.
- */
- if (fl_needs_tc_skb_ext(&mask->key)) {
- f->needs_tc_skb_ext = 1;
- tc_skb_ext_tc_enable();
- }
-
- return 0;
-}
-
static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
struct cls_fl_filter *fold,
bool *in_ht)
@@ -2241,6 +2234,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
struct cls_fl_head *head = fl_head_dereference(tp);
bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
struct cls_fl_filter *fold = *arg;
+ bool bound_to_filter = false;
struct cls_fl_filter *fnew;
struct fl_flow_mask *mask;
struct nlattr **tb;
@@ -2325,15 +2319,46 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout_idr;
- err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE],
- tp->chain->tmplt_priv, flags, fnew->flags,
- extack);
- if (err)
+ err = tcf_exts_validate_ex(net, tp, tb, tca[TCA_RATE],
+ &fnew->exts, flags, fnew->flags,
+ extack);
+ if (err < 0)
goto errout_idr;
+ if (tb[TCA_FLOWER_CLASSID]) {
+ fnew->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
+ rtnl_lock();
+ tcf_bind_filter(tp, &fnew->res, base);
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
+ rtnl_unlock();
+ bound_to_filter = true;
+ }
+
+ err = fl_set_key(net, tb, &fnew->key, &mask->key, extack);
+ if (err)
+ goto unbind_filter;
+
+ fl_mask_update_range(mask);
+ fl_set_masked_key(&fnew->mkey, &fnew->key, mask);
+
+ if (!fl_mask_fits_tmplt(tp->chain->tmplt_priv, mask)) {
+ NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template");
+ err = -EINVAL;
+ goto unbind_filter;
+ }
+
+ /* Enable tc skb extension if filter matches on data extracted from
+ * this extension.
+ */
+ if (fl_needs_tc_skb_ext(&mask->key)) {
+ fnew->needs_tc_skb_ext = 1;
+ tc_skb_ext_tc_enable();
+ }
+
err = fl_check_assign_mask(head, fnew, fold, mask);
if (err)
- goto errout_idr;
+ goto unbind_filter;
err = fl_ht_insert_unique(fnew, fold, &in_ht);
if (err)
@@ -2424,6 +2449,16 @@ errout_hw:
fnew->mask->filter_ht_params);
errout_mask:
fl_mask_put(head, fnew->mask);
+
+unbind_filter:
+ if (bound_to_filter) {
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
+ rtnl_lock();
+ tcf_unbind_filter(tp, &fnew->res);
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
+ rtnl_unlock();
+ }
+
errout_idr:
if (!fold)
idr_remove(&head->handle_idr, fnew->handle);
@@ -3359,6 +3394,12 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
sizeof(key->l2tpv3.session_id)))
goto nla_put_failure;
+ if (key->ipsec.spi &&
+ fl_dump_key_val(skb, &key->ipsec.spi, TCA_FLOWER_KEY_SPI,
+ &mask->ipsec.spi, TCA_FLOWER_KEY_SPI_MASK,
+ sizeof(key->ipsec.spi)))
+ goto nla_put_failure;
+
if ((key->basic.ip_proto == IPPROTO_TCP ||
key->basic.ip_proto == IPPROTO_UDP ||
key->basic.ip_proto == IPPROTO_SCTP) &&
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index ae9439a6c56c..c49d6af0e048 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -212,11 +212,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
if (err < 0)
return err;
- if (tb[TCA_FW_CLASSID]) {
- f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
- tcf_bind_filter(tp, &f->res, base);
- }
-
if (tb[TCA_FW_INDEV]) {
int ret;
ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack);
@@ -233,6 +228,11 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
} else if (head->mask != 0xFFFFFFFF)
return err;
+ if (tb[TCA_FW_CLASSID]) {
+ f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
+ tcf_bind_filter(tp, &f->res, base);
+ }
+
return 0;
}
@@ -267,7 +267,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;
fnew->id = f->id;
- fnew->res = f->res;
fnew->ifindex = f->ifindex;
fnew->tp = f->tp;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index fa3bbd187eb9..c4ed11df6254 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -159,26 +159,6 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
[TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
};
-static int mall_set_parms(struct net *net, struct tcf_proto *tp,
- struct cls_mall_head *head,
- unsigned long base, struct nlattr **tb,
- struct nlattr *est, u32 flags, u32 fl_flags,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- err = tcf_exts_validate_ex(net, tp, tb, est, &head->exts, flags,
- fl_flags, extack);
- if (err < 0)
- return err;
-
- if (tb[TCA_MATCHALL_CLASSID]) {
- head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
- tcf_bind_filter(tp, &head->res, base);
- }
- return 0;
-}
-
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
@@ -187,6 +167,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
+ bool bound_to_filter = false;
struct cls_mall_head *new;
u32 userflags = 0;
int err;
@@ -226,11 +207,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
goto err_alloc_percpu;
}
- err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE],
- flags, new->flags, extack);
- if (err)
+ err = tcf_exts_validate_ex(net, tp, tb, tca[TCA_RATE],
+ &new->exts, flags, new->flags, extack);
+ if (err < 0)
goto err_set_parms;
+ if (tb[TCA_MATCHALL_CLASSID]) {
+ new->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
+ tcf_bind_filter(tp, &new->res, base);
+ bound_to_filter = true;
+ }
+
if (!tc_skip_hw(new->flags)) {
err = mall_replace_hw_filter(tp, new, (unsigned long)new,
extack);
@@ -246,6 +233,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
return 0;
err_replace_hw_filter:
+ if (bound_to_filter)
+ tcf_unbind_filter(tp, &new->res);
err_set_parms:
free_percpu(new->pf);
err_alloc_percpu:
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index d0c53724d3e8..1e20bbd687f1 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -513,7 +513,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (fold) {
f->id = fold->id;
f->iif = fold->iif;
- f->res = fold->res;
f->handle = fold->handle;
f->tp = fold->tp;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d15d50de7980..da4c179a4d41 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -712,8 +712,23 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
[TCA_U32_FLAGS] = { .type = NLA_U32 },
};
+static void u32_unbind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
+ struct nlattr **tb)
+{
+ if (tb[TCA_U32_CLASSID])
+ tcf_unbind_filter(tp, &n->res);
+}
+
+static void u32_bind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
+ unsigned long base, struct nlattr **tb)
+{
+ if (tb[TCA_U32_CLASSID]) {
+ n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
+ tcf_bind_filter(tp, &n->res, base);
+ }
+}
+
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
- unsigned long base,
struct tc_u_knode *n, struct nlattr **tb,
struct nlattr *est, u32 flags, u32 fl_flags,
struct netlink_ext_ack *extack)
@@ -760,10 +775,6 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
if (ht_old)
ht_old->refcnt--;
}
- if (tb[TCA_U32_CLASSID]) {
- n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
- tcf_bind_filter(tp, &n->res, base);
- }
if (ifindex >= 0)
n->ifindex = ifindex;
@@ -815,7 +826,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
new->ifindex = n->ifindex;
new->fshift = n->fshift;
- new->res = n->res;
new->flags = n->flags;
RCU_INIT_POINTER(new->ht_down, ht);
@@ -903,17 +913,27 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (!new)
return -ENOMEM;
- err = u32_set_parms(net, tp, base, new, tb,
- tca[TCA_RATE], flags, new->flags,
- extack);
+ err = u32_set_parms(net, tp, new, tb, tca[TCA_RATE],
+ flags, new->flags, extack);
if (err) {
__u32_destroy_key(new);
return err;
}
+ u32_bind_filter(tp, new, base, tb);
+
err = u32_replace_hw_knode(tp, new, flags, extack);
if (err) {
+ u32_unbind_filter(tp, new, tb);
+
+ if (tb[TCA_U32_LINK]) {
+ struct tc_u_hnode *ht_old;
+
+ ht_old = rtnl_dereference(n->ht_down);
+ if (ht_old)
+ ht_old->refcnt++;
+ }
__u32_destroy_key(new);
return err;
}
@@ -1003,18 +1023,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
+ /* At this point, we need to derive the new handle that will be used to
+ * uniquely map the identity of this table match entry. The
+ * identity of the entry that we need to construct is 32 bits made of:
+ * htid(12b):bucketid(8b):node/entryid(12b)
+ *
+ * At this point _we have the table(ht)_ in which we will insert this
+ * entry. We carry the table's id in variable "htid".
+ * Note that earlier code picked the ht selection either by a) the user
+ * providing the htid specified via TCA_U32_HASH attribute or b) when
+ * no such attribute is passed then the root ht, is default to at ID
+ * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0.
+ * If OTOH the user passed us the htid, they may also pass a bucketid of
+ * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is
+ * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be
+ * passed via the htid, so even if it was non-zero it will be ignored.
+ *
+ * We may also have a handle, if the user passed one. The handle also
+ * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b).
+ * Rule: the bucketid on the handle is ignored even if one was passed;
+ * rather the value on "htid" is always assumed to be the bucketid.
+ */
if (handle) {
+ /* Rule: The htid from handle and tableid from htid must match */
if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
return -EINVAL;
}
- handle = htid | TC_U32_NODE(handle);
- err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
- GFP_KERNEL);
- if (err)
- return err;
- } else
+ /* Ok, so far we have a valid htid(12b):bucketid(8b) but we
+ * need to finalize the table entry identification with the last
+ * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for
+ * entries. Rule: nodeid of 0 is reserved only for tables(see
+ * earlier code which processes TC_U32_DIVISOR attribute).
+ * Rule: The nodeid can only be derived from the handle (and not
+ * htid).
+ * Rule: if the handle specified zero for the node id example
+ * 0x60000000, then pick a new nodeid from the pool of IDs
+ * this hash table has been allocating from.
+ * If OTOH it is specified (i.e for example the user passed a
+ * handle such as 0x60000123), then we use it generate our final
+ * handle which is used to uniquely identify the match entry.
+ */
+ if (!TC_U32_NODE(handle)) {
+ handle = gen_new_kid(ht, htid);
+ } else {
+ handle = htid | TC_U32_NODE(handle);
+ err = idr_alloc_u32(&ht->handle_idr, NULL, &handle,
+ handle, GFP_KERNEL);
+ if (err)
+ return err;
+ }
+ } else {
+ /* The user did not give us a handle; lets just generate one
+ * from the table's pool of nodeids.
+ */
handle = gen_new_kid(ht, htid);
+ }
if (tb[TCA_U32_SEL] == NULL) {
NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
@@ -1074,15 +1138,18 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
#endif
- err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE],
+ err = u32_set_parms(net, tp, n, tb, tca[TCA_RATE],
flags, n->flags, extack);
+
+ u32_bind_filter(tp, n, base, tb);
+
if (err == 0) {
struct tc_u_knode __rcu **ins;
struct tc_u_knode *pins;
err = u32_replace_hw_knode(tp, n, flags, extack);
if (err)
- goto errhw;
+ goto errunbind;
if (!tc_in_hw(n->flags))
n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
@@ -1100,7 +1167,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return 0;
}
-errhw:
+errunbind:
+ u32_unbind_filter(tp, n, tb);
+
#ifdef CONFIG_CLS_U32_MARK
free_percpu(n->pcpu_success);
#endif
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index af85a73c4c54..da34fd4c9269 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -502,7 +502,7 @@ META_COLLECTOR(int_sk_lingertime)
*err = -1;
return;
}
- dst->value = sk->sk_lingertime / HZ;
+ dst->value = READ_ONCE(sk->sk_lingertime) / HZ;
}
META_COLLECTOR(int_sk_err_qlen)
@@ -568,7 +568,7 @@ META_COLLECTOR(int_sk_rcvtimeo)
*err = -1;
return;
}
- dst->value = sk->sk_rcvtimeo / HZ;
+ dst->value = READ_ONCE(sk->sk_rcvtimeo) / HZ;
}
META_COLLECTOR(int_sk_sndtimeo)
@@ -579,7 +579,7 @@ META_COLLECTOR(int_sk_sndtimeo)
*err = -1;
return;
}
- dst->value = sk->sk_sndtimeo / HZ;
+ dst->value = READ_ONCE(sk->sk_sndtimeo) / HZ;
}
META_COLLECTOR(int_sk_sendmsg_off)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index aa6b1fe65151..e9eaf637220e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1547,10 +1547,28 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
return 0;
}
+static bool req_create_or_replace(struct nlmsghdr *n)
+{
+ return (n->nlmsg_flags & NLM_F_CREATE &&
+ n->nlmsg_flags & NLM_F_REPLACE);
+}
+
+static bool req_create_exclusive(struct nlmsghdr *n)
+{
+ return (n->nlmsg_flags & NLM_F_CREATE &&
+ n->nlmsg_flags & NLM_F_EXCL);
+}
+
+static bool req_change(struct nlmsghdr *n)
+{
+ return (!(n->nlmsg_flags & NLM_F_CREATE) &&
+ !(n->nlmsg_flags & NLM_F_REPLACE) &&
+ !(n->nlmsg_flags & NLM_F_EXCL));
+}
+
/*
* Create/change qdisc.
*/
-
static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
@@ -1644,27 +1662,35 @@ replay:
*
* We know, that some child q is already
* attached to this parent and have choice:
- * either to change it or to create/graft new one.
+ * 1) change it or 2) create/graft new one.
+ * If the requested qdisc kind is different
+ * than the existing one, then we choose graft.
+ * If they are the same then this is "change"
+ * operation - just let it fallthrough..
*
* 1. We are allowed to create/graft only
- * if CREATE and REPLACE flags are set.
+ * if the request is explicitly stating
+ * "please create if it doesn't exist".
*
- * 2. If EXCL is set, requestor wanted to say,
- * that qdisc tcm_handle is not expected
+ * 2. If the request is to exclusive create
+ * then the qdisc tcm_handle is not expected
* to exist, so that we choose create/graft too.
*
* 3. The last case is when no flags are set.
+ * This will happen when for example tc
+ * utility issues a "change" command.
* Alas, it is sort of hole in API, we
* cannot decide what to do unambiguously.
- * For now we select create/graft, if
- * user gave KIND, which does not match existing.
+ * For now we select create/graft.
*/
- if ((n->nlmsg_flags & NLM_F_CREATE) &&
- (n->nlmsg_flags & NLM_F_REPLACE) &&
- ((n->nlmsg_flags & NLM_F_EXCL) ||
- (tca[TCA_KIND] &&
- nla_strcmp(tca[TCA_KIND], q->ops->id))))
- goto create_n_graft;
+ if (tca[TCA_KIND] &&
+ nla_strcmp(tca[TCA_KIND], q->ops->id)) {
+ if (req_create_or_replace(n) ||
+ req_create_exclusive(n))
+ goto create_n_graft;
+ else if (req_change(n))
+ goto create_n_graft2;
+ }
}
}
} else {
@@ -1698,6 +1724,7 @@ create_n_graft:
NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
return -ENOENT;
}
+create_n_graft2:
if (clid == TC_H_INGRESS) {
if (dev_ingress_queue(dev)) {
q = qdisc_create(dev, dev_ingress_queue(dev),
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e35a4e90f4e6..19901e77cd3b 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -17,7 +17,6 @@
struct drr_class {
struct Qdisc_class_common common;
- unsigned int filter_cnt;
struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
@@ -150,8 +149,10 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg,
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl = (struct drr_class *)arg;
- if (cl->filter_cnt > 0)
+ if (qdisc_class_in_use(&cl->common)) {
+ NL_SET_ERR_MSG(extack, "DRR class is in use");
return -EBUSY;
+ }
sch_tree_lock(sch);
@@ -187,8 +188,8 @@ static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
{
struct drr_class *cl = drr_find_class(sch, classid);
- if (cl != NULL)
- cl->filter_cnt++;
+ if (cl)
+ qdisc_class_get(&cl->common);
return (unsigned long)cl;
}
@@ -197,7 +198,7 @@ static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg)
{
struct drr_class *cl = (struct drr_class *)arg;
- cl->filter_cnt--;
+ qdisc_class_put(&cl->common);
}
static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 70b0c5873d32..3554085bc2be 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -116,7 +116,6 @@ struct hfsc_class {
struct net_rate_estimator __rcu *rate_est;
struct tcf_proto __rcu *filter_list; /* filter list */
struct tcf_block *block;
- unsigned int filter_cnt; /* filter count */
unsigned int level; /* class level in hierarchy */
struct hfsc_sched *sched; /* scheduler data */
@@ -1012,6 +1011,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (parent == NULL)
return -ENOENT;
}
+ if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
+ NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC");
+ return -EINVAL;
+ }
if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
return -EINVAL;
@@ -1094,8 +1097,11 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg,
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl = (struct hfsc_class *)arg;
- if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root)
+ if (cl->level > 0 || qdisc_class_in_use(&cl->cl_common) ||
+ cl == &q->root) {
+ NL_SET_ERR_MSG(extack, "HFSC class in use");
return -EBUSY;
+ }
sch_tree_lock(sch);
@@ -1223,7 +1229,7 @@ hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid)
if (cl != NULL) {
if (p != NULL && p->level <= cl->level)
return 0;
- cl->filter_cnt++;
+ qdisc_class_get(&cl->cl_common);
}
return (unsigned long)cl;
@@ -1234,7 +1240,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
{
struct hfsc_class *cl = (struct hfsc_class *)arg;
- cl->filter_cnt--;
+ qdisc_class_put(&cl->cl_common);
}
static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 325c29041c7d..0d947414e616 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -102,7 +102,6 @@ struct htb_class {
struct tcf_proto __rcu *filter_list; /* class attached filters */
struct tcf_block *block;
- int filter_cnt;
int level; /* our level (see above) */
unsigned int children;
@@ -1710,8 +1709,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
* tc subsys guarantee us that in htb_destroy it holds no class
* refs so that we can remove children safely there ?
*/
- if (cl->children || cl->filter_cnt)
+ if (cl->children || qdisc_class_in_use(&cl->common)) {
+ NL_SET_ERR_MSG(extack, "HTB class in use");
return -EBUSY;
+ }
if (!cl->level && htb_parent_last_child(cl))
last_child = 1;
@@ -1810,10 +1811,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
goto failure;
}
- if (hopt->quantum) {
- NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
- goto failure;
- }
}
/* Keeping backward compatible with rate_table based iproute2 tc */
@@ -1910,6 +1907,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
.prio = hopt->prio,
+ .quantum = hopt->quantum,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -1931,6 +1929,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
.prio = hopt->prio,
+ .quantum = hopt->quantum,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -2017,6 +2016,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
.prio = hopt->prio,
+ .quantum = hopt->quantum,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -2108,7 +2108,7 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
* be broken by class during destroy IIUC.
*/
if (cl)
- cl->filter_cnt++;
+ qdisc_class_get(&cl->common);
return (unsigned long)cl;
}
@@ -2116,8 +2116,7 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
{
struct htb_class *cl = (struct htb_class *)arg;
- if (cl)
- cl->filter_cnt--;
+ qdisc_class_put(&cl->common);
}
static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index e43a45499372..a463a63192c3 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -13,6 +13,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tcx.h>
struct ingress_sched_data {
struct tcf_block *block;
@@ -78,6 +79,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
{
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *entry;
+ bool created;
int err;
if (sch->parent != TC_H_INGRESS)
@@ -85,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
net_inc_ingress_queue();
- mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
+ entry = tcx_entry_fetch_or_create(dev, true, &created);
+ if (!entry)
+ return -ENOMEM;
+ tcx_miniq_set_active(entry, true);
+ mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq);
+ if (created)
+ tcx_entry_update(dev, entry, true);
q->block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
q->block_info.chain_head_change = clsact_chain_head_change;
@@ -103,11 +112,22 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *entry = rtnl_dereference(dev->tcx_ingress);
if (sch->parent != TC_H_INGRESS)
return;
tcf_block_put_ext(q->block, sch, &q->block_info);
+
+ if (entry) {
+ tcx_miniq_set_active(entry, false);
+ if (!tcx_entry_is_active(entry)) {
+ tcx_entry_update(dev, NULL, true);
+ tcx_entry_free(entry);
+ }
+ }
+
net_dec_ingress_queue();
}
@@ -223,6 +243,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
{
struct clsact_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *entry;
+ bool created;
int err;
if (sch->parent != TC_H_CLSACT)
@@ -231,7 +253,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
net_inc_ingress_queue();
net_inc_egress_queue();
- mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
+ entry = tcx_entry_fetch_or_create(dev, true, &created);
+ if (!entry)
+ return -ENOMEM;
+ tcx_miniq_set_active(entry, true);
+ mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq);
+ if (created)
+ tcx_entry_update(dev, entry, true);
q->ingress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
q->ingress_block_info.chain_head_change = clsact_chain_head_change;
@@ -244,7 +272,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block);
- mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
+ entry = tcx_entry_fetch_or_create(dev, false, &created);
+ if (!entry)
+ return -ENOMEM;
+ tcx_miniq_set_active(entry, true);
+ mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq);
+ if (created)
+ tcx_entry_update(dev, entry, false);
q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
q->egress_block_info.chain_head_change = clsact_chain_head_change;
@@ -256,12 +290,31 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
static void clsact_destroy(struct Qdisc *sch)
{
struct clsact_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *ingress_entry = rtnl_dereference(dev->tcx_ingress);
+ struct bpf_mprog_entry *egress_entry = rtnl_dereference(dev->tcx_egress);
if (sch->parent != TC_H_CLSACT)
return;
- tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
+ tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
+
+ if (ingress_entry) {
+ tcx_miniq_set_active(ingress_entry, false);
+ if (!tcx_entry_is_active(ingress_entry)) {
+ tcx_entry_update(dev, NULL, true);
+ tcx_entry_free(ingress_entry);
+ }
+ }
+
+ if (egress_entry) {
+ tcx_miniq_set_active(egress_entry, false);
+ if (!tcx_entry_is_active(egress_entry)) {
+ tcx_entry_update(dev, NULL, false);
+ tcx_entry_free(egress_entry);
+ }
+ }
net_dec_ingress_queue();
net_dec_egress_queue();
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index ab69ff7577fc..793009f445c0 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -290,6 +290,13 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
"Attribute type expected to be TCA_MQPRIO_MIN_RATE64");
return -EINVAL;
}
+
+ if (nla_len(attr) != sizeof(u64)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Attribute TCA_MQPRIO_MIN_RATE64 expected to have 8 bytes length");
+ return -EINVAL;
+ }
+
if (i >= qopt->num_tc)
break;
priv->min_rate[i] = nla_get_u64(attr);
@@ -312,6 +319,13 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
"Attribute type expected to be TCA_MQPRIO_MAX_RATE64");
return -EINVAL;
}
+
+ if (nla_len(attr) != sizeof(u64)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Attribute TCA_MQPRIO_MAX_RATE64 expected to have 8 bytes length");
+ return -EINVAL;
+ }
+
if (i >= qopt->num_tc)
break;
priv->max_rate[i] = nla_get_u64(attr);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 38d9aa0cd30e..4ad39a4a3cf5 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -105,6 +105,11 @@ struct netem_sched_data {
u32 rho;
} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
+ struct prng {
+ u64 seed;
+ struct rnd_state prng_state;
+ } prng;
+
struct disttable *delay_dist;
enum {
@@ -179,15 +184,16 @@ static void init_crandom(struct crndstate *state, unsigned long rho)
* Next number depends on last value.
* rho is scaled to avoid floating point.
*/
-static u32 get_crandom(struct crndstate *state)
+static u32 get_crandom(struct crndstate *state, struct prng *p)
{
u64 value, rho;
unsigned long answer;
+ struct rnd_state *s = &p->prng_state;
if (!state || state->rho == 0) /* no correlation */
- return get_random_u32();
+ return prandom_u32_state(s);
- value = get_random_u32();
+ value = prandom_u32_state(s);
rho = (u64)state->rho + 1;
answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
state->last = answer;
@@ -201,7 +207,7 @@ static u32 get_crandom(struct crndstate *state)
static bool loss_4state(struct netem_sched_data *q)
{
struct clgstate *clg = &q->clg;
- u32 rnd = get_random_u32();
+ u32 rnd = prandom_u32_state(&q->prng.prng_state);
/*
* Makes a comparison between rnd and the transition
@@ -266,18 +272,19 @@ static bool loss_4state(struct netem_sched_data *q)
static bool loss_gilb_ell(struct netem_sched_data *q)
{
struct clgstate *clg = &q->clg;
+ struct rnd_state *s = &q->prng.prng_state;
switch (clg->state) {
case GOOD_STATE:
- if (get_random_u32() < clg->a1)
+ if (prandom_u32_state(s) < clg->a1)
clg->state = BAD_STATE;
- if (get_random_u32() < clg->a4)
+ if (prandom_u32_state(s) < clg->a4)
return true;
break;
case BAD_STATE:
- if (get_random_u32() < clg->a2)
+ if (prandom_u32_state(s) < clg->a2)
clg->state = GOOD_STATE;
- if (get_random_u32() > clg->a3)
+ if (prandom_u32_state(s) > clg->a3)
return true;
}
@@ -289,7 +296,7 @@ static bool loss_event(struct netem_sched_data *q)
switch (q->loss_model) {
case CLG_RANDOM:
/* Random packet drop 0 => none, ~0 => all */
- return q->loss && q->loss >= get_crandom(&q->loss_cor);
+ return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng);
case CLG_4_STATES:
/* 4state loss model algorithm (used also for GI model)
@@ -318,6 +325,7 @@ static bool loss_event(struct netem_sched_data *q)
*/
static s64 tabledist(s64 mu, s32 sigma,
struct crndstate *state,
+ struct prng *prng,
const struct disttable *dist)
{
s64 x;
@@ -327,7 +335,7 @@ static s64 tabledist(s64 mu, s32 sigma,
if (sigma == 0)
return mu;
- rnd = get_crandom(state);
+ rnd = get_crandom(state, prng);
/* default uniform distribution */
if (dist == NULL)
@@ -449,7 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
skb->prev = NULL;
/* Random duplication */
- if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
+ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng))
++count;
/* Drop packet? */
@@ -492,7 +500,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
* If packet is going to be hardware checksummed, then
* do it now in software before we mangle it.
*/
- if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) {
if (skb_is_gso(skb)) {
skb = netem_segment(skb, sch, to_free);
if (!skb)
@@ -530,12 +538,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
cb = netem_skb_cb(skb);
if (q->gap == 0 || /* not doing reordering */
q->counter < q->gap - 1 || /* inside last reordering gap */
- q->reorder < get_crandom(&q->reorder_cor)) {
+ q->reorder < get_crandom(&q->reorder_cor, &q->prng)) {
u64 now;
s64 delay;
delay = tabledist(q->latency, q->jitter,
- &q->delay_cor, q->delay_dist);
+ &q->delay_cor, &q->prng, q->delay_dist);
now = ktime_get_ns();
@@ -639,7 +647,7 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)
else
next_delay = tabledist(q->slot_config.dist_delay,
(s32)(q->slot_config.dist_jitter),
- NULL, q->slot_dist);
+ NULL, &q->prng, q->slot_dist);
q->slot.slot_next = now + next_delay;
q->slot.packets_left = q->slot_config.max_packets;
@@ -922,6 +930,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
[TCA_NETEM_JITTER64] = { .type = NLA_S64 },
[TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
+ [TCA_NETEM_PRNG_SEED] = { .type = NLA_U64 },
};
static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -1040,6 +1049,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
/* capping jitter to the range acceptable by tabledist() */
q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
+ if (tb[TCA_NETEM_PRNG_SEED])
+ q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]);
+ else
+ q->prng.seed = get_random_u64();
+ prandom_seed_state(&q->prng.prng_state, q->prng.seed);
+
unlock:
sch_tree_unlock(sch);
@@ -1203,6 +1218,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
}
+ if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed,
+ TCA_NETEM_PAD))
+ goto nla_put_failure;
+
return nla_nest_end(skb, nla);
nla_put_failure:
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index dfd9a99e6257..1a25752f1a9a 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -130,8 +130,6 @@ struct qfq_aggregate;
struct qfq_class {
struct Qdisc_class_common common;
- unsigned int filter_cnt;
-
struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
struct net_rate_estimator __rcu *rate_est;
@@ -381,8 +379,13 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
u32 lmax)
{
struct qfq_sched *q = qdisc_priv(sch);
- struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
+ struct qfq_aggregate *new_agg;
+
+ /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */
+ if (lmax > QFQ_MAX_LMAX)
+ return -EINVAL;
+ new_agg = qfq_find_agg(q, lmax, weight);
if (new_agg == NULL) { /* create new aggregate */
new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
if (new_agg == NULL)
@@ -423,10 +426,17 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
else
weight = 1;
- if (tb[TCA_QFQ_LMAX])
+ if (tb[TCA_QFQ_LMAX]) {
lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
- else
+ } else {
+ /* MTU size is user controlled */
lmax = psched_mtu(qdisc_dev(sch));
+ if (lmax < QFQ_MIN_LMAX || lmax > QFQ_MAX_LMAX) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "MTU size out of bounds for qfq");
+ return -EINVAL;
+ }
+ }
inv_w = ONE_FP / weight;
weight = ONE_FP / inv_w;
@@ -533,8 +543,10 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl = (struct qfq_class *)arg;
- if (cl->filter_cnt > 0)
+ if (qdisc_class_in_use(&cl->common)) {
+ NL_SET_ERR_MSG_MOD(extack, "QFQ class in use");
return -EBUSY;
+ }
sch_tree_lock(sch);
@@ -568,8 +580,8 @@ static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
{
struct qfq_class *cl = qfq_find_class(sch, classid);
- if (cl != NULL)
- cl->filter_cnt++;
+ if (cl)
+ qdisc_class_get(&cl->common);
return (unsigned long)cl;
}
@@ -578,7 +590,7 @@ static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
{
struct qfq_class *cl = (struct qfq_class *)arg;
- cl->filter_cnt--;
+ qdisc_class_put(&cl->common);
}
static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 717ae51d94a0..1cb5e41c0ec7 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1015,6 +1015,11 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
TC_FP_PREEMPTIBLE),
};
+static struct netlink_range_validation_signed taprio_cycle_time_range = {
+ .min = 0,
+ .max = INT_MAX,
+};
+
static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_PRIOMAP] = {
.len = sizeof(struct tc_mqprio_qopt)
@@ -1023,7 +1028,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
[TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
- [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
+ NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
[TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
@@ -1159,6 +1165,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
return -EINVAL;
}
+ if (cycle < 0 || cycle > INT_MAX) {
+ NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
+ return -EINVAL;
+ }
+
new->cycle_time = cycle;
}
@@ -1347,7 +1358,7 @@ static void setup_txtime(struct taprio_sched *q,
struct sched_gate_list *sched, ktime_t base)
{
struct sched_entry *entry;
- u32 interval = 0;
+ u64 interval = 0;
list_for_each_entry(entry, &sched->entries, list) {
entry->next_txtime = ktime_add_ns(base, interval);
@@ -2088,11 +2099,8 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
return -EOPNOTSUPP;
}
- /* pre-allocate qdisc, attachment can't fail */
- q->qdiscs = kcalloc(dev->num_tx_queues,
- sizeof(q->qdiscs[0]),
+ q->qdiscs = kcalloc(dev->num_tx_queues, sizeof(q->qdiscs[0]),
GFP_KERNEL);
-
if (!q->qdiscs)
return -ENOMEM;
@@ -2134,25 +2142,32 @@ static void taprio_attach(struct Qdisc *sch)
/* Attach underlying qdisc */
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
- struct Qdisc *qdisc = q->qdiscs[ntx];
- struct Qdisc *old;
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
+ struct Qdisc *old, *dev_queue_qdisc;
if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+ struct Qdisc *qdisc = q->qdiscs[ntx];
+
+ /* In offload mode, the root taprio qdisc is bypassed
+ * and the netdev TX queues see the children directly
+ */
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
- old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+ dev_queue_qdisc = qdisc;
} else {
- old = dev_graft_qdisc(qdisc->dev_queue, sch);
- qdisc_refcount_inc(sch);
+ /* In software mode, attach the root taprio qdisc
+ * to all netdev TX queues, so that dev_qdisc_enqueue()
+ * goes through taprio_enqueue().
+ */
+ dev_queue_qdisc = sch;
}
+ old = dev_graft_qdisc(dev_queue, dev_queue_qdisc);
+ /* The qdisc's refcount requires to be elevated once
+ * for each netdev TX queue it is grafted onto
+ */
+ qdisc_refcount_inc(dev_queue_qdisc);
if (old)
qdisc_put(old);
}
-
- /* access to the child qdiscs is not needed in offload mode */
- if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
- kfree(q->qdiscs);
- q->qdiscs = NULL;
- }
}
static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
@@ -2181,13 +2196,23 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl,
if (dev->flags & IFF_UP)
dev_deactivate(dev);
+ /* In offload mode, the child Qdisc is directly attached to the netdev
+ * TX queue, and thus, we need to keep its refcount elevated in order
+ * to counteract qdisc_graft()'s call to qdisc_put() once per TX queue.
+ * However, save the reference to the new qdisc in the private array in
+ * both software and offload cases, to have an up-to-date reference to
+ * our children.
+ */
+ *old = q->qdiscs[cl - 1];
if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
- *old = dev_graft_qdisc(dev_queue, new);
- } else {
- *old = q->qdiscs[cl - 1];
- q->qdiscs[cl - 1] = new;
+ WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old);
+ if (new)
+ qdisc_refcount_inc(new);
+ if (*old)
+ qdisc_put(*old);
}
+ q->qdiscs[cl - 1] = new;
if (new)
new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
@@ -2425,12 +2450,14 @@ start_error:
static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
{
- struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned int ntx = cl - 1;
- if (!dev_queue)
+ if (ntx >= dev->num_tx_queues)
return NULL;
- return rtnl_dereference(dev_queue->qdisc_sleeping);
+ return q->qdiscs[ntx];
}
static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
@@ -2445,11 +2472,11 @@ static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
struct sk_buff *skb, struct tcmsg *tcm)
{
- struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+ struct Qdisc *child = taprio_leaf(sch, cl);
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle |= TC_H_MIN(cl);
- tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
+ tcm->tcm_info = child->handle;
return 0;
}
@@ -2459,16 +2486,14 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
__releases(d->lock)
__acquires(d->lock)
{
- struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+ struct Qdisc *child = taprio_leaf(sch, cl);
struct tc_taprio_qopt_offload offload = {
.cmd = TAPRIO_CMD_QUEUE_STATS,
.queue_stats = {
.queue = cl - 1,
},
};
- struct Qdisc *child;
- child = rtnl_dereference(dev_queue->qdisc_sleeping);
if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 ||
qdisc_qstats_copy(d, child) < 0)
return -1;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 2613c4d74b16..17fcaa9b0df9 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -581,7 +581,7 @@ static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb,
default:
return;
}
- if (!sock_owned_by_user(sk) && inet_sk(sk)->recverr) {
+ if (!sock_owned_by_user(sk) && inet_test_bit(RECVERR, sk)) {
sk->sk_err = err;
sk_error_report(sk);
} else { /* Only an error on timeout */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 274d07bd774f..2185f44198de 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -360,7 +360,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
ret = inet_addr_type_table(net, addr->v4.sin_addr.s_addr, tb_id);
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
ret != RTN_LOCAL &&
- !sp->inet.freebind &&
+ !inet_test_bit(FREEBIND, sk) &&
!READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
return 0;
@@ -435,7 +435,8 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl4->fl4_dport = daddr->v4.sin_port;
fl4->flowi4_proto = IPPROTO_SCTP;
if (asoc) {
- fl4->flowi4_tos = RT_CONN_FLAGS_TOS(asoc->base.sk, tos);
+ fl4->flowi4_tos = RT_TOS(tos);
+ fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk);
fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
fl4->fl4_sport = htons(asoc->base.bind_addr.port);
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9388d98aebc0..fd0631e70d46 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -99,7 +99,7 @@ struct percpu_counter sctp_sockets_allocated;
static void sctp_enter_memory_pressure(struct sock *sk)
{
- sctp_memory_pressure = 1;
+ WRITE_ONCE(sctp_memory_pressure, 1);
}
@@ -9479,10 +9479,10 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
newinet->inet_dport = htons(asoc->peer.port);
newinet->pmtudisc = inet->pmtudisc;
- newinet->inet_id = get_random_u16();
+ atomic_set(&newinet->inet_id, get_random_u16());
newinet->uc_ttl = inet->uc_ttl;
- newinet->mc_loop = 1;
+ inet_set_bit(MC_LOOP, newsk);
newinet->mc_ttl = 1;
newinet->mc_index = 0;
newinet->mc_list = NULL;
@@ -9732,6 +9732,7 @@ struct proto sctpv6_prot = {
.unhash = sctp_unhash,
.no_autobind = true,
.obj_size = sizeof(struct sctp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct sctp6_sock, inet6),
.useroffset = offsetof(struct sctp6_sock, sctp.subscribe),
.usersize = offsetof(struct sctp6_sock, sctp.initmsg) -
offsetof(struct sctp6_sock, sctp.subscribe) +
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index a7f887d91d89..bacdd971615e 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -378,8 +378,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
sk->sk_protocol = protocol;
- WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
- WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
+ WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
+ WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_WORK(&smc->connect_work, smc_connect_work);
@@ -436,24 +436,9 @@ out:
return rc;
}
-static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
- unsigned long mask)
-{
- /* options we don't get control via setsockopt for */
- nsk->sk_type = osk->sk_type;
- nsk->sk_sndbuf = osk->sk_sndbuf;
- nsk->sk_rcvbuf = osk->sk_rcvbuf;
- nsk->sk_sndtimeo = osk->sk_sndtimeo;
- nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
- nsk->sk_mark = osk->sk_mark;
- nsk->sk_priority = osk->sk_priority;
- nsk->sk_rcvlowat = osk->sk_rcvlowat;
- nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
- nsk->sk_err = osk->sk_err;
-
- nsk->sk_flags &= ~mask;
- nsk->sk_flags |= osk->sk_flags & mask;
-}
+/* copy only relevant settings and flags of SOL_SOCKET level from smc to
+ * clc socket (since smc is not called for these options from net/core)
+ */
#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
(1UL << SOCK_KEEPOPEN) | \
@@ -470,9 +455,55 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
(1UL << SOCK_NOFCS) | \
(1UL << SOCK_FILTER_LOCKED) | \
(1UL << SOCK_TSTAMP_NEW))
-/* copy only relevant settings and flags of SOL_SOCKET level from smc to
- * clc socket (since smc is not called for these options from net/core)
- */
+
+/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
+static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
+ unsigned long mask)
+{
+ struct net *nnet = sock_net(nsk);
+
+ nsk->sk_userlocks = osk->sk_userlocks;
+ if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+ nsk->sk_sndbuf = osk->sk_sndbuf;
+ } else {
+ if (mask == SK_FLAGS_SMC_TO_CLC)
+ WRITE_ONCE(nsk->sk_sndbuf,
+ READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
+ else
+ WRITE_ONCE(nsk->sk_sndbuf,
+ 2 * READ_ONCE(nnet->smc.sysctl_wmem));
+ }
+ if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+ nsk->sk_rcvbuf = osk->sk_rcvbuf;
+ } else {
+ if (mask == SK_FLAGS_SMC_TO_CLC)
+ WRITE_ONCE(nsk->sk_rcvbuf,
+ READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
+ else
+ WRITE_ONCE(nsk->sk_rcvbuf,
+ 2 * READ_ONCE(nnet->smc.sysctl_rmem));
+ }
+}
+
+static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
+ unsigned long mask)
+{
+ /* options we don't get control via setsockopt for */
+ nsk->sk_type = osk->sk_type;
+ nsk->sk_sndtimeo = osk->sk_sndtimeo;
+ nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
+ nsk->sk_mark = READ_ONCE(osk->sk_mark);
+ nsk->sk_priority = osk->sk_priority;
+ nsk->sk_rcvlowat = osk->sk_rcvlowat;
+ nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
+ nsk->sk_err = osk->sk_err;
+
+ nsk->sk_flags &= ~mask;
+ nsk->sk_flags |= osk->sk_flags & mask;
+
+ smc_adjust_sock_bufsizes(nsk, osk, mask);
+}
+
static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
{
smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
@@ -610,20 +641,22 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
smc_llc_link_active(link);
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
- /* optional 2nd link, receive ADD LINK request from server */
- qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
- SMC_LLC_ADD_LINK);
- if (!qentry) {
- struct smc_clc_msg_decline dclc;
-
- rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
- SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
- if (rc == -EAGAIN)
- rc = 0; /* no DECLINE received, go with one link */
- return rc;
+ if (link->lgr->max_links > 1) {
+ /* optional 2nd link, receive ADD LINK request from server */
+ qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+ SMC_LLC_ADD_LINK);
+ if (!qentry) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
+ if (rc == -EAGAIN)
+ rc = 0; /* no DECLINE received, go with one link */
+ return rc;
+ }
+ smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
+ smc_llc_cli_add_link(link, qentry);
}
- smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
- smc_llc_cli_add_link(link, qentry);
return 0;
}
@@ -1113,7 +1146,7 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
#define SMC_CLC_MAX_ACCEPT_LEN \
(sizeof(struct smc_clc_msg_accept_confirm_v2) + \
- sizeof(struct smc_clc_first_contact_ext) + \
+ sizeof(struct smc_clc_first_contact_ext_v2x) + \
sizeof(struct smc_clc_msg_trail))
/* CLC handshake during connect */
@@ -1167,8 +1200,8 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
struct smc_clc_first_contact_ext *fce =
- (struct smc_clc_first_contact_ext *)
- (((u8 *)clc_v2) + sizeof(*clc_v2));
+ smc_get_clc_first_contact_ext(clc_v2, false);
+ int rc;
if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
return 0;
@@ -1187,6 +1220,12 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
return SMC_CLC_DECL_NOINDIRECT;
}
}
+
+ ini->release_nr = fce->release;
+ rc = smc_clc_clnt_v2x_features_validate(fce, ini);
+ if (rc)
+ return rc;
+
return 0;
}
@@ -1205,6 +1244,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
+ ini->max_conns = SMC_CONN_PER_LGR_MAX;
+ ini->max_links = SMC_LINKS_ADD_LNK_MAX;
reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
if (reason_code)
@@ -1355,6 +1396,16 @@ static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
+ if (ini->first_contact_peer) {
+ struct smc_clc_first_contact_ext *fce =
+ smc_get_clc_first_contact_ext(aclc_v2, true);
+
+ ini->release_nr = fce->release;
+ rc = smc_clc_clnt_v2x_features_validate(fce, ini);
+ if (rc)
+ return rc;
+ }
+
rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
if (rc)
return rc;
@@ -1389,7 +1440,7 @@ static int smc_connect_ism(struct smc_sock *smc,
}
rc = smc_clc_send_confirm(smc, ini->first_contact_local,
- aclc->hdr.version, eid, NULL);
+ aclc->hdr.version, eid, ini);
if (rc)
goto connect_abort;
mutex_unlock(&smc_server_lgr_pending);
@@ -1789,7 +1840,7 @@ void smc_close_non_accepted(struct sock *sk)
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
- sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
+ WRITE_ONCE(sk->sk_lingertime, SMC_MAX_STREAM_WAIT_TIMEOUT);
__smc_release(smc);
release_sock(sk);
sock_put(sk); /* sock_hold above */
@@ -1839,10 +1890,12 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
smc_llc_link_active(link);
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
- down_write(&link->lgr->llc_conf_mutex);
- /* initial contact - try to establish second link */
- smc_llc_srv_add_link(link, NULL);
- up_write(&link->lgr->llc_conf_mutex);
+ if (link->lgr->max_links > 1) {
+ down_write(&link->lgr->llc_conf_mutex);
+ /* initial contact - try to establish second link */
+ smc_llc_srv_add_link(link, NULL);
+ up_write(&link->lgr->llc_conf_mutex);
+ }
return 0;
}
@@ -1965,6 +2018,10 @@ static int smc_listen_v2_check(struct smc_sock *new_smc,
}
}
+ ini->release_nr = pclc_v2_ext->hdr.flag.release;
+ if (pclc_v2_ext->hdr.flag.release > SMC_RELEASE)
+ ini->release_nr = SMC_RELEASE;
+
out:
if (!ini->smcd_version && !ini->smcr_version)
return rc;
@@ -2399,6 +2456,10 @@ static void smc_listen_work(struct work_struct *work)
if (rc)
goto out_decl;
+ rc = smc_clc_srv_v2x_features_validate(pclc, ini);
+ if (rc)
+ goto out_decl;
+
mutex_lock(&smc_server_lgr_pending);
smc_close_init(new_smc);
smc_rx_init(new_smc);
@@ -2412,7 +2473,7 @@ static void smc_listen_work(struct work_struct *work)
/* send SMC Accept CLC message */
accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
- accept_version, ini->negotiated_eid);
+ accept_version, ini->negotiated_eid, ini);
if (rc)
goto out_unlock;
@@ -2431,6 +2492,18 @@ static void smc_listen_work(struct work_struct *work)
goto out_decl;
}
+ rc = smc_clc_v2x_features_confirm_check(cclc, ini);
+ if (rc) {
+ if (!ini->is_smcd)
+ goto out_unlock;
+ goto out_decl;
+ }
+
+ /* fce smc release version is needed in smc_listen_rdma_finish,
+ * so save fce info here.
+ */
+ smc_conn_save_peer_info_fce(new_smc, cclc);
+
/* finish worker */
if (!ini->is_smcd) {
rc = smc_listen_rdma_finish(new_smc, cclc,
@@ -2479,8 +2552,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
- new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
sock_hold(&new_smc->sk); /* sock_put in passive closing */
if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
sock_put(&new_smc->sk);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 2eeea4cdc718..24745fde4ac2 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -21,7 +21,10 @@
#define SMC_V1 1 /* SMC version V1 */
#define SMC_V2 2 /* SMC version V2 */
-#define SMC_RELEASE 0
+
+#define SMC_RELEASE_0 0
+#define SMC_RELEASE_1 1
+#define SMC_RELEASE SMC_RELEASE_1 /* the latest release version */
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
@@ -161,7 +164,7 @@ struct smc_connection {
struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */
struct smc_buf_desc *rmb_desc; /* RMBE descriptor */
- int rmbe_size_short;/* compressed notation */
+ int rmbe_size_comp; /* compressed notation */
int rmbe_update_limit;
/* lower limit for consumer
* cursor update
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index b9b8b07aa702..8deb46c28f1d 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -391,9 +391,7 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
return false;
} else {
if (hdr->typev1 == SMC_TYPE_D &&
- ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 &&
- (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 +
- sizeof(struct smc_clc_first_contact_ext)))
+ ntohs(hdr->length) < SMCD_CLC_ACCEPT_CONFIRM_LEN_V2)
return false;
if (hdr->typev1 == SMC_TYPE_R &&
ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2)
@@ -420,13 +418,29 @@ smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc)
return true;
}
-static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len)
+static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce,
+ struct smc_init_info *ini)
{
+ int ret = sizeof(*fce);
+
memset(fce, 0, sizeof(*fce));
- fce->os_type = SMC_CLC_OS_LINUX;
- fce->release = SMC_RELEASE;
- memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname));
- (*len) += sizeof(*fce);
+ fce->fce_v2_base.os_type = SMC_CLC_OS_LINUX;
+ fce->fce_v2_base.release = ini->release_nr;
+ memcpy(fce->fce_v2_base.hostname, smc_hostname, sizeof(smc_hostname));
+ if (ini->is_smcd && ini->release_nr < SMC_RELEASE_1) {
+ ret = sizeof(struct smc_clc_first_contact_ext);
+ goto out;
+ }
+
+ if (ini->release_nr >= SMC_RELEASE_1) {
+ if (!ini->is_smcd) {
+ fce->max_conns = ini->max_conns;
+ fce->max_links = ini->max_links;
+ }
+ }
+
+out:
+ return ret;
}
/* check if received message has a correct header length and contains valid
@@ -927,8 +941,11 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
sizeof(struct smc_clc_smcd_gid_chid);
}
}
- if (smcr_indicated(ini->smc_type_v2))
+ if (smcr_indicated(ini->smc_type_v2)) {
memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE);
+ v2_ext->max_conns = SMC_CONN_PER_LGR_PREFER;
+ v2_ext->max_links = SMC_LINKS_PER_LGR_MAX_PREFER;
+ }
pclc_base->hdr.length = htons(plen);
memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
@@ -986,13 +1003,13 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
u8 *eid, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
+ struct smc_clc_first_contact_ext_v2x fce;
struct smc_clc_msg_accept_confirm *clc;
- struct smc_clc_first_contact_ext fce;
struct smc_clc_fce_gid_ext gle;
struct smc_clc_msg_trail trl;
+ int i, len, fce_len;
struct kvec vec[5];
struct msghdr msg;
- int i, len;
/* send SMC Confirm CLC msg */
clc = (struct smc_clc_msg_accept_confirm *)clc_v2;
@@ -1007,7 +1024,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->d0.gid =
conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd);
clc->d0.token = conn->rmb_desc->token;
- clc->d0.dmbe_size = conn->rmbe_size_short;
+ clc->d0.dmbe_size = conn->rmbe_size_comp;
clc->d0.dmbe_idx = 0;
memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
if (version == SMC_V1) {
@@ -1018,8 +1035,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
if (eid && eid[0])
memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN);
len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
- if (first_contact)
- smc_clc_fill_fce(&fce, &len);
+ if (first_contact) {
+ fce_len = smc_clc_fill_fce(&fce, ini);
+ len += fce_len;
+ }
clc_v2->hdr.length = htons(len);
}
memcpy(trl.eyecatcher, SMCD_EYECATCHER,
@@ -1050,7 +1069,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
break;
}
- clc->r0.rmbe_size = conn->rmbe_size_short;
+ clc->r0.rmbe_size = conn->rmbe_size_comp;
clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
cpu_to_be64((u64)sg_dma_address
@@ -1063,15 +1082,14 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN);
len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
if (first_contact) {
- smc_clc_fill_fce(&fce, &len);
- fce.v2_direct = !link->lgr->uses_gateway;
- memset(&gle, 0, sizeof(gle));
- if (ini && clc->hdr.type == SMC_CLC_CONFIRM) {
+ fce_len = smc_clc_fill_fce(&fce, ini);
+ len += fce_len;
+ fce.fce_v2_base.v2_direct = !link->lgr->uses_gateway;
+ if (clc->hdr.type == SMC_CLC_CONFIRM) {
+ memset(&gle, 0, sizeof(gle));
gle.gid_cnt = ini->smcrv2.gidlist.len;
len += sizeof(gle);
len += gle.gid_cnt * sizeof(gle.gid[0]);
- } else {
- len += sizeof(gle.reserved);
}
}
clc_v2->hdr.length = htons(len);
@@ -1094,7 +1112,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
sizeof(trl);
if (version > SMC_V1 && first_contact) {
vec[i].iov_base = &fce;
- vec[i++].iov_len = sizeof(fce);
+ vec[i++].iov_len = fce_len;
if (!conn->lgr->is_smcd) {
if (clc->hdr.type == SMC_CLC_CONFIRM) {
vec[i].iov_base = &gle;
@@ -1102,9 +1120,6 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
vec[i].iov_base = &ini->smcrv2.gidlist.list;
vec[i++].iov_len = gle.gid_cnt *
sizeof(gle.gid[0]);
- } else {
- vec[i].iov_base = &gle.reserved;
- vec[i++].iov_len = sizeof(gle.reserved);
}
}
}
@@ -1141,7 +1156,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
/* send CLC ACCEPT message across internal TCP socket */
int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
- u8 version, u8 *negotiated_eid)
+ u8 version, u8 *negotiated_eid, struct smc_init_info *ini)
{
struct smc_clc_msg_accept_confirm_v2 aclc_v2;
int len;
@@ -1149,13 +1164,95 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
memset(&aclc_v2, 0, sizeof(aclc_v2));
aclc_v2.hdr.type = SMC_CLC_ACCEPT;
len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
- version, negotiated_eid, NULL);
+ version, negotiated_eid, ini);
if (len < ntohs(aclc_v2.hdr.length))
len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
return len > 0 ? 0 : len;
}
+int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_v2_extension *pclc_v2_ext;
+
+ ini->max_conns = SMC_CONN_PER_LGR_MAX;
+ ini->max_links = SMC_LINKS_ADD_LNK_MAX;
+
+ if ((!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) ||
+ ini->release_nr < SMC_RELEASE_1)
+ return 0;
+
+ pclc_v2_ext = smc_get_clc_v2_ext(pclc);
+ if (!pclc_v2_ext)
+ return SMC_CLC_DECL_NOV2EXT;
+
+ if (ini->smcr_version & SMC_V2) {
+ ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, SMC_CONN_PER_LGR_PREFER);
+ if (ini->max_conns < SMC_CONN_PER_LGR_MIN)
+ return SMC_CLC_DECL_MAXCONNERR;
+
+ ini->max_links = min_t(u8, pclc_v2_ext->max_links, SMC_LINKS_PER_LGR_MAX_PREFER);
+ if (ini->max_links < SMC_LINKS_ADD_LNK_MIN)
+ return SMC_CLC_DECL_MAXLINKERR;
+ }
+
+ return 0;
+}
+
+int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_first_contact_ext_v2x *fce_v2x =
+ (struct smc_clc_first_contact_ext_v2x *)fce;
+
+ if (ini->release_nr < SMC_RELEASE_1)
+ return 0;
+
+ if (!ini->is_smcd) {
+ if (fce_v2x->max_conns < SMC_CONN_PER_LGR_MIN)
+ return SMC_CLC_DECL_MAXCONNERR;
+ ini->max_conns = fce_v2x->max_conns;
+
+ if (fce_v2x->max_links > SMC_LINKS_ADD_LNK_MAX ||
+ fce_v2x->max_links < SMC_LINKS_ADD_LNK_MIN)
+ return SMC_CLC_DECL_MAXLINKERR;
+ ini->max_links = fce_v2x->max_links;
+ }
+
+ return 0;
+}
+
+int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)cclc;
+ struct smc_clc_first_contact_ext *fce =
+ smc_get_clc_first_contact_ext(clc_v2, ini->is_smcd);
+ struct smc_clc_first_contact_ext_v2x *fce_v2x =
+ (struct smc_clc_first_contact_ext_v2x *)fce;
+
+ if (cclc->hdr.version == SMC_V1 ||
+ !(cclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
+ return 0;
+
+ if (ini->release_nr != fce->release)
+ return SMC_CLC_DECL_RELEASEERR;
+
+ if (fce->release < SMC_RELEASE_1)
+ return 0;
+
+ if (!ini->is_smcd) {
+ if (fce_v2x->max_conns != ini->max_conns)
+ return SMC_CLC_DECL_MAXCONNERR;
+ if (fce_v2x->max_links != ini->max_links)
+ return SMC_CLC_DECL_MAXLINKERR;
+ }
+
+ return 0;
+}
+
void smc_clc_get_hostname(u8 **host)
{
*host = &smc_hostname[0];
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 5fee545c9a10..c5c8e7db775a 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -45,6 +45,9 @@
#define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */
#define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */
#define SMC_CLC_DECL_NOUEID 0x03030008 /* peer sent no UEID */
+#define SMC_CLC_DECL_RELEASEERR 0x03030009 /* release version negotiate failed */
+#define SMC_CLC_DECL_MAXCONNERR 0x0303000a /* max connections negotiate failed */
+#define SMC_CLC_DECL_MAXLINKERR 0x0303000b /* max links negotiate failed */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
@@ -133,7 +136,9 @@ struct smc_clc_smcd_gid_chid {
struct smc_clc_v2_extension {
struct smc_clnt_opts_area_hdr hdr;
u8 roce[16]; /* RoCEv2 GID */
- u8 reserved[16];
+ u8 max_conns;
+ u8 max_links;
+ u8 reserved[14];
u8 user_eids[][SMC_MAX_EID_LEN];
};
@@ -147,7 +152,9 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
struct smc_clc_msg_smcd { /* SMC-D GID information */
struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */
__be16 v2_ext_offset; /* SMC Version 2 Extension Offset */
- u8 reserved[28];
+ u8 vendor_oui[3]; /* vendor organizationally unique identifier */
+ u8 vendor_exp_options[5];
+ u8 reserved[20];
};
struct smc_clc_smcd_v2_extension {
@@ -231,8 +238,19 @@ struct smc_clc_first_contact_ext {
u8 hostname[SMC_MAX_HOSTNAME_LEN];
};
+struct smc_clc_first_contact_ext_v2x {
+ struct smc_clc_first_contact_ext fce_v2_base;
+ u8 max_conns; /* for SMC-R only */
+ u8 max_links; /* for SMC-R only */
+ u8 reserved3[2];
+ __be32 vendor_exp_options;
+ u8 reserved4[8];
+} __packed; /* format defined in
+ * IBM Shared Memory Communications Version 2 (Third Edition)
+ * (https://www.ibm.com/support/pages/node/7009315)
+ */
+
struct smc_clc_fce_gid_ext {
- u8 reserved[16];
u8 gid_cnt;
u8 reserved2[3];
u8 gid[][SMC_GID_SIZE];
@@ -370,6 +388,27 @@ smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext)
ntohs(prop_v2ext->hdr.smcd_v2_ext_offset));
}
+static inline struct smc_clc_first_contact_ext *
+smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm_v2 *clc_v2,
+ bool is_smcd)
+{
+ int clc_v2_len;
+
+ if (clc_v2->hdr.version == SMC_V1 ||
+ !(clc_v2->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
+ return NULL;
+
+ if (is_smcd)
+ clc_v2_len =
+ offsetofend(struct smc_clc_msg_accept_confirm_v2, d1);
+ else
+ clc_v2_len =
+ offsetofend(struct smc_clc_msg_accept_confirm_v2, r1);
+
+ return (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) +
+ clc_v2_len);
+}
+
struct smcd_dev;
struct smc_init_info;
@@ -382,7 +421,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version, u8 *eid, struct smc_init_info *ini);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
- u8 version, u8 *negotiated_eid);
+ u8 version, u8 *negotiated_eid, struct smc_init_info *ini);
+int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini);
+int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
+ struct smc_init_info *ini);
+int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
+ struct smc_init_info *ini);
void smc_clc_init(void) __init;
void smc_clc_exit(void);
void smc_clc_get_hostname(u8 **host);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3f465faf2b68..bd01dd31e4bd 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -319,6 +319,10 @@ static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
goto errv2attr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_CONNS, lgr->max_conns))
+ goto errv2attr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_LINKS, lgr->max_links))
+ goto errv2attr;
nla_nest_end(skb, v2_attrs);
return 0;
@@ -895,9 +899,13 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->uses_gateway = ini->smcrv2.uses_gateway;
memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
ETH_ALEN);
+ lgr->max_conns = ini->max_conns;
+ lgr->max_links = ini->max_links;
} else {
ibdev = ini->ib_dev;
ibport = ini->ib_port;
+ lgr->max_conns = SMC_CONN_PER_LGR_MAX;
+ lgr->max_links = SMC_LINKS_ADD_LNK_MAX;
}
memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
SMC_MAX_PNETID_LEN);
@@ -1664,6 +1672,9 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
!rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
continue;
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1)
+ continue;
+
/* trigger local add link processing */
link = smc_llc_usable_link(lgr);
if (link)
@@ -1888,7 +1899,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
(ini->smcd_version == SMC_V2 ||
lgr->vlan_id == ini->vlan_id) &&
(role == SMC_CLNT || ini->is_smcd ||
- (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
+ (lgr->conns_num < lgr->max_conns &&
!bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
/* link group found */
ini->first_contact_local = 0;
@@ -2309,31 +2320,30 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_connection *conn = &smc->conn;
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
- int bufsize, bufsize_short;
+ int bufsize, bufsize_comp;
struct rw_semaphore *lock; /* lock buffer list */
bool is_dgraded = false;
- int sk_buf_size;
if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */
- sk_buf_size = smc->sk.sk_rcvbuf;
+ bufsize = smc->sk.sk_rcvbuf / 2;
else
/* use socket send buffer size (w/o overhead) as start value */
- sk_buf_size = smc->sk.sk_sndbuf;
+ bufsize = smc->sk.sk_sndbuf / 2;
- for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
- bufsize_short >= 0; bufsize_short--) {
+ for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb);
+ bufsize_comp >= 0; bufsize_comp--) {
if (is_rmb) {
lock = &lgr->rmbs_lock;
- buf_list = &lgr->rmbs[bufsize_short];
+ buf_list = &lgr->rmbs[bufsize_comp];
} else {
lock = &lgr->sndbufs_lock;
- buf_list = &lgr->sndbufs[bufsize_short];
+ buf_list = &lgr->sndbufs[bufsize_comp];
}
- bufsize = smc_uncompress_bufsize(bufsize_short);
+ bufsize = smc_uncompress_bufsize(bufsize_comp);
/* check for reusable slot in the link group */
- buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
+ buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
if (buf_desc) {
buf_desc->is_dma_need_sync = 0;
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
@@ -2377,8 +2387,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb) {
conn->rmb_desc = buf_desc;
- conn->rmbe_size_short = bufsize_short;
- smc->sk.sk_rcvbuf = bufsize;
+ conn->rmbe_size_comp = bufsize_comp;
+ smc->sk.sk_rcvbuf = bufsize * 2;
atomic_set(&conn->bytes_to_rcv, 0);
conn->rmbe_update_limit =
smc_rmb_wnd_update_limit(buf_desc->len);
@@ -2386,7 +2396,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
} else {
conn->sndbuf_desc = buf_desc;
- smc->sk.sk_sndbuf = bufsize;
+ smc->sk.sk_sndbuf = bufsize * 2;
atomic_set(&conn->sndbuf_space, bufsize);
}
return 0;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 1645fba0d2d3..120027d40469 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -22,6 +22,15 @@
#include "smc_ib.h"
#define SMC_RMBS_PER_LGR_MAX 255 /* max. # of RMBs per link group */
+#define SMC_CONN_PER_LGR_MIN 16 /* min. # of connections per link group */
+#define SMC_CONN_PER_LGR_MAX 255 /* max. # of connections per link group,
+ * also is the default value for SMC-R v1 and v2.0
+ */
+#define SMC_CONN_PER_LGR_PREFER 255 /* Preferred connections per link group used for
+ * SMC-R v2.1 and later negotiation, vendors or
+ * distrubutions may modify it to a value between
+ * 16-255 as needed.
+ */
struct smc_lgr_list { /* list of link group definition */
struct list_head list;
@@ -164,6 +173,15 @@ struct smc_link {
*/
#define SMC_LINKS_PER_LGR_MAX 3
#define SMC_SINGLE_LINK 0
+#define SMC_LINKS_ADD_LNK_MIN 1 /* min. # of links per link group */
+#define SMC_LINKS_ADD_LNK_MAX 2 /* max. # of links per link group, also is the
+ * default value for smc-r v1.0 and v2.0
+ */
+#define SMC_LINKS_PER_LGR_MAX_PREFER 2 /* Preferred max links per link group used for
+ * SMC-R v2.1 and later negotiation, vendors or
+ * distrubutions may modify it to a value between
+ * 1-2 as needed.
+ */
/* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */
struct smc_buf_desc {
@@ -331,6 +349,10 @@ struct smc_link_group {
__be32 saddr;
/* net namespace */
struct net *net;
+ u8 max_conns;
+ /* max conn can be assigned to lgr */
+ u8 max_links;
+ /* max links can be added in lgr */
};
struct { /* SMC-D */
u64 peer_gid;
@@ -374,6 +396,9 @@ struct smc_init_info {
u8 is_smcd;
u8 smc_type_v1;
u8 smc_type_v2;
+ u8 release_nr;
+ u8 max_conns;
+ u8 max_links;
u8 first_contact_peer;
u8 first_contact_local;
unsigned short vlan_id;
@@ -539,7 +564,6 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini);
-void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
int smc_core_init(void);
void smc_core_exit(void);
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 034295676e88..4df5f8c8a0a1 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -96,7 +96,6 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk);
int smc_ib_create_queue_pair(struct smc_link *lnk);
int smc_ib_ready_link(struct smc_link *lnk);
int smc_ib_modify_qp_rts(struct smc_link *lnk);
-int smc_ib_modify_qp_reset(struct smc_link *lnk);
int smc_ib_modify_qp_error(struct smc_link *lnk);
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 90f0b60b196a..018ce8133b02 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -52,14 +52,13 @@ struct smc_llc_msg_confirm_link { /* type 0x01 */
u8 link_num;
u8 link_uid[SMC_LGR_ID_SIZE];
u8 max_links;
- u8 reserved[9];
+ u8 max_conns;
+ u8 reserved[8];
};
#define SMC_LLC_FLAG_ADD_LNK_REJ 0x40
#define SMC_LLC_REJ_RSN_NO_ALT_PATH 1
-#define SMC_LLC_ADD_LNK_MAX_LINKS 2
-
struct smc_llc_msg_add_link { /* type 0x02 */
struct smc_llc_hdr hd;
u8 sender_mac[ETH_ALEN];
@@ -471,7 +470,12 @@ int smc_llc_send_confirm_link(struct smc_link *link,
hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
confllc->link_num = link->link_id;
memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE);
- confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
+ confllc->max_links = SMC_LINKS_ADD_LNK_MAX;
+ if (link->lgr->smc_version == SMC_V2 &&
+ link->lgr->peer_smc_release >= SMC_RELEASE_1) {
+ confllc->max_conns = link->lgr->max_conns;
+ confllc->max_links = link->lgr->max_links;
+ }
/* send llc message */
rc = smc_wr_tx_send(link, pend);
put_out:
@@ -1041,6 +1045,11 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
goto out_reject;
}
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) {
+ rc = 0;
+ goto out_reject;
+ }
+
ini->vlan_id = lgr->vlan_id;
if (lgr->smc_version == SMC_V2) {
ini->check_smcrv2 = true;
@@ -1165,6 +1174,9 @@ static void smc_llc_cli_add_link_invite(struct smc_link *link,
lgr->type == SMC_LGR_ASYMMETRIC_PEER)
goto out;
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1)
+ goto out;
+
ini = kzalloc(sizeof(*ini), GFP_KERNEL);
if (!ini)
goto out;
@@ -1410,6 +1422,11 @@ int smc_llc_srv_add_link(struct smc_link *link,
goto out;
}
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) {
+ rc = 0;
+ goto out;
+ }
+
/* ignore client add link recommendation, start new flow */
ini->vlan_id = lgr->vlan_id;
if (lgr->smc_version == SMC_V2) {
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 3ab2d8eefc55..5cbc18c6e62b 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -21,6 +21,10 @@
static int min_sndbuf = SMC_BUF_MIN_SIZE;
static int min_rcvbuf = SMC_BUF_MIN_SIZE;
+static int max_sndbuf = INT_MAX / 2;
+static int max_rcvbuf = INT_MAX / 2;
+static const int net_smc_wmem_init = (64 * 1024);
+static const int net_smc_rmem_init = (64 * 1024);
static struct ctl_table smc_table[] = {
{
@@ -53,6 +57,7 @@ static struct ctl_table smc_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_sndbuf,
+ .extra2 = &max_sndbuf,
},
{
.procname = "rmem",
@@ -61,6 +66,7 @@ static struct ctl_table smc_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_rcvbuf,
+ .extra2 = &max_rcvbuf,
},
{ }
};
@@ -89,8 +95,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
- WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
- WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
+ WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init);
+ WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
return 0;
diff --git a/net/socket.c b/net/socket.c
index 2b0e54b2405c..848116d06b51 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -136,9 +136,10 @@ static void sock_splice_eof(struct file *file);
static void sock_show_fdinfo(struct seq_file *m, struct file *f)
{
struct socket *sock = f->private_data;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
- if (sock->ops->show_fdinfo)
- sock->ops->show_fdinfo(m, sock);
+ if (ops->show_fdinfo)
+ ops->show_fdinfo(m, sock);
}
#else
#define sock_show_fdinfo NULL
@@ -646,12 +647,14 @@ EXPORT_SYMBOL(sock_alloc);
static void __sock_release(struct socket *sock, struct inode *inode)
{
- if (sock->ops) {
- struct module *owner = sock->ops->owner;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
+
+ if (ops) {
+ struct module *owner = ops->owner;
if (inode)
inode_lock(inode);
- sock->ops->release(sock);
+ ops->release(sock);
sock->sk = NULL;
if (inode)
inode_unlock(inode);
@@ -722,7 +725,7 @@ static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
- int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
+ int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg,
inet_sendmsg, sock, msg,
msg_data_left(msg));
BUG_ON(ret == -EIOCBQUEUED);
@@ -786,13 +789,14 @@ int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
struct socket *sock = sk->sk_socket;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
- if (!sock->ops->sendmsg_locked)
+ if (!ops->sendmsg_locked)
return sock_no_sendmsg_locked(sk, msg, size);
iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
- return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
+ return ops->sendmsg_locked(sk, msg, msg_data_left(msg));
}
EXPORT_SYMBOL(kernel_sendmsg_locked);
@@ -1017,7 +1021,8 @@ static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int f
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
- int ret = INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
+ int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->recvmsg,
+ inet6_recvmsg,
inet_recvmsg, sock, msg,
msg_data_left(msg), flags);
if (trace_sock_recv_length_enabled())
@@ -1072,19 +1077,23 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
unsigned int flags)
{
struct socket *sock = file->private_data;
+ const struct proto_ops *ops;
- if (unlikely(!sock->ops->splice_read))
+ ops = READ_ONCE(sock->ops);
+ if (unlikely(!ops->splice_read))
return copy_splice_read(file, ppos, pipe, len, flags);
- return sock->ops->splice_read(sock, ppos, pipe, len, flags);
+ return ops->splice_read(sock, ppos, pipe, len, flags);
}
static void sock_splice_eof(struct file *file)
{
struct socket *sock = file->private_data;
+ const struct proto_ops *ops;
- if (sock->ops->splice_eof)
- sock->ops->splice_eof(sock);
+ ops = READ_ONCE(sock->ops);
+ if (ops->splice_eof)
+ ops->splice_eof(sock);
}
static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -1181,13 +1190,14 @@ EXPORT_SYMBOL(vlan_ioctl_set);
static long sock_do_ioctl(struct net *net, struct socket *sock,
unsigned int cmd, unsigned long arg)
{
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
struct ifreq ifr;
bool need_copyout;
int err;
void __user *argp = (void __user *)arg;
void __user *data;
- err = sock->ops->ioctl(sock, cmd, arg);
+ err = ops->ioctl(sock, cmd, arg);
/*
* If this ioctl is unknown try to hand it down
@@ -1216,6 +1226,7 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
+ const struct proto_ops *ops;
struct socket *sock;
struct sock *sk;
void __user *argp = (void __user *)arg;
@@ -1223,6 +1234,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
struct net *net;
sock = file->private_data;
+ ops = READ_ONCE(sock->ops);
sk = sock->sk;
net = sock_net(sk);
if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
@@ -1280,23 +1292,23 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
break;
case SIOCGSTAMP_OLD:
case SIOCGSTAMPNS_OLD:
- if (!sock->ops->gettstamp) {
+ if (!ops->gettstamp) {
err = -ENOIOCTLCMD;
break;
}
- err = sock->ops->gettstamp(sock, argp,
- cmd == SIOCGSTAMP_OLD,
- !IS_ENABLED(CONFIG_64BIT));
+ err = ops->gettstamp(sock, argp,
+ cmd == SIOCGSTAMP_OLD,
+ !IS_ENABLED(CONFIG_64BIT));
break;
case SIOCGSTAMP_NEW:
case SIOCGSTAMPNS_NEW:
- if (!sock->ops->gettstamp) {
+ if (!ops->gettstamp) {
err = -ENOIOCTLCMD;
break;
}
- err = sock->ops->gettstamp(sock, argp,
- cmd == SIOCGSTAMP_NEW,
- false);
+ err = ops->gettstamp(sock, argp,
+ cmd == SIOCGSTAMP_NEW,
+ false);
break;
case SIOCGIFCONF:
@@ -1357,9 +1369,10 @@ EXPORT_SYMBOL(sock_create_lite);
static __poll_t sock_poll(struct file *file, poll_table *wait)
{
struct socket *sock = file->private_data;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
__poll_t events = poll_requested_events(wait), flag = 0;
- if (!sock->ops->poll)
+ if (!ops->poll)
return 0;
if (sk_can_busy_loop(sock->sk)) {
@@ -1371,14 +1384,14 @@ static __poll_t sock_poll(struct file *file, poll_table *wait)
flag = POLL_BUSY_LOOP;
}
- return sock->ops->poll(file, sock, wait) | flag;
+ return ops->poll(file, sock, wait) | flag;
}
static int sock_mmap(struct file *file, struct vm_area_struct *vma)
{
struct socket *sock = file->private_data;
- return sock->ops->mmap(file, sock, vma);
+ return READ_ONCE(sock->ops)->mmap(file, sock, vma);
}
static int sock_close(struct inode *inode, struct file *filp)
@@ -1644,12 +1657,36 @@ struct file *__sys_socket_file(int family, int type, int protocol)
return sock_alloc_file(sock, flags, NULL);
}
+/* A hook for bpf progs to attach to and update socket protocol.
+ *
+ * A static noinline declaration here could cause the compiler to
+ * optimize away the function. A global noinline declaration will
+ * keep the definition, but may optimize away the callsite.
+ * Therefore, __weak is needed to ensure that the call is still
+ * emitted, by telling the compiler that we don't know what the
+ * function might eventually be.
+ *
+ * __diag_* below are needed to dismiss the missing prototype warning.
+ */
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "A fmod_ret entry point for BPF programs");
+
+__weak noinline int update_socket_protocol(int family, int type, int protocol)
+{
+ return protocol;
+}
+
+__diag_pop();
+
int __sys_socket(int family, int type, int protocol)
{
struct socket *sock;
int flags;
- sock = __sys_socket_create(family, type, protocol);
+ sock = __sys_socket_create(family, type,
+ update_socket_protocol(family, type, protocol));
if (IS_ERR(sock))
return PTR_ERR(sock);
@@ -1728,7 +1765,7 @@ int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
goto out;
}
- err = sock1->ops->socketpair(sock1, sock2);
+ err = READ_ONCE(sock1->ops)->socketpair(sock1, sock2);
if (unlikely(err < 0)) {
sock_release(sock2);
sock_release(sock1);
@@ -1789,7 +1826,7 @@ int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
(struct sockaddr *)&address,
addrlen);
if (!err)
- err = sock->ops->bind(sock,
+ err = READ_ONCE(sock->ops)->bind(sock,
(struct sockaddr *)
&address, addrlen);
}
@@ -1823,7 +1860,7 @@ int __sys_listen(int fd, int backlog)
err = security_socket_listen(sock, backlog);
if (!err)
- err = sock->ops->listen(sock, backlog);
+ err = READ_ONCE(sock->ops)->listen(sock, backlog);
fput_light(sock->file, fput_needed);
}
@@ -1843,6 +1880,7 @@ struct file *do_accept(struct file *file, unsigned file_flags,
struct file *newfile;
int err, len;
struct sockaddr_storage address;
+ const struct proto_ops *ops;
sock = sock_from_file(file);
if (!sock)
@@ -1851,15 +1889,16 @@ struct file *do_accept(struct file *file, unsigned file_flags,
newsock = sock_alloc();
if (!newsock)
return ERR_PTR(-ENFILE);
+ ops = READ_ONCE(sock->ops);
newsock->type = sock->type;
- newsock->ops = sock->ops;
+ newsock->ops = ops;
/*
* We don't need try_module_get here, as the listening socket (sock)
* has the protocol module (sock->ops->owner) held.
*/
- __module_get(newsock->ops->owner);
+ __module_get(ops->owner);
newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
if (IS_ERR(newfile))
@@ -1869,14 +1908,13 @@ struct file *do_accept(struct file *file, unsigned file_flags,
if (err)
goto out_fd;
- err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
+ err = ops->accept(sock, newsock, sock->file->f_flags | file_flags,
false);
if (err < 0)
goto out_fd;
if (upeer_sockaddr) {
- len = newsock->ops->getname(newsock,
- (struct sockaddr *)&address, 2);
+ len = ops->getname(newsock, (struct sockaddr *)&address, 2);
if (len < 0) {
err = -ECONNABORTED;
goto out_fd;
@@ -1989,8 +2027,8 @@ int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
if (err)
goto out;
- err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
- sock->file->f_flags | file_flags);
+ err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)address,
+ addrlen, sock->file->f_flags | file_flags);
out:
return err;
}
@@ -2039,7 +2077,7 @@ int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
if (err)
goto out_put;
- err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
+ err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 0);
if (err < 0)
goto out_put;
/* "err" is actually length in this case */
@@ -2071,13 +2109,15 @@ int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) {
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
+
err = security_socket_getpeername(sock);
if (err) {
fput_light(sock->file, fput_needed);
return err;
}
- err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
+ err = ops->getname(sock, (struct sockaddr *)&address, 1);
if (err >= 0)
/* "err" is actually length in this case */
err = move_addr_to_user(&address, err, usockaddr,
@@ -2227,6 +2267,7 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
int optlen)
{
sockptr_t optval = USER_SOCKPTR(user_optval);
+ const struct proto_ops *ops;
char *kernel_optval = NULL;
int err, fput_needed;
struct socket *sock;
@@ -2255,12 +2296,13 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
if (kernel_optval)
optval = KERNEL_SOCKPTR(kernel_optval);
+ ops = READ_ONCE(sock->ops);
if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
err = sock_setsockopt(sock, level, optname, optval, optlen);
- else if (unlikely(!sock->ops->setsockopt))
+ else if (unlikely(!ops->setsockopt))
err = -EOPNOTSUPP;
else
- err = sock->ops->setsockopt(sock, level, optname, optval,
+ err = ops->setsockopt(sock, level, optname, optval,
optlen);
kfree(kernel_optval);
out_put:
@@ -2285,6 +2327,7 @@ int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
int __user *optlen)
{
int max_optlen __maybe_unused;
+ const struct proto_ops *ops;
int err, fput_needed;
struct socket *sock;
@@ -2299,12 +2342,13 @@ int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
if (!in_compat_syscall())
max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
+ ops = READ_ONCE(sock->ops);
if (level == SOL_SOCKET)
err = sock_getsockopt(sock, level, optname, optval, optlen);
- else if (unlikely(!sock->ops->getsockopt))
+ else if (unlikely(!ops->getsockopt))
err = -EOPNOTSUPP;
else
- err = sock->ops->getsockopt(sock, level, optname, optval,
+ err = ops->getsockopt(sock, level, optname, optval,
optlen);
if (!in_compat_syscall())
@@ -2332,7 +2376,7 @@ int __sys_shutdown_sock(struct socket *sock, int how)
err = security_socket_shutdown(sock, how);
if (!err)
- err = sock->ops->shutdown(sock, how);
+ err = READ_ONCE(sock->ops)->shutdown(sock, how);
return err;
}
@@ -3324,6 +3368,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
void __user *argp = compat_ptr(arg);
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
+ const struct proto_ops *ops;
if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
return sock_ioctl(file, cmd, (unsigned long)argp);
@@ -3333,10 +3378,11 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
return compat_siocwandev(net, argp);
case SIOCGSTAMP_OLD:
case SIOCGSTAMPNS_OLD:
- if (!sock->ops->gettstamp)
+ ops = READ_ONCE(sock->ops);
+ if (!ops->gettstamp)
return -ENOIOCTLCMD;
- return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
- !COMPAT_USE_64BIT_TIME);
+ return ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
+ !COMPAT_USE_64BIT_TIME);
case SIOCETHTOOL:
case SIOCBONDSLAVEINFOQUERY:
@@ -3417,6 +3463,7 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct socket *sock = file->private_data;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
int ret = -ENOIOCTLCMD;
struct sock *sk;
struct net *net;
@@ -3424,8 +3471,8 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
sk = sock->sk;
net = sock_net(sk);
- if (sock->ops->compat_ioctl)
- ret = sock->ops->compat_ioctl(sock, cmd, arg);
+ if (ops->compat_ioctl)
+ ret = ops->compat_ioctl(sock, cmd, arg);
if (ret == -ENOIOCTLCMD &&
(cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
@@ -3449,7 +3496,7 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
- return sock->ops->bind(sock, addr, addrlen);
+ return READ_ONCE(sock->ops)->bind(sock, addr, addrlen);
}
EXPORT_SYMBOL(kernel_bind);
@@ -3463,7 +3510,7 @@ EXPORT_SYMBOL(kernel_bind);
int kernel_listen(struct socket *sock, int backlog)
{
- return sock->ops->listen(sock, backlog);
+ return READ_ONCE(sock->ops)->listen(sock, backlog);
}
EXPORT_SYMBOL(kernel_listen);
@@ -3481,6 +3528,7 @@ EXPORT_SYMBOL(kernel_listen);
int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
{
struct sock *sk = sock->sk;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
int err;
err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
@@ -3488,15 +3536,15 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
if (err < 0)
goto done;
- err = sock->ops->accept(sock, *newsock, flags, true);
+ err = ops->accept(sock, *newsock, flags, true);
if (err < 0) {
sock_release(*newsock);
*newsock = NULL;
goto done;
}
- (*newsock)->ops = sock->ops;
- __module_get((*newsock)->ops->owner);
+ (*newsock)->ops = ops;
+ __module_get(ops->owner);
done:
return err;
@@ -3519,7 +3567,12 @@ EXPORT_SYMBOL(kernel_accept);
int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags)
{
- return sock->ops->connect(sock, addr, addrlen, flags);
+ struct sockaddr_storage address;
+
+ memcpy(&address, addr, addrlen);
+
+ return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address,
+ addrlen, flags);
}
EXPORT_SYMBOL(kernel_connect);
@@ -3534,7 +3587,7 @@ EXPORT_SYMBOL(kernel_connect);
int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, 0);
+ return READ_ONCE(sock->ops)->getname(sock, addr, 0);
}
EXPORT_SYMBOL(kernel_getsockname);
@@ -3549,7 +3602,7 @@ EXPORT_SYMBOL(kernel_getsockname);
int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, 1);
+ return READ_ONCE(sock->ops)->getname(sock, addr, 1);
}
EXPORT_SYMBOL(kernel_getpeername);
@@ -3563,7 +3616,7 @@ EXPORT_SYMBOL(kernel_getpeername);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
{
- return sock->ops->shutdown(sock, how);
+ return READ_ONCE(sock->ops)->shutdown(sock, how);
}
EXPORT_SYMBOL(kernel_sock_shutdown);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 0b6034fab9ab..f420d8457345 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -472,7 +472,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
return NULL;
inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_fop = &simple_dir_operations;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e43f26382411..8c9a8ee76aa0 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -43,7 +43,7 @@
#include <net/udp.h>
#include <net/tcp.h>
#include <net/tcp_states.h>
-#include <net/tls.h>
+#include <net/tls_prot.h>
#include <net/handshake.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
@@ -226,27 +226,30 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
}
static int
-svc_tcp_sock_process_cmsg(struct svc_sock *svsk, struct msghdr *msg,
+svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
struct cmsghdr *cmsg, int ret)
{
- if (cmsg->cmsg_level == SOL_TLS &&
- cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
- u8 content_type = *((u8 *)CMSG_DATA(cmsg));
-
- switch (content_type) {
- case TLS_RECORD_TYPE_DATA:
- /* TLS sets EOR at the end of each application data
- * record, even though there might be more frames
- * waiting to be decrypted.
- */
- msg->msg_flags &= ~MSG_EOR;
- break;
- case TLS_RECORD_TYPE_ALERT:
- ret = -ENOTCONN;
- break;
- default:
- ret = -EAGAIN;
- }
+ u8 content_type = tls_get_record_type(sock->sk, cmsg);
+ u8 level, description;
+
+ switch (content_type) {
+ case 0:
+ break;
+ case TLS_RECORD_TYPE_DATA:
+ /* TLS sets EOR at the end of each application data
+ * record, even though there might be more frames
+ * waiting to be decrypted.
+ */
+ msg->msg_flags &= ~MSG_EOR;
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ tls_alert_recv(sock->sk, msg, &level, &description);
+ ret = (level == TLS_ALERT_LEVEL_FATAL) ?
+ -ENOTCONN : -EAGAIN;
+ break;
+ default:
+ /* discard this record type */
+ ret = -EAGAIN;
}
return ret;
}
@@ -258,13 +261,14 @@ svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg)
struct cmsghdr cmsg;
u8 buf[CMSG_SPACE(sizeof(u8))];
} u;
+ struct socket *sock = svsk->sk_sock;
int ret;
msg->msg_control = &u;
msg->msg_controllen = sizeof(u);
- ret = sock_recvmsg(svsk->sk_sock, msg, MSG_DONTWAIT);
+ ret = sock_recvmsg(sock, msg, MSG_DONTWAIT);
if (unlikely(msg->msg_controllen != sizeof(u)))
- ret = svc_tcp_sock_process_cmsg(svsk, msg, &u.cmsg, ret);
+ ret = svc_tcp_sock_process_cmsg(sock, msg, &u.cmsg, ret);
return ret;
}
@@ -1244,6 +1248,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
if (ret != head->iov_len)
goto out;
+ if (xdr_buf_pagecount(xdr))
+ xdr->bvec[0].bv_offset = offset_in_page(xdr->page_base);
+
msg.msg_flags = MSG_SPLICE_PAGES;
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec,
xdr_buf_pagecount(xdr), xdr->page_len);
@@ -1621,6 +1628,8 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ tls_handshake_close(svsk->sk_sock);
+
svc_sock_detach(xprt);
if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index b098fde373ab..28c0771c4e8c 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -935,9 +935,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
if (!rep->rr_rdmabuf)
goto out_free;
- if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
- goto out_free_regbuf;
-
rep->rr_cid.ci_completion_id =
atomic_inc_return(&r_xprt->rx_ep->re_completion_ids);
@@ -956,8 +953,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
spin_unlock(&buf->rb_lock);
return rep;
-out_free_regbuf:
- rpcrdma_regbuf_free(rep->rr_rdmabuf);
out_free:
kfree(rep);
out:
@@ -1363,6 +1358,10 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
rep = rpcrdma_rep_create(r_xprt, temp);
if (!rep)
break;
+ if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {
+ rpcrdma_rep_put(buf, rep);
+ break;
+ }
rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
trace_xprtrdma_post_recv(rep);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9f010369100a..268a2cc61acd 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,7 +47,7 @@
#include <net/checksum.h>
#include <net/udp.h>
#include <net/tcp.h>
-#include <net/tls.h>
+#include <net/tls_prot.h>
#include <net/handshake.h>
#include <linux/bvec.h>
@@ -360,24 +360,27 @@ static int
xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
struct cmsghdr *cmsg, int ret)
{
- if (cmsg->cmsg_level == SOL_TLS &&
- cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
- u8 content_type = *((u8 *)CMSG_DATA(cmsg));
-
- switch (content_type) {
- case TLS_RECORD_TYPE_DATA:
- /* TLS sets EOR at the end of each application data
- * record, even though there might be more frames
- * waiting to be decrypted.
- */
- msg->msg_flags &= ~MSG_EOR;
- break;
- case TLS_RECORD_TYPE_ALERT:
- ret = -ENOTCONN;
- break;
- default:
- ret = -EAGAIN;
- }
+ u8 content_type = tls_get_record_type(sock->sk, cmsg);
+ u8 level, description;
+
+ switch (content_type) {
+ case 0:
+ break;
+ case TLS_RECORD_TYPE_DATA:
+ /* TLS sets EOR at the end of each application data
+ * record, even though there might be more frames
+ * waiting to be decrypted.
+ */
+ msg->msg_flags &= ~MSG_EOR;
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ tls_alert_recv(sock->sk, msg, &level, &description);
+ ret = (level == TLS_ALERT_LEVEL_FATAL) ?
+ -EACCES : -EAGAIN;
+ break;
+ default:
+ /* discard this record type */
+ ret = -EAGAIN;
}
return ret;
}
@@ -777,6 +780,8 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
}
if (ret == -ESHUTDOWN)
kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+ else if (ret == -EACCES)
+ xprt_wake_pending_tasks(&transport->xprt, -EACCES);
else
xs_poll_check_readable(transport);
out:
@@ -1292,6 +1297,8 @@ static void xs_close(struct rpc_xprt *xprt)
dprintk("RPC: xs_close xprt %p\n", xprt);
+ if (transport->sock)
+ tls_handshake_close(transport->sock);
xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 8cc42aea19c7..5b045284849e 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -862,3 +862,28 @@ void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
NULL);
}
EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
+
+int switchdev_bridge_port_replay(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_brport_info brport_info = {
+ .brport = {
+ .dev = dev,
+ .ctx = ctx,
+ .atomic_nb = atomic_nb,
+ .blocking_nb = blocking_nb,
+ },
+ };
+ int err;
+
+ ASSERT_RTNL();
+
+ err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_REPLAY,
+ brport_dev, &brport_info.info,
+ extack);
+ return notifier_to_errno(err);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_replay);
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 0772cfadaa0d..93f82398283d 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -131,6 +131,5 @@ bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr);
void tipc_set_node_id(struct net *net, u8 *id);
void tipc_set_node_addr(struct net *net, u32 addr);
char *tipc_nodeid2string(char *str, u8 *id);
-u32 tipc_node_id2hash(u8 *id128);
#endif
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 1ee60649bd17..41eac1ee0c09 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -214,8 +214,6 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
-int tipc_media_set_priority(const char *name, u32 new_value);
-int tipc_media_set_window(const char *name, u32 new_value);
int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
struct nlattr *attrs[]);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 0a3f7a70a50a..7eccd97e0609 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -197,7 +197,7 @@ static inline int less(u16 left, u16 right)
return less_eq(left, right) && (mod(right) != mod(left));
}
-static inline int in_range(u16 val, u16 min, u16 max)
+static inline int tipc_in_range(u16 val, u16 min, u16 max)
{
return !less(val, min) && !more(val, max);
}
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 577fa5af33ec..302fd749c424 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1960,7 +1960,8 @@ rcv:
skb_reset_network_header(*skb);
skb_pull(*skb, tipc_ehdr_size(ehdr));
- pskb_trim(*skb, (*skb)->len - aead->authsize);
+ if (pskb_trim(*skb, (*skb)->len - aead->authsize))
+ goto free_skb;
/* Validate TIPCv2 message */
if (unlikely(!tipc_msg_validate(skb))) {
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2eff1c7949cb..e33b4f29f77c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1623,7 +1623,7 @@ next_gap_ack:
last_ga->bgack_cnt);
}
/* Check against the last Gap ACK block */
- if (in_range(seqno, start, end))
+ if (tipc_in_range(seqno, start, end))
continue;
/* Update/release the packet peer is acking */
bc_has_acked = true;
@@ -2251,12 +2251,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
strncpy(if_name, data, TIPC_MAX_IF_NAME);
/* Update own tolerance if peer indicates a non-zero value */
- if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
+ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
l->tolerance = peers_tol;
l->bc_rcvlink->tolerance = peers_tol;
}
/* Update own priority if peer's priority is higher */
- if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
+ if (tipc_in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
l->priority = peers_prio;
/* If peer is going down we want full re-establish cycle */
@@ -2299,13 +2299,13 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
l->rcv_nxt_state = msg_seqno(hdr) + 1;
/* Update own tolerance if peer indicates a non-zero value */
- if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
+ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
l->tolerance = peers_tol;
l->bc_rcvlink->tolerance = peers_tol;
}
/* Update own prio if peer indicates a different value */
if ((peers_prio != l->priority) &&
- in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) {
+ tipc_in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) {
l->priority = peers_prio;
rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
}
diff --git a/net/tipc/link.h b/net/tipc/link.h
index a16f401fdabd..d80f5649b395 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -148,8 +148,6 @@ int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap,
struct tipc_gap_ack_blks *ga,
struct sk_buff_head *xmitq,
struct sk_buff_head *retrq);
-void tipc_link_build_bc_sync_msg(struct tipc_link *l,
- struct sk_buff_head *xmitq);
void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
struct sk_buff_head *xmitq);
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index e231e6964d61..c677f6f082df 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -67,7 +67,6 @@ struct distr_item {
__be32 key;
};
-void tipc_named_bcast(struct net *net, struct sk_buff *skb);
struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities);
diff --git a/net/tipc/net.h b/net/tipc/net.h
index d0c91d2df20a..1cb1e43cf34a 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -43,7 +43,6 @@ extern const struct nla_policy tipc_nl_net_policy[];
int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
void tipc_net_finalize_work(struct work_struct *work);
-void tipc_sched_net_finalize(struct net *net, u32 addr);
void tipc_net_stop(struct net *net);
int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 9b47c8409231..5bc076f2fa74 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -208,7 +208,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
goto err_out;
}
- info.attrs = attrbuf;
+ info.info.attrs = attrbuf;
if (nlmsg_len(cb.nlh) > 0) {
err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf,
@@ -1294,7 +1294,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
struct tipc_nl_compat_msg msg;
struct nlmsghdr *req_nlh;
struct nlmsghdr *rep_nlh;
- struct tipc_genlmsghdr *req_userhdr = info->userhdr;
+ struct tipc_genlmsghdr *req_userhdr = genl_info_userhdr(info);
memset(&msg, 0, sizeof(msg));
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 5e000fde8067..3105abe97bb9 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -583,7 +583,7 @@ update:
n->capabilities, &n->bc_entry.inputq1,
&n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
pr_warn("Broadcast rcv link creation failed, no memory\n");
- kfree(n);
+ tipc_node_put(n);
n = NULL;
goto exit;
}
@@ -2662,7 +2662,7 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *node;
@@ -2870,7 +2870,7 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
int err;
if (!prev_node) {
- struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
struct nlattr *mon[TIPC_NLA_MON_MAX + 1];
if (!attrs[TIPC_NLA_MON])
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ef8e5139a873..bb1118d02f95 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3791,7 +3791,7 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct tipc_sock *tsk;
if (!tsk_portid) {
- struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];
if (!attrs[TIPC_NLA_SOCK])
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 926232557e77..f892b0903dba 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -465,7 +465,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
int i;
if (!bid && !skip_cnt) {
- struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
struct net *net = sock_net(skb->sk);
struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1];
char *bname;
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 86cef1c68e03..28a8c0e80e3c 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -39,6 +39,7 @@
#include <linux/types.h>
#include <linux/skmsg.h>
#include <net/tls.h>
+#include <net/tls_prot.h>
#define TLS_PAGE_ORDER (min_t(unsigned int, PAGE_ALLOC_COSTLY_ORDER, \
TLS_MAX_PAYLOAD_SIZE >> PAGE_SHIFT))
@@ -50,6 +51,59 @@
#define TLS_DEC_STATS(net, field) \
SNMP_DEC_STATS((net)->mib.tls_statistics, field)
+struct tls_cipher_desc {
+ unsigned int nonce;
+ unsigned int iv;
+ unsigned int key;
+ unsigned int salt;
+ unsigned int tag;
+ unsigned int rec_seq;
+ unsigned int iv_offset;
+ unsigned int key_offset;
+ unsigned int salt_offset;
+ unsigned int rec_seq_offset;
+ char *cipher_name;
+ bool offloadable;
+ size_t crypto_info;
+};
+
+#define TLS_CIPHER_MIN TLS_CIPHER_AES_GCM_128
+#define TLS_CIPHER_MAX TLS_CIPHER_ARIA_GCM_256
+extern const struct tls_cipher_desc tls_cipher_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN];
+
+static inline const struct tls_cipher_desc *get_cipher_desc(u16 cipher_type)
+{
+ if (cipher_type < TLS_CIPHER_MIN || cipher_type > TLS_CIPHER_MAX)
+ return NULL;
+
+ return &tls_cipher_desc[cipher_type - TLS_CIPHER_MIN];
+}
+
+static inline char *crypto_info_iv(struct tls_crypto_info *crypto_info,
+ const struct tls_cipher_desc *cipher_desc)
+{
+ return (char *)crypto_info + cipher_desc->iv_offset;
+}
+
+static inline char *crypto_info_key(struct tls_crypto_info *crypto_info,
+ const struct tls_cipher_desc *cipher_desc)
+{
+ return (char *)crypto_info + cipher_desc->key_offset;
+}
+
+static inline char *crypto_info_salt(struct tls_crypto_info *crypto_info,
+ const struct tls_cipher_desc *cipher_desc)
+{
+ return (char *)crypto_info + cipher_desc->salt_offset;
+}
+
+static inline char *crypto_info_rec_seq(struct tls_crypto_info *crypto_info,
+ const struct tls_cipher_desc *cipher_desc)
+{
+ return (char *)crypto_info + cipher_desc->rec_seq_offset;
+}
+
+
/* TLS records are maintained in 'struct tls_rec'. It stores the memory pages
* allocated or mapped for each TLS record. After encryption, the records are
* stores in a linked list.
@@ -86,10 +140,6 @@ void tls_ctx_free(struct sock *sk, struct tls_context *ctx);
void update_sk_prot(struct sock *sk, struct tls_context *ctx);
int wait_on_pending_writer(struct sock *sk, long *timeo);
-int tls_sk_query(struct sock *sk, int optname, char __user *optval,
- int __user *optlen);
-int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
- unsigned int optlen);
void tls_err_abort(struct sock *sk, int err);
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
@@ -110,6 +160,8 @@ bool tls_sw_sock_is_readable(struct sock *sk);
ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
+int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t read_actor);
int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
void tls_device_splice_eof(struct socket *sock);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 2021fe557e50..8c94c926606a 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -52,6 +52,8 @@ static LIST_HEAD(tls_device_list);
static LIST_HEAD(tls_device_down_list);
static DEFINE_SPINLOCK(tls_device_lock);
+static struct page *dummy_page;
+
static void tls_device_free_ctx(struct tls_context *ctx)
{
if (ctx->tx_conf == TLS_HW) {
@@ -312,36 +314,33 @@ static int tls_push_record(struct sock *sk,
return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
}
-static int tls_device_record_close(struct sock *sk,
- struct tls_context *ctx,
- struct tls_record_info *record,
- struct page_frag *pfrag,
- unsigned char record_type)
+static void tls_device_record_close(struct sock *sk,
+ struct tls_context *ctx,
+ struct tls_record_info *record,
+ struct page_frag *pfrag,
+ unsigned char record_type)
{
struct tls_prot_info *prot = &ctx->prot_info;
- int ret;
+ struct page_frag dummy_tag_frag;
/* append tag
* device will fill in the tag, we just need to append a placeholder
* use socket memory to improve coalescing (re-using a single buffer
* increases frag count)
- * if we can't allocate memory now, steal some back from data
+ * if we can't allocate memory now use the dummy page
*/
- if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
- sk->sk_allocation))) {
- ret = 0;
- tls_append_frag(record, pfrag, prot->tag_size);
- } else {
- ret = prot->tag_size;
- if (record->len <= prot->overhead_size)
- return -ENOMEM;
+ if (unlikely(pfrag->size - pfrag->offset < prot->tag_size) &&
+ !skb_page_frag_refill(prot->tag_size, pfrag, sk->sk_allocation)) {
+ dummy_tag_frag.page = dummy_page;
+ dummy_tag_frag.offset = 0;
+ pfrag = &dummy_tag_frag;
}
+ tls_append_frag(record, pfrag, prot->tag_size);
/* fill prepend */
tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
record->len - prot->overhead_size,
record_type);
- return ret;
}
static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
@@ -441,9 +440,13 @@ static int tls_push_data(struct sock *sk,
long timeo;
if (flags &
- ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SPLICE_PAGES))
+ ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_SPLICE_PAGES | MSG_EOR))
return -EOPNOTSUPP;
+ if ((flags & (MSG_MORE | MSG_EOR)) == (MSG_MORE | MSG_EOR))
+ return -EINVAL;
+
if (unlikely(sk->sk_err))
return -sk->sk_err;
@@ -541,18 +544,8 @@ last_record:
if (done || record->len >= max_open_record_len ||
(record->num_frags >= MAX_SKB_FRAGS - 1)) {
- rc = tls_device_record_close(sk, tls_ctx, record,
- pfrag, record_type);
- if (rc) {
- if (rc > 0) {
- size += rc;
- } else {
- size = orig_size;
- destroy_record(record);
- ctx->open_record = NULL;
- break;
- }
- }
+ tls_device_record_close(sk, tls_ctx, record,
+ pfrag, record_type);
rc = tls_push_record(sk,
tls_ctx,
@@ -891,7 +884,7 @@ static int
tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
{
struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx);
- const struct tls_cipher_size_desc *cipher_sz;
+ const struct tls_cipher_desc *cipher_desc;
int err, offset, copy, data_len, pos;
struct sk_buff *skb, *skb_iter;
struct scatterlist sg[1];
@@ -905,10 +898,10 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
default:
return -EINVAL;
}
- cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_recv.info.cipher_type];
+ cipher_desc = get_cipher_desc(tls_ctx->crypto_recv.info.cipher_type);
rxm = strp_msg(tls_strp_msg(sw_ctx));
- orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv,
+ orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv,
sk->sk_allocation);
if (!orig_buf)
return -ENOMEM;
@@ -924,8 +917,8 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
sg_init_table(sg, 1);
sg_set_buf(&sg[0], buf,
- rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv);
- err = skb_copy_bits(skb, offset, buf, TLS_HEADER_SIZE + cipher_sz->iv);
+ rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv);
+ err = skb_copy_bits(skb, offset, buf, TLS_HEADER_SIZE + cipher_desc->iv);
if (err)
goto free_buf;
@@ -936,7 +929,7 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
else
err = 0;
- data_len = rxm->full_len - cipher_sz->tag;
+ data_len = rxm->full_len - cipher_desc->tag;
if (skb_pagelen(skb) > offset) {
copy = min_t(int, skb_pagelen(skb) - offset, data_len);
@@ -1053,7 +1046,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
- const struct tls_cipher_size_desc *cipher_sz;
+ const struct tls_cipher_desc *cipher_desc;
struct tls_record_info *start_marker_record;
struct tls_offload_context_tx *offload_ctx;
struct tls_crypto_info *crypto_info;
@@ -1086,46 +1079,32 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
goto release_netdev;
}
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv;
- rec_seq =
- ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq;
- break;
- case TLS_CIPHER_AES_GCM_256:
- iv = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->iv;
- rec_seq =
- ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->rec_seq;
- break;
- default:
+ cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+ if (!cipher_desc || !cipher_desc->offloadable) {
rc = -EINVAL;
goto release_netdev;
}
- cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type];
- /* Sanity-check the rec_seq_size for stack allocations */
- if (cipher_sz->rec_seq > TLS_MAX_REC_SEQ_SIZE) {
- rc = -EINVAL;
- goto release_netdev;
- }
+ iv = crypto_info_iv(crypto_info, cipher_desc);
+ rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc);
prot->version = crypto_info->version;
prot->cipher_type = crypto_info->cipher_type;
- prot->prepend_size = TLS_HEADER_SIZE + cipher_sz->iv;
- prot->tag_size = cipher_sz->tag;
+ prot->prepend_size = TLS_HEADER_SIZE + cipher_desc->iv;
+ prot->tag_size = cipher_desc->tag;
prot->overhead_size = prot->prepend_size + prot->tag_size;
- prot->iv_size = cipher_sz->iv;
- prot->salt_size = cipher_sz->salt;
- ctx->tx.iv = kmalloc(cipher_sz->iv + cipher_sz->salt, GFP_KERNEL);
+ prot->iv_size = cipher_desc->iv;
+ prot->salt_size = cipher_desc->salt;
+ ctx->tx.iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL);
if (!ctx->tx.iv) {
rc = -ENOMEM;
goto release_netdev;
}
- memcpy(ctx->tx.iv + cipher_sz->salt, iv, cipher_sz->iv);
+ memcpy(ctx->tx.iv + cipher_desc->salt, iv, cipher_desc->iv);
- prot->rec_seq_size = cipher_sz->rec_seq;
- ctx->tx.rec_seq = kmemdup(rec_seq, cipher_sz->rec_seq, GFP_KERNEL);
+ prot->rec_seq_size = cipher_desc->rec_seq;
+ ctx->tx.rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL);
if (!ctx->tx.rec_seq) {
rc = -ENOMEM;
goto free_iv;
@@ -1450,14 +1429,26 @@ int __init tls_device_init(void)
{
int err;
- destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
- if (!destruct_wq)
+ dummy_page = alloc_page(GFP_KERNEL);
+ if (!dummy_page)
return -ENOMEM;
+ destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
+ if (!destruct_wq) {
+ err = -ENOMEM;
+ goto err_free_dummy;
+ }
+
err = register_netdevice_notifier(&tls_dev_notifier);
if (err)
- destroy_workqueue(destruct_wq);
+ goto err_destroy_wq;
+ return 0;
+
+err_destroy_wq:
+ destroy_workqueue(destruct_wq);
+err_free_dummy:
+ put_page(dummy_page);
return err;
}
@@ -1466,4 +1457,5 @@ void __exit tls_device_cleanup(void)
unregister_netdevice_notifier(&tls_dev_notifier);
destroy_workqueue(destruct_wq);
clean_acked_data_flush();
+ put_page(dummy_page);
}
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index b28c5e296dfd..1d743f310f4f 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -55,7 +55,7 @@ static int tls_enc_record(struct aead_request *aead_req,
struct tls_prot_info *prot)
{
unsigned char buf[TLS_HEADER_SIZE + MAX_IV_SIZE];
- const struct tls_cipher_size_desc *cipher_sz;
+ const struct tls_cipher_desc *cipher_desc;
struct scatterlist sg_in[3];
struct scatterlist sg_out[3];
unsigned int buf_size;
@@ -69,9 +69,9 @@ static int tls_enc_record(struct aead_request *aead_req,
default:
return -EINVAL;
}
- cipher_sz = &tls_cipher_size_desc[prot->cipher_type];
+ cipher_desc = get_cipher_desc(prot->cipher_type);
- buf_size = TLS_HEADER_SIZE + cipher_sz->iv;
+ buf_size = TLS_HEADER_SIZE + cipher_desc->iv;
len = min_t(int, *in_len, buf_size);
scatterwalk_copychunks(buf, in, len, 0);
@@ -85,11 +85,11 @@ static int tls_enc_record(struct aead_request *aead_req,
scatterwalk_pagedone(out, 1, 1);
len = buf[4] | (buf[3] << 8);
- len -= cipher_sz->iv;
+ len -= cipher_desc->iv;
- tls_make_aad(aad, len - cipher_sz->tag, (char *)&rcd_sn, buf[0], prot);
+ tls_make_aad(aad, len - cipher_desc->tag, (char *)&rcd_sn, buf[0], prot);
- memcpy(iv + cipher_sz->salt, buf + TLS_HEADER_SIZE, cipher_sz->iv);
+ memcpy(iv + cipher_desc->salt, buf + TLS_HEADER_SIZE, cipher_desc->iv);
sg_init_table(sg_in, ARRAY_SIZE(sg_in));
sg_init_table(sg_out, ARRAY_SIZE(sg_out));
@@ -100,7 +100,7 @@ static int tls_enc_record(struct aead_request *aead_req,
*in_len -= len;
if (*in_len < 0) {
- *in_len += cipher_sz->tag;
+ *in_len += cipher_desc->tag;
/* the input buffer doesn't contain the entire record.
* trim len accordingly. The resulting authentication tag
* will contain garbage, but we don't care, so we won't
@@ -121,7 +121,7 @@ static int tls_enc_record(struct aead_request *aead_req,
scatterwalk_pagedone(out, 1, 1);
}
- len -= cipher_sz->tag;
+ len -= cipher_desc->tag;
aead_request_set_crypt(aead_req, sg_in, sg_out, len, iv);
rc = crypto_aead_encrypt(aead_req);
@@ -309,14 +309,14 @@ static void fill_sg_out(struct scatterlist sg_out[3], void *buf,
int sync_size,
void *dummy_buf)
{
- const struct tls_cipher_size_desc *cipher_sz =
- &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type];
+ const struct tls_cipher_desc *cipher_desc =
+ get_cipher_desc(tls_ctx->crypto_send.info.cipher_type);
sg_set_buf(&sg_out[0], dummy_buf, sync_size);
sg_set_buf(&sg_out[1], nskb->data + tcp_payload_offset, payload_len);
/* Add room for authentication tag produced by crypto */
dummy_buf += sync_size;
- sg_set_buf(&sg_out[2], dummy_buf, cipher_sz->tag);
+ sg_set_buf(&sg_out[2], dummy_buf, cipher_desc->tag);
}
static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
@@ -328,7 +328,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
int tcp_payload_offset = skb_tcp_all_headers(skb);
int payload_len = skb->len - tcp_payload_offset;
- const struct tls_cipher_size_desc *cipher_sz;
+ const struct tls_cipher_desc *cipher_desc;
void *buf, *iv, *aad, *dummy_buf, *salt;
struct aead_request *aead_req;
struct sk_buff *nskb = NULL;
@@ -348,16 +348,16 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
default:
goto free_req;
}
- cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type];
- buf_len = cipher_sz->salt + cipher_sz->iv + TLS_AAD_SPACE_SIZE +
- sync_size + cipher_sz->tag;
+ cipher_desc = get_cipher_desc(tls_ctx->crypto_send.info.cipher_type);
+ buf_len = cipher_desc->salt + cipher_desc->iv + TLS_AAD_SPACE_SIZE +
+ sync_size + cipher_desc->tag;
buf = kmalloc(buf_len, GFP_ATOMIC);
if (!buf)
goto free_req;
iv = buf;
- memcpy(iv, salt, cipher_sz->salt);
- aad = buf + cipher_sz->salt + cipher_sz->iv;
+ memcpy(iv, salt, cipher_desc->salt);
+ aad = buf + cipher_desc->salt + cipher_desc->iv;
dummy_buf = aad + TLS_AAD_SPACE_SIZE;
nskb = alloc_skb(skb_headroom(skb) + skb->len, GFP_ATOMIC);
@@ -471,12 +471,15 @@ int tls_sw_fallback_init(struct sock *sk,
struct tls_offload_context_tx *offload_ctx,
struct tls_crypto_info *crypto_info)
{
- const struct tls_cipher_size_desc *cipher_sz;
- const u8 *key;
+ const struct tls_cipher_desc *cipher_desc;
int rc;
+ cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+ if (!cipher_desc || !cipher_desc->offloadable)
+ return -EINVAL;
+
offload_ctx->aead_send =
- crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+ crypto_alloc_aead(cipher_desc->cipher_name, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(offload_ctx->aead_send)) {
rc = PTR_ERR(offload_ctx->aead_send);
pr_err_ratelimited("crypto_alloc_aead failed rc=%d\n", rc);
@@ -484,24 +487,13 @@ int tls_sw_fallback_init(struct sock *sk,
goto err_out;
}
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key;
- break;
- case TLS_CIPHER_AES_GCM_256:
- key = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->key;
- break;
- default:
- rc = -EINVAL;
- goto free_aead;
- }
- cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type];
-
- rc = crypto_aead_setkey(offload_ctx->aead_send, key, cipher_sz->key);
+ rc = crypto_aead_setkey(offload_ctx->aead_send,
+ crypto_info_key(crypto_info, cipher_desc),
+ cipher_desc->key);
if (rc)
goto free_aead;
- rc = crypto_aead_setauthsize(offload_ctx->aead_send, cipher_sz->tag);
+ rc = crypto_aead_setauthsize(offload_ctx->aead_send, cipher_desc->tag);
if (rc)
goto free_aead;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b6896126bb92..02f583ff9239 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -58,23 +58,66 @@ enum {
TLS_NUM_PROTS,
};
-#define CIPHER_SIZE_DESC(cipher) [cipher] = { \
+#define CHECK_CIPHER_DESC(cipher,ci) \
+ static_assert(cipher ## _IV_SIZE <= MAX_IV_SIZE); \
+ static_assert(cipher ## _REC_SEQ_SIZE <= TLS_MAX_REC_SEQ_SIZE); \
+ static_assert(cipher ## _TAG_SIZE == TLS_TAG_SIZE); \
+ static_assert(sizeof_field(struct ci, iv) == cipher ## _IV_SIZE); \
+ static_assert(sizeof_field(struct ci, key) == cipher ## _KEY_SIZE); \
+ static_assert(sizeof_field(struct ci, salt) == cipher ## _SALT_SIZE); \
+ static_assert(sizeof_field(struct ci, rec_seq) == cipher ## _REC_SEQ_SIZE);
+
+#define __CIPHER_DESC(ci) \
+ .iv_offset = offsetof(struct ci, iv), \
+ .key_offset = offsetof(struct ci, key), \
+ .salt_offset = offsetof(struct ci, salt), \
+ .rec_seq_offset = offsetof(struct ci, rec_seq), \
+ .crypto_info = sizeof(struct ci)
+
+#define CIPHER_DESC(cipher,ci,algname,_offloadable) [cipher - TLS_CIPHER_MIN] = { \
+ .nonce = cipher ## _IV_SIZE, \
.iv = cipher ## _IV_SIZE, \
.key = cipher ## _KEY_SIZE, \
.salt = cipher ## _SALT_SIZE, \
.tag = cipher ## _TAG_SIZE, \
.rec_seq = cipher ## _REC_SEQ_SIZE, \
+ .cipher_name = algname, \
+ .offloadable = _offloadable, \
+ __CIPHER_DESC(ci), \
}
-const struct tls_cipher_size_desc tls_cipher_size_desc[] = {
- CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_128),
- CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_256),
- CIPHER_SIZE_DESC(TLS_CIPHER_AES_CCM_128),
- CIPHER_SIZE_DESC(TLS_CIPHER_CHACHA20_POLY1305),
- CIPHER_SIZE_DESC(TLS_CIPHER_SM4_GCM),
- CIPHER_SIZE_DESC(TLS_CIPHER_SM4_CCM),
+#define CIPHER_DESC_NONCE0(cipher,ci,algname,_offloadable) [cipher - TLS_CIPHER_MIN] = { \
+ .nonce = 0, \
+ .iv = cipher ## _IV_SIZE, \
+ .key = cipher ## _KEY_SIZE, \
+ .salt = cipher ## _SALT_SIZE, \
+ .tag = cipher ## _TAG_SIZE, \
+ .rec_seq = cipher ## _REC_SEQ_SIZE, \
+ .cipher_name = algname, \
+ .offloadable = _offloadable, \
+ __CIPHER_DESC(ci), \
+}
+
+const struct tls_cipher_desc tls_cipher_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN] = {
+ CIPHER_DESC(TLS_CIPHER_AES_GCM_128, tls12_crypto_info_aes_gcm_128, "gcm(aes)", true),
+ CIPHER_DESC(TLS_CIPHER_AES_GCM_256, tls12_crypto_info_aes_gcm_256, "gcm(aes)", true),
+ CIPHER_DESC(TLS_CIPHER_AES_CCM_128, tls12_crypto_info_aes_ccm_128, "ccm(aes)", false),
+ CIPHER_DESC_NONCE0(TLS_CIPHER_CHACHA20_POLY1305, tls12_crypto_info_chacha20_poly1305, "rfc7539(chacha20,poly1305)", false),
+ CIPHER_DESC(TLS_CIPHER_SM4_GCM, tls12_crypto_info_sm4_gcm, "gcm(sm4)", false),
+ CIPHER_DESC(TLS_CIPHER_SM4_CCM, tls12_crypto_info_sm4_ccm, "ccm(sm4)", false),
+ CIPHER_DESC(TLS_CIPHER_ARIA_GCM_128, tls12_crypto_info_aria_gcm_128, "gcm(aria)", false),
+ CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256, tls12_crypto_info_aria_gcm_256, "gcm(aria)", false),
};
+CHECK_CIPHER_DESC(TLS_CIPHER_AES_GCM_128, tls12_crypto_info_aes_gcm_128);
+CHECK_CIPHER_DESC(TLS_CIPHER_AES_GCM_256, tls12_crypto_info_aes_gcm_256);
+CHECK_CIPHER_DESC(TLS_CIPHER_AES_CCM_128, tls12_crypto_info_aes_ccm_128);
+CHECK_CIPHER_DESC(TLS_CIPHER_CHACHA20_POLY1305, tls12_crypto_info_chacha20_poly1305);
+CHECK_CIPHER_DESC(TLS_CIPHER_SM4_GCM, tls12_crypto_info_sm4_gcm);
+CHECK_CIPHER_DESC(TLS_CIPHER_SM4_CCM, tls12_crypto_info_sm4_ccm);
+CHECK_CIPHER_DESC(TLS_CIPHER_ARIA_GCM_128, tls12_crypto_info_aria_gcm_128);
+CHECK_CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256, tls12_crypto_info_aria_gcm_256);
+
static const struct proto *saved_tcpv6_prot;
static DEFINE_MUTEX(tcpv6_prot_mutex);
static const struct proto *saved_tcpv4_prot;
@@ -139,9 +182,6 @@ int tls_push_sg(struct sock *sk,
ctx->splicing_pages = true;
while (1) {
- if (sg_is_last(sg))
- msg.msg_flags = flags;
-
/* is sending application-limited? */
tcp_rate_check_app_limited(sk);
p = sg_page(sg);
@@ -395,6 +435,7 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
int __user *optlen, int tx)
{
int rc = 0;
+ const struct tls_cipher_desc *cipher_desc;
struct tls_context *ctx = tls_get_ctx(sk);
struct tls_crypto_info *crypto_info;
struct cipher_context *cctx;
@@ -433,172 +474,19 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
goto out;
}
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128: {
- struct tls12_crypto_info_aes_gcm_128 *
- crypto_info_aes_gcm_128 =
- container_of(crypto_info,
- struct tls12_crypto_info_aes_gcm_128,
- info);
-
- if (len != sizeof(*crypto_info_aes_gcm_128)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(crypto_info_aes_gcm_128->iv,
- cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
- TLS_CIPHER_AES_GCM_128_IV_SIZE);
- memcpy(crypto_info_aes_gcm_128->rec_seq, cctx->rec_seq,
- TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
- if (copy_to_user(optval,
- crypto_info_aes_gcm_128,
- sizeof(*crypto_info_aes_gcm_128)))
- rc = -EFAULT;
- break;
- }
- case TLS_CIPHER_AES_GCM_256: {
- struct tls12_crypto_info_aes_gcm_256 *
- crypto_info_aes_gcm_256 =
- container_of(crypto_info,
- struct tls12_crypto_info_aes_gcm_256,
- info);
-
- if (len != sizeof(*crypto_info_aes_gcm_256)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(crypto_info_aes_gcm_256->iv,
- cctx->iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE,
- TLS_CIPHER_AES_GCM_256_IV_SIZE);
- memcpy(crypto_info_aes_gcm_256->rec_seq, cctx->rec_seq,
- TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE);
- if (copy_to_user(optval,
- crypto_info_aes_gcm_256,
- sizeof(*crypto_info_aes_gcm_256)))
- rc = -EFAULT;
- break;
- }
- case TLS_CIPHER_AES_CCM_128: {
- struct tls12_crypto_info_aes_ccm_128 *aes_ccm_128 =
- container_of(crypto_info,
- struct tls12_crypto_info_aes_ccm_128, info);
-
- if (len != sizeof(*aes_ccm_128)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(aes_ccm_128->iv,
- cctx->iv + TLS_CIPHER_AES_CCM_128_SALT_SIZE,
- TLS_CIPHER_AES_CCM_128_IV_SIZE);
- memcpy(aes_ccm_128->rec_seq, cctx->rec_seq,
- TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE);
- if (copy_to_user(optval, aes_ccm_128, sizeof(*aes_ccm_128)))
- rc = -EFAULT;
- break;
- }
- case TLS_CIPHER_CHACHA20_POLY1305: {
- struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305 =
- container_of(crypto_info,
- struct tls12_crypto_info_chacha20_poly1305,
- info);
-
- if (len != sizeof(*chacha20_poly1305)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(chacha20_poly1305->iv,
- cctx->iv + TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE,
- TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE);
- memcpy(chacha20_poly1305->rec_seq, cctx->rec_seq,
- TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE);
- if (copy_to_user(optval, chacha20_poly1305,
- sizeof(*chacha20_poly1305)))
- rc = -EFAULT;
- break;
+ cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+ if (!cipher_desc || len != cipher_desc->crypto_info) {
+ rc = -EINVAL;
+ goto out;
}
- case TLS_CIPHER_SM4_GCM: {
- struct tls12_crypto_info_sm4_gcm *sm4_gcm_info =
- container_of(crypto_info,
- struct tls12_crypto_info_sm4_gcm, info);
- if (len != sizeof(*sm4_gcm_info)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(sm4_gcm_info->iv,
- cctx->iv + TLS_CIPHER_SM4_GCM_SALT_SIZE,
- TLS_CIPHER_SM4_GCM_IV_SIZE);
- memcpy(sm4_gcm_info->rec_seq, cctx->rec_seq,
- TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE);
- if (copy_to_user(optval, sm4_gcm_info, sizeof(*sm4_gcm_info)))
- rc = -EFAULT;
- break;
- }
- case TLS_CIPHER_SM4_CCM: {
- struct tls12_crypto_info_sm4_ccm *sm4_ccm_info =
- container_of(crypto_info,
- struct tls12_crypto_info_sm4_ccm, info);
+ memcpy(crypto_info_iv(crypto_info, cipher_desc),
+ cctx->iv + cipher_desc->salt, cipher_desc->iv);
+ memcpy(crypto_info_rec_seq(crypto_info, cipher_desc),
+ cctx->rec_seq, cipher_desc->rec_seq);
- if (len != sizeof(*sm4_ccm_info)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(sm4_ccm_info->iv,
- cctx->iv + TLS_CIPHER_SM4_CCM_SALT_SIZE,
- TLS_CIPHER_SM4_CCM_IV_SIZE);
- memcpy(sm4_ccm_info->rec_seq, cctx->rec_seq,
- TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE);
- if (copy_to_user(optval, sm4_ccm_info, sizeof(*sm4_ccm_info)))
- rc = -EFAULT;
- break;
- }
- case TLS_CIPHER_ARIA_GCM_128: {
- struct tls12_crypto_info_aria_gcm_128 *
- crypto_info_aria_gcm_128 =
- container_of(crypto_info,
- struct tls12_crypto_info_aria_gcm_128,
- info);
-
- if (len != sizeof(*crypto_info_aria_gcm_128)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(crypto_info_aria_gcm_128->iv,
- cctx->iv + TLS_CIPHER_ARIA_GCM_128_SALT_SIZE,
- TLS_CIPHER_ARIA_GCM_128_IV_SIZE);
- memcpy(crypto_info_aria_gcm_128->rec_seq, cctx->rec_seq,
- TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE);
- if (copy_to_user(optval,
- crypto_info_aria_gcm_128,
- sizeof(*crypto_info_aria_gcm_128)))
- rc = -EFAULT;
- break;
- }
- case TLS_CIPHER_ARIA_GCM_256: {
- struct tls12_crypto_info_aria_gcm_256 *
- crypto_info_aria_gcm_256 =
- container_of(crypto_info,
- struct tls12_crypto_info_aria_gcm_256,
- info);
-
- if (len != sizeof(*crypto_info_aria_gcm_256)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(crypto_info_aria_gcm_256->iv,
- cctx->iv + TLS_CIPHER_ARIA_GCM_256_SALT_SIZE,
- TLS_CIPHER_ARIA_GCM_256_IV_SIZE);
- memcpy(crypto_info_aria_gcm_256->rec_seq, cctx->rec_seq,
- TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE);
- if (copy_to_user(optval,
- crypto_info_aria_gcm_256,
- sizeof(*crypto_info_aria_gcm_256)))
- rc = -EFAULT;
- break;
- }
- default:
- rc = -EINVAL;
- }
+ if (copy_to_user(optval, crypto_info, cipher_desc->crypto_info))
+ rc = -EFAULT;
out:
return rc;
@@ -699,7 +587,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
struct tls_crypto_info *crypto_info;
struct tls_crypto_info *alt_crypto_info;
struct tls_context *ctx = tls_get_ctx(sk);
- size_t optsize;
+ const struct tls_cipher_desc *cipher_desc;
int rc = 0;
int conf;
@@ -740,46 +628,23 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
}
}
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- optsize = sizeof(struct tls12_crypto_info_aes_gcm_128);
- break;
- case TLS_CIPHER_AES_GCM_256: {
- optsize = sizeof(struct tls12_crypto_info_aes_gcm_256);
- break;
+ cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+ if (!cipher_desc) {
+ rc = -EINVAL;
+ goto err_crypto_info;
}
- case TLS_CIPHER_AES_CCM_128:
- optsize = sizeof(struct tls12_crypto_info_aes_ccm_128);
- break;
- case TLS_CIPHER_CHACHA20_POLY1305:
- optsize = sizeof(struct tls12_crypto_info_chacha20_poly1305);
- break;
- case TLS_CIPHER_SM4_GCM:
- optsize = sizeof(struct tls12_crypto_info_sm4_gcm);
- break;
- case TLS_CIPHER_SM4_CCM:
- optsize = sizeof(struct tls12_crypto_info_sm4_ccm);
- break;
+
+ switch (crypto_info->cipher_type) {
case TLS_CIPHER_ARIA_GCM_128:
- if (crypto_info->version != TLS_1_2_VERSION) {
- rc = -EINVAL;
- goto err_crypto_info;
- }
- optsize = sizeof(struct tls12_crypto_info_aria_gcm_128);
- break;
case TLS_CIPHER_ARIA_GCM_256:
if (crypto_info->version != TLS_1_2_VERSION) {
rc = -EINVAL;
goto err_crypto_info;
}
- optsize = sizeof(struct tls12_crypto_info_aria_gcm_256);
break;
- default:
- rc = -EINVAL;
- goto err_crypto_info;
}
- if (optlen != optsize) {
+ if (optlen != cipher_desc->crypto_info) {
rc = -EINVAL;
goto err_crypto_info;
}
@@ -962,10 +827,12 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE];
ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read;
ops[TLS_BASE][TLS_SW ].poll = tls_sk_poll;
+ ops[TLS_BASE][TLS_SW ].read_sock = tls_sw_read_sock;
ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE];
ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read;
ops[TLS_SW ][TLS_SW ].poll = tls_sk_poll;
+ ops[TLS_SW ][TLS_SW ].read_sock = tls_sw_read_sock;
#ifdef CONFIG_TLS_DEVICE
ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index f37f4a0fcd3c..ca1e0e198ceb 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -369,7 +369,6 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
static int tls_strp_read_copyin(struct tls_strparser *strp)
{
- struct socket *sock = strp->sk->sk_socket;
read_descriptor_t desc;
desc.arg.data = strp;
@@ -377,7 +376,7 @@ static int tls_strp_read_copyin(struct tls_strparser *strp)
desc.count = 1; /* give more than one skb per call */
/* sk should be locked here, so okay to do read_sock */
- sock->ops->read_sock(strp->sk, &desc, tls_strp_copyin);
+ tcp_read_sock(strp->sk, &desc, tls_strp_copyin);
return desc.error;
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 53f944e6d8ef..1ed4a611631f 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -984,6 +984,9 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
int ret = 0;
int pending;
+ if (!eor && (msg->msg_flags & MSG_EOR))
+ return -EINVAL;
+
if (unlikely(msg->msg_controllen)) {
ret = tls_process_cmsg(sk, msg, &record_type);
if (ret) {
@@ -1193,7 +1196,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
int ret;
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
- MSG_CMSG_COMPAT | MSG_SPLICE_PAGES |
+ MSG_CMSG_COMPAT | MSG_SPLICE_PAGES | MSG_EOR |
MSG_SENDPAGE_NOPOLICY))
return -EOPNOTSUPP;
@@ -1845,13 +1848,10 @@ tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot,
return sk_flush_backlog(sk);
}
-static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
- bool nonblock)
+static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx,
+ bool nonblock)
{
long timeo;
- int err;
-
- lock_sock(sk);
timeo = sock_rcvtimeo(sk, nonblock);
@@ -1865,26 +1865,30 @@ static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
!READ_ONCE(ctx->reader_present), &wait);
remove_wait_queue(&ctx->wq, &wait);
- if (timeo <= 0) {
- err = -EAGAIN;
- goto err_unlock;
- }
- if (signal_pending(current)) {
- err = sock_intr_errno(timeo);
- goto err_unlock;
- }
+ if (timeo <= 0)
+ return -EAGAIN;
+ if (signal_pending(current))
+ return sock_intr_errno(timeo);
}
WRITE_ONCE(ctx->reader_present, 1);
return 0;
+}
-err_unlock:
- release_sock(sk);
+static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
+ bool nonblock)
+{
+ int err;
+
+ lock_sock(sk);
+ err = tls_rx_reader_acquire(sk, ctx, nonblock);
+ if (err)
+ release_sock(sk);
return err;
}
-static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
+static void tls_rx_reader_release(struct sock *sk, struct tls_sw_context_rx *ctx)
{
if (unlikely(ctx->reader_contended)) {
if (wq_has_sleeper(&ctx->wq))
@@ -1896,6 +1900,11 @@ static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
}
WRITE_ONCE(ctx->reader_present, 0);
+}
+
+static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
+{
+ tls_rx_reader_release(sk, ctx);
release_sock(sk);
}
@@ -2193,6 +2202,102 @@ splice_requeue:
goto splice_read_end;
}
+int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t read_actor)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
+ struct strp_msg *rxm = NULL;
+ struct sk_buff *skb = NULL;
+ struct sk_psock *psock;
+ size_t flushed_at = 0;
+ bool released = true;
+ struct tls_msg *tlm;
+ ssize_t copied = 0;
+ ssize_t decrypted;
+ int err, used;
+
+ psock = sk_psock_get(sk);
+ if (psock) {
+ sk_psock_put(sk, psock);
+ return -EINVAL;
+ }
+ err = tls_rx_reader_acquire(sk, ctx, true);
+ if (err < 0)
+ return err;
+
+ /* If crypto failed the connection is broken */
+ err = ctx->async_wait.err;
+ if (err)
+ goto read_sock_end;
+
+ decrypted = 0;
+ do {
+ if (!skb_queue_empty(&ctx->rx_list)) {
+ skb = __skb_dequeue(&ctx->rx_list);
+ rxm = strp_msg(skb);
+ tlm = tls_msg(skb);
+ } else {
+ struct tls_decrypt_arg darg;
+
+ err = tls_rx_rec_wait(sk, NULL, true, released);
+ if (err <= 0)
+ goto read_sock_end;
+
+ memset(&darg.inargs, 0, sizeof(darg.inargs));
+
+ err = tls_rx_one_record(sk, NULL, &darg);
+ if (err < 0) {
+ tls_err_abort(sk, -EBADMSG);
+ goto read_sock_end;
+ }
+
+ released = tls_read_flush_backlog(sk, prot, INT_MAX,
+ 0, decrypted,
+ &flushed_at);
+ skb = darg.skb;
+ rxm = strp_msg(skb);
+ tlm = tls_msg(skb);
+ decrypted += rxm->full_len;
+
+ tls_rx_rec_done(ctx);
+ }
+
+ /* read_sock does not support reading control messages */
+ if (tlm->control != TLS_RECORD_TYPE_DATA) {
+ err = -EINVAL;
+ goto read_sock_requeue;
+ }
+
+ used = read_actor(desc, skb, rxm->offset, rxm->full_len);
+ if (used <= 0) {
+ if (!copied)
+ err = used;
+ goto read_sock_requeue;
+ }
+ copied += used;
+ if (used < rxm->full_len) {
+ rxm->offset += used;
+ rxm->full_len -= used;
+ if (!desc->count)
+ goto read_sock_requeue;
+ } else {
+ consume_skb(skb);
+ if (!desc->count)
+ skb = NULL;
+ }
+ } while (skb);
+
+read_sock_end:
+ tls_rx_reader_release(sk, ctx);
+ return copied ? : err;
+
+read_sock_requeue:
+ __skb_queue_head(&ctx->rx_list, skb);
+ goto read_sock_end;
+}
+
bool tls_sw_sock_is_readable(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -2485,10 +2590,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
struct tls_sw_context_rx *sw_ctx_rx = NULL;
struct cipher_context *cctx;
struct crypto_aead **aead;
- u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size;
struct crypto_tfm *tfm;
- char *iv, *rec_seq, *key, *salt, *cipher_name;
- size_t keysize;
+ char *iv, *rec_seq, *key, *salt;
+ const struct tls_cipher_desc *cipher_desc;
+ u16 nonce_size;
int rc = 0;
if (!ctx) {
@@ -2542,148 +2647,19 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
aead = &sw_ctx_rx->aead_recv;
}
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128: {
- struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
-
- gcm_128_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
- tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE;
- iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
- iv = gcm_128_info->iv;
- rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE;
- rec_seq = gcm_128_info->rec_seq;
- keysize = TLS_CIPHER_AES_GCM_128_KEY_SIZE;
- key = gcm_128_info->key;
- salt = gcm_128_info->salt;
- salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE;
- cipher_name = "gcm(aes)";
- break;
- }
- case TLS_CIPHER_AES_GCM_256: {
- struct tls12_crypto_info_aes_gcm_256 *gcm_256_info;
-
- gcm_256_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_AES_GCM_256_IV_SIZE;
- tag_size = TLS_CIPHER_AES_GCM_256_TAG_SIZE;
- iv_size = TLS_CIPHER_AES_GCM_256_IV_SIZE;
- iv = gcm_256_info->iv;
- rec_seq_size = TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE;
- rec_seq = gcm_256_info->rec_seq;
- keysize = TLS_CIPHER_AES_GCM_256_KEY_SIZE;
- key = gcm_256_info->key;
- salt = gcm_256_info->salt;
- salt_size = TLS_CIPHER_AES_GCM_256_SALT_SIZE;
- cipher_name = "gcm(aes)";
- break;
- }
- case TLS_CIPHER_AES_CCM_128: {
- struct tls12_crypto_info_aes_ccm_128 *ccm_128_info;
-
- ccm_128_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_AES_CCM_128_IV_SIZE;
- tag_size = TLS_CIPHER_AES_CCM_128_TAG_SIZE;
- iv_size = TLS_CIPHER_AES_CCM_128_IV_SIZE;
- iv = ccm_128_info->iv;
- rec_seq_size = TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE;
- rec_seq = ccm_128_info->rec_seq;
- keysize = TLS_CIPHER_AES_CCM_128_KEY_SIZE;
- key = ccm_128_info->key;
- salt = ccm_128_info->salt;
- salt_size = TLS_CIPHER_AES_CCM_128_SALT_SIZE;
- cipher_name = "ccm(aes)";
- break;
- }
- case TLS_CIPHER_CHACHA20_POLY1305: {
- struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305_info;
-
- chacha20_poly1305_info = (void *)crypto_info;
- nonce_size = 0;
- tag_size = TLS_CIPHER_CHACHA20_POLY1305_TAG_SIZE;
- iv_size = TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE;
- iv = chacha20_poly1305_info->iv;
- rec_seq_size = TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE;
- rec_seq = chacha20_poly1305_info->rec_seq;
- keysize = TLS_CIPHER_CHACHA20_POLY1305_KEY_SIZE;
- key = chacha20_poly1305_info->key;
- salt = chacha20_poly1305_info->salt;
- salt_size = TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE;
- cipher_name = "rfc7539(chacha20,poly1305)";
- break;
- }
- case TLS_CIPHER_SM4_GCM: {
- struct tls12_crypto_info_sm4_gcm *sm4_gcm_info;
-
- sm4_gcm_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_SM4_GCM_IV_SIZE;
- tag_size = TLS_CIPHER_SM4_GCM_TAG_SIZE;
- iv_size = TLS_CIPHER_SM4_GCM_IV_SIZE;
- iv = sm4_gcm_info->iv;
- rec_seq_size = TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE;
- rec_seq = sm4_gcm_info->rec_seq;
- keysize = TLS_CIPHER_SM4_GCM_KEY_SIZE;
- key = sm4_gcm_info->key;
- salt = sm4_gcm_info->salt;
- salt_size = TLS_CIPHER_SM4_GCM_SALT_SIZE;
- cipher_name = "gcm(sm4)";
- break;
- }
- case TLS_CIPHER_SM4_CCM: {
- struct tls12_crypto_info_sm4_ccm *sm4_ccm_info;
-
- sm4_ccm_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_SM4_CCM_IV_SIZE;
- tag_size = TLS_CIPHER_SM4_CCM_TAG_SIZE;
- iv_size = TLS_CIPHER_SM4_CCM_IV_SIZE;
- iv = sm4_ccm_info->iv;
- rec_seq_size = TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE;
- rec_seq = sm4_ccm_info->rec_seq;
- keysize = TLS_CIPHER_SM4_CCM_KEY_SIZE;
- key = sm4_ccm_info->key;
- salt = sm4_ccm_info->salt;
- salt_size = TLS_CIPHER_SM4_CCM_SALT_SIZE;
- cipher_name = "ccm(sm4)";
- break;
- }
- case TLS_CIPHER_ARIA_GCM_128: {
- struct tls12_crypto_info_aria_gcm_128 *aria_gcm_128_info;
-
- aria_gcm_128_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_ARIA_GCM_128_IV_SIZE;
- tag_size = TLS_CIPHER_ARIA_GCM_128_TAG_SIZE;
- iv_size = TLS_CIPHER_ARIA_GCM_128_IV_SIZE;
- iv = aria_gcm_128_info->iv;
- rec_seq_size = TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE;
- rec_seq = aria_gcm_128_info->rec_seq;
- keysize = TLS_CIPHER_ARIA_GCM_128_KEY_SIZE;
- key = aria_gcm_128_info->key;
- salt = aria_gcm_128_info->salt;
- salt_size = TLS_CIPHER_ARIA_GCM_128_SALT_SIZE;
- cipher_name = "gcm(aria)";
- break;
- }
- case TLS_CIPHER_ARIA_GCM_256: {
- struct tls12_crypto_info_aria_gcm_256 *gcm_256_info;
-
- gcm_256_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_ARIA_GCM_256_IV_SIZE;
- tag_size = TLS_CIPHER_ARIA_GCM_256_TAG_SIZE;
- iv_size = TLS_CIPHER_ARIA_GCM_256_IV_SIZE;
- iv = gcm_256_info->iv;
- rec_seq_size = TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE;
- rec_seq = gcm_256_info->rec_seq;
- keysize = TLS_CIPHER_ARIA_GCM_256_KEY_SIZE;
- key = gcm_256_info->key;
- salt = gcm_256_info->salt;
- salt_size = TLS_CIPHER_ARIA_GCM_256_SALT_SIZE;
- cipher_name = "gcm(aria)";
- break;
- }
- default:
+ cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+ if (!cipher_desc) {
rc = -EINVAL;
goto free_priv;
}
+ nonce_size = cipher_desc->nonce;
+
+ iv = crypto_info_iv(crypto_info, cipher_desc);
+ key = crypto_info_key(crypto_info, cipher_desc);
+ salt = crypto_info_salt(crypto_info, cipher_desc);
+ rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc);
+
if (crypto_info->version == TLS_1_3_VERSION) {
nonce_size = 0;
prot->aad_size = TLS_HEADER_SIZE;
@@ -2694,9 +2670,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
}
/* Sanity-check the sizes for stack allocations. */
- if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE ||
- rec_seq_size > TLS_MAX_REC_SEQ_SIZE || tag_size != TLS_TAG_SIZE ||
- prot->aad_size > TLS_MAX_AAD_SIZE) {
+ if (nonce_size > MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) {
rc = -EINVAL;
goto free_priv;
}
@@ -2704,28 +2678,29 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
prot->version = crypto_info->version;
prot->cipher_type = crypto_info->cipher_type;
prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
- prot->tag_size = tag_size;
+ prot->tag_size = cipher_desc->tag;
prot->overhead_size = prot->prepend_size +
prot->tag_size + prot->tail_size;
- prot->iv_size = iv_size;
- prot->salt_size = salt_size;
- cctx->iv = kmalloc(iv_size + salt_size, GFP_KERNEL);
+ prot->iv_size = cipher_desc->iv;
+ prot->salt_size = cipher_desc->salt;
+ cctx->iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL);
if (!cctx->iv) {
rc = -ENOMEM;
goto free_priv;
}
/* Note: 128 & 256 bit salt are the same size */
- prot->rec_seq_size = rec_seq_size;
- memcpy(cctx->iv, salt, salt_size);
- memcpy(cctx->iv + salt_size, iv, iv_size);
- cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
+ prot->rec_seq_size = cipher_desc->rec_seq;
+ memcpy(cctx->iv, salt, cipher_desc->salt);
+ memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv);
+
+ cctx->rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL);
if (!cctx->rec_seq) {
rc = -ENOMEM;
goto free_iv;
}
if (!*aead) {
- *aead = crypto_alloc_aead(cipher_name, 0, 0);
+ *aead = crypto_alloc_aead(cipher_desc->cipher_name, 0, 0);
if (IS_ERR(*aead)) {
rc = PTR_ERR(*aead);
*aead = NULL;
@@ -2735,8 +2710,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
ctx->push_pending_record = tls_sw_push_pending_record;
- rc = crypto_aead_setkey(*aead, key, keysize);
-
+ rc = crypto_aead_setkey(*aead, key, cipher_desc->key);
if (rc)
goto free_aead;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 123b35ddfd71..86930a8ed012 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -289,17 +289,29 @@ static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
return 0;
}
-static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
+static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
{
+ struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
+ short offset = offsetof(struct sockaddr_storage, __data);
+
+ BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
+
/* This may look like an off by one error but it is a bit more
* subtle. 108 is the longest valid AF_UNIX path for a binding.
* sun_path[108] doesn't as such exist. However in kernel space
* we are guaranteed that it is a valid memory location in our
* kernel address buffer because syscall functions always pass
* a pointer of struct sockaddr_storage which has a bigger buffer
- * than 108.
+ * than 108. Also, we must terminate sun_path for strlen() in
+ * getname_kernel().
+ */
+ addr->__data[addr_len - offset] = 0;
+
+ /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
+ * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
+ * know the actual buffer.
*/
- ((char *)sunaddr)[addr_len] = 0;
+ return strlen(addr->__data) + offset + 1;
}
static void __unix_remove_socket(struct sock *sk)
@@ -778,7 +790,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
if (mutex_lock_interruptible(&u->iolock))
return -EINTR;
- sk->sk_peek_off = val;
+ WRITE_ONCE(sk->sk_peek_off, val);
mutex_unlock(&u->iolock);
return 0;
@@ -1208,10 +1220,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
struct path parent;
int err;
- unix_mkname_bsd(sunaddr, addr_len);
- addr_len = strlen(sunaddr->sun_path) +
- offsetof(struct sockaddr_un, sun_path) + 1;
-
+ addr_len = unix_mkname_bsd(sunaddr, addr_len);
addr = unix_create_addr(sunaddr, addr_len);
if (!addr)
return -ENOMEM;
diff --git a/net/unix/scm.c b/net/unix/scm.c
index f9152881d77f..e9dde7176c8a 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -29,10 +29,11 @@ struct sock *unix_get_socket(struct file *filp)
/* Socket ? */
if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
struct socket *sock = SOCKET_I(inode);
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
struct sock *s = sock->sk;
/* PF_UNIX ? */
- if (s && sock->ops && sock->ops->family == PF_UNIX)
+ if (s && ops && ops->family == PF_UNIX)
u_sock = s;
} else {
/* Could be an io_uring instance */
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index b769fc258931..352d042b130b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -348,37 +348,34 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk,
size_t len)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- size_t bytes, total = 0, off;
- struct sk_buff *skb, *tmp;
- int err = -EFAULT;
+ struct sk_buff *skb;
+ size_t total = 0;
+ int err;
spin_lock_bh(&vvs->rx_lock);
- skb_queue_walk_safe(&vvs->rx_queue, skb, tmp) {
- off = 0;
+ skb_queue_walk(&vvs->rx_queue, skb) {
+ size_t bytes;
- if (total == len)
- break;
+ bytes = len - total;
+ if (bytes > skb->len)
+ bytes = skb->len;
- while (total < len && off < skb->len) {
- bytes = len - total;
- if (bytes > skb->len - off)
- bytes = skb->len - off;
+ spin_unlock_bh(&vvs->rx_lock);
- /* sk_lock is held by caller so no one else can dequeue.
- * Unlock rx_lock since memcpy_to_msg() may sleep.
- */
- spin_unlock_bh(&vvs->rx_lock);
+ /* sk_lock is held by caller so no one else can dequeue.
+ * Unlock rx_lock since memcpy_to_msg() may sleep.
+ */
+ err = memcpy_to_msg(msg, skb->data, bytes);
+ if (err)
+ goto out;
- err = memcpy_to_msg(msg, skb->data + off, bytes);
- if (err)
- goto out;
+ total += bytes;
- spin_lock_bh(&vvs->rx_lock);
+ spin_lock_bh(&vvs->rx_lock);
- total += bytes;
- off += bytes;
- }
+ if (total == len)
+ break;
}
spin_unlock_bh(&vvs->rx_lock);
@@ -463,6 +460,63 @@ out:
return err;
}
+static ssize_t
+virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk,
+ struct msghdr *msg)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ struct sk_buff *skb;
+ size_t total, len;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ if (!vvs->msg_count) {
+ spin_unlock_bh(&vvs->rx_lock);
+ return 0;
+ }
+
+ total = 0;
+ len = msg_data_left(msg);
+
+ skb_queue_walk(&vvs->rx_queue, skb) {
+ struct virtio_vsock_hdr *hdr;
+
+ if (total < len) {
+ size_t bytes;
+ int err;
+
+ bytes = len - total;
+ if (bytes > skb->len)
+ bytes = skb->len;
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ /* sk_lock is held by caller so no one else can dequeue.
+ * Unlock rx_lock since memcpy_to_msg() may sleep.
+ */
+ err = memcpy_to_msg(msg, skb->data, bytes);
+ if (err)
+ return err;
+
+ spin_lock_bh(&vvs->rx_lock);
+ }
+
+ total += skb->len;
+ hdr = virtio_vsock_hdr(skb);
+
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
+ msg->msg_flags |= MSG_EOR;
+
+ break;
+ }
+ }
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ return total;
+}
+
static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
int flags)
@@ -557,9 +611,9 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
int flags)
{
if (flags & MSG_PEEK)
- return -EOPNOTSUPP;
-
- return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
+ return virtio_transport_seqpacket_do_peek(vsk, msg);
+ else
+ return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
}
EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index b7b072194282..dbda3ababa14 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -116,9 +116,6 @@ struct vmci_transport {
spinlock_t lock; /* protects sk. */
};
-int vmci_transport_register(void);
-void vmci_transport_unregister(void);
-
int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
struct sockaddr_vm *src);
int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 8a807b609ef7..507d184b8b40 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -298,7 +298,7 @@ struct cfg80211_cqm_config {
u32 rssi_hyst;
s32 last_rssi_event_value;
int n_rssi_thresholds;
- s32 rssi_thresholds[];
+ s32 rssi_thresholds[] __counted_by(n_rssi_thresholds);
};
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index ac059cefbeb3..775cac4d6100 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -281,6 +281,11 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
ether_addr_equal(req->bss->bssid, wdev->u.client.connected_addr))
return -EALREADY;
+ if (ether_addr_equal(req->bss->bssid, dev->dev_addr) ||
+ (req->link_id >= 0 &&
+ ether_addr_equal(req->ap_mld_addr, dev->dev_addr)))
+ return -EINVAL;
+
return rdev_auth(rdev, dev, req);
}
@@ -335,6 +340,9 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
if (req->links[i].bss == req->links[j].bss)
return -EINVAL;
}
+
+ if (ether_addr_equal(req->links[i].bss->bssid, dev->dev_addr))
+ return -EINVAL;
}
if (wdev->connected &&
@@ -342,6 +350,11 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
!ether_addr_equal(wdev->u.client.connected_addr, req->prev_bssid)))
return -EALREADY;
+ if ((req->bss && ether_addr_equal(req->bss->bssid, dev->dev_addr)) ||
+ (req->link_id >= 0 &&
+ ether_addr_equal(req->ap_mld_addr, dev->dev_addr)))
+ return -EINVAL;
+
cfg80211_oper_and_ht_capa(&req->ht_capa_mask,
rdev->wiphy.ht_capa_mod_mask);
cfg80211_oper_and_vht_capa(&req->vht_capa_mask,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0da2e6a2a7ea..de47838aca4f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -323,6 +323,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = {
[NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED] = { .type = NLA_FLAG },
[NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED] = { .type = NLA_FLAG },
[NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK] = { .type = NLA_FLAG },
+ [NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR] = { .type = NLA_U8 },
};
static const struct nla_policy
@@ -4889,13 +4890,12 @@ static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy,
acl = kzalloc(struct_size(acl, mac_addrs, n_entries), GFP_KERNEL);
if (!acl)
return ERR_PTR(-ENOMEM);
+ acl->n_acl_entries = n_entries;
nla_for_each_nested(attr, info->attrs[NL80211_ATTR_MAC_ADDRS], tmp) {
memcpy(acl->mac_addrs[i].addr, nla_data(attr), ETH_ALEN);
i++;
}
-
- acl->n_acl_entries = n_entries;
acl->acl_policy = acl_policy;
return acl;
@@ -5430,19 +5430,22 @@ nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs)
if (!wiphy->mbssid_max_interfaces)
return ERR_PTR(-EINVAL);
- nla_for_each_nested(nl_elems, attrs, rem_elems)
+ nla_for_each_nested(nl_elems, attrs, rem_elems) {
+ if (num_elems >= 255)
+ return ERR_PTR(-EINVAL);
num_elems++;
+ }
elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL);
if (!elems)
return ERR_PTR(-ENOMEM);
+ elems->cnt = num_elems;
nla_for_each_nested(nl_elems, attrs, rem_elems) {
elems->elem[i].data = nla_data(nl_elems);
elems->elem[i].len = nla_len(nl_elems);
i++;
}
- elems->cnt = num_elems;
return elems;
}
@@ -5468,13 +5471,13 @@ nl80211_parse_rnr_elems(struct wiphy *wiphy, struct nlattr *attrs,
elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL);
if (!elems)
return ERR_PTR(-ENOMEM);
+ elems->cnt = num_elems;
nla_for_each_nested(nl_elems, attrs, rem_elems) {
elems->elem[i].data = nla_data(nl_elems);
elems->elem[i].len = nla_len(nl_elems);
i++;
}
- elems->cnt = num_elems;
return elems;
}
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 0278d817bb02..b4af53f9b227 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -120,6 +120,5 @@ void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev);
/* peer measurement */
int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info);
-int nl80211_pmsr_dump_results(struct sk_buff *skb, struct netlink_callback *cb);
#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c
index 27a1732264f9..29afaf3da54f 100644
--- a/net/wireless/ocb.c
+++ b/net/wireless/ocb.c
@@ -68,6 +68,9 @@ int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev,
if (!rdev->ops->leave_ocb)
return -EOPNOTSUPP;
+ if (!wdev->u.ocb.chandef.chan)
+ return -ENOTCONN;
+
err = rdev_leave_ocb(rdev, dev);
if (!err)
memset(&wdev->u.ocb.chandef, 0, sizeof(wdev->u.ocb.chandef));
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index 77000a264855..9611aa0bd051 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -291,6 +291,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info)
req = kzalloc(struct_size(req, peers, count), GFP_KERNEL);
if (!req)
return -ENOMEM;
+ req->n_peers = count;
if (info->attrs[NL80211_ATTR_TIMEOUT])
req->timeout = nla_get_u32(info->attrs[NL80211_ATTR_TIMEOUT]);
@@ -321,8 +322,6 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info)
goto out_err;
idx++;
}
-
- req->n_peers = count;
req->cookie = cfg80211_assign_cookie(rdev);
req->nl_portid = info->snd_portid;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 8bf00caf5d29..0cf1ce7b6934 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -657,7 +657,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp);
if (ret)
- return ret;
+ return 0;
for_each_element_id(elem, WLAN_EID_REDUCED_NEIGHBOR_REPORT,
ies->data, ies->len) {
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 89c9ad6c886e..1783ab9d57a3 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -580,6 +580,8 @@ int ieee80211_strip_8023_mesh_hdr(struct sk_buff *skb)
hdrlen += ETH_ALEN + 2;
else if (!pskb_may_pull(skb, hdrlen))
return -EINVAL;
+ else
+ payload.eth.h_proto = htons(skb->len - hdrlen);
mesh_addr = skb->data + sizeof(payload.eth) + ETH_ALEN;
switch (payload.flags & MESH_FLAGS_AE) {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 31dca4ecb2c5..fcfc8472f73d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -25,6 +25,7 @@
#include <linux/vmalloc.h>
#include <net/xdp_sock_drv.h>
#include <net/busy_poll.h>
+#include <net/netdev_rx_queue.h>
#include <net/xdp.h>
#include "xsk_queue.h"
@@ -135,14 +136,14 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
return 0;
}
-static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
+ u32 flags)
{
- struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
u64 addr;
int err;
addr = xp_get_handle(xskb);
- err = xskq_prod_reserve_desc(xs->rx, addr, len);
+ err = xskq_prod_reserve_desc(xs->rx, addr, len, flags);
if (err) {
xs->rx_queue_full++;
return err;
@@ -152,48 +153,138 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return 0;
}
-static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
+static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- void *from_buf, *to_buf;
- u32 metalen;
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+ u32 frags = xdp_buff_has_frags(xdp);
+ struct xdp_buff_xsk *pos, *tmp;
+ struct list_head *xskb_list;
+ u32 contd = 0;
+ int err;
- if (unlikely(xdp_data_meta_unsupported(from))) {
- from_buf = from->data;
- to_buf = to->data;
- metalen = 0;
- } else {
- from_buf = from->data_meta;
- metalen = from->data - from->data_meta;
- to_buf = to->data - metalen;
+ if (frags)
+ contd = XDP_PKT_CONTD;
+
+ err = __xsk_rcv_zc(xs, xskb, len, contd);
+ if (err || likely(!frags))
+ goto out;
+
+ xskb_list = &xskb->pool->xskb_list;
+ list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
+ if (list_is_singular(xskb_list))
+ contd = 0;
+ len = pos->xdp.data_end - pos->xdp.data;
+ err = __xsk_rcv_zc(xs, pos, len, contd);
+ if (err)
+ return err;
+ list_del(&pos->xskb_list_node);
}
- memcpy(to_buf, from_buf, len + metalen);
+out:
+ return err;
}
-static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static void *xsk_copy_xdp_start(struct xdp_buff *from)
{
+ if (unlikely(xdp_data_meta_unsupported(from)))
+ return from->data;
+ else
+ return from->data_meta;
+}
+
+static u32 xsk_copy_xdp(void *to, void **from, u32 to_len,
+ u32 *from_len, skb_frag_t **frag, u32 rem)
+{
+ u32 copied = 0;
+
+ while (1) {
+ u32 copy_len = min_t(u32, *from_len, to_len);
+
+ memcpy(to, *from, copy_len);
+ copied += copy_len;
+ if (rem == copied)
+ return copied;
+
+ if (*from_len == copy_len) {
+ *from = skb_frag_address(*frag);
+ *from_len = skb_frag_size((*frag)++);
+ } else {
+ *from += copy_len;
+ *from_len -= copy_len;
+ }
+ if (to_len == copy_len)
+ return copied;
+
+ to_len -= copy_len;
+ to += copy_len;
+ }
+}
+
+static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+{
+ u32 frame_size = xsk_pool_get_rx_frame_size(xs->pool);
+ void *copy_from = xsk_copy_xdp_start(xdp), *copy_to;
+ u32 from_len, meta_len, rem, num_desc;
+ struct xdp_buff_xsk *xskb;
struct xdp_buff *xsk_xdp;
- int err;
- u32 len;
+ skb_frag_t *frag;
- len = xdp->data_end - xdp->data;
- if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
- xs->rx_dropped++;
- return -ENOSPC;
+ from_len = xdp->data_end - copy_from;
+ meta_len = xdp->data - copy_from;
+ rem = len + meta_len;
+
+ if (len <= frame_size && !xdp_buff_has_frags(xdp)) {
+ int err;
+
+ xsk_xdp = xsk_buff_alloc(xs->pool);
+ if (!xsk_xdp) {
+ xs->rx_dropped++;
+ return -ENOMEM;
+ }
+ memcpy(xsk_xdp->data - meta_len, copy_from, rem);
+ xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
+ err = __xsk_rcv_zc(xs, xskb, len, 0);
+ if (err) {
+ xsk_buff_free(xsk_xdp);
+ return err;
+ }
+
+ return 0;
}
- xsk_xdp = xsk_buff_alloc(xs->pool);
- if (!xsk_xdp) {
+ num_desc = (len - 1) / frame_size + 1;
+
+ if (!xsk_buff_can_alloc(xs->pool, num_desc)) {
xs->rx_dropped++;
return -ENOMEM;
}
+ if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) {
+ xs->rx_queue_full++;
+ return -ENOBUFS;
+ }
- xsk_copy_xdp(xsk_xdp, xdp, len);
- err = __xsk_rcv_zc(xs, xsk_xdp, len);
- if (err) {
- xsk_buff_free(xsk_xdp);
- return err;
+ if (xdp_buff_has_frags(xdp)) {
+ struct skb_shared_info *sinfo;
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ frag = &sinfo->frags[0];
}
+
+ do {
+ u32 to_len = frame_size + meta_len;
+ u32 copied;
+
+ xsk_xdp = xsk_buff_alloc(xs->pool);
+ copy_to = xsk_xdp->data - meta_len;
+
+ copied = xsk_copy_xdp(copy_to, &copy_from, to_len, &from_len, &frag, rem);
+ rem -= copied;
+
+ xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
+ __xsk_rcv_zc(xs, xskb, copied - meta_len, rem ? XDP_PKT_CONTD : 0);
+ meta_len = 0;
+ } while (rem);
+
return 0;
}
@@ -215,7 +306,7 @@ static bool xsk_is_bound(struct xdp_sock *xs)
return false;
}
-static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
if (!xsk_is_bound(xs))
return -ENXIO;
@@ -223,6 +314,11 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
+ if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
+ xs->rx_dropped++;
+ return -ENOSPC;
+ }
+
sk_mark_napi_id_once_xdp(&xs->sk, xdp);
return 0;
}
@@ -236,12 +332,13 @@ static void xsk_flush(struct xdp_sock *xs)
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
+ u32 len = xdp_get_buff_len(xdp);
int err;
spin_lock_bh(&xs->rx_lock);
- err = xsk_rcv_check(xs, xdp);
+ err = xsk_rcv_check(xs, xdp, len);
if (!err) {
- err = __xsk_rcv(xs, xdp);
+ err = __xsk_rcv(xs, xdp, len);
xsk_flush(xs);
}
spin_unlock_bh(&xs->rx_lock);
@@ -250,19 +347,19 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
+ u32 len = xdp_get_buff_len(xdp);
int err;
- u32 len;
- err = xsk_rcv_check(xs, xdp);
+ err = xsk_rcv_check(xs, xdp, len);
if (err)
return err;
if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
len = xdp->data_end - xdp->data;
- return __xsk_rcv_zc(xs, xdp, len);
+ return xsk_rcv_zc(xs, xdp, len);
}
- err = __xsk_rcv(xs, xdp);
+ err = __xsk_rcv(xs, xdp, len);
if (!err)
xdp_return_buff(xdp);
return err;
@@ -321,7 +418,8 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
rcu_read_lock();
list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
- xs->tx->queue_empty_descs++;
+ if (xskq_has_descs(xs->tx))
+ xskq_cons_release(xs->tx);
continue;
}
@@ -408,37 +506,91 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
-static void xsk_destruct_skb(struct sk_buff *skb)
+static int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ ret = xskq_prod_reserve_addr(xs->pool->cq, addr);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+
+ return ret;
+}
+
+static void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n)
{
- u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
- struct xdp_sock *xs = xdp_sk(skb->sk);
unsigned long flags;
spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_submit_addr(xs->pool->cq, addr);
+ xskq_prod_submit_n(xs->pool->cq, n);
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+}
+
+static void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ xskq_prod_cancel_n(xs->pool->cq, n);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+}
+
+static u32 xsk_get_num_desc(struct sk_buff *skb)
+{
+ return skb ? (long)skb_shinfo(skb)->destructor_arg : 0;
+}
+
+static void xsk_destruct_skb(struct sk_buff *skb)
+{
+ xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
sock_wfree(skb);
}
+static void xsk_set_destructor_arg(struct sk_buff *skb)
+{
+ long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1;
+
+ skb_shinfo(skb)->destructor_arg = (void *)num;
+}
+
+static void xsk_consume_skb(struct sk_buff *skb)
+{
+ struct xdp_sock *xs = xdp_sk(skb->sk);
+
+ skb->destructor = sock_wfree;
+ xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb));
+ /* Free skb without triggering the perf drop trace */
+ consume_skb(skb);
+ xs->skb = NULL;
+}
+
+static void xsk_drop_skb(struct sk_buff *skb)
+{
+ xdp_sk(skb->sk)->tx->invalid_descs += xsk_get_num_desc(skb);
+ xsk_consume_skb(skb);
+}
+
static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
struct xdp_desc *desc)
{
struct xsk_buff_pool *pool = xs->pool;
u32 hr, len, ts, offset, copy, copied;
- struct sk_buff *skb;
+ struct sk_buff *skb = xs->skb;
struct page *page;
void *buffer;
int err, i;
u64 addr;
- hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
+ if (!skb) {
+ hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
- skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
- if (unlikely(!skb))
- return ERR_PTR(err);
+ skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
+ if (unlikely(!skb))
+ return ERR_PTR(err);
- skb_reserve(skb, hr);
+ skb_reserve(skb, hr);
+ }
addr = desc->addr;
len = desc->len;
@@ -448,7 +600,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
offset = offset_in_page(buffer);
addr = buffer - pool->addrs;
- for (copied = 0, i = 0; copied < len; i++) {
+ for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
+ if (unlikely(i >= MAX_SKB_FRAGS))
+ return ERR_PTR(-EFAULT);
+
page = pool->umem->pgs[addr >> PAGE_SHIFT];
get_page(page);
@@ -473,43 +628,77 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
struct xdp_desc *desc)
{
struct net_device *dev = xs->dev;
- struct sk_buff *skb;
+ struct sk_buff *skb = xs->skb;
+ int err;
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
skb = xsk_build_skb_zerocopy(xs, desc);
- if (IS_ERR(skb))
- return skb;
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ goto free_err;
+ }
} else {
u32 hr, tr, len;
void *buffer;
- int err;
- hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
- tr = dev->needed_tailroom;
+ buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
len = desc->len;
- skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
- if (unlikely(!skb))
- return ERR_PTR(err);
+ if (!skb) {
+ hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
+ tr = dev->needed_tailroom;
+ skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
+ if (unlikely(!skb))
+ goto free_err;
- skb_reserve(skb, hr);
- skb_put(skb, len);
+ skb_reserve(skb, hr);
+ skb_put(skb, len);
- buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
- err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err)) {
- kfree_skb(skb);
- return ERR_PTR(err);
+ err = skb_store_bits(skb, 0, buffer, len);
+ if (unlikely(err))
+ goto free_err;
+ } else {
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ struct page *page;
+ u8 *vaddr;
+
+ if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
+ err = -EFAULT;
+ goto free_err;
+ }
+
+ page = alloc_page(xs->sk.sk_allocation);
+ if (unlikely(!page)) {
+ err = -EAGAIN;
+ goto free_err;
+ }
+
+ vaddr = kmap_local_page(page);
+ memcpy(vaddr, buffer, len);
+ kunmap_local(vaddr);
+
+ skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
}
}
skb->dev = dev;
skb->priority = xs->sk.sk_priority;
- skb->mark = xs->sk.sk_mark;
- skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
+ skb->mark = READ_ONCE(xs->sk.sk_mark);
skb->destructor = xsk_destruct_skb;
+ xsk_set_destructor_arg(skb);
return skb;
+
+free_err:
+ if (err == -EAGAIN) {
+ xsk_cq_cancel_locked(xs, 1);
+ } else {
+ xsk_set_destructor_arg(skb);
+ xsk_drop_skb(skb);
+ xskq_cons_release(xs->tx);
+ }
+
+ return ERR_PTR(err);
}
static int __xsk_generic_xmit(struct sock *sk)
@@ -519,7 +708,6 @@ static int __xsk_generic_xmit(struct sock *sk)
bool sent_frame = false;
struct xdp_desc desc;
struct sk_buff *skb;
- unsigned long flags;
int err = 0;
mutex_lock(&xs->mutex);
@@ -544,47 +732,51 @@ static int __xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- if (xskq_prod_reserve(xs->pool->cq)) {
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ if (xsk_cq_reserve_addr_locked(xs, desc.addr))
goto out;
- }
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
skb = xsk_build_skb(xs, &desc);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_cancel(xs->pool->cq);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
- goto out;
+ if (err == -EAGAIN)
+ goto out;
+ err = 0;
+ continue;
+ }
+
+ xskq_cons_release(xs->tx);
+
+ if (xp_mb_desc(&desc)) {
+ xs->skb = skb;
+ continue;
}
err = __dev_direct_xmit(skb, xs->queue_id);
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */
- skb->destructor = sock_wfree;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_cancel(xs->pool->cq);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
- /* Free skb without triggering the perf drop trace */
- consume_skb(skb);
+ xskq_cons_cancel_n(xs->tx, xsk_get_num_desc(skb));
+ xsk_consume_skb(skb);
err = -EAGAIN;
goto out;
}
- xskq_cons_release(xs->tx);
/* Ignore NET_XMIT_CN as packet might have been sent */
if (err == NET_XMIT_DROP) {
/* SKB completed but not sent */
err = -EBUSY;
+ xs->skb = NULL;
goto out;
}
sent_frame = true;
+ xs->skb = NULL;
}
- xs->tx->queue_empty_descs++;
+ if (xskq_has_descs(xs->tx)) {
+ if (xs->skb)
+ xsk_drop_skb(xs->skb);
+ xskq_cons_release(xs->tx);
+ }
out:
if (sent_frame)
@@ -834,6 +1026,9 @@ static int xsk_release(struct socket *sock)
net = sock_net(sk);
+ if (xs->skb)
+ xsk_drop_skb(xs->skb);
+
mutex_lock(&net->xdp.lock);
sk_del_node_init_rcu(sk);
mutex_unlock(&net->xdp.lock);
@@ -897,7 +1092,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
flags = sxdp->sxdp_flags;
if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
- XDP_USE_NEED_WAKEUP))
+ XDP_USE_NEED_WAKEUP | XDP_USE_SG))
return -EINVAL;
bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
@@ -929,7 +1124,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
struct socket *sock;
if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
- (flags & XDP_USE_NEED_WAKEUP)) {
+ (flags & XDP_USE_NEED_WAKEUP) || (flags & XDP_USE_SG)) {
/* Cannot specify flags for shared sockets. */
err = -EINVAL;
goto out_unlock;
@@ -994,6 +1189,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
err = xp_alloc_tx_descs(xs->pool, xs);
if (err) {
xp_put_pool(xs->pool);
+ xs->pool = NULL;
sockfd_put(sock);
goto out_unlock;
}
@@ -1028,6 +1224,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->dev = dev;
xs->zc = xs->umem->zc;
+ xs->sg = !!(flags & XDP_USE_SG);
xs->queue_id = qid;
xp_add_xsk(xs->pool, xs);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 26f6d304451e..b3f7b310811e 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -86,6 +86,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
pool->umem = umem;
pool->addrs = umem->addrs;
INIT_LIST_HEAD(&pool->free_list);
+ INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock);
spin_lock_init(&pool->cq_lock);
@@ -99,6 +100,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
xskb->pool = pool;
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
INIT_LIST_HEAD(&xskb->free_list_node);
+ INIT_LIST_HEAD(&xskb->xskb_list_node);
if (pool->unaligned)
pool->free_heads[i] = xskb;
else
@@ -187,6 +189,11 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
goto err_unreg_pool;
}
+ if (netdev->xdp_zc_max_segs == 1 && (flags & XDP_USE_SG)) {
+ err = -EOPNOTSUPP;
+ goto err_unreg_pool;
+ }
+
bpf.command = XDP_SETUP_XSK_POOL;
bpf.xsk.pool = pool;
bpf.xsk.queue_id = queue_id;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 6d40a77fccbe..13354a1e4280 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -48,6 +48,11 @@ struct xsk_queue {
size_t ring_vmalloc_size;
};
+struct parsed_desc {
+ u32 mb;
+ u32 valid;
+};
+
/* The structure of the shared state of the rings are a simple
* circular buffer, as outlined in
* Documentation/core-api/circular-buffers.rst. For the Rx and
@@ -130,18 +135,26 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
return false;
}
+static inline bool xp_unused_options_set(u32 options)
+{
+ return options & ~XDP_PKT_CONTD;
+}
+
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
u64 offset = desc->addr & (pool->chunk_size - 1);
+ if (!desc->len)
+ return false;
+
if (offset + desc->len > pool->chunk_size)
return false;
if (desc->addr >= pool->addrs_cnt)
return false;
- if (desc->options)
+ if (xp_unused_options_set(desc->options))
return false;
return true;
}
@@ -151,6 +164,9 @@ static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
{
u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ if (!desc->len)
+ return false;
+
if (desc->len > pool->chunk_size)
return false;
@@ -158,7 +174,7 @@ static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
return false;
- if (desc->options)
+ if (xp_unused_options_set(desc->options))
return false;
return true;
}
@@ -170,6 +186,11 @@ static inline bool xp_validate_desc(struct xsk_buff_pool *pool,
xp_aligned_validate_desc(pool, desc);
}
+static inline bool xskq_has_descs(struct xsk_queue *q)
+{
+ return q->cached_cons != q->cached_prod;
+}
+
static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
struct xdp_desc *d,
struct xsk_buff_pool *pool)
@@ -185,17 +206,15 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q,
struct xdp_desc *desc,
struct xsk_buff_pool *pool)
{
- while (q->cached_cons != q->cached_prod) {
+ if (q->cached_cons != q->cached_prod) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx = q->cached_cons & q->ring_mask;
*desc = ring->desc[idx];
- if (xskq_cons_is_valid_desc(q, desc, pool))
- return true;
-
- q->cached_cons++;
+ return xskq_cons_is_valid_desc(q, desc, pool);
}
+ q->queue_empty_descs++;
return false;
}
@@ -204,30 +223,52 @@ static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
q->cached_cons += cnt;
}
-static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
- u32 max)
+static inline void parse_desc(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ struct xdp_desc *desc, struct parsed_desc *parsed)
+{
+ parsed->valid = xskq_cons_is_valid_desc(q, desc, pool);
+ parsed->mb = xp_mb_desc(desc);
+}
+
+static inline
+u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ u32 max)
{
u32 cached_cons = q->cached_cons, nb_entries = 0;
struct xdp_desc *descs = pool->tx_descs;
+ u32 total_descs = 0, nr_frags = 0;
+ /* track first entry, if stumble upon *any* invalid descriptor, rewind
+ * current packet that consists of frags and stop the processing
+ */
while (cached_cons != q->cached_prod && nb_entries < max) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx = cached_cons & q->ring_mask;
+ struct parsed_desc parsed;
descs[nb_entries] = ring->desc[idx];
- if (unlikely(!xskq_cons_is_valid_desc(q, &descs[nb_entries], pool))) {
- /* Skip the entry */
- cached_cons++;
- continue;
+ cached_cons++;
+ parse_desc(q, pool, &descs[nb_entries], &parsed);
+ if (unlikely(!parsed.valid))
+ break;
+
+ if (likely(!parsed.mb)) {
+ total_descs += (nr_frags + 1);
+ nr_frags = 0;
+ } else {
+ nr_frags++;
+ if (nr_frags == pool->netdev->xdp_zc_max_segs) {
+ nr_frags = 0;
+ break;
+ }
}
-
nb_entries++;
- cached_cons++;
}
+ cached_cons -= nr_frags;
/* Release valid plus any invalid entries */
xskq_cons_release_n(q, cached_cons - q->cached_cons);
- return nb_entries;
+ return total_descs;
}
/* Functions for consumers */
@@ -292,6 +333,11 @@ static inline void xskq_cons_release(struct xsk_queue *q)
q->cached_cons++;
}
+static inline void xskq_cons_cancel_n(struct xsk_queue *q, u32 cnt)
+{
+ q->cached_cons -= cnt;
+}
+
static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
{
/* No barriers needed since data is not accessed */
@@ -319,9 +365,9 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
return xskq_prod_nb_free(q, 1) ? false : true;
}
-static inline void xskq_prod_cancel(struct xsk_queue *q)
+static inline void xskq_prod_cancel_n(struct xsk_queue *q, u32 cnt)
{
- q->cached_prod--;
+ q->cached_prod -= cnt;
}
static inline int xskq_prod_reserve(struct xsk_queue *q)
@@ -360,7 +406,7 @@ static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_de
}
static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
- u64 addr, u32 len)
+ u64 addr, u32 len, u32 flags)
{
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx;
@@ -372,6 +418,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
idx = q->cached_prod++ & q->ring_mask;
ring->desc[idx].addr = addr;
ring->desc[idx].len = len;
+ ring->desc[idx].options = flags;
return 0;
}
@@ -386,16 +433,6 @@ static inline void xskq_prod_submit(struct xsk_queue *q)
__xskq_prod_submit(q, q->cached_prod);
}
-static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr)
-{
- struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- u32 idx = q->ring->producer;
-
- ring->desc[idx++ & q->ring_mask] = addr;
-
- __xskq_prod_submit(q, idx);
-}
-
static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries)
{
__xskq_prod_submit(q, q->ring->producer + nb_entries);
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index 8cbf45a8bcdc..655fe4ff8621 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -108,7 +108,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
[XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) },
[XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) },
[XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) },
- [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) },
+ [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_user_sec_ctx) },
[XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) },
[XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) },
[XFRMA_REPLAY_THRESH] = { .type = NLA_U32 },
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 533697e2488f..3784534c9185 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -247,12 +247,6 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return -EINVAL;
}
- /* We don't yet support UDP encapsulation and TFC padding. */
- if (x->encap || x->tfcpad) {
- NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded");
- return -EINVAL;
- }
-
if (xuo->flags &
~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET)) {
NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request");
@@ -260,6 +254,13 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
}
is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET;
+
+ /* We don't yet support UDP encapsulation and TFC padding. */
+ if ((!is_packet_offload && x->encap) || x->tfcpad) {
+ NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded");
+ return -EINVAL;
+ }
+
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 815b38080401..d5ee96789d4b 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -180,6 +180,8 @@ static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb)
int optlen = 0;
int err = -EINVAL;
+ skb->protocol = htons(ETH_P_IP);
+
if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) {
struct ip_beet_phdr *ph;
int phlen;
@@ -232,6 +234,8 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
{
int err = -EINVAL;
+ skb->protocol = htons(ETH_P_IP);
+
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out;
@@ -267,6 +271,8 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
{
int err = -EINVAL;
+ skb->protocol = htons(ETH_P_IPV6);
+
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
@@ -296,6 +302,8 @@ static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb)
int size = sizeof(struct ipv6hdr);
int err;
+ skb->protocol = htons(ETH_P_IPV6);
+
err = skb_cow_head(skb, size + skb->mac_len);
if (err)
goto out;
@@ -346,6 +354,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x,
return xfrm6_remove_tunnel_encap(x, skb);
break;
}
+ return -EINVAL;
}
WARN_ON_ONCE(1);
@@ -366,19 +375,6 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
return -EAFNOSUPPORT;
}
- switch (XFRM_MODE_SKB_CB(skb)->protocol) {
- case IPPROTO_IPIP:
- case IPPROTO_BEETPH:
- skb->protocol = htons(ETH_P_IP);
- break;
- case IPPROTO_IPV6:
- skb->protocol = htons(ETH_P_IPV6);
- break;
- default:
- WARN_ON_ONCE(1);
- break;
- }
-
return xfrm_inner_mode_encap_remove(x, skb);
}
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index a3319965470a..b86474084690 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -537,8 +537,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IPV6):
- xfrm_decode_session(skb, &fl, AF_INET6);
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET6);
if (!dst) {
fl.u.ip6.flowi6_oif = dev->ifindex;
fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
@@ -552,8 +552,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
}
break;
case htons(ETH_P_IP):
- xfrm_decode_session(skb, &fl, AF_INET);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET);
if (!dst) {
struct rtable *rt;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e7617c9959c3..d6b405782b63 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2250,7 +2250,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
match = xfrm_selector_match(&pol->selector, fl, family);
if (match) {
- if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
+ if ((READ_ONCE(sk->sk_mark) & pol->mark.m) != pol->mark.v ||
pol->if_id != if_id) {
pol = NULL;
goto out;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 49e63eea841d..bda5327bf34d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1324,12 +1324,8 @@ found:
struct xfrm_dev_offload *xso = &x->xso;
if (xso->type == XFRM_DEV_OFFLOAD_PACKET) {
- xso->dev->xfrmdev_ops->xdo_dev_state_delete(x);
- xso->dir = 0;
- netdev_put(xso->dev, &xso->dev_tracker);
- xso->dev = NULL;
- xso->real_dev = NULL;
- xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
+ xfrm_dev_state_delete(x);
+ xfrm_dev_state_free(x);
}
#endif
x->km.state = XFRM_STATE_DEAD;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index c34a2a06ca94..ad01997c3aa9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -628,7 +628,7 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
struct nlattr *mt = attrs[XFRMA_MTIMER_THRESH];
- if (re) {
+ if (re && x->replay_esn && x->preplay_esn) {
struct xfrm_replay_state_esn *replay_esn;
replay_esn = nla_data(re);
memcpy(x->replay_esn, replay_esn,
@@ -1267,6 +1267,15 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
sizeof(*filter), GFP_KERNEL);
if (filter == NULL)
return -ENOMEM;
+
+ /* see addr_match(), (prefix length >> 5) << 2
+ * will be used to compare xfrm_address_t
+ */
+ if (filter->splen > (sizeof(xfrm_address_t) << 3) ||
+ filter->dplen > (sizeof(xfrm_address_t) << 3)) {
+ kfree(filter);
+ return -EINVAL;
+ }
}
if (attrs[XFRMA_PROTO])
@@ -2336,6 +2345,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
NETLINK_CB(skb).portid);
}
} else {
+ xfrm_dev_policy_delete(xp);
xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
if (err != 0)
@@ -3015,7 +3025,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) },
[XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) },
[XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) },
- [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) },
+ [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_user_sec_ctx) },
[XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) },
[XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) },
[XFRMA_REPLAY_THRESH] = { .type = NLA_U32 },
@@ -3035,6 +3045,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_SET_MARK] = { .type = NLA_U32 },
[XFRMA_SET_MARK_MASK] = { .type = NLA_U32 },
[XFRMA_IF_ID] = { .type = NLA_U32 },
+ [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(xfrma_policy);