summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Makefile1
-rw-r--r--net/batman-adv/bat_iv_ogm.c1
-rw-r--r--net/batman-adv/bat_v_elp.c1
-rw-r--r--net/batman-adv/bat_v_ogm.c5
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/gateway_common.c2
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c251
-rw-r--r--net/batman-adv/multicast.h38
-rw-r--r--net/batman-adv/network-coding.c4
-rw-r--r--net/batman-adv/routing.c7
-rw-r--r--net/batman-adv/soft-interface.c26
-rw-r--r--net/batman-adv/translation-table.c4
-rw-r--r--net/batman-adv/tvlv.c71
-rw-r--r--net/batman-adv/tvlv.h9
-rw-r--r--net/batman-adv/types.h6
-rw-r--r--net/bluetooth/rfcomm/core.c4
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c18
-rw-r--r--net/bpf/test_run.c3
-rw-r--r--net/bridge/br_mdb.c17
-rw-r--r--net/bridge/br_multicast.c179
-rw-r--r--net/bridge/br_netfilter_hooks.c2
-rw-r--r--net/bridge/br_netlink.c19
-rw-r--r--net/bridge/br_netlink_tunnel.c3
-rw-r--r--net/bridge/br_private.h12
-rw-r--r--net/bridge/br_switchdev.c10
-rw-r--r--net/bridge/br_vlan.c11
-rw-r--r--net/bridge/br_vlan_options.c27
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c4
-rw-r--r--net/caif/caif_socket.c4
-rw-r--r--net/can/gw.c7
-rw-r--r--net/can/isotp.c3
-rw-r--r--net/can/raw.c11
-rw-r--r--net/ceph/messenger.c4
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/dev.c20
-rw-r--r--net/core/dev.h18
-rw-r--r--net/core/dst.c8
-rw-r--r--net/core/filter.c56
-rw-r--r--net/core/gro.c12
-rw-r--r--net/core/neighbour.c14
-rw-r--r--net/core/net-traces.c3
-rw-r--r--net/core/netpoll.c12
-rw-r--r--net/core/page_pool.c6
-rw-r--r--net/core/rtnetlink.c35
-rw-r--r--net/core/skbuff.c198
-rw-r--r--net/core/skmsg.c5
-rw-r--r--net/core/sock.c43
-rw-r--r--net/core/sysctl_net_core.c5
-rw-r--r--net/core/xdp.c64
-rw-r--r--net/dcb/dcbnl.c272
-rw-r--r--net/devlink/Makefile3
-rw-r--r--net/devlink/core.c320
-rw-r--r--net/devlink/dev.c1343
-rw-r--r--net/devlink/devl_internal.h220
-rw-r--r--net/devlink/leftover.c (renamed from net/core/devlink.c)3352
-rw-r--r--net/devlink/netlink.c251
-rw-r--r--net/dsa/master.c6
-rw-r--r--net/dsa/slave.c50
-rw-r--r--net/dsa/tag_ksz.c216
-rw-r--r--net/ethtool/Makefile4
-rw-r--r--net/ethtool/channels.c92
-rw-r--r--net/ethtool/coalesce.c114
-rw-r--r--net/ethtool/common.c8
-rw-r--r--net/ethtool/common.h2
-rw-r--r--net/ethtool/debug.c71
-rw-r--r--net/ethtool/eee.c78
-rw-r--r--net/ethtool/fec.c83
-rw-r--r--net/ethtool/linkinfo.c81
-rw-r--r--net/ethtool/linkmodes.c91
-rw-r--r--net/ethtool/mm.c251
-rw-r--r--net/ethtool/module.c89
-rw-r--r--net/ethtool/netlink.c135
-rw-r--r--net/ethtool/netlink.h72
-rw-r--r--net/ethtool/pause.c125
-rw-r--r--net/ethtool/plca.c248
-rw-r--r--net/ethtool/privflags.c84
-rw-r--r--net/ethtool/pse-pd.c79
-rw-r--r--net/ethtool/rings.c101
-rw-r--r--net/ethtool/stats.c159
-rw-r--r--net/ethtool/wol.c79
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c7
-rw-r--r--net/ipv4/bpf_tcp_ca.c3
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/fou_core.c (renamed from net/ipv4/fou.c)47
-rw-r--r--net/ipv4/fou_nl.c48
-rw-r--r--net/ipv4/fou_nl.h25
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/inet_connection_sock.c25
-rw-r--r--net/ipv4/inet_hashtables.c14
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_sockglue.c18
-rw-r--r--net/ipv4/netfilter/Kconfig14
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c929
-rw-r--r--net/ipv4/proc.c8
-rw-r--r--net/ipv4/raw.c21
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/icmp.c9
-rw-r--r--net/ipv6/proc.c1
-rw-r--r--net/ipv6/raw.c16
-rw-r--r--net/ipv6/route.c23
-rw-r--r--net/ipv6/rpl_iptunnel.c2
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/kcm/kcmsock.c3
-rw-r--r--net/mac80211/cfg.c36
-rw-r--r--net/mac80211/rx.c15
-rw-r--r--net/mac80211/sta_info.c9
-rw-r--r--net/mac80211/sta_info.h24
-rw-r--r--net/mac80211/util.c26
-rw-r--r--net/mptcp/options.c3
-rw-r--r--net/mptcp/pm_netlink.c63
-rw-r--r--net/mptcp/pm_userspace.c5
-rw-r--r--net/mptcp/protocol.c38
-rw-r--r--net/mptcp/protocol.h2
-rw-r--r--net/mptcp/sockopt.c3
-rw-r--r--net/mptcp/subflow.c3
-rw-r--r--net/mptcp/token.c14
-rw-r--r--net/mptcp/token_test.c3
-rw-r--r--net/netfilter/Makefile6
-rw-r--r--net/netfilter/ipset/Kconfig2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c57
-rw-r--r--net/netfilter/nf_conntrack_netlink.c2
-rw-r--r--net/netfilter/nf_conntrack_proto.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c44
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c9
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c10
-rw-r--r--net/netfilter/nf_flow_table_core.c5
-rw-r--r--net/netfilter/nf_flow_table_inet.c2
-rw-r--r--net/netfilter/nf_flow_table_offload.c18
-rw-r--r--net/netfilter/nf_log_syslog.c2
-rw-r--r--net/netfilter/nf_tables_api.c111
-rw-r--r--net/netfilter/nf_tables_core.c35
-rw-r--r--net/netfilter/nft_ct.c39
-rw-r--r--net/netfilter/nft_ct_fast.c56
-rw-r--r--net/netfilter/nft_objref.c12
-rw-r--r--net/netfilter/xt_length.c2
-rw-r--r--net/openvswitch/conntrack.c2
-rw-r--r--net/openvswitch/flow.c9
-rw-r--r--net/openvswitch/flow.h2
-rw-r--r--net/openvswitch/flow_table.c8
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/phonet/pep-gprs.c4
-rw-r--r--net/qrtr/ns.c3
-rw-r--r--net/rds/ib_recv.c1
-rw-r--r--net/rds/recv.c1
-rw-r--r--net/rds/tcp_listen.c2
-rw-r--r--net/rds/tcp_recv.c2
-rw-r--r--net/rfkill/rfkill-gpio.c20
-rw-r--r--net/rxrpc/Kconfig9
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-internal.h15
-rw-r--r--net/rxrpc/call_accept.c2
-rw-r--r--net/rxrpc/call_event.c15
-rw-r--r--net/rxrpc/call_object.c7
-rw-r--r--net/rxrpc/conn_service.c7
-rw-r--r--net/rxrpc/input.c60
-rw-r--r--net/rxrpc/io_thread.c48
-rw-r--r--net/rxrpc/local_object.c7
-rw-r--r--net/rxrpc/misc.c7
-rw-r--r--net/rxrpc/output.c69
-rw-r--r--net/rxrpc/proc.c4
-rw-r--r--net/rxrpc/recvmsg.c18
-rw-r--r--net/rxrpc/skbuff.c4
-rw-r--r--net/rxrpc/sysctl.c17
-rw-r--r--net/rxrpc/txbuf.c12
-rw-r--r--net/sched/Kconfig9
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c15
-rw-r--r--net/sched/act_ct.c67
-rw-r--r--net/sched/act_mirred.c23
-rw-r--r--net/sched/act_pedit.c277
-rw-r--r--net/sched/cls_api.c62
-rw-r--r--net/sched/sch_api.c84
-rw-r--r--net/sched/sch_cake.c2
-rw-r--r--net/sched/sch_mqprio.c291
-rw-r--r--net/sched/sch_mqprio_lib.c117
-rw-r--r--net/sched/sch_mqprio_lib.h18
-rw-r--r--net/sched/sch_taprio.c709
-rw-r--r--net/sctp/socket.c5
-rw-r--r--net/smc/af_smc.c35
-rw-r--r--net/smc/smc_clc.c11
-rw-r--r--net/smc/smc_core.c88
-rw-r--r--net/smc/smc_core.h6
-rw-r--r--net/smc/smc_diag.c3
-rw-r--r--net/smc/smc_ism.c180
-rw-r--r--net/smc/smc_ism.h3
-rw-r--r--net/smc/smc_llc.c34
-rw-r--r--net/smc/smc_pnet.c40
-rw-r--r--net/smc/smc_rx.c4
-rw-r--r--net/socket.c33
-rw-r--r--net/sunrpc/svcsock.c5
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/tipc/topsrv.c5
-rw-r--r--net/tls/tls_sw.c3
-rw-r--r--net/unix/af_unix.c21
-rw-r--r--net/vmw_vsock/af_vsock.c3
-rw-r--r--net/vmw_vsock/virtio_transport.c149
-rw-r--r--net/vmw_vsock/virtio_transport_common.c422
-rw-r--r--net/vmw_vsock/vsock_loopback.c51
-rw-r--r--net/wireless/core.h4
-rw-r--r--net/wireless/ibss.c5
-rw-r--r--net/wireless/nl80211.c6
-rw-r--r--net/wireless/reg.c54
-rw-r--r--net/wireless/sme.c6
-rw-r--r--net/wireless/util.c2
-rw-r--r--net/wireless/wext-compat.c2
-rw-r--r--net/wireless/wext-core.c20
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--net/xfrm/espintcp.c3
-rw-r--r--net/xfrm/xfrm_device.c8
-rw-r--r--net/xfrm/xfrm_state.c2
217 files changed, 7940 insertions, 6572 deletions
diff --git a/net/Makefile b/net/Makefile
index 6a62e5b27378..0914bea9c335 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_BPFILTER) += bpfilter/
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/
+obj-$(CONFIG_NET_DEVLINK) += devlink/
obj-$(CONFIG_NET_DSA) += dsa/
obj-$(CONFIG_ATALK) += appletalk/
obj-$(CONFIG_X25) += x25/
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 114ee5da261f..828fb393ee94 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -27,7 +27,6 @@
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/pkt_sched.h>
-#include <linux/prandom.h>
#include <linux/printk.h>
#include <linux/random.h>
#include <linux/rculist.h>
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index f9a58fb5442e..acff565849ae 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -21,7 +21,6 @@
#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/nl80211.h>
-#include <linux/prandom.h>
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index addfd8c4fe95..e710e9afe78f 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -21,7 +21,6 @@
#include <linux/minmax.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
-#include <linux/prandom.h>
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
@@ -800,8 +799,8 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
/* only unknown & newer OGMs contain TVLVs we are interested in */
if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT)
- batadv_tvlv_containers_process(bat_priv, true, orig_node,
- NULL, NULL,
+ batadv_tvlv_containers_process(bat_priv, BATADV_OGM2, orig_node,
+ NULL,
(unsigned char *)(ogm2 + 1),
ntohs(ogm2->tvlv_len));
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index fefb51a5f606..6968e55eb971 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -822,7 +822,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv)
batadv_dat_start_timer(bat_priv);
batadv_tvlv_handler_register(bat_priv, batadv_dat_tvlv_ogm_handler_v1,
- NULL, BATADV_TVLV_DAT, 1,
+ NULL, NULL, BATADV_TVLV_DAT, 1,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
batadv_dat_tvlv_container_update(bat_priv);
return 0;
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 9349c76f30c5..6a964a773f57 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -259,7 +259,7 @@ void batadv_gw_init(struct batadv_priv *bat_priv)
atomic_set(&bat_priv->gw.sel_class, 1);
batadv_tvlv_handler_register(bat_priv, batadv_gw_tvlv_ogm_handler_v1,
- NULL, BATADV_TVLV_GW, 1,
+ NULL, NULL, BATADV_TVLV_GW, 1,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
}
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index c48803b32bb0..156ed39eded1 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2022.3"
+#define BATADV_SOURCE_VERSION "2023.1"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index b238455913df..315394f12c55 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -26,7 +26,6 @@
#include <linux/ipv6.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
-#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -1137,222 +1136,19 @@ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_forw_tt_node_get() - get a multicast tt node
- * @bat_priv: the bat priv with all the soft interface information
- * @ethhdr: the ether header containing the multicast destination
- *
- * Return: an orig_node matching the multicast address provided by ethhdr
- * via a translation table lookup. This increases the returned nodes refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
- struct ethhdr *ethhdr)
-{
- return batadv_transtable_search(bat_priv, NULL, ethhdr->h_dest,
- BATADV_NO_FLAGS);
-}
-
-/**
- * batadv_mcast_forw_ipv4_node_get() - get a node with an ipv4 flag
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and
- * increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
-{
- struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_orig_node,
- &bat_priv->mcast.want_all_ipv4_list,
- mcast_want_all_ipv4_node) {
- if (!kref_get_unless_zero(&tmp_orig_node->refcount))
- continue;
-
- orig_node = tmp_orig_node;
- break;
- }
- rcu_read_unlock();
-
- return orig_node;
-}
-
-/**
- * batadv_mcast_forw_ipv6_node_get() - get a node with an ipv6 flag
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set
- * and increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
-{
- struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_orig_node,
- &bat_priv->mcast.want_all_ipv6_list,
- mcast_want_all_ipv6_node) {
- if (!kref_get_unless_zero(&tmp_orig_node->refcount))
- continue;
-
- orig_node = tmp_orig_node;
- break;
- }
- rcu_read_unlock();
-
- return orig_node;
-}
-
-/**
- * batadv_mcast_forw_ip_node_get() - get a node with an ipv4/ipv6 flag
- * @bat_priv: the bat priv with all the soft interface information
- * @ethhdr: an ethernet header to determine the protocol family from
- *
- * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or
- * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, sets and
- * increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv,
- struct ethhdr *ethhdr)
-{
- switch (ntohs(ethhdr->h_proto)) {
- case ETH_P_IP:
- return batadv_mcast_forw_ipv4_node_get(bat_priv);
- case ETH_P_IPV6:
- return batadv_mcast_forw_ipv6_node_get(bat_priv);
- default:
- /* we shouldn't be here... */
- return NULL;
- }
-}
-
-/**
- * batadv_mcast_forw_unsnoop_node_get() - get a node with an unsnoopable flag
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag
- * set and increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
-{
- struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_orig_node,
- &bat_priv->mcast.want_all_unsnoopables_list,
- mcast_want_all_unsnoopables_node) {
- if (!kref_get_unless_zero(&tmp_orig_node->refcount))
- continue;
-
- orig_node = tmp_orig_node;
- break;
- }
- rcu_read_unlock();
-
- return orig_node;
-}
-
-/**
- * batadv_mcast_forw_rtr4_node_get() - get a node with an ipv4 mcast router flag
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Return: an orig_node which has the BATADV_MCAST_WANT_NO_RTR4 flag unset and
- * increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_rtr4_node_get(struct batadv_priv *bat_priv)
-{
- struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_orig_node,
- &bat_priv->mcast.want_all_rtr4_list,
- mcast_want_all_rtr4_node) {
- if (!kref_get_unless_zero(&tmp_orig_node->refcount))
- continue;
-
- orig_node = tmp_orig_node;
- break;
- }
- rcu_read_unlock();
-
- return orig_node;
-}
-
-/**
- * batadv_mcast_forw_rtr6_node_get() - get a node with an ipv6 mcast router flag
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Return: an orig_node which has the BATADV_MCAST_WANT_NO_RTR6 flag unset
- * and increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_rtr6_node_get(struct batadv_priv *bat_priv)
-{
- struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_orig_node,
- &bat_priv->mcast.want_all_rtr6_list,
- mcast_want_all_rtr6_node) {
- if (!kref_get_unless_zero(&tmp_orig_node->refcount))
- continue;
-
- orig_node = tmp_orig_node;
- break;
- }
- rcu_read_unlock();
-
- return orig_node;
-}
-
-/**
- * batadv_mcast_forw_rtr_node_get() - get a node with an ipv4/ipv6 router flag
- * @bat_priv: the bat priv with all the soft interface information
- * @ethhdr: an ethernet header to determine the protocol family from
- *
- * Return: an orig_node which has no BATADV_MCAST_WANT_NO_RTR4 or
- * BATADV_MCAST_WANT_NO_RTR6 flag, depending on the provided ethhdr, set and
- * increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv,
- struct ethhdr *ethhdr)
-{
- switch (ntohs(ethhdr->h_proto)) {
- case ETH_P_IP:
- return batadv_mcast_forw_rtr4_node_get(bat_priv);
- case ETH_P_IPV6:
- return batadv_mcast_forw_rtr6_node_get(bat_priv);
- default:
- /* we shouldn't be here... */
- return NULL;
- }
-}
-
-/**
* batadv_mcast_forw_mode() - check on how to forward a multicast packet
* @bat_priv: the bat priv with all the soft interface information
- * @skb: The multicast packet to check
- * @orig: an originator to be set to forward the skb to
+ * @skb: the multicast packet to check
* @is_routable: stores whether the destination is routable
*
- * Return: the forwarding mode as enum batadv_forw_mode and in case of
- * BATADV_FORW_SINGLE set the orig to the single originator the skb
- * should be forwarded to.
+ * Return: The forwarding mode as enum batadv_forw_mode.
*/
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- struct batadv_orig_node **orig, int *is_routable)
+ int *is_routable)
{
int ret, tt_count, ip_count, unsnoop_count, total_count;
bool is_unsnoopable = false;
- unsigned int mcast_fanout;
struct ethhdr *ethhdr;
int rtr_count = 0;
@@ -1361,7 +1157,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
if (ret == -ENOMEM)
return BATADV_FORW_NONE;
else if (ret < 0)
- return BATADV_FORW_ALL;
+ return BATADV_FORW_BCAST;
ethhdr = eth_hdr(skb);
@@ -1374,32 +1170,15 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
total_count = tt_count + ip_count + unsnoop_count + rtr_count;
- switch (total_count) {
- case 1:
- if (tt_count)
- *orig = batadv_mcast_forw_tt_node_get(bat_priv, ethhdr);
- else if (ip_count)
- *orig = batadv_mcast_forw_ip_node_get(bat_priv, ethhdr);
- else if (unsnoop_count)
- *orig = batadv_mcast_forw_unsnoop_node_get(bat_priv);
- else if (rtr_count)
- *orig = batadv_mcast_forw_rtr_node_get(bat_priv,
- ethhdr);
-
- if (*orig)
- return BATADV_FORW_SINGLE;
-
- fallthrough;
- case 0:
+ if (!total_count)
return BATADV_FORW_NONE;
- default:
- mcast_fanout = atomic_read(&bat_priv->multicast_fanout);
+ else if (unsnoop_count)
+ return BATADV_FORW_BCAST;
- if (!unsnoop_count && total_count <= mcast_fanout)
- return BATADV_FORW_SOME;
- }
+ if (total_count <= atomic_read(&bat_priv->multicast_fanout))
+ return BATADV_FORW_UCASTS;
- return BATADV_FORW_ALL;
+ return BATADV_FORW_BCAST;
}
/**
@@ -1411,10 +1190,10 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
*
* Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
-int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
- struct sk_buff *skb,
- unsigned short vid,
- struct batadv_orig_node *orig_node)
+static int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct batadv_orig_node *orig_node)
{
/* Avoid sending multicast-in-unicast packets to other BLA
* gateways - they already got the frame from the LAN side
@@ -2039,7 +1818,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
void batadv_mcast_init(struct batadv_priv *bat_priv)
{
batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
- NULL, BATADV_TVLV_MCAST, 2,
+ NULL, NULL, BATADV_TVLV_MCAST, 2,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update);
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 8aec818d0bf6..a9770d8d6d36 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -17,23 +17,16 @@
*/
enum batadv_forw_mode {
/**
- * @BATADV_FORW_ALL: forward the packet to all nodes (currently via
- * classic flooding)
+ * @BATADV_FORW_BCAST: forward the packet to all nodes via a batman-adv
+ * broadcast packet
*/
- BATADV_FORW_ALL,
+ BATADV_FORW_BCAST,
/**
- * @BATADV_FORW_SOME: forward the packet to some nodes (currently via
- * a multicast-to-unicast conversion and the BATMAN unicast routing
- * protocol)
+ * @BATADV_FORW_UCASTS: forward the packet to some nodes via one
+ * or more batman-adv unicast packets
*/
- BATADV_FORW_SOME,
-
- /**
- * @BATADV_FORW_SINGLE: forward the packet to a single node (currently
- * via the BATMAN unicast routing protocol)
- */
- BATADV_FORW_SINGLE,
+ BATADV_FORW_UCASTS,
/** @BATADV_FORW_NONE: don't forward, drop it */
BATADV_FORW_NONE,
@@ -43,14 +36,8 @@ enum batadv_forw_mode {
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- struct batadv_orig_node **mcast_single_orig,
int *is_routable);
-int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
- struct sk_buff *skb,
- unsigned short vid,
- struct batadv_orig_node *orig_node);
-
int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid, int is_routable);
@@ -69,20 +56,9 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
static inline enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- struct batadv_orig_node **mcast_single_orig,
int *is_routable)
{
- return BATADV_FORW_ALL;
-}
-
-static inline int
-batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
- struct sk_buff *skb,
- unsigned short vid,
- struct batadv_orig_node *orig_node)
-{
- kfree_skb(skb);
- return NET_XMIT_DROP;
+ return BATADV_FORW_BCAST;
}
static inline int
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index bf29fba4dde5..71ebd0284f95 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -25,8 +25,8 @@
#include <linux/lockdep.h>
#include <linux/net.h>
#include <linux/netdevice.h>
-#include <linux/prandom.h>
#include <linux/printk.h>
+#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/skbuff.h>
@@ -160,7 +160,7 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
batadv_nc_start_timer(bat_priv);
batadv_tvlv_handler_register(bat_priv, batadv_nc_tvlv_ogm_handler_v1,
- NULL, BATADV_TVLV_NC, 1,
+ NULL, NULL, BATADV_TVLV_NC, 1,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
batadv_nc_tvlv_container_update(bat_priv);
return 0;
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 83f31494ea4d..163cd43c4821 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1073,10 +1073,9 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
if (tvlv_buff_len > skb->len - hdr_size)
goto free_skb;
- ret = batadv_tvlv_containers_process(bat_priv, false, NULL,
- unicast_tvlv_packet->src,
- unicast_tvlv_packet->dst,
- tvlv_buff, tvlv_buff_len);
+ ret = batadv_tvlv_containers_process(bat_priv, BATADV_UNICAST_TVLV,
+ NULL, skb, tvlv_buff,
+ tvlv_buff_len);
if (ret != NET_RX_SUCCESS) {
ret = batadv_route_unicast_packet(skb, recv_if);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 0f5c0679b55a..125f4628687c 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -48,7 +48,6 @@
#include "hard-interface.h"
#include "multicast.h"
#include "network-coding.h"
-#include "originator.h"
#include "send.h"
#include "translation-table.h"
@@ -196,8 +195,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
unsigned short vid;
u32 seqno;
int gw_mode;
- enum batadv_forw_mode forw_mode = BATADV_FORW_SINGLE;
- struct batadv_orig_node *mcast_single_orig = NULL;
+ enum batadv_forw_mode forw_mode = BATADV_FORW_BCAST;
int mcast_is_routable = 0;
int network_offset = ETH_HLEN;
__be16 proto;
@@ -301,14 +299,18 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
send:
if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) {
forw_mode = batadv_mcast_forw_mode(bat_priv, skb,
- &mcast_single_orig,
&mcast_is_routable);
- if (forw_mode == BATADV_FORW_NONE)
- goto dropped;
-
- if (forw_mode == BATADV_FORW_SINGLE ||
- forw_mode == BATADV_FORW_SOME)
+ switch (forw_mode) {
+ case BATADV_FORW_BCAST:
+ break;
+ case BATADV_FORW_UCASTS:
do_bcast = false;
+ break;
+ case BATADV_FORW_NONE:
+ fallthrough;
+ default:
+ goto dropped;
+ }
}
}
@@ -357,10 +359,7 @@ send:
if (ret)
goto dropped;
ret = batadv_send_skb_via_gw(bat_priv, skb, vid);
- } else if (mcast_single_orig) {
- ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid,
- mcast_single_orig);
- } else if (forw_mode == BATADV_FORW_SOME) {
+ } else if (forw_mode == BATADV_FORW_UCASTS) {
ret = batadv_mcast_forw_send(bat_priv, skb, vid,
mcast_is_routable);
} else {
@@ -386,7 +385,6 @@ dropped:
dropped_freed:
batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
end:
- batadv_orig_node_put(mcast_single_orig);
batadv_hardif_put(primary_if);
return NETDEV_TX_OK;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 01d30c1e412c..36ca31252a73 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -4168,11 +4168,11 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
}
batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1,
- batadv_tt_tvlv_unicast_handler_v1,
+ batadv_tt_tvlv_unicast_handler_v1, NULL,
BATADV_TVLV_TT, 1, BATADV_NO_FLAGS);
batadv_tvlv_handler_register(bat_priv, NULL,
- batadv_roam_tvlv_unicast_handler_v1,
+ batadv_roam_tvlv_unicast_handler_v1, NULL,
BATADV_TVLV_ROAM, 1, BATADV_NO_FLAGS);
INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge);
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 7ec2e2343884..2a583215d439 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -352,10 +352,9 @@ end:
* appropriate handlers
* @bat_priv: the bat priv with all the soft interface information
* @tvlv_handler: tvlv callback function handling the tvlv content
- * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
+ * @packet_type: indicates for which packet type the TVLV handler is called
* @orig_node: orig node emitting the ogm packet
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
+ * @skb: the skb the TVLV handler is called for
* @tvlv_value: tvlv content
* @tvlv_value_len: tvlv content length
*
@@ -364,15 +363,20 @@ end:
*/
static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
struct batadv_tvlv_handler *tvlv_handler,
- bool ogm_source,
+ u8 packet_type,
struct batadv_orig_node *orig_node,
- u8 *src, u8 *dst,
- void *tvlv_value, u16 tvlv_value_len)
+ struct sk_buff *skb, void *tvlv_value,
+ u16 tvlv_value_len)
{
+ unsigned int tvlv_offset;
+ u8 *src, *dst;
+
if (!tvlv_handler)
return NET_RX_SUCCESS;
- if (ogm_source) {
+ switch (packet_type) {
+ case BATADV_IV_OGM:
+ case BATADV_OGM2:
if (!tvlv_handler->ogm_handler)
return NET_RX_SUCCESS;
@@ -383,19 +387,32 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
BATADV_NO_FLAGS,
tvlv_value, tvlv_value_len);
tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED;
- } else {
- if (!src)
- return NET_RX_SUCCESS;
-
- if (!dst)
+ break;
+ case BATADV_UNICAST_TVLV:
+ if (!skb)
return NET_RX_SUCCESS;
if (!tvlv_handler->unicast_handler)
return NET_RX_SUCCESS;
+ src = ((struct batadv_unicast_tvlv_packet *)skb->data)->src;
+ dst = ((struct batadv_unicast_tvlv_packet *)skb->data)->dst;
+
return tvlv_handler->unicast_handler(bat_priv, src,
dst, tvlv_value,
tvlv_value_len);
+ case BATADV_MCAST:
+ if (!skb)
+ return NET_RX_SUCCESS;
+
+ if (!tvlv_handler->mcast_handler)
+ return NET_RX_SUCCESS;
+
+ tvlv_offset = (unsigned char *)tvlv_value - skb->data;
+ skb_set_network_header(skb, tvlv_offset);
+ skb_set_transport_header(skb, tvlv_offset + tvlv_value_len);
+
+ return tvlv_handler->mcast_handler(bat_priv, skb);
}
return NET_RX_SUCCESS;
@@ -405,10 +422,9 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
* batadv_tvlv_containers_process() - parse the given tvlv buffer to call the
* appropriate handlers
* @bat_priv: the bat priv with all the soft interface information
- * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
+ * @packet_type: indicates for which packet type the TVLV handler is called
* @orig_node: orig node emitting the ogm packet
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
+ * @skb: the skb the TVLV handler is called for
* @tvlv_value: tvlv content
* @tvlv_value_len: tvlv content length
*
@@ -416,10 +432,10 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
* handler callbacks.
*/
int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
- bool ogm_source,
+ u8 packet_type,
struct batadv_orig_node *orig_node,
- u8 *src, u8 *dst,
- void *tvlv_value, u16 tvlv_value_len)
+ struct sk_buff *skb, void *tvlv_value,
+ u16 tvlv_value_len)
{
struct batadv_tvlv_handler *tvlv_handler;
struct batadv_tvlv_hdr *tvlv_hdr;
@@ -441,20 +457,24 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
tvlv_hdr->version);
ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler,
- ogm_source, orig_node,
- src, dst, tvlv_value,
+ packet_type, orig_node, skb,
+ tvlv_value,
tvlv_value_cont_len);
batadv_tvlv_handler_put(tvlv_handler);
tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
tvlv_value_len -= tvlv_value_cont_len;
}
- if (!ogm_source)
+ if (packet_type != BATADV_IV_OGM &&
+ packet_type != BATADV_OGM2)
return ret;
rcu_read_lock();
hlist_for_each_entry_rcu(tvlv_handler,
&bat_priv->tvlv.handler_list, list) {
+ if (!tvlv_handler->ogm_handler)
+ continue;
+
if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) &&
!(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED))
tvlv_handler->ogm_handler(bat_priv, orig_node,
@@ -490,7 +510,7 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
tvlv_value = batadv_ogm_packet + 1;
- batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL,
+ batadv_tvlv_containers_process(bat_priv, BATADV_IV_OGM, orig_node, NULL,
tvlv_value, tvlv_value_len);
}
@@ -504,6 +524,10 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
* @uptr: unicast tvlv handler callback function. This function receives the
* source & destination of the unicast packet as well as the tvlv content
* to process.
+ * @mptr: multicast packet tvlv handler callback function. This function
+ * receives the full skb to process, with the skb network header pointing
+ * to the current tvlv and the skb transport header pointing to the first
+ * byte after the current tvlv.
* @type: tvlv handler type to be registered
* @version: tvlv handler version to be registered
* @flags: flags to enable or disable TVLV API behavior
@@ -518,6 +542,8 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
u8 *src, u8 *dst,
void *tvlv_value,
u16 tvlv_value_len),
+ int (*mptr)(struct batadv_priv *bat_priv,
+ struct sk_buff *skb),
u8 type, u8 version, u8 flags)
{
struct batadv_tvlv_handler *tvlv_handler;
@@ -539,6 +565,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
tvlv_handler->ogm_handler = optr;
tvlv_handler->unicast_handler = uptr;
+ tvlv_handler->mcast_handler = mptr;
tvlv_handler->type = type;
tvlv_handler->version = version;
tvlv_handler->flags = flags;
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index 4cf8af00fc11..e5697230d991 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -9,6 +9,7 @@
#include "main.h"
+#include <linux/skbuff.h>
#include <linux/types.h>
#include <uapi/linux/batadv_packet.h>
@@ -34,14 +35,16 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
u8 *src, u8 *dst,
void *tvlv_value,
u16 tvlv_value_len),
+ int (*mptr)(struct batadv_priv *bat_priv,
+ struct sk_buff *skb),
u8 type, u8 version, u8 flags);
void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
u8 type, u8 version);
int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
- bool ogm_source,
+ u8 packet_type,
struct batadv_orig_node *orig_node,
- u8 *src, u8 *dst,
- void *tvlv_buff, u16 tvlv_buff_len);
+ struct sk_buff *skb, void *tvlv_buff,
+ u16 tvlv_buff_len);
void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, const u8 *src,
const u8 *dst, u8 type, u8 version,
void *tvlv_value, u16 tvlv_value_len);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 758cd797a063..ca9449ec9836 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -2335,6 +2335,12 @@ struct batadv_tvlv_handler {
u8 *src, u8 *dst,
void *tvlv_value, u16 tvlv_value_len);
+ /**
+ * @mcast_handler: handler callback which is given the tvlv payload to
+ * process on incoming mcast packet
+ */
+ int (*mcast_handler)(struct batadv_priv *bat_priv, struct sk_buff *skb);
+
/** @type: tvlv type this handler feels responsible for */
u8 type;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 8d6fce9005bd..053ef8f25fae 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -35,6 +35,8 @@
#include <net/bluetooth/l2cap.h>
#include <net/bluetooth/rfcomm.h>
+#include <trace/events/sock.h>
+
#define VERSION "1.11"
static bool disable_cfc;
@@ -186,6 +188,8 @@ static void rfcomm_l2state_change(struct sock *sk)
static void rfcomm_l2data_ready(struct sock *sk)
{
+ trace_sk_data_ready(sk);
+
BT_DBG("%p", sk);
rfcomm_schedule();
}
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 1ac4467928a9..ff4f89a2b02a 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -154,6 +154,23 @@ static bool bpf_dummy_ops_is_valid_access(int off, int size,
return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
}
+static int bpf_dummy_ops_check_member(const struct btf_type *t,
+ const struct btf_member *member,
+ const struct bpf_prog *prog)
+{
+ u32 moff = __btf_member_bit_offset(t, member) / 8;
+
+ switch (moff) {
+ case offsetof(struct bpf_dummy_ops, test_sleepable):
+ break;
+ default:
+ if (prog->aux->sleepable)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
int off, int size, enum bpf_access_type atype,
@@ -208,6 +225,7 @@ static void bpf_dummy_unreg(void *kdata)
struct bpf_struct_ops bpf_bpf_dummy_ops = {
.verifier_ops = &bpf_dummy_verifier_ops,
.init = bpf_dummy_init,
+ .check_member = bpf_dummy_ops_check_member,
.init_member = bpf_dummy_init_member,
.reg = bpf_dummy_reg,
.unreg = bpf_dummy_unreg,
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2723623429ac..8da0d73b368e 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -1300,6 +1300,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (kattr->test.flags & ~BPF_F_TEST_XDP_LIVE_FRAMES)
return -EINVAL;
+ if (bpf_prog_is_dev_bound(prog->aux))
+ return -EINVAL;
+
if (do_live) {
if (!batch_size)
batch_size = NAPI_POLL_WEIGHT;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 00e5743647b0..9f22ebfdc518 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -849,11 +849,10 @@ static int br_mdb_add_group_sg(const struct br_mdb_config *cfg,
}
p = br_multicast_new_port_group(cfg->p, &cfg->group, *pp, flags, NULL,
- MCAST_INCLUDE, cfg->rt_protocol);
- if (unlikely(!p)) {
- NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (S, G) port group");
+ MCAST_INCLUDE, cfg->rt_protocol, extack);
+ if (unlikely(!p))
return -ENOMEM;
- }
+
rcu_assign_pointer(*pp, p);
if (!(flags & MDB_PG_FLAGS_PERMANENT) && !cfg->src_entry)
mod_timer(&p->timer,
@@ -1075,11 +1074,10 @@ static int br_mdb_add_group_star_g(const struct br_mdb_config *cfg,
}
p = br_multicast_new_port_group(cfg->p, &cfg->group, *pp, flags, NULL,
- cfg->filter_mode, cfg->rt_protocol);
- if (unlikely(!p)) {
- NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (*, G) port group");
+ cfg->filter_mode, cfg->rt_protocol,
+ extack);
+ if (unlikely(!p))
return -ENOMEM;
- }
err = br_mdb_add_group_srcs(cfg, p, brmctx, extack);
if (err)
@@ -1101,8 +1099,7 @@ static int br_mdb_add_group_star_g(const struct br_mdb_config *cfg,
return 0;
err_del_port_group:
- hlist_del_init(&p->mglist);
- kfree(p);
+ br_multicast_del_port_group(p);
return err;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index dea1ee1bd095..96d1fc78dd39 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -31,6 +31,7 @@
#include <net/ip6_checksum.h>
#include <net/addrconf.h>
#endif
+#include <trace/events/bridge.h>
#include "br_private.h"
#include "br_private_mcast_eht.h"
@@ -234,6 +235,29 @@ out:
return pmctx;
}
+static struct net_bridge_mcast_port *
+br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid)
+{
+ struct net_bridge_mcast_port *pmctx = NULL;
+ struct net_bridge_vlan *vlan;
+
+ lockdep_assert_held_once(&port->br->multicast_lock);
+
+ if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+ return NULL;
+
+ /* Take RCU to access the vlan. */
+ rcu_read_lock();
+
+ vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid);
+ if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx))
+ pmctx = &vlan->port_mcast_ctx;
+
+ rcu_read_unlock();
+
+ return pmctx;
+}
+
/* when snooping we need to check if the contexts should be used
* in the following order:
* - if pmctx is non-NULL (port), check if it should be used
@@ -668,6 +692,101 @@ void br_multicast_del_group_src(struct net_bridge_group_src *src,
__br_multicast_del_group_src(src);
}
+static int
+br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx,
+ struct netlink_ext_ack *extack,
+ const char *what)
+{
+ u32 max = READ_ONCE(pmctx->mdb_max_entries);
+ u32 n = READ_ONCE(pmctx->mdb_n_entries);
+
+ if (max && n >= max) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "%s is already in %u groups, and mcast_max_groups=%u",
+ what, n, max);
+ return -E2BIG;
+ }
+
+ WRITE_ONCE(pmctx->mdb_n_entries, n + 1);
+ return 0;
+}
+
+static void br_multicast_port_ngroups_dec_one(struct net_bridge_mcast_port *pmctx)
+{
+ u32 n = READ_ONCE(pmctx->mdb_n_entries);
+
+ WARN_ON_ONCE(n == 0);
+ WRITE_ONCE(pmctx->mdb_n_entries, n - 1);
+}
+
+static int br_multicast_port_ngroups_inc(struct net_bridge_port *port,
+ const struct br_ip *group,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_mcast_port *pmctx;
+ int err;
+
+ lockdep_assert_held_once(&port->br->multicast_lock);
+
+ /* Always count on the port context. */
+ err = br_multicast_port_ngroups_inc_one(&port->multicast_ctx, extack,
+ "Port");
+ if (err) {
+ trace_br_mdb_full(port->dev, group);
+ return err;
+ }
+
+ /* Only count on the VLAN context if VID is given, and if snooping on
+ * that VLAN is enabled.
+ */
+ if (!group->vid)
+ return 0;
+
+ pmctx = br_multicast_port_vid_to_port_ctx(port, group->vid);
+ if (!pmctx)
+ return 0;
+
+ err = br_multicast_port_ngroups_inc_one(pmctx, extack, "Port-VLAN");
+ if (err) {
+ trace_br_mdb_full(port->dev, group);
+ goto dec_one_out;
+ }
+
+ return 0;
+
+dec_one_out:
+ br_multicast_port_ngroups_dec_one(&port->multicast_ctx);
+ return err;
+}
+
+static void br_multicast_port_ngroups_dec(struct net_bridge_port *port, u16 vid)
+{
+ struct net_bridge_mcast_port *pmctx;
+
+ lockdep_assert_held_once(&port->br->multicast_lock);
+
+ if (vid) {
+ pmctx = br_multicast_port_vid_to_port_ctx(port, vid);
+ if (pmctx)
+ br_multicast_port_ngroups_dec_one(pmctx);
+ }
+ br_multicast_port_ngroups_dec_one(&port->multicast_ctx);
+}
+
+u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx)
+{
+ return READ_ONCE(pmctx->mdb_n_entries);
+}
+
+void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max)
+{
+ WRITE_ONCE(pmctx->mdb_max_entries, max);
+}
+
+u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx)
+{
+ return READ_ONCE(pmctx->mdb_max_entries);
+}
+
static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc)
{
struct net_bridge_port_group *pg;
@@ -702,6 +821,7 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
} else {
br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE);
}
+ br_multicast_port_ngroups_dec(pg->key.port, pg->key.addr.vid);
hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list);
queue_work(system_long_wq, &br->mcast_gc_work);
@@ -1165,6 +1285,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
return mp;
if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
+ trace_br_mdb_full(br->dev, group);
br_mc_disabled_update(br->dev, false, NULL);
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
return ERR_PTR(-E2BIG);
@@ -1284,14 +1405,22 @@ struct net_bridge_port_group *br_multicast_new_port_group(
unsigned char flags,
const unsigned char *src,
u8 filter_mode,
- u8 rt_protocol)
+ u8 rt_protocol,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_port_group *p;
+ int err;
- p = kzalloc(sizeof(*p), GFP_ATOMIC);
- if (unlikely(!p))
+ err = br_multicast_port_ngroups_inc(port, group, extack);
+ if (err)
return NULL;
+ p = kzalloc(sizeof(*p), GFP_ATOMIC);
+ if (unlikely(!p)) {
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group");
+ goto dec_out;
+ }
+
p->key.addr = *group;
p->key.port = port;
p->flags = flags;
@@ -1305,8 +1434,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
if (!br_multicast_is_star_g(group) &&
rhashtable_lookup_insert_fast(&port->br->sg_port_tbl, &p->rhnode,
br_sg_port_rht_params)) {
- kfree(p);
- return NULL;
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't insert new port group");
+ goto free_out;
}
rcu_assign_pointer(p->next, next);
@@ -1320,6 +1449,25 @@ struct net_bridge_port_group *br_multicast_new_port_group(
eth_broadcast_addr(p->eth_addr);
return p;
+
+free_out:
+ kfree(p);
+dec_out:
+ br_multicast_port_ngroups_dec(port, group->vid);
+ return NULL;
+}
+
+void br_multicast_del_port_group(struct net_bridge_port_group *p)
+{
+ struct net_bridge_port *port = p->key.port;
+ __u16 vid = p->key.addr.vid;
+
+ hlist_del_init(&p->mglist);
+ if (!br_multicast_is_star_g(&p->key.addr))
+ rhashtable_remove_fast(&port->br->sg_port_tbl, &p->rhnode,
+ br_sg_port_rht_params);
+ kfree(p);
+ br_multicast_port_ngroups_dec(port, vid);
}
void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
@@ -1387,7 +1535,7 @@ __br_multicast_add_group(struct net_bridge_mcast *brmctx,
}
p = br_multicast_new_port_group(pmctx->port, group, *pp, 0, src,
- filter_mode, RTPROT_KERNEL);
+ filter_mode, RTPROT_KERNEL, NULL);
if (unlikely(!p)) {
p = ERR_PTR(-ENOMEM);
goto out;
@@ -1933,6 +2081,25 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
br_ip4_multicast_add_router(brmctx, pmctx);
br_ip6_multicast_add_router(brmctx, pmctx);
}
+
+ if (br_multicast_port_ctx_is_vlan(pmctx)) {
+ struct net_bridge_port_group *pg;
+ u32 n = 0;
+
+ /* The mcast_n_groups counter might be wrong. First,
+ * BR_VLFLAG_MCAST_ENABLED is toggled before temporary entries
+ * are flushed, thus mcast_n_groups after the toggle does not
+ * reflect the true values. And second, permanent entries added
+ * while BR_VLFLAG_MCAST_ENABLED was disabled, are not reflected
+ * either. Thus we have to refresh the counter.
+ */
+
+ hlist_for_each_entry(pg, &pmctx->port->mglist, mglist) {
+ if (pg->key.addr.vid == pmctx->vlan->vid)
+ n++;
+ }
+ WRITE_ONCE(pmctx->mdb_n_entries, n);
+ }
}
void br_multicast_enable_port(struct net_bridge_port *port)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 9554abcfd5b4..638a4d5359db 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -214,7 +214,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto csum_error;
- len = ntohs(iph->tot_len);
+ len = skb_ip_totlen(skb);
if (skb->len < len) {
__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4316cc82ae17..9173e52b89e2 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -202,6 +202,8 @@ static inline size_t br_port_info_size(void)
+ nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_HOLD_TIMER */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_N_GROUPS */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_MAX_GROUPS */
#endif
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_RING_OPEN */
@@ -298,7 +300,11 @@ static int br_port_fill_attrs(struct sk_buff *skb,
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
p->multicast_eht_hosts_limit) ||
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
- p->multicast_eht_hosts_cnt))
+ p->multicast_eht_hosts_cnt) ||
+ nla_put_u32(skb, IFLA_BRPORT_MCAST_N_GROUPS,
+ br_multicast_ngroups_get(&p->multicast_ctx)) ||
+ nla_put_u32(skb, IFLA_BRPORT_MCAST_MAX_GROUPS,
+ br_multicast_ngroups_get_max(&p->multicast_ctx)))
return -EMSGSIZE;
#endif
@@ -858,6 +864,8 @@ static int br_afspec(struct net_bridge *br,
}
static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
+ [IFLA_BRPORT_UNSPEC] = { .strict_start_type =
+ IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT + 1 },
[IFLA_BRPORT_STATE] = { .type = NLA_U8 },
[IFLA_BRPORT_COST] = { .type = NLA_U32 },
[IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },
@@ -881,6 +889,8 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MAB] = { .type = NLA_U8 },
[IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
+ [IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
+ [IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
};
/* Change the state of the port and notify spanning tree */
@@ -1015,6 +1025,13 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
if (err)
return err;
}
+
+ if (tb[IFLA_BRPORT_MCAST_MAX_GROUPS]) {
+ u32 max_groups;
+
+ max_groups = nla_get_u32(tb[IFLA_BRPORT_MCAST_MAX_GROUPS]);
+ br_multicast_ngroups_set_max(&p->multicast_ctx, max_groups);
+ }
#endif
if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index 8914290c75d4..17abf092f7ca 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -188,6 +188,9 @@ initvars:
}
static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1] = {
+ [IFLA_BRIDGE_VLAN_TUNNEL_UNSPEC] = {
+ .strict_start_type = IFLA_BRIDGE_VLAN_TUNNEL_FLAGS + 1
+ },
[IFLA_BRIDGE_VLAN_TUNNEL_ID] = { .type = NLA_U32 },
[IFLA_BRIDGE_VLAN_TUNNEL_VID] = { .type = NLA_U16 },
[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = { .type = NLA_U16 },
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 15ef7fd508ee..cef5f6ea850c 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -126,6 +126,8 @@ struct net_bridge_mcast_port {
struct hlist_node ip6_rlist;
#endif /* IS_ENABLED(CONFIG_IPV6) */
unsigned char multicast_router;
+ u32 mdb_n_entries;
+ u32 mdb_max_entries;
#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
};
@@ -956,7 +958,9 @@ br_multicast_new_port_group(struct net_bridge_port *port,
const struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags, const unsigned char *src,
- u8 filter_mode, u8 rt_protocol);
+ u8 filter_mode, u8 rt_protocol,
+ struct netlink_ext_ack *extack);
+void br_multicast_del_port_group(struct net_bridge_port_group *p);
int br_mdb_hash_init(struct net_bridge *br);
void br_mdb_hash_fini(struct net_bridge *br);
void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
@@ -974,6 +978,9 @@ void br_multicast_uninit_stats(struct net_bridge *br);
void br_multicast_get_stats(const struct net_bridge *br,
const struct net_bridge_port *p,
struct br_mcast_stats *dest);
+u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx);
+void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max);
+u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx);
void br_mdb_init(void);
void br_mdb_uninit(void);
void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
@@ -1757,7 +1764,8 @@ static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid)
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *range_end);
-bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v);
+bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,
+ const struct net_bridge_port *p);
size_t br_vlan_opts_nl_size(void);
int br_vlan_process_options(const struct net_bridge *br,
const struct net_bridge_port *p,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 7eb6fd5bb917..de18e9c1d7a7 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -104,9 +104,8 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
return 0;
if (err) {
- if (extack && !extack->_msg)
- NL_SET_ERR_MSG_MOD(extack,
- "bridge flag offload is not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "bridge flag offload is not supported");
return -EOPNOTSUPP;
}
@@ -115,9 +114,8 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
err = switchdev_port_attr_set(p->dev, &attr, extack);
if (err) {
- if (extack && !extack->_msg)
- NL_SET_ERR_MSG_MOD(extack,
- "error setting offload flag on port");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "error setting offload flag on port");
return err;
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index bc75fa1e4666..8a3dbc09ba38 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1816,6 +1816,7 @@ out_err:
/* v_opts is used to dump the options which must be equal in the whole range */
static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range,
const struct net_bridge_vlan *v_opts,
+ const struct net_bridge_port *p,
u16 flags,
bool dump_stats)
{
@@ -1842,7 +1843,7 @@ static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range,
goto out_err;
if (v_opts) {
- if (!br_vlan_opts_fill(skb, v_opts))
+ if (!br_vlan_opts_fill(skb, v_opts, p))
goto out_err;
if (dump_stats && !br_vlan_stats_fill(skb, v_opts))
@@ -1925,7 +1926,7 @@ void br_vlan_notify(const struct net_bridge *br,
goto out_kfree;
}
- if (!br_vlan_fill_vids(skb, vid, vid_range, v, flags, false))
+ if (!br_vlan_fill_vids(skb, vid, vid_range, v, p, flags, false))
goto out_err;
nlmsg_end(skb, nlh);
@@ -2030,7 +2031,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
if (!br_vlan_fill_vids(skb, range_start->vid,
range_end->vid, range_start,
- vlan_flags, dump_stats)) {
+ p, vlan_flags, dump_stats)) {
err = -EMSGSIZE;
break;
}
@@ -2056,7 +2057,7 @@ update_end:
else if (!dump_global &&
!br_vlan_fill_vids(skb, range_start->vid,
range_end->vid, range_start,
- br_vlan_flags(range_start, pvid),
+ p, br_vlan_flags(range_start, pvid),
dump_stats))
err = -EMSGSIZE;
}
@@ -2131,6 +2132,8 @@ static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] =
[BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 },
[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED },
[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER] = { .type = NLA_U8 },
+ [BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS] = { .type = NLA_REJECT },
+ [BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
};
static int br_vlan_rtm_process_one(struct net_device *dev,
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index a2724d03278c..e378c2f3a9e2 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -48,7 +48,8 @@ bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
curr_mc_rtr == range_mc_rtr;
}
-bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v)
+bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,
+ const struct net_bridge_port *p)
{
if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, br_vlan_get_state(v)) ||
!__vlan_tun_put(skb, v))
@@ -58,6 +59,12 @@ bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v)
if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
br_vlan_multicast_router(v)))
return false;
+ if (p && !br_multicast_port_ctx_vlan_disabled(&v->port_mcast_ctx) &&
+ (nla_put_u32(skb, BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS,
+ br_multicast_ngroups_get(&v->port_mcast_ctx)) ||
+ nla_put_u32(skb, BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS,
+ br_multicast_ngroups_get_max(&v->port_mcast_ctx))))
+ return false;
#endif
return true;
@@ -70,6 +77,8 @@ size_t br_vlan_opts_nl_size(void)
+ nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_TINFO_ID */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_MCAST_ROUTER */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS */
#endif
+ 0;
}
@@ -212,6 +221,22 @@ static int br_vlan_process_one_opts(const struct net_bridge *br,
return err;
*changed = true;
}
+ if (tb[BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS]) {
+ u32 val;
+
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't set mcast_max_groups for non-port vlans");
+ return -EINVAL;
+ }
+ if (br_multicast_port_ctx_vlan_disabled(&v->port_mcast_ctx)) {
+ NL_SET_ERR_MSG_MOD(extack, "Multicast snooping disabled on this VLAN");
+ return -EINVAL;
+ }
+
+ val = nla_get_u32(tb[BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS]);
+ br_multicast_ngroups_set_max(&v->port_mcast_ctx, val);
+ *changed = true;
+ }
#endif
return 0;
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 5c5dd437f1c2..71056ee84773 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -212,7 +212,7 @@ static int nf_ct_br_ip_check(const struct sk_buff *skb)
iph->version != 4)
return -1;
- len = ntohs(iph->tot_len);
+ len = skb_ip_totlen(skb);
if (skb->len < nhoff + len ||
len < (iph->ihl * 4))
return -1;
@@ -256,7 +256,7 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
return NF_ACCEPT;
- len = ntohs(ip_hdr(skb)->tot_len);
+ len = skb_ip_totlen(skb);
if (pskb_trim_rcsum(skb, len))
return NF_ACCEPT;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 748be7253248..1f2c1d7b90e2 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -533,10 +533,6 @@ static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_namelen)
goto err;
- ret = -EINVAL;
- if (unlikely(msg->msg_iter.nr_segs == 0) ||
- unlikely(msg->msg_iter.iov->iov_base == NULL))
- goto err;
noblock = msg->msg_flags & MSG_DONTWAIT;
timeo = sock_sndtimeo(sk, noblock);
diff --git a/net/can/gw.c b/net/can/gw.c
index 23a3d89cad81..37528826935e 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1139,6 +1139,13 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
if (gwj->dst.dev->type != ARPHRD_CAN)
goto out;
+ /* is sending the skb back to the incoming interface intended? */
+ if (gwj->src.dev == gwj->dst.dev &&
+ !(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK)) {
+ err = -EINVAL;
+ goto out;
+ }
+
ASSERT_RTNL();
err = cgw_register_filter(net, gwj);
diff --git a/net/can/isotp.c b/net/can/isotp.c
index fc81d77724a1..9bc344851704 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1220,6 +1220,9 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (len < ISOTP_MIN_NAMELEN)
return -EINVAL;
+ if (addr->can_family != AF_CAN)
+ return -EINVAL;
+
/* sanitize tx CAN identifier */
if (tx_id & CAN_EFF_FLAG)
tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
diff --git a/net/can/raw.c b/net/can/raw.c
index ba86782ba8bb..f64469b98260 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -523,6 +523,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
struct can_filter sfilter; /* single filter */
struct net_device *dev = NULL;
can_err_mask_t err_mask = 0;
+ int fd_frames;
int count = 0;
int err = 0;
@@ -664,17 +665,17 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
break;
case CAN_RAW_FD_FRAMES:
- if (optlen != sizeof(ro->fd_frames))
+ if (optlen != sizeof(fd_frames))
return -EINVAL;
- if (copy_from_sockptr(&ro->fd_frames, optval, optlen))
+ if (copy_from_sockptr(&fd_frames, optval, optlen))
return -EFAULT;
/* Enabling CAN XL includes CAN FD */
- if (ro->xl_frames && !ro->fd_frames) {
- ro->fd_frames = ro->xl_frames;
+ if (ro->xl_frames && !fd_frames)
return -EINVAL;
- }
+
+ ro->fd_frames = fd_frames;
break;
case CAN_RAW_XL_FRAMES:
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1d06e114ba3f..cd7b0bf5369e 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -17,6 +17,7 @@
#endif /* CONFIG_BLOCK */
#include <linux/dns_resolver.h>
#include <net/tcp.h>
+#include <trace/events/sock.h>
#include <linux/ceph/ceph_features.h>
#include <linux/ceph/libceph.h>
@@ -344,6 +345,9 @@ static void con_sock_state_closed(struct ceph_connection *con)
static void ceph_sock_data_ready(struct sock *sk)
{
struct ceph_connection *con = sk->sk_user_data;
+
+ trace_sk_data_ready(sk);
+
if (atomic_read(&con->msgr->stopping)) {
return;
}
diff --git a/net/core/Makefile b/net/core/Makefile
index 5857cec87b83..10edd66a8a37 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -33,7 +33,6 @@ obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
obj-$(CONFIG_DST_CACHE) += dst_cache.o
obj-$(CONFIG_HWBM) += hwbm.o
-obj-$(CONFIG_NET_DEVLINK) += devlink.o
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
obj-$(CONFIG_FAILOVER) += failover.o
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
diff --git a/net/core/dev.c b/net/core/dev.c
index b76fb37b381e..bb42150a38ec 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1840,7 +1840,7 @@ EXPORT_SYMBOL(register_netdevice_notifier_net);
* @nb: notifier
*
* Unregister a notifier previously registered by
- * register_netdevice_notifier(). The notifier is unlinked into the
+ * register_netdevice_notifier_net(). The notifier is unlinked from the
* kernel structures and may then be reused. A negative errno code
* is returned on a failure.
*
@@ -3001,6 +3001,8 @@ void netif_set_tso_max_size(struct net_device *dev, unsigned int size)
dev->tso_max_size = min(GSO_MAX_SIZE, size);
if (size < READ_ONCE(dev->gso_max_size))
netif_set_gso_max_size(dev, size);
+ if (size < READ_ONCE(dev->gso_ipv4_max_size))
+ netif_set_gso_ipv4_max_size(dev, size);
}
EXPORT_SYMBOL(netif_set_tso_max_size);
@@ -6616,17 +6618,16 @@ static int napi_threaded_poll(void *data)
static void skb_defer_free_flush(struct softnet_data *sd)
{
struct sk_buff *skb, *next;
- unsigned long flags;
/* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
if (!READ_ONCE(sd->defer_list))
return;
- spin_lock_irqsave(&sd->defer_lock, flags);
+ spin_lock_irq(&sd->defer_lock);
skb = sd->defer_list;
sd->defer_list = NULL;
sd->defer_count = 0;
- spin_unlock_irqrestore(&sd->defer_lock, flags);
+ spin_unlock_irq(&sd->defer_lock);
while (skb != NULL) {
next = skb->next;
@@ -9224,8 +9225,12 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
return -EEXIST;
}
- if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
- NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported");
+ if (!offload && bpf_prog_is_offloaded(new_prog->aux)) {
+ NL_SET_ERR_MSG(extack, "Using offloaded program without HW_MODE flag is not supported");
+ return -EINVAL;
+ }
+ if (bpf_prog_is_dev_bound(new_prog->aux) && !bpf_offload_dev_match(new_prog, dev)) {
+ NL_SET_ERR_MSG(extack, "Program bound to different device");
return -EINVAL;
}
if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
@@ -10611,6 +10616,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
+ dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
+ dev->gro_ipv4_max_size = GRO_LEGACY_MAX_SIZE;
dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
dev->tso_max_segs = TSO_MAX_SEGS;
dev->upper_level = 1;
@@ -10830,6 +10837,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_shutdown(dev);
dev_xdp_uninstall(dev);
+ bpf_dev_bound_netdev_unregister(dev);
netdev_offload_xstats_disable_all(dev);
diff --git a/net/core/dev.h b/net/core/dev.h
index 814ed5b7b960..a065b7571441 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -100,6 +100,8 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
{
/* dev->gso_max_size is read locklessly from sk_setup_caps() */
WRITE_ONCE(dev->gso_max_size, size);
+ if (size <= GSO_LEGACY_MAX_SIZE)
+ WRITE_ONCE(dev->gso_ipv4_max_size, size);
}
static inline void netif_set_gso_max_segs(struct net_device *dev,
@@ -114,6 +116,22 @@ static inline void netif_set_gro_max_size(struct net_device *dev,
{
/* This pairs with the READ_ONCE() in skb_gro_receive() */
WRITE_ONCE(dev->gro_max_size, size);
+ if (size <= GRO_LEGACY_MAX_SIZE)
+ WRITE_ONCE(dev->gro_ipv4_max_size, size);
+}
+
+static inline void netif_set_gso_ipv4_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* dev->gso_ipv4_max_size is read locklessly from sk_setup_caps() */
+ WRITE_ONCE(dev->gso_ipv4_max_size, size);
+}
+
+static inline void netif_set_gro_ipv4_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* This pairs with the READ_ONCE() in skb_gro_receive() */
+ WRITE_ONCE(dev->gro_ipv4_max_size, size);
}
#endif
diff --git a/net/core/dst.c b/net/core/dst.c
index 6d2dd03dafa8..31c08a3386d3 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -82,12 +82,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
if (ops->gc &&
!(flags & DST_NOCOUNT) &&
- dst_entries_get_fast(ops) > ops->gc_thresh) {
- if (ops->gc(ops)) {
- pr_notice_ratelimited("Route cache is full: consider increasing sysctl net.ipv6.route.max_size.\n");
- return NULL;
- }
- }
+ dst_entries_get_fast(ops) > ops->gc_thresh)
+ ops->gc(ops);
dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
if (!dst)
diff --git a/net/core/filter.c b/net/core/filter.c
index 43cc1fe58a2c..d8f9b53f3db6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3381,13 +3381,17 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+#define BPF_F_ADJ_ROOM_DECAP_L3_MASK (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
+ BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
+
#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
BPF_F_ADJ_ROOM_ENCAP_L2( \
- BPF_ADJ_ROOM_ENCAP_L2_MASK))
+ BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
+ BPF_F_ADJ_ROOM_DECAP_L3_MASK)
static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
u64 flags)
@@ -3501,6 +3505,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
int ret;
if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_DECAP_L3_MASK |
BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
return -EINVAL;
@@ -3519,6 +3524,14 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
if (unlikely(ret < 0))
return ret;
+ /* Match skb->protocol to new outer l3 protocol */
+ if (skb->protocol == htons(ETH_P_IP) &&
+ flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
+ skb->protocol = htons(ETH_P_IPV6);
+ else if (skb->protocol == htons(ETH_P_IPV6) &&
+ flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
+ skb->protocol = htons(ETH_P_IP);
+
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -3608,6 +3621,22 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
return -ENOTSUPP;
}
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
+ if (!shrink)
+ return -EINVAL;
+
+ switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
+ case BPF_F_ADJ_ROOM_DECAP_L3_IPV4:
+ len_min = sizeof(struct iphdr);
+ break;
+ case BPF_F_ADJ_ROOM_DECAP_L3_IPV6:
+ len_min = sizeof(struct ipv6hdr);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
len_cur = skb->len - skb_network_offset(skb);
if ((shrink && (len_diff_abs >= len_cur ||
len_cur - len_diff_abs < len_min)) ||
@@ -4128,9 +4157,13 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
* bpf_redirect_info to actually enqueue the frame into a map type-specific
* bulk queue structure.
*
- * 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(),
- * which will flush all the different bulk queues, thus completing the
- * redirect.
+ * 3. Before exiting its NAPI poll loop, the driver will call
+ * xdp_do_flush(), which will flush all the different bulk queues,
+ * thus completing the redirect. Note that xdp_do_flush() must be
+ * called before napi_complete_done() in the driver, as the
+ * XDP_REDIRECT logic relies on being inside a single NAPI instance
+ * through to the xdp_do_flush() call for RCU protection of all
+ * in-kernel data structures.
*/
/*
* Pointers to the map entries will be kept around for this whole sequence of
@@ -4618,7 +4651,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
struct ip_tunnel_info *info;
if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
- BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
+ BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER |
+ BPF_F_NO_TUNNEL_KEY)))
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
@@ -4656,6 +4690,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
info->key.tun_flags &= ~TUNNEL_CSUM;
if (flags & BPF_F_SEQ_NUMBER)
info->key.tun_flags |= TUNNEL_SEQ;
+ if (flags & BPF_F_NO_TUNNEL_KEY)
+ info->key.tun_flags &= ~TUNNEL_KEY;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
@@ -5172,7 +5208,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
char *optval, int *optlen,
bool getopt)
{
- if (sk->sk_prot->setsockopt != tcp_setsockopt)
+ if (sk->sk_protocol != IPPROTO_TCP)
return -EINVAL;
switch (optname) {
@@ -6844,9 +6880,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
FIELD)); \
} while (0)
- if (insn > insn_buf)
- return insn - insn_buf;
-
switch (si->off) {
case offsetof(struct bpf_tcp_sock, rtt_min):
BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
@@ -8731,7 +8764,7 @@ static bool xdp_is_valid_access(int off, int size,
}
if (type == BPF_WRITE) {
- if (bpf_prog_is_dev_bound(prog->aux)) {
+ if (bpf_prog_is_offloaded(prog->aux)) {
switch (off) {
case offsetof(struct xdp_md, rx_queue_index):
return __is_valid_xdp_access(off, size);
@@ -10144,9 +10177,6 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
} while (0)
- if (insn > insn_buf)
- return insn - insn_buf;
-
switch (si->off) {
case offsetof(struct bpf_sock_ops, op):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
diff --git a/net/core/gro.c b/net/core/gro.c
index 4bac7ea6e025..a606705a0859 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -171,16 +171,18 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
if (p->pp_recycle != skb->pp_recycle)
return -ETOOMANYREFS;
- /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
- gro_max_size = READ_ONCE(p->dev->gro_max_size);
+ /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */
+ gro_max_size = p->protocol == htons(ETH_P_IPV6) ?
+ READ_ONCE(p->dev->gro_max_size) :
+ READ_ONCE(p->dev->gro_ipv4_max_size);
if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
return -E2BIG;
if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) {
- if (p->protocol != htons(ETH_P_IPV6) ||
- skb_headroom(p) < sizeof(struct hop_jumbo_hdr) ||
- ipv6_hdr(p)->nexthdr != IPPROTO_TCP ||
+ if (NAPI_GRO_CB(skb)->proto != IPPROTO_TCP ||
+ (p->protocol == htons(ETH_P_IPV6) &&
+ skb_headroom(p) < sizeof(struct hop_jumbo_hdr)) ||
p->encapsulation)
return -E2BIG;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4edd2176e238..6798f6d2423b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1674,11 +1674,21 @@ static void neigh_proxy_process(struct timer_list *t)
spin_unlock(&tbl->proxy_queue.lock);
}
+static unsigned long neigh_proxy_delay(struct neigh_parms *p)
+{
+ /* If proxy_delay is zero, do not call get_random_u32_below()
+ * as it is undefined behavior.
+ */
+ unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
+
+ return proxy_delay ?
+ jiffies + get_random_u32_below(proxy_delay) : jiffies;
+}
+
void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
struct sk_buff *skb)
{
- unsigned long sched_next = jiffies +
- get_random_u32_below(NEIGH_VAR(p, PROXY_DELAY));
+ unsigned long sched_next = neigh_proxy_delay(p);
if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
kfree_skb(skb);
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index c40cd8dd75c7..805b7385dd8d 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -41,6 +41,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add);
EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_mdb_full);
#endif
#if IS_ENABLED(CONFIG_PAGE_POOL)
@@ -61,3 +62,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(sk_data_ready);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 9be762e1d042..a089b704b986 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -682,7 +682,7 @@ int netpoll_setup(struct netpoll *np)
}
if (!netif_running(ndev)) {
- unsigned long atmost, atleast;
+ unsigned long atmost;
np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
@@ -694,7 +694,6 @@ int netpoll_setup(struct netpoll *np)
}
rtnl_unlock();
- atleast = jiffies + HZ/10;
atmost = jiffies + carrier_timeout * HZ;
while (!netif_carrier_ok(ndev)) {
if (time_after(jiffies, atmost)) {
@@ -704,15 +703,6 @@ int netpoll_setup(struct netpoll *np)
msleep(1);
}
- /* If carrier appears to come up instantly, we don't
- * trust it and pause so that we don't pump all our
- * queued console messages into the bitbucket.
- */
-
- if (time_before(jiffies, atleast)) {
- np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
- msleep(4000);
- }
rtnl_lock();
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9b203d8660e4..193c18799865 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -511,8 +511,8 @@ static void page_pool_return_page(struct page_pool *pool, struct page *page)
static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
{
int ret;
- /* BH protection not needed if current is serving softirq */
- if (in_serving_softirq())
+ /* BH protection not needed if current is softirq */
+ if (in_softirq())
ret = ptr_ring_produce(&pool->ring, page);
else
ret = ptr_ring_produce_bh(&pool->ring, page);
@@ -570,7 +570,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
page_pool_dma_sync_for_device(pool, page,
dma_sync_size);
- if (allow_direct && in_serving_softirq() &&
+ if (allow_direct && in_softirq() &&
page_pool_recycle_in_cache(page, pool))
return NULL;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 64289bc98887..5d8eb57867a9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -58,7 +58,7 @@
#include "dev.h"
#define RTNL_MAX_TYPE 50
-#define RTNL_SLAVE_MAX_TYPE 40
+#define RTNL_SLAVE_MAX_TYPE 42
struct rtnl_link {
rtnl_doit_func doit;
@@ -1074,6 +1074,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_GSO_MAX_SEGS */
+ nla_total_size(4) /* IFLA_GSO_MAX_SIZE */
+ nla_total_size(4) /* IFLA_GRO_MAX_SIZE */
+ + nla_total_size(4) /* IFLA_GSO_IPV4_MAX_SIZE */
+ + nla_total_size(4) /* IFLA_GRO_IPV4_MAX_SIZE */
+ nla_total_size(4) /* IFLA_TSO_MAX_SIZE */
+ nla_total_size(4) /* IFLA_TSO_MAX_SEGS */
+ nla_total_size(1) /* IFLA_OPERSTATE */
@@ -1807,6 +1809,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
+ nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) ||
+ nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) ||
nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
#ifdef CONFIG_RPS
@@ -1968,6 +1972,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT },
[IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT },
[IFLA_ALLMULTI] = { .type = NLA_REJECT },
+ [IFLA_GSO_IPV4_MAX_SIZE] = { .type = NLA_U32 },
+ [IFLA_GRO_IPV4_MAX_SIZE] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2883,6 +2889,29 @@ static int do_setlink(const struct sk_buff *skb,
}
}
+ if (tb[IFLA_GSO_IPV4_MAX_SIZE]) {
+ u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]);
+
+ if (max_size > dev->tso_max_size) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (dev->gso_ipv4_max_size ^ max_size) {
+ netif_set_gso_ipv4_max_size(dev, max_size);
+ status |= DO_SETLINK_MODIFIED;
+ }
+ }
+
+ if (tb[IFLA_GRO_IPV4_MAX_SIZE]) {
+ u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]);
+
+ if (dev->gro_ipv4_max_size ^ gro_max_size) {
+ netif_set_gro_ipv4_max_size(dev, gro_max_size);
+ status |= DO_SETLINK_MODIFIED;
+ }
+ }
+
if (tb[IFLA_OPERSTATE])
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
@@ -3325,6 +3354,10 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS]));
if (tb[IFLA_GRO_MAX_SIZE])
netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE]));
+ if (tb[IFLA_GSO_IPV4_MAX_SIZE])
+ netif_set_gso_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]));
+ if (tb[IFLA_GRO_IPV4_MAX_SIZE])
+ netif_set_gro_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]));
return dev;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a31ff4d83ecc..70a6088e8326 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -79,6 +79,7 @@
#include <linux/capability.h>
#include <linux/user_namespace.h>
#include <linux/indirect_call_wrapper.h>
+#include <linux/textsearch.h>
#include "dev.h"
#include "sock_destructor.h"
@@ -88,6 +89,34 @@ static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
#ifdef CONFIG_SKB_EXTENSIONS
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#endif
+
+/* skb_small_head_cache and related code is only supported
+ * for CONFIG_SLAB and CONFIG_SLUB.
+ * As soon as SLOB is removed from the kernel, we can clean up this.
+ */
+#if !defined(CONFIG_SLOB)
+# define HAVE_SKB_SMALL_HEAD_CACHE 1
+#endif
+
+#ifdef HAVE_SKB_SMALL_HEAD_CACHE
+static struct kmem_cache *skb_small_head_cache __ro_after_init;
+
+#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
+
+/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
+ * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
+ * size, and we can differentiate heads from skb_small_head_cache
+ * vs system slabs by looking at their size (skb_end_offset()).
+ */
+#define SKB_SMALL_HEAD_CACHE_SIZE \
+ (is_power_of_2(SKB_SMALL_HEAD_SIZE) ? \
+ (SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) : \
+ SKB_SMALL_HEAD_SIZE)
+
+#define SKB_SMALL_HEAD_HEADROOM \
+ SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
+#endif /* HAVE_SKB_SMALL_HEAD_CACHE */
+
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
@@ -386,8 +415,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
/* build_skb() is wrapper over __build_skb(), that specifically
* takes care of skb->head and skb->pfmemalloc
- * This means that if @frag_size is not zero, then @data must be backed
- * by a page fragment, not kmalloc() or vmalloc()
*/
struct sk_buff *build_skb(void *data, unsigned int frag_size)
{
@@ -406,7 +433,7 @@ EXPORT_SYMBOL(build_skb);
* build_skb_around - build a network buffer around provided skb
* @skb: sk_buff provide by caller, must be memset cleared
* @data: data buffer provided by caller
- * @frag_size: size of data, or 0 if head was kmalloced
+ * @frag_size: size of data
*/
struct sk_buff *build_skb_around(struct sk_buff *skb,
void *data, unsigned int frag_size)
@@ -428,7 +455,7 @@ EXPORT_SYMBOL(build_skb_around);
/**
* __napi_build_skb - build a network buffer
* @data: data buffer provided by caller
- * @frag_size: size of data, or 0 if head was kmalloced
+ * @frag_size: size of data
*
* Version of __build_skb() that uses NAPI percpu caches to obtain
* skbuff_head instead of inplace allocation.
@@ -452,7 +479,7 @@ static struct sk_buff *__napi_build_skb(void *data, unsigned int frag_size)
/**
* napi_build_skb - build a network buffer
* @data: data buffer provided by caller
- * @frag_size: size of data, or 0 if head was kmalloced
+ * @frag_size: size of data
*
* Version of __napi_build_skb() that takes care of skb->head_frag
* and skb->pfmemalloc when the data is a page or page fragment.
@@ -479,17 +506,37 @@ EXPORT_SYMBOL(napi_build_skb);
* may be used. Otherwise, the packet data may be discarded until enough
* memory is free
*/
-static void *kmalloc_reserve(size_t size, gfp_t flags, int node,
+static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
bool *pfmemalloc)
{
- void *obj;
bool ret_pfmemalloc = false;
+ unsigned int obj_size;
+ void *obj;
+
+ obj_size = SKB_HEAD_ALIGN(*size);
+#ifdef HAVE_SKB_SMALL_HEAD_CACHE
+ if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
+ !(flags & KMALLOC_NOT_NORMAL_BITS)) {
+ /* skb_small_head_cache has non power of two size,
+ * likely forcing SLUB to use order-3 pages.
+ * We deliberately attempt a NOMEMALLOC allocation only.
+ */
+ obj = kmem_cache_alloc_node(skb_small_head_cache,
+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
+ node);
+ if (obj) {
+ *size = SKB_SMALL_HEAD_CACHE_SIZE;
+ goto out;
+ }
+ }
+#endif
+ *size = obj_size = kmalloc_size_roundup(obj_size);
/*
* Try a regular allocation, when that fails and we're not entitled
* to the reserves, fail.
*/
- obj = kmalloc_node_track_caller(size,
+ obj = kmalloc_node_track_caller(obj_size,
flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
node);
if (obj || !(gfp_pfmemalloc_allowed(flags)))
@@ -497,7 +544,7 @@ static void *kmalloc_reserve(size_t size, gfp_t flags, int node,
/* Try again but now we are using pfmemalloc reserves */
ret_pfmemalloc = true;
- obj = kmalloc_node_track_caller(size, flags, node);
+ obj = kmalloc_node_track_caller(obj_size, flags, node);
out:
if (pfmemalloc)
@@ -534,7 +581,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
{
struct kmem_cache *cache;
struct sk_buff *skb;
- unsigned int osize;
bool pfmemalloc;
u8 *data;
@@ -559,18 +605,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* aligned memory blocks, unless SLUB/SLAB debug is enabled.
* Both skb->head and skb_shared_info are cache line aligned.
*/
- size = SKB_DATA_ALIGN(size);
- size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- osize = kmalloc_size_roundup(size);
- data = kmalloc_reserve(osize, gfp_mask, node, &pfmemalloc);
+ data = kmalloc_reserve(&size, gfp_mask, node, &pfmemalloc);
if (unlikely(!data))
goto nodata;
/* kmalloc_size_roundup() might give us more room than requested.
* Put skb_shared_info exactly at the end of allocated zone,
* to allow max possible filling before reallocation.
*/
- size = SKB_WITH_OVERHEAD(osize);
- prefetchw(data + size);
+ prefetchw(data + SKB_WITH_OVERHEAD(size));
/*
* Only clear those fields we need to clear, not those that we will
@@ -578,7 +620,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* the tail pointer in struct sk_buff!
*/
memset(skb, 0, offsetof(struct sk_buff, tail));
- __build_skb_around(skb, data, osize);
+ __build_skb_around(skb, data, size);
skb->pfmemalloc = pfmemalloc;
if (flags & SKB_ALLOC_FCLONE) {
@@ -633,8 +675,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
goto skb_success;
}
- len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- len = SKB_DATA_ALIGN(len);
+ len = SKB_HEAD_ALIGN(len);
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
@@ -733,8 +774,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
} else {
- len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- len = SKB_DATA_ALIGN(len);
+ len = SKB_HEAD_ALIGN(len);
data = page_frag_alloc(&nc->page, len, gfp_mask);
pfmemalloc = nc->page.pfmemalloc;
@@ -810,6 +850,16 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
return page_pool_return_skb_page(virt_to_page(data));
}
+static void skb_kfree_head(void *head, unsigned int end_offset)
+{
+#ifdef HAVE_SKB_SMALL_HEAD_CACHE
+ if (end_offset == SKB_SMALL_HEAD_HEADROOM)
+ kmem_cache_free(skb_small_head_cache, head);
+ else
+#endif
+ kfree(head);
+}
+
static void skb_free_head(struct sk_buff *skb)
{
unsigned char *head = skb->head;
@@ -819,7 +869,7 @@ static void skb_free_head(struct sk_buff *skb)
return;
skb_free_frag(head);
} else {
- kfree(head);
+ skb_kfree_head(head, skb_end_offset(skb));
}
}
@@ -932,6 +982,21 @@ void __kfree_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(__kfree_skb);
+static __always_inline
+bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+{
+ if (unlikely(!skb_unref(skb)))
+ return false;
+
+ DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX);
+
+ if (reason == SKB_CONSUMED)
+ trace_consume_skb(skb);
+ else
+ trace_kfree_skb(skb, __builtin_return_address(0), reason);
+ return true;
+}
+
/**
* kfree_skb_reason - free an sk_buff with special reason
* @skb: buffer to free
@@ -944,28 +1009,59 @@ EXPORT_SYMBOL(__kfree_skb);
void __fix_address
kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
- if (unlikely(!skb_unref(skb)))
+ if (__kfree_skb_reason(skb, reason))
+ __kfree_skb(skb);
+}
+EXPORT_SYMBOL(kfree_skb_reason);
+
+#define KFREE_SKB_BULK_SIZE 16
+
+struct skb_free_array {
+ unsigned int skb_count;
+ void *skb_array[KFREE_SKB_BULK_SIZE];
+};
+
+static void kfree_skb_add_bulk(struct sk_buff *skb,
+ struct skb_free_array *sa,
+ enum skb_drop_reason reason)
+{
+ /* if SKB is a clone, don't handle this case */
+ if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
+ __kfree_skb(skb);
return;
+ }
- DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX);
+ skb_release_all(skb, reason);
+ sa->skb_array[sa->skb_count++] = skb;
- if (reason == SKB_CONSUMED)
- trace_consume_skb(skb);
- else
- trace_kfree_skb(skb, __builtin_return_address(0), reason);
- __kfree_skb(skb);
+ if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
+ kmem_cache_free_bulk(skbuff_head_cache, KFREE_SKB_BULK_SIZE,
+ sa->skb_array);
+ sa->skb_count = 0;
+ }
}
-EXPORT_SYMBOL(kfree_skb_reason);
-void kfree_skb_list_reason(struct sk_buff *segs,
- enum skb_drop_reason reason)
+void __fix_address
+kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason)
{
+ struct skb_free_array sa;
+
+ sa.skb_count = 0;
+
while (segs) {
struct sk_buff *next = segs->next;
- kfree_skb_reason(segs, reason);
+ if (__kfree_skb_reason(segs, reason)) {
+ skb_poison_list(segs);
+ kfree_skb_add_bulk(segs, &sa, reason);
+ }
+
segs = next;
}
+
+ if (sa.skb_count)
+ kmem_cache_free_bulk(skbuff_head_cache, sa.skb_count,
+ sa.skb_array);
}
EXPORT_SYMBOL(kfree_skb_list_reason);
@@ -1893,10 +1989,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- size = SKB_DATA_ALIGN(size);
- size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- size = kmalloc_size_roundup(size);
- data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL);
+ data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
goto nodata;
size = SKB_WITH_OVERHEAD(size);
@@ -1959,7 +2052,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
return 0;
nofrags:
- kfree(data);
+ skb_kfree_head(data, size);
nodata:
return -ENOMEM;
}
@@ -4596,6 +4689,19 @@ void __init skb_init(void)
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
+#ifdef HAVE_SKB_SMALL_HEAD_CACHE
+ /* usercopy should only access first SKB_SMALL_HEAD_HEADROOM bytes.
+ * struct skb_shared_info is located at the end of skb->head,
+ * and should not be copied to/from user.
+ */
+ skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head",
+ SKB_SMALL_HEAD_CACHE_SIZE,
+ 0,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC,
+ 0,
+ SKB_SMALL_HEAD_HEADROOM,
+ NULL);
+#endif
skb_extensions_init();
}
@@ -6244,10 +6350,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- size = SKB_DATA_ALIGN(size);
- size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- size = kmalloc_size_roundup(size);
- data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL);
+ data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
return -ENOMEM;
size = SKB_WITH_OVERHEAD(size);
@@ -6263,7 +6366,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
if (skb_cloned(skb)) {
/* drop the old head gracefully */
if (skb_orphan_frags(skb, gfp_mask)) {
- kfree(data);
+ skb_kfree_head(data, size);
return -ENOMEM;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
@@ -6363,10 +6466,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- size = SKB_DATA_ALIGN(size);
- size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- size = kmalloc_size_roundup(size);
- data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL);
+ data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
return -ENOMEM;
size = SKB_WITH_OVERHEAD(size);
@@ -6374,7 +6474,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
memcpy((struct skb_shared_info *)(data + size),
skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
if (skb_orphan_frags(skb, gfp_mask)) {
- kfree(data);
+ skb_kfree_head(data, size);
return -ENOMEM;
}
shinfo = (struct skb_shared_info *)(data + size);
@@ -6410,7 +6510,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
/* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
if (skb_has_frag_list(skb))
kfree_skb_list(skb_shinfo(skb)->frag_list);
- kfree(data);
+ skb_kfree_head(data, size);
return -ENOMEM;
}
skb_release_data(skb, SKB_CONSUMED);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 53d0251788aa..f81883759d38 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -8,6 +8,7 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <net/tls.h>
+#include <trace/events/sock.h>
static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
{
@@ -1114,6 +1115,8 @@ static void sk_psock_strp_data_ready(struct sock *sk)
{
struct sk_psock *psock;
+ trace_sk_data_ready(sk);
+
rcu_read_lock();
psock = sk_psock(sk);
if (likely(psock)) {
@@ -1210,6 +1213,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
+ trace_sk_data_ready(sk);
+
if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
return;
sock->ops->read_skb(sk, sk_psock_verdict_recv);
diff --git a/net/core/sock.c b/net/core/sock.c
index 6f27c24016fe..afbb02984d5f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2375,17 +2375,22 @@ void sk_free_unlock_clone(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
-static void sk_trim_gso_size(struct sock *sk)
+static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
{
- if (sk->sk_gso_max_size <= GSO_LEGACY_MAX_SIZE)
- return;
+ bool is_ipv6 = false;
+ u32 max_size;
+
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6 &&
- sk_is_tcp(sk) &&
- !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
- return;
+ is_ipv6 = (sk->sk_family == AF_INET6 &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
#endif
- sk->sk_gso_max_size = GSO_LEGACY_MAX_SIZE;
+ /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
+ max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
+ READ_ONCE(dst->dev->gso_ipv4_max_size);
+ if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
+ max_size = GSO_LEGACY_MAX_SIZE;
+
+ return max_size - (MAX_TCP_HEADER + 1);
}
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
@@ -2405,10 +2410,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
} else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
- /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
- sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
- sk_trim_gso_size(sk);
- sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
+ sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
}
@@ -3293,6 +3295,8 @@ void sock_def_readable(struct sock *sk)
{
struct socket_wq *wq;
+ trace_sk_data_ready(sk);
+
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
@@ -3381,7 +3385,7 @@ void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
}
EXPORT_SYMBOL(sk_stop_timer_sync);
-void sock_init_data(struct socket *sock, struct sock *sk)
+void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
{
sk_init_common(sk);
sk->sk_send_head = NULL;
@@ -3401,11 +3405,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_type = sock->type;
RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
sock->sk = sk;
- sk->sk_uid = SOCK_INODE(sock)->i_uid;
} else {
RCU_INIT_POINTER(sk->sk_wq, NULL);
- sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0);
}
+ sk->sk_uid = uid;
rwlock_init(&sk->sk_callback_lock);
if (sk->sk_kern_sock)
@@ -3463,6 +3466,16 @@ void sock_init_data(struct socket *sock, struct sock *sk)
refcount_set(&sk->sk_refcnt, 1);
atomic_set(&sk->sk_drops, 0);
}
+EXPORT_SYMBOL(sock_init_data_uid);
+
+void sock_init_data(struct socket *sock, struct sock *sk)
+{
+ kuid_t uid = sock ?
+ SOCK_INODE(sock)->i_uid :
+ make_kuid(sock_net(sk)->user_ns, 0);
+
+ sock_init_data_uid(sock, sk, uid);
+}
EXPORT_SYMBOL(sock_init_data);
void lock_sock_nested(struct sock *sk, int subclass)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 5b1ce656baa1..e7b98162c632 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -643,11 +643,6 @@ static __net_init int sysctl_core_net_init(struct net *net)
for (tmp = tbl; tmp->procname; tmp++)
tmp->data += (char *)net - (char *)&init_net;
-
- /* Don't export any sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- tbl[0].procname = NULL;
- }
}
net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 844c9d99dc0e..a5a7ecf6391c 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -4,6 +4,7 @@
* Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
*/
#include <linux/bpf.h>
+#include <linux/btf_ids.h>
#include <linux/filter.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -709,3 +710,66 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
return nxdpf;
}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
+
+/**
+ * bpf_xdp_metadata_rx_timestamp - Read XDP frame RX timestamp.
+ * @ctx: XDP context pointer.
+ * @timestamp: Return value pointer.
+ *
+ * Returns 0 on success or ``-errno`` on error.
+ */
+int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
+{
+ return -EOPNOTSUPP;
+}
+
+/**
+ * bpf_xdp_metadata_rx_hash - Read XDP frame RX hash.
+ * @ctx: XDP context pointer.
+ * @hash: Return value pointer.
+ *
+ * Returns 0 on success or ``-errno`` on error.
+ */
+int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
+{
+ return -EOPNOTSUPP;
+}
+
+__diag_pop();
+
+BTF_SET8_START(xdp_metadata_kfunc_ids)
+#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, 0)
+XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+BTF_SET8_END(xdp_metadata_kfunc_ids)
+
+static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &xdp_metadata_kfunc_ids,
+};
+
+BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted)
+#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str)
+XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+
+u32 bpf_xdp_metadata_kfunc_id(int id)
+{
+ /* xdp_metadata_kfunc_ids is sorted and can't be used */
+ return xdp_metadata_kfunc_ids_unsorted[id];
+}
+
+bool bpf_dev_bound_kfunc_id(u32 btf_id)
+{
+ return btf_id_set8_contains(&xdp_metadata_kfunc_ids, btf_id);
+}
+
+static int __init xdp_metadata_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &xdp_metadata_kfunc_set);
+}
+late_initcall(xdp_metadata_init);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index f9949e051f49..c0c438128575 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -178,6 +178,7 @@ static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = {
};
static LIST_HEAD(dcb_app_list);
+static LIST_HEAD(dcb_rewr_list);
static DEFINE_SPINLOCK(dcb_lock);
static enum ieee_attrs_app dcbnl_app_attr_type_get(u8 selector)
@@ -1099,11 +1100,46 @@ out:
return err;
}
+/* Set or delete APP table or rewrite table entries. The APP struct is validated
+ * and the appropriate callback function is called.
+ */
+static int dcbnl_app_table_setdel(struct nlattr *attr,
+ struct net_device *netdev,
+ int (*setdel)(struct net_device *dev,
+ struct dcb_app *app))
+{
+ struct dcb_app *app_data;
+ enum ieee_attrs_app type;
+ struct nlattr *attr_itr;
+ int rem, err;
+
+ nla_for_each_nested(attr_itr, attr, rem) {
+ type = nla_type(attr_itr);
+
+ if (!dcbnl_app_attr_type_validate(type))
+ continue;
+
+ if (nla_len(attr_itr) < sizeof(struct dcb_app))
+ return -ERANGE;
+
+ app_data = nla_data(attr_itr);
+
+ if (!dcbnl_app_selector_validate(type, app_data->selector))
+ return -EINVAL;
+
+ err = setdel(netdev, app_data);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb GET commands. */
static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
{
const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
- struct nlattr *ieee, *app;
+ struct nlattr *ieee, *app, *rewr;
struct dcb_app_type *itr;
int dcbx;
int err;
@@ -1206,6 +1242,27 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
spin_unlock_bh(&dcb_lock);
nla_nest_end(skb, app);
+ rewr = nla_nest_start(skb, DCB_ATTR_DCB_REWR_TABLE);
+ if (!rewr)
+ return -EMSGSIZE;
+
+ spin_lock_bh(&dcb_lock);
+ list_for_each_entry(itr, &dcb_rewr_list, list) {
+ if (itr->ifindex == netdev->ifindex) {
+ enum ieee_attrs_app type =
+ dcbnl_app_attr_type_get(itr->app.selector);
+ err = nla_put(skb, type, sizeof(itr->app), &itr->app);
+ if (err) {
+ spin_unlock_bh(&dcb_lock);
+ nla_nest_cancel(skb, rewr);
+ return -EMSGSIZE;
+ }
+ }
+ }
+
+ spin_unlock_bh(&dcb_lock);
+ nla_nest_end(skb, rewr);
+
if (ops->dcbnl_getapptrust) {
err = dcbnl_getapptrust(netdev, skb);
if (err)
@@ -1567,37 +1624,20 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
goto err;
}
- if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
- struct nlattr *attr;
- int rem;
-
- nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
- enum ieee_attrs_app type = nla_type(attr);
- struct dcb_app *app_data;
-
- if (!dcbnl_app_attr_type_validate(type))
- continue;
-
- if (nla_len(attr) < sizeof(struct dcb_app)) {
- err = -ERANGE;
- goto err;
- }
-
- app_data = nla_data(attr);
-
- if (!dcbnl_app_selector_validate(type,
- app_data->selector)) {
- err = -EINVAL;
- goto err;
- }
+ if (ieee[DCB_ATTR_DCB_REWR_TABLE]) {
+ err = dcbnl_app_table_setdel(ieee[DCB_ATTR_DCB_REWR_TABLE],
+ netdev,
+ ops->dcbnl_setrewr ?: dcb_setrewr);
+ if (err)
+ goto err;
+ }
- if (ops->ieee_setapp)
- err = ops->ieee_setapp(netdev, app_data);
- else
- err = dcb_ieee_setapp(netdev, app_data);
- if (err)
- goto err;
- }
+ if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
+ err = dcbnl_app_table_setdel(ieee[DCB_ATTR_IEEE_APP_TABLE],
+ netdev, ops->ieee_setapp ?:
+ dcb_ieee_setapp);
+ if (err)
+ goto err;
}
if (ieee[DCB_ATTR_DCB_APP_TRUST_TABLE]) {
@@ -1684,31 +1724,19 @@ static int dcbnl_ieee_del(struct net_device *netdev, struct nlmsghdr *nlh,
return err;
if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
- struct nlattr *attr;
- int rem;
-
- nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
- enum ieee_attrs_app type = nla_type(attr);
- struct dcb_app *app_data;
-
- if (!dcbnl_app_attr_type_validate(type))
- continue;
-
- app_data = nla_data(attr);
-
- if (!dcbnl_app_selector_validate(type,
- app_data->selector)) {
- err = -EINVAL;
- goto err;
- }
+ err = dcbnl_app_table_setdel(ieee[DCB_ATTR_IEEE_APP_TABLE],
+ netdev, ops->ieee_delapp ?:
+ dcb_ieee_delapp);
+ if (err)
+ goto err;
+ }
- if (ops->ieee_delapp)
- err = ops->ieee_delapp(netdev, app_data);
- else
- err = dcb_ieee_delapp(netdev, app_data);
- if (err)
- goto err;
- }
+ if (ieee[DCB_ATTR_DCB_REWR_TABLE]) {
+ err = dcbnl_app_table_setdel(ieee[DCB_ATTR_DCB_REWR_TABLE],
+ netdev,
+ ops->dcbnl_delrewr ?: dcb_delrewr);
+ if (err)
+ goto err;
}
err:
@@ -1939,6 +1967,22 @@ out:
return ret;
}
+static struct dcb_app_type *dcb_rewr_lookup(const struct dcb_app *app,
+ int ifindex, int proto)
+{
+ struct dcb_app_type *itr;
+
+ list_for_each_entry(itr, &dcb_rewr_list, list) {
+ if (itr->app.selector == app->selector &&
+ itr->app.priority == app->priority &&
+ itr->ifindex == ifindex &&
+ ((proto == -1) || itr->app.protocol == proto))
+ return itr;
+ }
+
+ return NULL;
+}
+
static struct dcb_app_type *dcb_app_lookup(const struct dcb_app *app,
int ifindex, int prio)
{
@@ -1955,7 +1999,8 @@ static struct dcb_app_type *dcb_app_lookup(const struct dcb_app *app,
return NULL;
}
-static int dcb_app_add(const struct dcb_app *app, int ifindex)
+static int dcb_app_add(struct list_head *list, const struct dcb_app *app,
+ int ifindex)
{
struct dcb_app_type *entry;
@@ -1965,7 +2010,7 @@ static int dcb_app_add(const struct dcb_app *app, int ifindex)
memcpy(&entry->app, app, sizeof(*app));
entry->ifindex = ifindex;
- list_add(&entry->list, &dcb_app_list);
+ list_add(&entry->list, list);
return 0;
}
@@ -2028,7 +2073,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new)
}
/* App type does not exist add new application type */
if (new->priority)
- err = dcb_app_add(new, dev->ifindex);
+ err = dcb_app_add(&dcb_app_list, new, dev->ifindex);
out:
spin_unlock_bh(&dcb_lock);
if (!err)
@@ -2061,6 +2106,63 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app)
}
EXPORT_SYMBOL(dcb_ieee_getapp_mask);
+/* Get protocol value from rewrite entry. */
+u16 dcb_getrewr(struct net_device *dev, struct dcb_app *app)
+{
+ struct dcb_app_type *itr;
+ u16 proto = 0;
+
+ spin_lock_bh(&dcb_lock);
+ itr = dcb_rewr_lookup(app, dev->ifindex, -1);
+ if (itr)
+ proto = itr->app.protocol;
+ spin_unlock_bh(&dcb_lock);
+
+ return proto;
+}
+EXPORT_SYMBOL(dcb_getrewr);
+
+ /* Add rewrite entry to the rewrite list. */
+int dcb_setrewr(struct net_device *dev, struct dcb_app *new)
+{
+ int err;
+
+ spin_lock_bh(&dcb_lock);
+ /* Search for existing match and abort if found. */
+ if (dcb_rewr_lookup(new, dev->ifindex, new->protocol)) {
+ err = -EEXIST;
+ goto out;
+ }
+
+ err = dcb_app_add(&dcb_rewr_list, new, dev->ifindex);
+out:
+ spin_unlock_bh(&dcb_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(dcb_setrewr);
+
+/* Delete rewrite entry from the rewrite list. */
+int dcb_delrewr(struct net_device *dev, struct dcb_app *del)
+{
+ struct dcb_app_type *itr;
+ int err = -ENOENT;
+
+ spin_lock_bh(&dcb_lock);
+ /* Search for existing match and remove it. */
+ itr = dcb_rewr_lookup(del, dev->ifindex, del->protocol);
+ if (itr) {
+ list_del(&itr->list);
+ kfree(itr);
+ err = 0;
+ }
+
+ spin_unlock_bh(&dcb_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(dcb_delrewr);
+
/**
* dcb_ieee_setapp - add IEEE dcb application data to app list
* @dev: network interface
@@ -2088,7 +2190,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new)
goto out;
}
- err = dcb_app_add(new, dev->ifindex);
+ err = dcb_app_add(&dcb_app_list, new, dev->ifindex);
out:
spin_unlock_bh(&dcb_lock);
if (!err)
@@ -2130,6 +2232,58 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del)
}
EXPORT_SYMBOL(dcb_ieee_delapp);
+/* dcb_getrewr_prio_pcp_mask_map - For a given device, find mapping from
+ * priorities to the PCP and DEI values assigned to that priority.
+ */
+void dcb_getrewr_prio_pcp_mask_map(const struct net_device *dev,
+ struct dcb_rewr_prio_pcp_map *p_map)
+{
+ int ifindex = dev->ifindex;
+ struct dcb_app_type *itr;
+ u8 prio;
+
+ memset(p_map->map, 0, sizeof(p_map->map));
+
+ spin_lock_bh(&dcb_lock);
+ list_for_each_entry(itr, &dcb_rewr_list, list) {
+ if (itr->ifindex == ifindex &&
+ itr->app.selector == DCB_APP_SEL_PCP &&
+ itr->app.protocol < 16 &&
+ itr->app.priority < IEEE_8021QAZ_MAX_TCS) {
+ prio = itr->app.priority;
+ p_map->map[prio] |= 1 << itr->app.protocol;
+ }
+ }
+ spin_unlock_bh(&dcb_lock);
+}
+EXPORT_SYMBOL(dcb_getrewr_prio_pcp_mask_map);
+
+/* dcb_getrewr_prio_dscp_mask_map - For a given device, find mapping from
+ * priorities to the DSCP values assigned to that priority.
+ */
+void dcb_getrewr_prio_dscp_mask_map(const struct net_device *dev,
+ struct dcb_ieee_app_prio_map *p_map)
+{
+ int ifindex = dev->ifindex;
+ struct dcb_app_type *itr;
+ u8 prio;
+
+ memset(p_map->map, 0, sizeof(p_map->map));
+
+ spin_lock_bh(&dcb_lock);
+ list_for_each_entry(itr, &dcb_rewr_list, list) {
+ if (itr->ifindex == ifindex &&
+ itr->app.selector == IEEE_8021QAZ_APP_SEL_DSCP &&
+ itr->app.protocol < 64 &&
+ itr->app.priority < IEEE_8021QAZ_MAX_TCS) {
+ prio = itr->app.priority;
+ p_map->map[prio] |= 1ULL << itr->app.protocol;
+ }
+ }
+ spin_unlock_bh(&dcb_lock);
+}
+EXPORT_SYMBOL(dcb_getrewr_prio_dscp_mask_map);
+
/*
* dcb_ieee_getapp_prio_dscp_mask_map - For a given device, find mapping from
* priorities to the DSCP values assigned to that priority. Initialize p_map
diff --git a/net/devlink/Makefile b/net/devlink/Makefile
new file mode 100644
index 000000000000..daad4521c61e
--- /dev/null
+++ b/net/devlink/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y := leftover.o core.o netlink.o dev.o
diff --git a/net/devlink/core.c b/net/devlink/core.c
new file mode 100644
index 000000000000..a4f47dafb864
--- /dev/null
+++ b/net/devlink/core.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <net/genetlink.h>
+
+#include "devl_internal.h"
+
+DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
+
+void *devlink_priv(struct devlink *devlink)
+{
+ return &devlink->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_priv);
+
+struct devlink *priv_to_devlink(void *priv)
+{
+ return container_of(priv, struct devlink, priv);
+}
+EXPORT_SYMBOL_GPL(priv_to_devlink);
+
+struct device *devlink_to_dev(const struct devlink *devlink)
+{
+ return devlink->dev;
+}
+EXPORT_SYMBOL_GPL(devlink_to_dev);
+
+struct net *devlink_net(const struct devlink *devlink)
+{
+ return read_pnet(&devlink->_net);
+}
+EXPORT_SYMBOL_GPL(devlink_net);
+
+void devl_assert_locked(struct devlink *devlink)
+{
+ lockdep_assert_held(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_assert_locked);
+
+#ifdef CONFIG_LOCKDEP
+/* For use in conjunction with LOCKDEP only e.g. rcu_dereference_protected() */
+bool devl_lock_is_held(struct devlink *devlink)
+{
+ return lockdep_is_held(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_lock_is_held);
+#endif
+
+void devl_lock(struct devlink *devlink)
+{
+ mutex_lock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_lock);
+
+int devl_trylock(struct devlink *devlink)
+{
+ return mutex_trylock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_trylock);
+
+void devl_unlock(struct devlink *devlink)
+{
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_unlock);
+
+/**
+ * devlink_try_get() - try to obtain a reference on a devlink instance
+ * @devlink: instance to reference
+ *
+ * Obtain a reference on a devlink instance. A reference on a devlink instance
+ * only implies that it's safe to take the instance lock. It does not imply
+ * that the instance is registered, use devl_is_registered() after taking
+ * the instance lock to check registration status.
+ */
+struct devlink *__must_check devlink_try_get(struct devlink *devlink)
+{
+ if (refcount_inc_not_zero(&devlink->refcount))
+ return devlink;
+ return NULL;
+}
+
+static void devlink_release(struct work_struct *work)
+{
+ struct devlink *devlink;
+
+ devlink = container_of(to_rcu_work(work), struct devlink, rwork);
+
+ mutex_destroy(&devlink->lock);
+ lockdep_unregister_key(&devlink->lock_key);
+ kfree(devlink);
+}
+
+void devlink_put(struct devlink *devlink)
+{
+ if (refcount_dec_and_test(&devlink->refcount))
+ queue_rcu_work(system_wq, &devlink->rwork);
+}
+
+struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp)
+{
+ struct devlink *devlink = NULL;
+
+ rcu_read_lock();
+retry:
+ devlink = xa_find(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED);
+ if (!devlink)
+ goto unlock;
+
+ if (!devlink_try_get(devlink))
+ goto next;
+ if (!net_eq(devlink_net(devlink), net)) {
+ devlink_put(devlink);
+ goto next;
+ }
+unlock:
+ rcu_read_unlock();
+ return devlink;
+
+next:
+ (*indexp)++;
+ goto retry;
+}
+
+/**
+ * devl_register - Register devlink instance
+ * @devlink: devlink
+ */
+int devl_register(struct devlink *devlink)
+{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ devl_assert_locked(devlink);
+
+ xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+ devlink_notify_register(devlink);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_register);
+
+void devlink_register(struct devlink *devlink)
+{
+ devl_lock(devlink);
+ devl_register(devlink);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_register);
+
+/**
+ * devl_unregister - Unregister devlink instance
+ * @devlink: devlink
+ */
+void devl_unregister(struct devlink *devlink)
+{
+ ASSERT_DEVLINK_REGISTERED(devlink);
+ devl_assert_locked(devlink);
+
+ devlink_notify_unregister(devlink);
+ xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+}
+EXPORT_SYMBOL_GPL(devl_unregister);
+
+void devlink_unregister(struct devlink *devlink)
+{
+ devl_lock(devlink);
+ devl_unregister(devlink);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_unregister);
+
+/**
+ * devlink_alloc_ns - Allocate new devlink instance resources
+ * in specific namespace
+ *
+ * @ops: ops
+ * @priv_size: size of user private data
+ * @net: net namespace
+ * @dev: parent device
+ *
+ * Allocate new devlink instance resources, including devlink index
+ * and name.
+ */
+struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
+ size_t priv_size, struct net *net,
+ struct device *dev)
+{
+ struct devlink *devlink;
+ static u32 last_id;
+ int ret;
+
+ WARN_ON(!ops || !dev);
+ if (!devlink_reload_actions_valid(ops))
+ return NULL;
+
+ devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
+ if (!devlink)
+ return NULL;
+
+ ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b,
+ &last_id, GFP_KERNEL);
+ if (ret < 0)
+ goto err_xa_alloc;
+
+ devlink->netdevice_nb.notifier_call = devlink_port_netdevice_event;
+ ret = register_netdevice_notifier(&devlink->netdevice_nb);
+ if (ret)
+ goto err_register_netdevice_notifier;
+
+ devlink->dev = dev;
+ devlink->ops = ops;
+ xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
+ xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
+ write_pnet(&devlink->_net, net);
+ INIT_LIST_HEAD(&devlink->rate_list);
+ INIT_LIST_HEAD(&devlink->linecard_list);
+ INIT_LIST_HEAD(&devlink->sb_list);
+ INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
+ INIT_LIST_HEAD(&devlink->resource_list);
+ INIT_LIST_HEAD(&devlink->param_list);
+ INIT_LIST_HEAD(&devlink->region_list);
+ INIT_LIST_HEAD(&devlink->reporter_list);
+ INIT_LIST_HEAD(&devlink->trap_list);
+ INIT_LIST_HEAD(&devlink->trap_group_list);
+ INIT_LIST_HEAD(&devlink->trap_policer_list);
+ INIT_RCU_WORK(&devlink->rwork, devlink_release);
+ lockdep_register_key(&devlink->lock_key);
+ mutex_init(&devlink->lock);
+ lockdep_set_class(&devlink->lock, &devlink->lock_key);
+ refcount_set(&devlink->refcount, 1);
+
+ return devlink;
+
+err_register_netdevice_notifier:
+ xa_erase(&devlinks, devlink->index);
+err_xa_alloc:
+ kfree(devlink);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(devlink_alloc_ns);
+
+/**
+ * devlink_free - Free devlink instance resources
+ *
+ * @devlink: devlink
+ */
+void devlink_free(struct devlink *devlink)
+{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ WARN_ON(!list_empty(&devlink->trap_policer_list));
+ WARN_ON(!list_empty(&devlink->trap_group_list));
+ WARN_ON(!list_empty(&devlink->trap_list));
+ WARN_ON(!list_empty(&devlink->reporter_list));
+ WARN_ON(!list_empty(&devlink->region_list));
+ WARN_ON(!list_empty(&devlink->param_list));
+ WARN_ON(!list_empty(&devlink->resource_list));
+ WARN_ON(!list_empty(&devlink->dpipe_table_list));
+ WARN_ON(!list_empty(&devlink->sb_list));
+ WARN_ON(!list_empty(&devlink->rate_list));
+ WARN_ON(!list_empty(&devlink->linecard_list));
+ WARN_ON(!xa_empty(&devlink->ports));
+
+ xa_destroy(&devlink->snapshot_ids);
+ xa_destroy(&devlink->ports);
+
+ WARN_ON_ONCE(unregister_netdevice_notifier(&devlink->netdevice_nb));
+
+ xa_erase(&devlinks, devlink->index);
+
+ devlink_put(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_free);
+
+static void __net_exit devlink_pernet_pre_exit(struct net *net)
+{
+ struct devlink *devlink;
+ u32 actions_performed;
+ unsigned long index;
+ int err;
+
+ /* In case network namespace is getting destroyed, reload
+ * all devlink instances from this namespace into init_net.
+ */
+ devlinks_xa_for_each_registered_get(net, index, devlink) {
+ devl_lock(devlink);
+ err = 0;
+ if (devl_is_registered(devlink))
+ err = devlink_reload(devlink, &init_net,
+ DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ DEVLINK_RELOAD_LIMIT_UNSPEC,
+ &actions_performed, NULL);
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ if (err && err != -EOPNOTSUPP)
+ pr_warn("Failed to reload devlink instance into init_net\n");
+ }
+}
+
+static struct pernet_operations devlink_pernet_ops __net_initdata = {
+ .pre_exit = devlink_pernet_pre_exit,
+};
+
+static int __init devlink_init(void)
+{
+ int err;
+
+ err = genl_register_family(&devlink_nl_family);
+ if (err)
+ goto out;
+ err = register_pernet_subsys(&devlink_pernet_ops);
+
+out:
+ WARN_ON(err);
+ return err;
+}
+
+subsys_initcall(devlink_init);
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
new file mode 100644
index 000000000000..78d824eda5ec
--- /dev/null
+++ b/net/devlink/dev.c
@@ -0,0 +1,1343 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <net/genetlink.h>
+#include <net/sock.h>
+#include "devl_internal.h"
+
+struct devlink_info_req {
+ struct sk_buff *msg;
+ void (*version_cb)(const char *version_name,
+ enum devlink_info_version_type version_type,
+ void *version_cb_priv);
+ void *version_cb_priv;
+};
+
+struct devlink_reload_combination {
+ enum devlink_reload_action action;
+ enum devlink_reload_limit limit;
+};
+
+static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = {
+ {
+ /* can't reinitialize driver with no down time */
+ .action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ .limit = DEVLINK_RELOAD_LIMIT_NO_RESET,
+ },
+};
+
+static bool
+devlink_reload_combination_is_invalid(enum devlink_reload_action action,
+ enum devlink_reload_limit limit)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++)
+ if (devlink_reload_invalid_combinations[i].action == action &&
+ devlink_reload_invalid_combinations[i].limit == limit)
+ return true;
+ return false;
+}
+
+static bool
+devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action)
+{
+ return test_bit(action, &devlink->ops->reload_actions);
+}
+
+static bool
+devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit)
+{
+ return test_bit(limit, &devlink->ops->reload_limits);
+}
+
+static int devlink_reload_stat_put(struct sk_buff *msg,
+ enum devlink_reload_limit limit, u32 value)
+{
+ struct nlattr *reload_stats_entry;
+
+ reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
+ if (!reload_stats_entry)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
+ nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
+ goto nla_put_failure;
+ nla_nest_end(msg, reload_stats_entry);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, reload_stats_entry);
+ return -EMSGSIZE;
+}
+
+static int
+devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
+{
+ struct nlattr *reload_stats_attr, *act_info, *act_stats;
+ int i, j, stat_idx;
+ u32 value;
+
+ if (!is_remote)
+ reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
+ else
+ reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS);
+
+ if (!reload_stats_attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
+ if ((!is_remote &&
+ !devlink_reload_action_is_supported(devlink, i)) ||
+ i == DEVLINK_RELOAD_ACTION_UNSPEC)
+ continue;
+ act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
+ if (!act_info)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
+ goto action_info_nest_cancel;
+ act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
+ if (!act_stats)
+ goto action_info_nest_cancel;
+
+ for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
+ /* Remote stats are shown even if not locally supported.
+ * Stats of actions with unspecified limit are shown
+ * though drivers don't need to register unspecified
+ * limit.
+ */
+ if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+ !devlink_reload_limit_is_supported(devlink, j)) ||
+ devlink_reload_combination_is_invalid(i, j))
+ continue;
+
+ stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
+ if (!is_remote)
+ value = devlink->stats.reload_stats[stat_idx];
+ else
+ value = devlink->stats.remote_reload_stats[stat_idx];
+ if (devlink_reload_stat_put(msg, j, value))
+ goto action_stats_nest_cancel;
+ }
+ nla_nest_end(msg, act_stats);
+ nla_nest_end(msg, act_info);
+ }
+ nla_nest_end(msg, reload_stats_attr);
+ return 0;
+
+action_stats_nest_cancel:
+ nla_nest_cancel(msg, act_stats);
+action_info_nest_cancel:
+ nla_nest_cancel(msg, act_info);
+nla_put_failure:
+ nla_nest_cancel(msg, reload_stats_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ struct nlattr *dev_stats;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
+ goto nla_put_failure;
+
+ dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);
+ if (!dev_stats)
+ goto nla_put_failure;
+
+ if (devlink_reload_stats_put(msg, devlink, false))
+ goto dev_stats_nest_cancel;
+ if (devlink_reload_stats_put(msg, devlink, true))
+ goto dev_stats_nest_cancel;
+
+ nla_nest_end(msg, dev_stats);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+dev_stats_nest_cancel:
+ nla_nest_cancel(msg, dev_stats);
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI);
+}
+
+const struct devlink_cmd devl_cmd_get = {
+ .dump_one = devlink_nl_cmd_get_dump_one,
+};
+
+static void devlink_reload_failed_set(struct devlink *devlink,
+ bool reload_failed)
+{
+ if (devlink->reload_failed == reload_failed)
+ return;
+ devlink->reload_failed = reload_failed;
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+bool devlink_is_reload_failed(const struct devlink *devlink)
+{
+ return devlink->reload_failed;
+}
+EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
+
+static void
+__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
+ enum devlink_reload_limit limit, u32 actions_performed)
+{
+ unsigned long actions = actions_performed;
+ int stat_idx;
+ int action;
+
+ for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) {
+ stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
+ reload_stats[stat_idx]++;
+ }
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+static void
+devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
+ u32 actions_performed)
+{
+ __devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit,
+ actions_performed);
+}
+
+/**
+ * devlink_remote_reload_actions_performed - Update devlink on reload actions
+ * performed which are not a direct result of devlink reload call.
+ *
+ * This should be called by a driver after performing reload actions in case it was not
+ * a result of devlink reload call. For example fw_activate was performed as a result
+ * of devlink reload triggered fw_activate on another host.
+ * The motivation for this function is to keep data on reload actions performed on this
+ * function whether it was done due to direct devlink reload call or not.
+ *
+ * @devlink: devlink
+ * @limit: reload limit
+ * @actions_performed: bitmask of actions performed
+ */
+void devlink_remote_reload_actions_performed(struct devlink *devlink,
+ enum devlink_reload_limit limit,
+ u32 actions_performed)
+{
+ if (WARN_ON(!actions_performed ||
+ actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
+ actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) ||
+ limit > DEVLINK_RELOAD_LIMIT_MAX))
+ return;
+
+ __devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit,
+ actions_performed);
+}
+EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed);
+
+static struct net *devlink_netns_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
+ struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
+ struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
+ struct net *net;
+
+ if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
+ NL_SET_ERR_MSG_MOD(info->extack, "multiple netns identifying attributes specified");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (netns_pid_attr) {
+ net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
+ } else if (netns_fd_attr) {
+ net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
+ } else if (netns_id_attr) {
+ net = get_net_ns_by_id(sock_net(skb->sk),
+ nla_get_u32(netns_id_attr));
+ if (!net)
+ net = ERR_PTR(-EINVAL);
+ } else {
+ WARN_ON(1);
+ net = ERR_PTR(-EINVAL);
+ }
+ if (IS_ERR(net)) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Unknown network namespace");
+ return ERR_PTR(-EINVAL);
+ }
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
+ return ERR_PTR(-EPERM);
+ }
+ return net;
+}
+
+static void devlink_reload_netns_change(struct devlink *devlink,
+ struct net *curr_net,
+ struct net *dest_net)
+{
+ /* Userspace needs to be notified about devlink objects
+ * removed from original and entering new network namespace.
+ * The rest of the devlink objects are re-created during
+ * reload process so the notifications are generated separatelly.
+ */
+ devlink_notify_unregister(devlink);
+ move_netdevice_notifier_net(curr_net, dest_net,
+ &devlink->netdevice_nb);
+ write_pnet(&devlink->_net, dest_net);
+ devlink_notify_register(devlink);
+}
+
+int devlink_reload(struct devlink *devlink, struct net *dest_net,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ u32 *actions_performed, struct netlink_ext_ack *extack)
+{
+ u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+ struct net *curr_net;
+ int err;
+
+ memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
+ sizeof(remote_reload_stats));
+
+ err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
+ if (err)
+ return err;
+
+ curr_net = devlink_net(devlink);
+ if (dest_net && !net_eq(dest_net, curr_net))
+ devlink_reload_netns_change(devlink, curr_net, dest_net);
+
+ err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
+ devlink_reload_failed_set(devlink, !!err);
+ if (err)
+ return err;
+
+ WARN_ON(!(*actions_performed & BIT(action)));
+ /* Catch driver on updating the remote action within devlink reload */
+ WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
+ sizeof(remote_reload_stats)));
+ devlink_reload_stats_update(devlink, limit, *actions_performed);
+ return 0;
+}
+
+static int
+devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed,
+ enum devlink_command cmd, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd);
+ if (!hdr)
+ goto free_msg;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed,
+ actions_performed))
+ goto nla_put_failure;
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ enum devlink_reload_action action;
+ enum devlink_reload_limit limit;
+ struct net *dest_net = NULL;
+ u32 actions_performed;
+ int err;
+
+ err = devlink_resources_validate(devlink, NULL, info);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
+ action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
+ else
+ action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
+
+ if (!devlink_reload_action_is_supported(devlink, action)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Requested reload action is not supported by the driver");
+ return -EOPNOTSUPP;
+ }
+
+ limit = DEVLINK_RELOAD_LIMIT_UNSPEC;
+ if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) {
+ struct nla_bitfield32 limits;
+ u32 limits_selected;
+
+ limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]);
+ limits_selected = limits.value & limits.selector;
+ if (!limits_selected) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Invalid limit selected");
+ return -EINVAL;
+ }
+ for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++)
+ if (limits_selected & BIT(limit))
+ break;
+ /* UAPI enables multiselection, but currently it is not used */
+ if (limits_selected != BIT(limit)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Multiselection of limit is not supported");
+ return -EOPNOTSUPP;
+ }
+ if (!devlink_reload_limit_is_supported(devlink, limit)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Requested limit is not supported by the driver");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_reload_combination_is_invalid(action, limit)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Requested limit is invalid for this action");
+ return -EINVAL;
+ }
+ }
+ if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
+ info->attrs[DEVLINK_ATTR_NETNS_FD] ||
+ info->attrs[DEVLINK_ATTR_NETNS_ID]) {
+ dest_net = devlink_netns_get(skb, info);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
+ }
+
+ err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
+
+ if (dest_net)
+ put_net(dest_net);
+
+ if (err)
+ return err;
+ /* For backward compatibility generate reply only if attributes used by user */
+ if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS])
+ return 0;
+
+ return devlink_nl_reload_actions_performed_snd(devlink, actions_performed,
+ DEVLINK_CMD_RELOAD, info);
+}
+
+bool devlink_reload_actions_valid(const struct devlink_ops *ops)
+{
+ const struct devlink_reload_combination *comb;
+ int i;
+
+ if (!devlink_reload_supported(ops)) {
+ if (WARN_ON(ops->reload_actions))
+ return false;
+ return true;
+ }
+
+ if (WARN_ON(!ops->reload_actions ||
+ ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
+ ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX)))
+ return false;
+
+ if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) ||
+ ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX)))
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) {
+ comb = &devlink_reload_invalid_combinations[i];
+ if (ops->reload_actions == BIT(comb->action) &&
+ ops->reload_limits == BIT(comb->limit))
+ return false;
+ }
+ return true;
+}
+
+static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ const struct devlink_ops *ops = devlink->ops;
+ enum devlink_eswitch_encap_mode encap_mode;
+ u8 inline_mode;
+ void *hdr;
+ int err = 0;
+ u16 mode;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = devlink_nl_put_handle(msg, devlink);
+ if (err)
+ goto nla_put_failure;
+
+ if (ops->eswitch_mode_get) {
+ err = ops->eswitch_mode_get(devlink, &mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ if (ops->eswitch_inline_mode_get) {
+ err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
+ inline_mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ if (ops->eswitch_encap_mode_get) {
+ err = ops->eswitch_encap_mode_get(devlink, &encap_mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET,
+ info->snd_portid, info->snd_seq, 0);
+
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const struct devlink_ops *ops = devlink->ops;
+ enum devlink_eswitch_encap_mode encap_mode;
+ u8 inline_mode;
+ int err = 0;
+ u16 mode;
+
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
+ if (!ops->eswitch_mode_set)
+ return -EOPNOTSUPP;
+ mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+ err = devlink_rate_nodes_check(devlink, mode, info->extack);
+ if (err)
+ return err;
+ err = ops->eswitch_mode_set(devlink, mode, info->extack);
+ if (err)
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) {
+ if (!ops->eswitch_inline_mode_set)
+ return -EOPNOTSUPP;
+ inline_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
+ err = ops->eswitch_inline_mode_set(devlink, inline_mode,
+ info->extack);
+ if (err)
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
+ if (!ops->eswitch_encap_mode_set)
+ return -EOPNOTSUPP;
+ encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
+ err = ops->eswitch_encap_mode_set(devlink, encap_mode,
+ info->extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
+{
+ if (!req->msg)
+ return 0;
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
+
+int devlink_info_board_serial_number_put(struct devlink_info_req *req,
+ const char *bsn)
+{
+ if (!req->msg)
+ return 0;
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
+ bsn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
+
+static int devlink_info_version_put(struct devlink_info_req *req, int attr,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ struct nlattr *nest;
+ int err;
+
+ if (req->version_cb)
+ req->version_cb(version_name, version_type,
+ req->version_cb_priv);
+
+ if (!req->msg)
+ return 0;
+
+ nest = nla_nest_start_noflag(req->msg, attr);
+ if (!nest)
+ return -EMSGSIZE;
+
+ err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME,
+ version_name);
+ if (err)
+ goto nla_put_failure;
+
+ err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE,
+ version_value);
+ if (err)
+ goto nla_put_failure;
+
+ nla_nest_end(req->msg, nest);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(req->msg, nest);
+ return err;
+}
+
+int devlink_info_version_fixed_put(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED,
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put);
+
+int devlink_info_version_stored_put(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_stored_put);
+
+int devlink_info_version_stored_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
+ version_name, version_value,
+ version_type);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext);
+
+int devlink_info_version_running_put(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_running_put);
+
+int devlink_info_version_running_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+ version_name, version_value,
+ version_type);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext);
+
+static int devlink_nl_driver_info_get(struct device_driver *drv,
+ struct devlink_info_req *req)
+{
+ if (!drv)
+ return 0;
+
+ if (drv->name[0])
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME,
+ drv->name);
+
+ return 0;
+}
+
+static int
+devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags, struct netlink_ext_ack *extack)
+{
+ struct device *dev = devlink_to_dev(devlink);
+ struct devlink_info_req req = {};
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto err_cancel_msg;
+
+ req.msg = msg;
+ if (devlink->ops->info_get) {
+ err = devlink->ops->info_get(devlink, &req, extack);
+ if (err)
+ goto err_cancel_msg;
+ }
+
+ err = devlink_nl_driver_info_get(dev->driver, &req);
+ if (err)
+ goto err_cancel_msg;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ int err;
+
+ err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->extack);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ return err;
+}
+
+const struct devlink_cmd devl_cmd_info_get = {
+ .dump_one = devlink_nl_cmd_info_get_dump_one,
+};
+
+static int devlink_nl_flash_update_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ enum devlink_command cmd,
+ struct devlink_flash_notify *params)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS)
+ goto out;
+
+ if (params->status_msg &&
+ nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,
+ params->status_msg))
+ goto nla_put_failure;
+ if (params->component &&
+ nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
+ params->component))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
+ params->done, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
+ params->total, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
+ params->timeout, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+out:
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void __devlink_flash_update_notify(struct devlink *devlink,
+ enum devlink_command cmd,
+ struct devlink_flash_notify *params)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
+ cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
+ cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_flash_update_fill(msg, devlink, cmd, params);
+ if (err)
+ goto out_free_msg;
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ return;
+
+out_free_msg:
+ nlmsg_free(msg);
+}
+
+static void devlink_flash_update_begin_notify(struct devlink *devlink)
+{
+ struct devlink_flash_notify params = {};
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE,
+ &params);
+}
+
+static void devlink_flash_update_end_notify(struct devlink *devlink)
+{
+ struct devlink_flash_notify params = {};
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_END,
+ &params);
+}
+
+void devlink_flash_update_status_notify(struct devlink *devlink,
+ const char *status_msg,
+ const char *component,
+ unsigned long done,
+ unsigned long total)
+{
+ struct devlink_flash_notify params = {
+ .status_msg = status_msg,
+ .component = component,
+ .done = done,
+ .total = total,
+ };
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_STATUS,
+ &params);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify);
+
+void devlink_flash_update_timeout_notify(struct devlink *devlink,
+ const char *status_msg,
+ const char *component,
+ unsigned long timeout)
+{
+ struct devlink_flash_notify params = {
+ .status_msg = status_msg,
+ .component = component,
+ .timeout = timeout,
+ };
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_STATUS,
+ &params);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
+
+struct devlink_flash_component_lookup_ctx {
+ const char *lookup_name;
+ bool lookup_name_found;
+};
+
+static void
+devlink_flash_component_lookup_cb(const char *version_name,
+ enum devlink_info_version_type version_type,
+ void *version_cb_priv)
+{
+ struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv;
+
+ if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT ||
+ lookup_ctx->lookup_name_found)
+ return;
+
+ lookup_ctx->lookup_name_found =
+ !strcmp(lookup_ctx->lookup_name, version_name);
+}
+
+static int devlink_flash_component_get(struct devlink *devlink,
+ struct nlattr *nla_component,
+ const char **p_component,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_flash_component_lookup_ctx lookup_ctx = {};
+ struct devlink_info_req req = {};
+ const char *component;
+ int ret;
+
+ if (!nla_component)
+ return 0;
+
+ component = nla_data(nla_component);
+
+ if (!devlink->ops->info_get) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_component,
+ "component update is not supported by this device");
+ return -EOPNOTSUPP;
+ }
+
+ lookup_ctx.lookup_name = component;
+ req.version_cb = devlink_flash_component_lookup_cb;
+ req.version_cb_priv = &lookup_ctx;
+
+ ret = devlink->ops->info_get(devlink, &req, NULL);
+ if (ret)
+ return ret;
+
+ if (!lookup_ctx.lookup_name_found) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_component,
+ "selected component is not supported by this device");
+ return -EINVAL;
+ }
+ *p_component = component;
+ return 0;
+}
+
+int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *nla_overwrite_mask, *nla_file_name;
+ struct devlink_flash_update_params params = {};
+ struct devlink *devlink = info->user_ptr[0];
+ const char *file_name;
+ u32 supported_params;
+ int ret;
+
+ if (!devlink->ops->flash_update)
+ return -EOPNOTSUPP;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME))
+ return -EINVAL;
+
+ ret = devlink_flash_component_get(devlink,
+ info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT],
+ &params.component, info->extack);
+ if (ret)
+ return ret;
+
+ supported_params = devlink->ops->supported_flash_update_params;
+
+ nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK];
+ if (nla_overwrite_mask) {
+ struct nla_bitfield32 sections;
+
+ if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) {
+ NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask,
+ "overwrite settings are not supported by this device");
+ return -EOPNOTSUPP;
+ }
+ sections = nla_get_bitfield32(nla_overwrite_mask);
+ params.overwrite_mask = sections.value & sections.selector;
+ }
+
+ nla_file_name = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME];
+ file_name = nla_data(nla_file_name);
+ ret = request_firmware(&params.fw, file_name, devlink->dev);
+ if (ret) {
+ NL_SET_ERR_MSG_ATTR(info->extack, nla_file_name,
+ "failed to locate the requested firmware file");
+ return ret;
+ }
+
+ devlink_flash_update_begin_notify(devlink);
+ ret = devlink->ops->flash_update(devlink, &params, info->extack);
+ devlink_flash_update_end_notify(devlink);
+
+ release_firmware(params.fw);
+
+ return ret;
+}
+
+static void __devlink_compat_running_version(struct devlink *devlink,
+ char *buf, size_t len)
+{
+ struct devlink_info_req req = {};
+ const struct nlattr *nlattr;
+ struct sk_buff *msg;
+ int rem, err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ req.msg = msg;
+ err = devlink->ops->info_get(devlink, &req, NULL);
+ if (err)
+ goto free_msg;
+
+ nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) {
+ const struct nlattr *kv;
+ int rem_kv;
+
+ if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING)
+ continue;
+
+ nla_for_each_nested(kv, nlattr, rem_kv) {
+ if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE)
+ continue;
+
+ strlcat(buf, nla_data(kv), len);
+ strlcat(buf, " ", len);
+ }
+ }
+free_msg:
+ nlmsg_free(msg);
+}
+
+void devlink_compat_running_version(struct devlink *devlink,
+ char *buf, size_t len)
+{
+ if (!devlink->ops->info_get)
+ return;
+
+ devl_lock(devlink);
+ if (devl_is_registered(devlink))
+ __devlink_compat_running_version(devlink, buf, len);
+ devl_unlock(devlink);
+}
+
+int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
+{
+ struct devlink_flash_update_params params = {};
+ int ret;
+
+ devl_lock(devlink);
+ if (!devl_is_registered(devlink)) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ if (!devlink->ops->flash_update) {
+ ret = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
+ ret = request_firmware(&params.fw, file_name, devlink->dev);
+ if (ret)
+ goto out_unlock;
+
+ devlink_flash_update_begin_notify(devlink);
+ ret = devlink->ops->flash_update(devlink, &params, NULL);
+ devlink_flash_update_end_notify(devlink);
+
+ release_firmware(params.fw);
+out_unlock:
+ devl_unlock(devlink);
+
+ return ret;
+}
+
+static int
+devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink,
+ u32 portid, u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *selftests;
+ void *hdr;
+ int err;
+ int i;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags,
+ DEVLINK_CMD_SELFTESTS_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto err_cancel_msg;
+
+ selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
+ if (!selftests)
+ goto err_cancel_msg;
+
+ for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
+ i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
+ if (devlink->ops->selftest_check(devlink, i, extack)) {
+ err = nla_put_flag(msg, i);
+ if (err)
+ goto err_cancel_msg;
+ }
+ }
+
+ nla_nest_end(msg, selftests);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ if (!devlink->ops->selftest_check)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid,
+ info->snd_seq, 0, info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_selftests_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ if (!devlink->ops->selftest_check)
+ return 0;
+
+ return devlink_nl_selftests_fill(msg, devlink,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->extack);
+}
+
+const struct devlink_cmd devl_cmd_selftests_get = {
+ .dump_one = devlink_nl_cmd_selftests_get_dump_one,
+};
+
+static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
+ enum devlink_selftest_status test_status)
+{
+ struct nlattr *result_attr;
+
+ result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT);
+ if (!result_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS,
+ test_status))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, result_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, result_attr);
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
+ [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG },
+};
+
+int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
+ struct devlink *devlink = info->user_ptr[0];
+ struct nlattr *attrs, *selftests;
+ struct sk_buff *msg;
+ void *hdr;
+ int err;
+ int i;
+
+ if (!devlink->ops->selftest_run || !devlink->ops->selftest_check)
+ return -EOPNOTSUPP;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS))
+ return -EINVAL;
+
+ attrs = info->attrs[DEVLINK_ATTR_SELFTESTS];
+
+ err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs,
+ devlink_selftest_nl_policy, info->extack);
+ if (err < 0)
+ return err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = -EMSGSIZE;
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN);
+ if (!hdr)
+ goto free_msg;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
+ if (!selftests)
+ goto genlmsg_cancel;
+
+ for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
+ i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
+ enum devlink_selftest_status test_status;
+
+ if (nla_get_flag(tb[i])) {
+ if (!devlink->ops->selftest_check(devlink, i,
+ info->extack)) {
+ if (devlink_selftest_result_put(msg, i,
+ DEVLINK_SELFTEST_STATUS_SKIP))
+ goto selftests_nest_cancel;
+ continue;
+ }
+
+ test_status = devlink->ops->selftest_run(devlink, i,
+ info->extack);
+ if (devlink_selftest_result_put(msg, i, test_status))
+ goto selftests_nest_cancel;
+ }
+ }
+
+ nla_nest_end(msg, selftests);
+ genlmsg_end(msg, hdr);
+ return genlmsg_reply(msg, info);
+
+selftests_nest_cancel:
+ nla_nest_cancel(msg, selftests);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return err;
+}
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
new file mode 100644
index 000000000000..941174e157d4
--- /dev/null
+++ b/net/devlink/devl_internal.h
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/notifier.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/xarray.h>
+#include <net/devlink.h>
+#include <net/net_namespace.h>
+
+#define DEVLINK_REGISTERED XA_MARK_1
+
+#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
+ (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
+
+struct devlink_dev_stats {
+ u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+ u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+};
+
+struct devlink {
+ u32 index;
+ struct xarray ports;
+ struct list_head rate_list;
+ struct list_head sb_list;
+ struct list_head dpipe_table_list;
+ struct list_head resource_list;
+ struct list_head param_list;
+ struct list_head region_list;
+ struct list_head reporter_list;
+ struct devlink_dpipe_headers *dpipe_headers;
+ struct list_head trap_list;
+ struct list_head trap_group_list;
+ struct list_head trap_policer_list;
+ struct list_head linecard_list;
+ const struct devlink_ops *ops;
+ struct xarray snapshot_ids;
+ struct devlink_dev_stats stats;
+ struct device *dev;
+ possible_net_t _net;
+ /* Serializes access to devlink instance specific objects such as
+ * port, sb, dpipe, resource, params, region, traps and more.
+ */
+ struct mutex lock;
+ struct lock_class_key lock_key;
+ u8 reload_failed:1;
+ refcount_t refcount;
+ struct rcu_work rwork;
+ struct notifier_block netdevice_nb;
+ char priv[] __aligned(NETDEV_ALIGN);
+};
+
+extern struct xarray devlinks;
+extern struct genl_family devlink_nl_family;
+
+/* devlink instances are open to the access from the user space after
+ * devlink_register() call. Such logical barrier allows us to have certain
+ * expectations related to locking.
+ *
+ * Before *_register() - we are in initialization stage and no parallel
+ * access possible to the devlink instance. All drivers perform that phase
+ * by implicitly holding device_lock.
+ *
+ * After *_register() - users and driver can access devlink instance at
+ * the same time.
+ */
+#define ASSERT_DEVLINK_REGISTERED(d) \
+ WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
+#define ASSERT_DEVLINK_NOT_REGISTERED(d) \
+ WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
+
+/* Iterate over devlink pointers which were possible to get reference to.
+ * devlink_put() needs to be called for each iterated devlink pointer
+ * in loop body in order to release the reference.
+ */
+#define devlinks_xa_for_each_registered_get(net, index, devlink) \
+ for (index = 0; (devlink = devlinks_xa_find_get(net, &index)); index++)
+
+struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp);
+
+static inline bool devl_is_registered(struct devlink *devlink)
+{
+ devl_assert_locked(devlink);
+ return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+}
+
+/* Netlink */
+#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
+#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
+#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
+#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
+#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
+
+enum devlink_multicast_groups {
+ DEVLINK_MCGRP_CONFIG,
+};
+
+/* state held across netlink dumps */
+struct devlink_nl_dump_state {
+ unsigned long instance;
+ int idx;
+ union {
+ /* DEVLINK_CMD_REGION_READ */
+ struct {
+ u64 start_offset;
+ };
+ /* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET */
+ struct {
+ u64 dump_ts;
+ };
+ };
+};
+
+struct devlink_cmd {
+ int (*dump_one)(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb);
+};
+
+extern const struct genl_small_ops devlink_nl_ops[56];
+
+struct devlink *
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
+
+void devlink_notify_unregister(struct devlink *devlink);
+void devlink_notify_register(struct devlink *devlink);
+
+int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb);
+
+static inline struct devlink_nl_dump_state *
+devlink_dump_state(struct netlink_callback *cb)
+{
+ NL_ASSERT_DUMP_CTX_FITS(struct devlink_nl_dump_state);
+
+ return (struct devlink_nl_dump_state *)cb->ctx;
+}
+
+static inline int
+devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
+{
+ if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
+ return -EMSGSIZE;
+ if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
+ return -EMSGSIZE;
+ return 0;
+}
+
+/* Commands */
+extern const struct devlink_cmd devl_cmd_get;
+extern const struct devlink_cmd devl_cmd_port_get;
+extern const struct devlink_cmd devl_cmd_sb_get;
+extern const struct devlink_cmd devl_cmd_sb_pool_get;
+extern const struct devlink_cmd devl_cmd_sb_port_pool_get;
+extern const struct devlink_cmd devl_cmd_sb_tc_pool_bind_get;
+extern const struct devlink_cmd devl_cmd_param_get;
+extern const struct devlink_cmd devl_cmd_region_get;
+extern const struct devlink_cmd devl_cmd_info_get;
+extern const struct devlink_cmd devl_cmd_health_reporter_get;
+extern const struct devlink_cmd devl_cmd_trap_get;
+extern const struct devlink_cmd devl_cmd_trap_group_get;
+extern const struct devlink_cmd devl_cmd_trap_policer_get;
+extern const struct devlink_cmd devl_cmd_rate_get;
+extern const struct devlink_cmd devl_cmd_linecard_get;
+extern const struct devlink_cmd devl_cmd_selftests_get;
+
+/* Notify */
+void devlink_notify(struct devlink *devlink, enum devlink_command cmd);
+
+/* Ports */
+int devlink_port_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr);
+
+struct devlink_port *
+devlink_port_get_from_info(struct devlink *devlink, struct genl_info *info);
+
+/* Reload */
+bool devlink_reload_actions_valid(const struct devlink_ops *ops);
+int devlink_reload(struct devlink *devlink, struct net *dest_net,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ u32 *actions_performed, struct netlink_ext_ack *extack);
+
+static inline bool devlink_reload_supported(const struct devlink_ops *ops)
+{
+ return ops->reload_down && ops->reload_up;
+}
+
+/* Resources */
+struct devlink_resource;
+int devlink_resources_validate(struct devlink *devlink,
+ struct devlink_resource *resource,
+ struct genl_info *info);
+
+/* Line cards */
+struct devlink_linecard;
+
+struct devlink_linecard *
+devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info);
+
+/* Rates */
+int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack);
+struct devlink_rate *
+devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info);
+struct devlink_rate *
+devlink_rate_node_get_from_info(struct devlink *devlink,
+ struct genl_info *info);
+/* Devlink nl cmds */
+int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/core/devlink.c b/net/devlink/leftover.c
index 909a10e4b0dd..f05ab093d231 100644
--- a/net/core/devlink.c
+++ b/net/devlink/leftover.c
@@ -31,58 +31,12 @@
#define CREATE_TRACE_POINTS
#include <trace/events/devlink.h>
-#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
- (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
-
-struct devlink_dev_stats {
- u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
- u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
-};
-
-struct devlink {
- u32 index;
- struct xarray ports;
- struct list_head rate_list;
- struct list_head sb_list;
- struct list_head dpipe_table_list;
- struct list_head resource_list;
- struct list_head param_list;
- struct list_head region_list;
- struct list_head reporter_list;
- struct mutex reporters_lock; /* protects reporter_list */
- struct devlink_dpipe_headers *dpipe_headers;
- struct list_head trap_list;
- struct list_head trap_group_list;
- struct list_head trap_policer_list;
- struct list_head linecard_list;
- struct mutex linecards_lock; /* protects linecard_list */
- const struct devlink_ops *ops;
- u64 features;
- struct xarray snapshot_ids;
- struct devlink_dev_stats stats;
- struct device *dev;
- possible_net_t _net;
- /* Serializes access to devlink instance specific objects such as
- * port, sb, dpipe, resource, params, region, traps and more.
- */
- struct mutex lock;
- struct lock_class_key lock_key;
- u8 reload_failed:1;
- refcount_t refcount;
- struct completion comp;
- struct rcu_head rcu;
- struct notifier_block netdevice_nb;
- char priv[] __aligned(NETDEV_ALIGN);
-};
-
-struct devlink_linecard_ops;
-struct devlink_linecard_type;
+#include "devl_internal.h"
struct devlink_linecard {
struct list_head list;
struct devlink *devlink;
unsigned int index;
- refcount_t refcount;
const struct devlink_linecard_ops *ops;
void *priv;
enum devlink_linecard_state state;
@@ -122,24 +76,6 @@ struct devlink_resource {
void *occ_get_priv;
};
-void *devlink_priv(struct devlink *devlink)
-{
- return &devlink->priv;
-}
-EXPORT_SYMBOL_GPL(devlink_priv);
-
-struct devlink *priv_to_devlink(void *priv)
-{
- return container_of(priv, struct devlink, priv);
-}
-EXPORT_SYMBOL_GPL(priv_to_devlink);
-
-struct device *devlink_to_dev(const struct devlink *devlink)
-{
- return devlink->dev;
-}
-EXPORT_SYMBOL_GPL(devlink_to_dev);
-
static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
{
.name = "destination mac",
@@ -207,176 +143,6 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
};
-static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
- [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG },
-};
-
-static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
-#define DEVLINK_REGISTERED XA_MARK_1
-#define DEVLINK_UNREGISTERING XA_MARK_2
-
-/* devlink instances are open to the access from the user space after
- * devlink_register() call. Such logical barrier allows us to have certain
- * expectations related to locking.
- *
- * Before *_register() - we are in initialization stage and no parallel
- * access possible to the devlink instance. All drivers perform that phase
- * by implicitly holding device_lock.
- *
- * After *_register() - users and driver can access devlink instance at
- * the same time.
- */
-#define ASSERT_DEVLINK_REGISTERED(d) \
- WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
-#define ASSERT_DEVLINK_NOT_REGISTERED(d) \
- WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
-
-struct net *devlink_net(const struct devlink *devlink)
-{
- return read_pnet(&devlink->_net);
-}
-EXPORT_SYMBOL_GPL(devlink_net);
-
-static void __devlink_put_rcu(struct rcu_head *head)
-{
- struct devlink *devlink = container_of(head, struct devlink, rcu);
-
- complete(&devlink->comp);
-}
-
-void devlink_put(struct devlink *devlink)
-{
- if (refcount_dec_and_test(&devlink->refcount))
- /* Make sure unregister operation that may await the completion
- * is unblocked only after all users are after the end of
- * RCU grace period.
- */
- call_rcu(&devlink->rcu, __devlink_put_rcu);
-}
-
-struct devlink *__must_check devlink_try_get(struct devlink *devlink)
-{
- if (refcount_inc_not_zero(&devlink->refcount))
- return devlink;
- return NULL;
-}
-
-void devl_assert_locked(struct devlink *devlink)
-{
- lockdep_assert_held(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devl_assert_locked);
-
-#ifdef CONFIG_LOCKDEP
-/* For use in conjunction with LOCKDEP only e.g. rcu_dereference_protected() */
-bool devl_lock_is_held(struct devlink *devlink)
-{
- return lockdep_is_held(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devl_lock_is_held);
-#endif
-
-void devl_lock(struct devlink *devlink)
-{
- mutex_lock(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devl_lock);
-
-int devl_trylock(struct devlink *devlink)
-{
- return mutex_trylock(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devl_trylock);
-
-void devl_unlock(struct devlink *devlink)
-{
- mutex_unlock(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devl_unlock);
-
-static struct devlink *
-devlinks_xa_find_get(struct net *net, unsigned long *indexp, xa_mark_t filter,
- void * (*xa_find_fn)(struct xarray *, unsigned long *,
- unsigned long, xa_mark_t))
-{
- struct devlink *devlink;
-
- rcu_read_lock();
-retry:
- devlink = xa_find_fn(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED);
- if (!devlink)
- goto unlock;
-
- /* In case devlink_unregister() was already called and "unregistering"
- * mark was set, do not allow to get a devlink reference here.
- * This prevents live-lock of devlink_unregister() wait for completion.
- */
- if (xa_get_mark(&devlinks, *indexp, DEVLINK_UNREGISTERING))
- goto retry;
-
- /* For a possible retry, the xa_find_after() should be always used */
- xa_find_fn = xa_find_after;
- if (!devlink_try_get(devlink))
- goto retry;
- if (!net_eq(devlink_net(devlink), net)) {
- devlink_put(devlink);
- goto retry;
- }
-unlock:
- rcu_read_unlock();
- return devlink;
-}
-
-static struct devlink *devlinks_xa_find_get_first(struct net *net,
- unsigned long *indexp,
- xa_mark_t filter)
-{
- return devlinks_xa_find_get(net, indexp, filter, xa_find);
-}
-
-static struct devlink *devlinks_xa_find_get_next(struct net *net,
- unsigned long *indexp,
- xa_mark_t filter)
-{
- return devlinks_xa_find_get(net, indexp, filter, xa_find_after);
-}
-
-/* Iterate over devlink pointers which were possible to get reference to.
- * devlink_put() needs to be called for each iterated devlink pointer
- * in loop body in order to release the reference.
- */
-#define devlinks_xa_for_each_get(net, index, devlink, filter) \
- for (index = 0, \
- devlink = devlinks_xa_find_get_first(net, &index, filter); \
- devlink; devlink = devlinks_xa_find_get_next(net, &index, filter))
-
-#define devlinks_xa_for_each_registered_get(net, index, devlink) \
- devlinks_xa_for_each_get(net, index, devlink, DEVLINK_REGISTERED)
-
-static struct devlink *devlink_get_from_attrs(struct net *net,
- struct nlattr **attrs)
-{
- struct devlink *devlink;
- unsigned long index;
- char *busname;
- char *devname;
-
- if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME])
- return ERR_PTR(-EINVAL);
-
- busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
- devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
-
- devlinks_xa_for_each_registered_get(net, index, devlink) {
- if (strcmp(devlink->dev->bus->name, busname) == 0 &&
- strcmp(dev_name(devlink->dev), devname) == 0)
- return devlink;
- devlink_put(devlink);
- }
-
- return ERR_PTR(-ENODEV);
-}
-
#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \
WARN_ON_ONCE(!(devlink_port)->registered)
#define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \
@@ -405,8 +171,8 @@ static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
return ERR_PTR(-EINVAL);
}
-static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
- struct genl_info *info)
+struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
{
return devlink_port_get_from_attrs(devlink, info->attrs);
}
@@ -466,13 +232,13 @@ devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
return devlink_rate_node_get_by_name(devlink, rate_node_name);
}
-static struct devlink_rate *
+struct devlink_rate *
devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
{
return devlink_rate_node_get_from_attrs(devlink, info->attrs);
}
-static struct devlink_rate *
+struct devlink_rate *
devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
{
struct nlattr **attrs = info->attrs;
@@ -511,11 +277,7 @@ devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]);
struct devlink_linecard *linecard;
- mutex_lock(&devlink->linecards_lock);
linecard = devlink_linecard_get_by_index(devlink, linecard_index);
- if (linecard)
- refcount_inc(&linecard->refcount);
- mutex_unlock(&devlink->linecards_lock);
if (!linecard)
return ERR_PTR(-ENODEV);
return linecard;
@@ -523,20 +285,12 @@ devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
return ERR_PTR(-EINVAL);
}
-static struct devlink_linecard *
+struct devlink_linecard *
devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
{
return devlink_linecard_get_from_attrs(devlink, info->attrs);
}
-static void devlink_linecard_put(struct devlink_linecard *linecard)
-{
- if (refcount_dec_and_test(&linecard->refcount)) {
- mutex_destroy(&linecard->state_lock);
- kfree(linecard);
- }
-}
-
struct devlink_sb {
struct list_head list;
unsigned int index;
@@ -838,104 +592,6 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
return NULL;
}
-#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
-#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
-#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
-#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
-#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
-
-static int devlink_nl_pre_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink_linecard *linecard;
- struct devlink_port *devlink_port;
- struct devlink *devlink;
- int err;
-
- devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs);
- if (IS_ERR(devlink))
- return PTR_ERR(devlink);
- devl_lock(devlink);
- info->user_ptr[0] = devlink;
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
- devlink_port = devlink_port_get_from_info(devlink, info);
- if (IS_ERR(devlink_port)) {
- err = PTR_ERR(devlink_port);
- goto unlock;
- }
- info->user_ptr[1] = devlink_port;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
- devlink_port = devlink_port_get_from_info(devlink, info);
- if (!IS_ERR(devlink_port))
- info->user_ptr[1] = devlink_port;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
- struct devlink_rate *devlink_rate;
-
- devlink_rate = devlink_rate_get_from_info(devlink, info);
- if (IS_ERR(devlink_rate)) {
- err = PTR_ERR(devlink_rate);
- goto unlock;
- }
- info->user_ptr[1] = devlink_rate;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) {
- struct devlink_rate *rate_node;
-
- rate_node = devlink_rate_node_get_from_info(devlink, info);
- if (IS_ERR(rate_node)) {
- err = PTR_ERR(rate_node);
- goto unlock;
- }
- info->user_ptr[1] = rate_node;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
- linecard = devlink_linecard_get_from_info(devlink, info);
- if (IS_ERR(linecard)) {
- err = PTR_ERR(linecard);
- goto unlock;
- }
- info->user_ptr[1] = linecard;
- }
- return 0;
-
-unlock:
- devl_unlock(devlink);
- devlink_put(devlink);
- return err;
-}
-
-static void devlink_nl_post_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink_linecard *linecard;
- struct devlink *devlink;
-
- devlink = info->user_ptr[0];
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
- linecard = info->user_ptr[1];
- devlink_linecard_put(linecard);
- }
- devl_unlock(devlink);
- devlink_put(devlink);
-}
-
-static struct genl_family devlink_nl_family;
-
-enum devlink_multicast_groups {
- DEVLINK_MCGRP_CONFIG,
-};
-
-static const struct genl_multicast_group devlink_nl_mcgrps[] = {
- [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
-};
-
-static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
-{
- if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
- return -EMSGSIZE;
- if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
- return -EMSGSIZE;
- return 0;
-}
-
static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
{
struct nlattr *nested_attr;
@@ -972,185 +628,6 @@ size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port)
+ nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */
}
-struct devlink_reload_combination {
- enum devlink_reload_action action;
- enum devlink_reload_limit limit;
-};
-
-static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = {
- {
- /* can't reinitialize driver with no down time */
- .action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
- .limit = DEVLINK_RELOAD_LIMIT_NO_RESET,
- },
-};
-
-static bool
-devlink_reload_combination_is_invalid(enum devlink_reload_action action,
- enum devlink_reload_limit limit)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++)
- if (devlink_reload_invalid_combinations[i].action == action &&
- devlink_reload_invalid_combinations[i].limit == limit)
- return true;
- return false;
-}
-
-static bool
-devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action)
-{
- return test_bit(action, &devlink->ops->reload_actions);
-}
-
-static bool
-devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit)
-{
- return test_bit(limit, &devlink->ops->reload_limits);
-}
-
-static int devlink_reload_stat_put(struct sk_buff *msg,
- enum devlink_reload_limit limit, u32 value)
-{
- struct nlattr *reload_stats_entry;
-
- reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
- if (!reload_stats_entry)
- return -EMSGSIZE;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
- nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
- goto nla_put_failure;
- nla_nest_end(msg, reload_stats_entry);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, reload_stats_entry);
- return -EMSGSIZE;
-}
-
-static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
-{
- struct nlattr *reload_stats_attr, *act_info, *act_stats;
- int i, j, stat_idx;
- u32 value;
-
- if (!is_remote)
- reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
- else
- reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS);
-
- if (!reload_stats_attr)
- return -EMSGSIZE;
-
- for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
- if ((!is_remote &&
- !devlink_reload_action_is_supported(devlink, i)) ||
- i == DEVLINK_RELOAD_ACTION_UNSPEC)
- continue;
- act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
- if (!act_info)
- goto nla_put_failure;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
- goto action_info_nest_cancel;
- act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
- if (!act_stats)
- goto action_info_nest_cancel;
-
- for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
- /* Remote stats are shown even if not locally supported.
- * Stats of actions with unspecified limit are shown
- * though drivers don't need to register unspecified
- * limit.
- */
- if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
- !devlink_reload_limit_is_supported(devlink, j)) ||
- devlink_reload_combination_is_invalid(i, j))
- continue;
-
- stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
- if (!is_remote)
- value = devlink->stats.reload_stats[stat_idx];
- else
- value = devlink->stats.remote_reload_stats[stat_idx];
- if (devlink_reload_stat_put(msg, j, value))
- goto action_stats_nest_cancel;
- }
- nla_nest_end(msg, act_stats);
- nla_nest_end(msg, act_info);
- }
- nla_nest_end(msg, reload_stats_attr);
- return 0;
-
-action_stats_nest_cancel:
- nla_nest_cancel(msg, act_stats);
-action_info_nest_cancel:
- nla_nest_cancel(msg, act_info);
-nla_put_failure:
- nla_nest_cancel(msg, reload_stats_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
-{
- struct nlattr *dev_stats;
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
- goto nla_put_failure;
-
- dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);
- if (!dev_stats)
- goto nla_put_failure;
-
- if (devlink_reload_stats_put(msg, devlink, false))
- goto dev_stats_nest_cancel;
- if (devlink_reload_stats_put(msg, devlink, true))
- goto dev_stats_nest_cancel;
-
- nla_nest_end(msg, dev_stats);
- genlmsg_end(msg, hdr);
- return 0;
-
-dev_stats_nest_cancel:
- nla_nest_cancel(msg, dev_stats);
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
- WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
static int devlink_nl_port_attrs_put(struct sk_buff *msg,
struct devlink_port *devlink_port)
{
@@ -1537,47 +1014,40 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
-static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_rate *devlink_rate;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
- enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
- u32 id = NETLINK_CB(cb->skb).portid;
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
+ u32 id = NETLINK_CB(cb->skb).portid;
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, NULL);
- if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ if (idx < state->idx) {
idx++;
+ continue;
}
- devl_unlock(devlink);
- devlink_put(devlink);
+ err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, NULL);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
}
-out:
- if (err != -EMSGSIZE)
- return err;
- cb->args[0] = idx;
- return msg->len;
+ return err;
}
+const struct devlink_cmd devl_cmd_rate_get = {
+ .dump_one = devlink_nl_cmd_rate_get_dump_one,
+};
+
static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
@@ -1612,58 +1082,6 @@ devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
return false;
}
-static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
- int err;
-
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (idx < start) {
- idx++;
- devlink_put(devlink);
- continue;
- }
-
- devl_lock(devlink);
- err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
- devl_unlock(devlink);
- devlink_put(devlink);
-
- if (err)
- goto out;
- idx++;
- }
-out:
- cb->args[0] = idx;
- return msg->len;
-}
-
static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
@@ -1686,43 +1104,40 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
{
- struct devlink *devlink;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_port *devlink_port;
- unsigned long index, port_index;
- int start = cb->args[0];
+ unsigned long port_index;
int idx = 0;
- int err;
+ int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- xa_for_each(&devlink->ports, port_index, devlink_port) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_port_fill(msg, devlink_port,
- DEVLINK_CMD_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, cb->extack);
- if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ xa_for_each(&devlink->ports, port_index, devlink_port) {
+ if (idx < state->idx) {
idx++;
+ continue;
+ }
+ err = devlink_nl_port_fill(msg, devlink_port,
+ DEVLINK_CMD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, cb->extack);
+ if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
- devlink_put(devlink);
+ idx++;
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return err;
}
+const struct devlink_cmd devl_cmd_port_get = {
+ .dump_one = devlink_nl_cmd_port_get_dump_one,
+};
+
static int devlink_port_type_set(struct devlink_port *devlink_port,
enum devlink_port_type port_type)
@@ -2465,46 +1880,42 @@ static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int devlink_nl_cmd_linecard_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_linecard *linecard;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
- int err;
+ int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- mutex_lock(&devlink->linecards_lock);
- list_for_each_entry(linecard, &devlink->linecard_list, list) {
- if (idx < start) {
- idx++;
- continue;
- }
- mutex_lock(&linecard->state_lock);
- err = devlink_nl_linecard_fill(msg, devlink, linecard,
- DEVLINK_CMD_LINECARD_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI,
- cb->extack);
- mutex_unlock(&linecard->state_lock);
- if (err) {
- mutex_unlock(&devlink->linecards_lock);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(linecard, &devlink->linecard_list, list) {
+ if (idx < state->idx) {
idx++;
+ continue;
+ }
+ mutex_lock(&linecard->state_lock);
+ err = devlink_nl_linecard_fill(msg, devlink, linecard,
+ DEVLINK_CMD_LINECARD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ cb->extack);
+ mutex_unlock(&linecard->state_lock);
+ if (err) {
+ state->idx = idx;
+ break;
}
- mutex_unlock(&devlink->linecards_lock);
- devlink_put(devlink);
+ idx++;
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return err;
}
+const struct devlink_cmd devl_cmd_linecard_get = {
+ .dump_one = devlink_nl_cmd_linecard_get_dump_one,
+};
+
static struct devlink_linecard_type *
devlink_linecard_type_lookup(struct devlink_linecard *linecard,
const char *type)
@@ -2727,43 +2138,39 @@ static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
{
- struct devlink *devlink;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_sb *devlink_sb;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
- int err;
+ int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
- DEVLINK_CMD_SB_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ if (idx < state->idx) {
idx++;
+ continue;
+ }
+ err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+ DEVLINK_CMD_SB_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
- devlink_put(devlink);
+ idx++;
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return err;
}
+const struct devlink_cmd devl_cmd_sb_get = {
+ .dump_one = devlink_nl_cmd_sb_get_dump_one,
+};
+
static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_sb *devlink_sb,
u16 pool_index, enum devlink_command cmd,
@@ -2869,46 +2276,39 @@ static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
return 0;
}
-static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_sb_pool_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
- struct devlink *devlink;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_sb *devlink_sb;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
int err = 0;
+ int idx = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (!devlink->ops->sb_pool_get)
- goto retry;
+ if (!devlink->ops->sb_pool_get)
+ return 0;
- devl_lock(devlink);
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
- devlink_sb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_pool_get_dumpit(msg, state->idx, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
-retry:
- devlink_put(devlink);
}
-out:
- if (err != -EMSGSIZE)
- return err;
- cb->args[0] = idx;
- return msg->len;
+ return err;
}
+const struct devlink_cmd devl_cmd_sb_pool_get = {
+ .dump_one = devlink_nl_cmd_sb_pool_get_dump_one,
+};
+
static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
u16 pool_index, u32 size,
enum devlink_sb_threshold_type threshold_type,
@@ -3084,46 +2484,39 @@ static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
return 0;
}
-static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_sb_port_pool_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
- struct devlink *devlink;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_sb *devlink_sb;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (!devlink->ops->sb_port_pool_get)
- goto retry;
-
- devl_lock(devlink);
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- err = __sb_port_pool_get_dumpit(msg, start, &idx,
- devlink, devlink_sb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ if (!devlink->ops->sb_port_pool_get)
+ return 0;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_port_pool_get_dumpit(msg, state->idx, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
-retry:
- devlink_put(devlink);
}
-out:
- if (err != -EMSGSIZE)
- return err;
- cb->args[0] = idx;
- return msg->len;
+ return err;
}
+const struct devlink_cmd devl_cmd_sb_port_pool_get = {
+ .dump_one = devlink_nl_cmd_sb_port_pool_get_dump_one,
+};
+
static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
u32 threshold,
@@ -3327,47 +2720,38 @@ static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
}
static int
-devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+devlink_nl_cmd_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
- struct devlink *devlink;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_sb *devlink_sb;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (!devlink->ops->sb_tc_pool_bind_get)
- goto retry;
+ if (!devlink->ops->sb_tc_pool_bind_get)
+ return 0;
- devl_lock(devlink);
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx,
- devlink,
- devlink_sb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_tc_pool_bind_get_dumpit(msg, state->idx, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
-retry:
- devlink_put(devlink);
}
-out:
- if (err != -EMSGSIZE)
- return err;
- cb->args[0] = idx;
- return msg->len;
+ return err;
}
+const struct devlink_cmd devl_cmd_sb_tc_pool_bind_get = {
+ .dump_one = devlink_nl_cmd_sb_tc_pool_bind_get_dump_one,
+};
+
static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
@@ -3455,85 +2839,8 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
return -EOPNOTSUPP;
}
-static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
-{
- const struct devlink_ops *ops = devlink->ops;
- enum devlink_eswitch_encap_mode encap_mode;
- u8 inline_mode;
- void *hdr;
- int err = 0;
- u16 mode;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- err = devlink_nl_put_handle(msg, devlink);
- if (err)
- goto nla_put_failure;
-
- if (ops->eswitch_mode_get) {
- err = ops->eswitch_mode_get(devlink, &mode);
- if (err)
- goto nla_put_failure;
- err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
- if (err)
- goto nla_put_failure;
- }
-
- if (ops->eswitch_inline_mode_get) {
- err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
- if (err)
- goto nla_put_failure;
- err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
- inline_mode);
- if (err)
- goto nla_put_failure;
- }
-
- if (ops->eswitch_encap_mode_get) {
- err = ops->eswitch_encap_mode_get(devlink, &encap_mode);
- if (err)
- goto nla_put_failure;
- err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode);
- if (err)
- goto nla_put_failure;
- }
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET,
- info->snd_portid, info->snd_seq, 0);
-
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
- struct netlink_ext_ack *extack)
+int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
{
struct devlink_rate *devlink_rate;
@@ -3545,52 +2852,6 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
return 0;
}
-static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- const struct devlink_ops *ops = devlink->ops;
- enum devlink_eswitch_encap_mode encap_mode;
- u8 inline_mode;
- int err = 0;
- u16 mode;
-
- if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
- if (!ops->eswitch_mode_set)
- return -EOPNOTSUPP;
- mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
- err = devlink_rate_nodes_check(devlink, mode, info->extack);
- if (err)
- return err;
- err = ops->eswitch_mode_set(devlink, mode, info->extack);
- if (err)
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) {
- if (!ops->eswitch_inline_mode_set)
- return -EOPNOTSUPP;
- inline_mode = nla_get_u8(
- info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
- err = ops->eswitch_inline_mode_set(devlink, inline_mode,
- info->extack);
- if (err)
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
- if (!ops->eswitch_encap_mode_set)
- return -EOPNOTSUPP;
- encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
- err = ops->eswitch_encap_mode_set(devlink, encap_mode,
- info->extack);
- if (err)
- return err;
- }
-
- return 0;
-}
-
int devlink_dpipe_match_put(struct sk_buff *skb,
struct devlink_dpipe_match *match)
{
@@ -4561,10 +3822,9 @@ static int devlink_nl_cmd_resource_dump(struct sk_buff *skb,
return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
}
-static int
-devlink_resources_validate(struct devlink *devlink,
- struct devlink_resource *resource,
- struct genl_info *info)
+int devlink_resources_validate(struct devlink *devlink,
+ struct devlink_resource *resource,
+ struct genl_info *info)
{
struct list_head *resource_list;
int err = 0;
@@ -4584,743 +3844,6 @@ devlink_resources_validate(struct devlink *devlink,
return err;
}
-static struct net *devlink_netns_get(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
- struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
- struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
- struct net *net;
-
- if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
- NL_SET_ERR_MSG_MOD(info->extack, "multiple netns identifying attributes specified");
- return ERR_PTR(-EINVAL);
- }
-
- if (netns_pid_attr) {
- net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
- } else if (netns_fd_attr) {
- net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
- } else if (netns_id_attr) {
- net = get_net_ns_by_id(sock_net(skb->sk),
- nla_get_u32(netns_id_attr));
- if (!net)
- net = ERR_PTR(-EINVAL);
- } else {
- WARN_ON(1);
- net = ERR_PTR(-EINVAL);
- }
- if (IS_ERR(net)) {
- NL_SET_ERR_MSG_MOD(info->extack, "Unknown network namespace");
- return ERR_PTR(-EINVAL);
- }
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
- put_net(net);
- return ERR_PTR(-EPERM);
- }
- return net;
-}
-
-static void devlink_param_notify(struct devlink *devlink,
- unsigned int port_index,
- struct devlink_param_item *param_item,
- enum devlink_command cmd);
-
-static void devlink_ns_change_notify(struct devlink *devlink,
- struct net *dest_net, struct net *curr_net,
- bool new)
-{
- struct devlink_param_item *param_item;
- enum devlink_command cmd;
-
- /* Userspace needs to be notified about devlink objects
- * removed from original and entering new network namespace.
- * The rest of the devlink objects are re-created during
- * reload process so the notifications are generated separatelly.
- */
-
- if (!dest_net || net_eq(dest_net, curr_net))
- return;
-
- if (new)
- devlink_notify(devlink, DEVLINK_CMD_NEW);
-
- cmd = new ? DEVLINK_CMD_PARAM_NEW : DEVLINK_CMD_PARAM_DEL;
- list_for_each_entry(param_item, &devlink->param_list, list)
- devlink_param_notify(devlink, 0, param_item, cmd);
-
- if (!new)
- devlink_notify(devlink, DEVLINK_CMD_DEL);
-}
-
-static bool devlink_reload_supported(const struct devlink_ops *ops)
-{
- return ops->reload_down && ops->reload_up;
-}
-
-static void devlink_reload_failed_set(struct devlink *devlink,
- bool reload_failed)
-{
- if (devlink->reload_failed == reload_failed)
- return;
- devlink->reload_failed = reload_failed;
- devlink_notify(devlink, DEVLINK_CMD_NEW);
-}
-
-bool devlink_is_reload_failed(const struct devlink *devlink)
-{
- return devlink->reload_failed;
-}
-EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
-
-static void
-__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
- enum devlink_reload_limit limit, u32 actions_performed)
-{
- unsigned long actions = actions_performed;
- int stat_idx;
- int action;
-
- for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) {
- stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
- reload_stats[stat_idx]++;
- }
- devlink_notify(devlink, DEVLINK_CMD_NEW);
-}
-
-static void
-devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
- u32 actions_performed)
-{
- __devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit,
- actions_performed);
-}
-
-/**
- * devlink_remote_reload_actions_performed - Update devlink on reload actions
- * performed which are not a direct result of devlink reload call.
- *
- * This should be called by a driver after performing reload actions in case it was not
- * a result of devlink reload call. For example fw_activate was performed as a result
- * of devlink reload triggered fw_activate on another host.
- * The motivation for this function is to keep data on reload actions performed on this
- * function whether it was done due to direct devlink reload call or not.
- *
- * @devlink: devlink
- * @limit: reload limit
- * @actions_performed: bitmask of actions performed
- */
-void devlink_remote_reload_actions_performed(struct devlink *devlink,
- enum devlink_reload_limit limit,
- u32 actions_performed)
-{
- if (WARN_ON(!actions_performed ||
- actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
- actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) ||
- limit > DEVLINK_RELOAD_LIMIT_MAX))
- return;
-
- __devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit,
- actions_performed);
-}
-EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed);
-
-static int devlink_reload(struct devlink *devlink, struct net *dest_net,
- enum devlink_reload_action action, enum devlink_reload_limit limit,
- u32 *actions_performed, struct netlink_ext_ack *extack)
-{
- u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
- struct net *curr_net;
- int err;
-
- memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
- sizeof(remote_reload_stats));
-
- curr_net = devlink_net(devlink);
- devlink_ns_change_notify(devlink, dest_net, curr_net, false);
- err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
- if (err)
- return err;
-
- if (dest_net && !net_eq(dest_net, curr_net)) {
- move_netdevice_notifier_net(curr_net, dest_net,
- &devlink->netdevice_nb);
- write_pnet(&devlink->_net, dest_net);
- }
-
- err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
- devlink_reload_failed_set(devlink, !!err);
- if (err)
- return err;
-
- devlink_ns_change_notify(devlink, dest_net, curr_net, true);
- WARN_ON(!(*actions_performed & BIT(action)));
- /* Catch driver on updating the remote action within devlink reload */
- WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
- sizeof(remote_reload_stats)));
- devlink_reload_stats_update(devlink, limit, *actions_performed);
- return 0;
-}
-
-static int
-devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed,
- enum devlink_command cmd, struct genl_info *info)
-{
- struct sk_buff *msg;
- void *hdr;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd);
- if (!hdr)
- goto free_msg;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed,
- actions_performed))
- goto nla_put_failure;
- genlmsg_end(msg, hdr);
-
- return genlmsg_reply(msg, info);
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
-free_msg:
- nlmsg_free(msg);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- enum devlink_reload_action action;
- enum devlink_reload_limit limit;
- struct net *dest_net = NULL;
- u32 actions_performed;
- int err;
-
- if (!(devlink->features & DEVLINK_F_RELOAD))
- return -EOPNOTSUPP;
-
- err = devlink_resources_validate(devlink, NULL, info);
- if (err) {
- NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
- action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
- else
- action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
-
- if (!devlink_reload_action_is_supported(devlink, action)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested reload action is not supported by the driver");
- return -EOPNOTSUPP;
- }
-
- limit = DEVLINK_RELOAD_LIMIT_UNSPEC;
- if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) {
- struct nla_bitfield32 limits;
- u32 limits_selected;
-
- limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]);
- limits_selected = limits.value & limits.selector;
- if (!limits_selected) {
- NL_SET_ERR_MSG_MOD(info->extack, "Invalid limit selected");
- return -EINVAL;
- }
- for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++)
- if (limits_selected & BIT(limit))
- break;
- /* UAPI enables multiselection, but currently it is not used */
- if (limits_selected != BIT(limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Multiselection of limit is not supported");
- return -EOPNOTSUPP;
- }
- if (!devlink_reload_limit_is_supported(devlink, limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested limit is not supported by the driver");
- return -EOPNOTSUPP;
- }
- if (devlink_reload_combination_is_invalid(action, limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested limit is invalid for this action");
- return -EINVAL;
- }
- }
- if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
- info->attrs[DEVLINK_ATTR_NETNS_FD] ||
- info->attrs[DEVLINK_ATTR_NETNS_ID]) {
- dest_net = devlink_netns_get(skb, info);
- if (IS_ERR(dest_net))
- return PTR_ERR(dest_net);
- }
-
- err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
-
- if (dest_net)
- put_net(dest_net);
-
- if (err)
- return err;
- /* For backward compatibility generate reply only if attributes used by user */
- if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS])
- return 0;
-
- return devlink_nl_reload_actions_performed_snd(devlink, actions_performed,
- DEVLINK_CMD_RELOAD, info);
-}
-
-static int devlink_nl_flash_update_fill(struct sk_buff *msg,
- struct devlink *devlink,
- enum devlink_command cmd,
- struct devlink_flash_notify *params)
-{
- void *hdr;
-
- hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS)
- goto out;
-
- if (params->status_msg &&
- nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,
- params->status_msg))
- goto nla_put_failure;
- if (params->component &&
- nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
- params->component))
- goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
- params->done, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
- params->total, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
- params->timeout, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
-out:
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void __devlink_flash_update_notify(struct devlink *devlink,
- enum devlink_command cmd,
- struct devlink_flash_notify *params)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
- cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
- cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
-
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_flash_update_fill(msg, devlink, cmd, params);
- if (err)
- goto out_free_msg;
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
- return;
-
-out_free_msg:
- nlmsg_free(msg);
-}
-
-static void devlink_flash_update_begin_notify(struct devlink *devlink)
-{
- struct devlink_flash_notify params = {};
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE,
- &params);
-}
-
-static void devlink_flash_update_end_notify(struct devlink *devlink)
-{
- struct devlink_flash_notify params = {};
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE_END,
- &params);
-}
-
-void devlink_flash_update_status_notify(struct devlink *devlink,
- const char *status_msg,
- const char *component,
- unsigned long done,
- unsigned long total)
-{
- struct devlink_flash_notify params = {
- .status_msg = status_msg,
- .component = component,
- .done = done,
- .total = total,
- };
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE_STATUS,
- &params);
-}
-EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify);
-
-void devlink_flash_update_timeout_notify(struct devlink *devlink,
- const char *status_msg,
- const char *component,
- unsigned long timeout)
-{
- struct devlink_flash_notify params = {
- .status_msg = status_msg,
- .component = component,
- .timeout = timeout,
- };
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE_STATUS,
- &params);
-}
-EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
-
-struct devlink_info_req {
- struct sk_buff *msg;
- void (*version_cb)(const char *version_name,
- enum devlink_info_version_type version_type,
- void *version_cb_priv);
- void *version_cb_priv;
-};
-
-struct devlink_flash_component_lookup_ctx {
- const char *lookup_name;
- bool lookup_name_found;
-};
-
-static void
-devlink_flash_component_lookup_cb(const char *version_name,
- enum devlink_info_version_type version_type,
- void *version_cb_priv)
-{
- struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv;
-
- if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT ||
- lookup_ctx->lookup_name_found)
- return;
-
- lookup_ctx->lookup_name_found =
- !strcmp(lookup_ctx->lookup_name, version_name);
-}
-
-static int devlink_flash_component_get(struct devlink *devlink,
- struct nlattr *nla_component,
- const char **p_component,
- struct netlink_ext_ack *extack)
-{
- struct devlink_flash_component_lookup_ctx lookup_ctx = {};
- struct devlink_info_req req = {};
- const char *component;
- int ret;
-
- if (!nla_component)
- return 0;
-
- component = nla_data(nla_component);
-
- if (!devlink->ops->info_get) {
- NL_SET_ERR_MSG_ATTR(extack, nla_component,
- "component update is not supported by this device");
- return -EOPNOTSUPP;
- }
-
- lookup_ctx.lookup_name = component;
- req.version_cb = devlink_flash_component_lookup_cb;
- req.version_cb_priv = &lookup_ctx;
-
- ret = devlink->ops->info_get(devlink, &req, NULL);
- if (ret)
- return ret;
-
- if (!lookup_ctx.lookup_name_found) {
- NL_SET_ERR_MSG_ATTR(extack, nla_component,
- "selected component is not supported by this device");
- return -EINVAL;
- }
- *p_component = component;
- return 0;
-}
-
-static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct nlattr *nla_overwrite_mask, *nla_file_name;
- struct devlink_flash_update_params params = {};
- struct devlink *devlink = info->user_ptr[0];
- const char *file_name;
- u32 supported_params;
- int ret;
-
- if (!devlink->ops->flash_update)
- return -EOPNOTSUPP;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME))
- return -EINVAL;
-
- ret = devlink_flash_component_get(devlink,
- info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT],
- &params.component, info->extack);
- if (ret)
- return ret;
-
- supported_params = devlink->ops->supported_flash_update_params;
-
- nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK];
- if (nla_overwrite_mask) {
- struct nla_bitfield32 sections;
-
- if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) {
- NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask,
- "overwrite settings are not supported by this device");
- return -EOPNOTSUPP;
- }
- sections = nla_get_bitfield32(nla_overwrite_mask);
- params.overwrite_mask = sections.value & sections.selector;
- }
-
- nla_file_name = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME];
- file_name = nla_data(nla_file_name);
- ret = request_firmware(&params.fw, file_name, devlink->dev);
- if (ret) {
- NL_SET_ERR_MSG_ATTR(info->extack, nla_file_name, "failed to locate the requested firmware file");
- return ret;
- }
-
- devlink_flash_update_begin_notify(devlink);
- ret = devlink->ops->flash_update(devlink, &params, info->extack);
- devlink_flash_update_end_notify(devlink);
-
- release_firmware(params.fw);
-
- return ret;
-}
-
-static int
-devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink,
- u32 portid, u32 seq, int flags,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *selftests;
- void *hdr;
- int err;
- int i;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags,
- DEVLINK_CMD_SELFTESTS_GET);
- if (!hdr)
- return -EMSGSIZE;
-
- err = -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto err_cancel_msg;
-
- selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
- if (!selftests)
- goto err_cancel_msg;
-
- for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
- i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
- if (devlink->ops->selftest_check(devlink, i, extack)) {
- err = nla_put_flag(msg, i);
- if (err)
- goto err_cancel_msg;
- }
- }
-
- nla_nest_end(msg, selftests);
- genlmsg_end(msg, hdr);
- return 0;
-
-err_cancel_msg:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- if (!devlink->ops->selftest_check)
- return -EOPNOTSUPP;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid,
- info->snd_seq, 0, info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_selftests_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
- int err = 0;
-
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (idx < start || !devlink->ops->selftest_check)
- goto inc;
-
- devl_lock(devlink);
- err = devlink_nl_selftests_fill(msg, devlink,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- cb->extack);
- devl_unlock(devlink);
- if (err) {
- devlink_put(devlink);
- break;
- }
-inc:
- idx++;
- devlink_put(devlink);
- }
-
- if (err != -EMSGSIZE)
- return err;
-
- cb->args[0] = idx;
- return msg->len;
-}
-
-static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
- enum devlink_selftest_status test_status)
-{
- struct nlattr *result_attr;
-
- result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT);
- if (!result_attr)
- return -EMSGSIZE;
-
- if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) ||
- nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS,
- test_status))
- goto nla_put_failure;
-
- nla_nest_end(skb, result_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, result_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_selftests_run(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
- struct devlink *devlink = info->user_ptr[0];
- struct nlattr *attrs, *selftests;
- struct sk_buff *msg;
- void *hdr;
- int err;
- int i;
-
- if (!devlink->ops->selftest_run || !devlink->ops->selftest_check)
- return -EOPNOTSUPP;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS))
- return -EINVAL;
-
- attrs = info->attrs[DEVLINK_ATTR_SELFTESTS];
-
- err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs,
- devlink_selftest_nl_policy, info->extack);
- if (err < 0)
- return err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = -EMSGSIZE;
- hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
- &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN);
- if (!hdr)
- goto free_msg;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto genlmsg_cancel;
-
- selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
- if (!selftests)
- goto genlmsg_cancel;
-
- for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
- i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
- enum devlink_selftest_status test_status;
-
- if (nla_get_flag(tb[i])) {
- if (!devlink->ops->selftest_check(devlink, i,
- info->extack)) {
- if (devlink_selftest_result_put(msg, i,
- DEVLINK_SELFTEST_STATUS_SKIP))
- goto selftests_nest_cancel;
- continue;
- }
-
- test_status = devlink->ops->selftest_run(devlink, i,
- info->extack);
- if (devlink_selftest_result_put(msg, i, test_status))
- goto selftests_nest_cancel;
- }
- }
-
- nla_nest_end(msg, selftests);
- genlmsg_end(msg, hdr);
- return genlmsg_reply(msg, info);
-
-selftests_nest_cancel:
- nla_nest_cancel(msg, selftests);
-genlmsg_cancel:
- genlmsg_cancel(msg, hdr);
-free_msg:
- nlmsg_free(msg);
- return err;
-}
-
static const struct devlink_param devlink_param_generic[] = {
{
.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
@@ -5654,7 +4177,13 @@ static void devlink_param_notify(struct devlink *devlink,
WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL &&
cmd != DEVLINK_CMD_PORT_PARAM_NEW &&
cmd != DEVLINK_CMD_PORT_PARAM_DEL);
- ASSERT_DEVLINK_REGISTERED(devlink);
+
+ /* devlink_notify_register() / devlink_notify_unregister()
+ * will replay the notifications if the params are added/removed
+ * outside of the lifetime of the instance.
+ */
+ if (!devl_is_registered(devlink))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -5670,48 +4199,41 @@ static void devlink_param_notify(struct devlink *devlink,
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
-static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_param_item *param_item;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_param_fill(msg, devlink, 0, param_item,
- DEVLINK_CMD_PARAM_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(param_item, &devlink->param_list, list) {
+ if (idx < state->idx) {
idx++;
+ continue;
+ }
+ err = devlink_nl_param_fill(msg, devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
- devlink_put(devlink);
+ idx++;
}
-out:
- if (err != -EMSGSIZE)
- return err;
- cb->args[0] = idx;
- return msg->len;
+ return err;
}
+const struct devlink_cmd devl_cmd_param_get = {
+ .dump_one = devlink_nl_cmd_param_get_dump_one,
+};
+
static int
devlink_param_type_get_from_info(struct genl_info *info,
enum devlink_param_type *param_type)
@@ -6375,21 +4897,20 @@ out:
return err;
}
-static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb,
- struct devlink *devlink,
- int *idx,
- int start)
+static int
+devlink_nl_cmd_region_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_region *region;
struct devlink_port *port;
unsigned long port_index;
- int err = 0;
+ int idx = 0;
+ int err;
- devl_lock(devlink);
list_for_each_entry(region, &devlink->region_list, list) {
- if (*idx < start) {
- (*idx)++;
+ if (idx < state->idx) {
+ idx++;
continue;
}
err = devlink_nl_region_fill(msg, devlink,
@@ -6397,43 +4918,28 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI, region);
- if (err)
- goto out;
- (*idx)++;
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
+ idx++;
}
xa_for_each(&devlink->ports, port_index, port) {
- err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, idx,
- start);
- if (err)
- goto out;
+ err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, &idx,
+ state->idx);
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
}
-out:
- devl_unlock(devlink);
- return err;
+ return 0;
}
-static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
- int err = 0;
-
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink,
- &idx, start);
- devlink_put(devlink);
- if (err)
- goto out;
- }
-out:
- cb->args[0] = idx;
- return msg->len;
-}
+const struct devlink_cmd devl_cmd_region_get = {
+ .dump_one = devlink_nl_cmd_region_get_dump_one,
+};
static int devlink_nl_cmd_region_del(struct sk_buff *skb,
struct genl_info *info)
@@ -6716,6 +5222,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct nlattr *chunks_attr, *region_attr, *snapshot_attr;
u64 ret_offset, start_offset, end_offset = U64_MAX;
struct nlattr **attrs = info->attrs;
@@ -6729,14 +5236,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
void *hdr;
int err;
- start_offset = *((u64 *)&cb->args[0]);
+ start_offset = state->start_offset;
- devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
- devl_lock(devlink);
-
if (!attrs[DEVLINK_ATTR_REGION_NAME]) {
NL_SET_ERR_MSG(cb->extack, "No region name provided");
err = -EINVAL;
@@ -6868,7 +5373,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
goto nla_put_failure;
}
- *((u64 *)&cb->args[0]) = ret_offset;
+ state->start_offset = ret_offset;
nla_nest_end(skb, chunks_attr);
genlmsg_end(skb, hdr);
@@ -6884,223 +5389,6 @@ out_unlock:
return err;
}
-int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
-{
- if (!req->msg)
- return 0;
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn);
-}
-EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
-
-int devlink_info_board_serial_number_put(struct devlink_info_req *req,
- const char *bsn)
-{
- if (!req->msg)
- return 0;
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
- bsn);
-}
-EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
-
-static int devlink_info_version_put(struct devlink_info_req *req, int attr,
- const char *version_name,
- const char *version_value,
- enum devlink_info_version_type version_type)
-{
- struct nlattr *nest;
- int err;
-
- if (req->version_cb)
- req->version_cb(version_name, version_type,
- req->version_cb_priv);
-
- if (!req->msg)
- return 0;
-
- nest = nla_nest_start_noflag(req->msg, attr);
- if (!nest)
- return -EMSGSIZE;
-
- err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME,
- version_name);
- if (err)
- goto nla_put_failure;
-
- err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE,
- version_value);
- if (err)
- goto nla_put_failure;
-
- nla_nest_end(req->msg, nest);
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(req->msg, nest);
- return err;
-}
-
-int devlink_info_version_fixed_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED,
- version_name, version_value,
- DEVLINK_INFO_VERSION_TYPE_NONE);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put);
-
-int devlink_info_version_stored_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
- version_name, version_value,
- DEVLINK_INFO_VERSION_TYPE_NONE);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_stored_put);
-
-int devlink_info_version_stored_put_ext(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value,
- enum devlink_info_version_type version_type)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
- version_name, version_value,
- version_type);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext);
-
-int devlink_info_version_running_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
- version_name, version_value,
- DEVLINK_INFO_VERSION_TYPE_NONE);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_running_put);
-
-int devlink_info_version_running_put_ext(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value,
- enum devlink_info_version_type version_type)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
- version_name, version_value,
- version_type);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext);
-
-static int devlink_nl_driver_info_get(struct device_driver *drv,
- struct devlink_info_req *req)
-{
- if (!drv)
- return 0;
-
- if (drv->name[0])
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME,
- drv->name);
-
- return 0;
-}
-
-static int
-devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags, struct netlink_ext_ack *extack)
-{
- struct device *dev = devlink_to_dev(devlink);
- struct devlink_info_req req = {};
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- err = -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto err_cancel_msg;
-
- req.msg = msg;
- if (devlink->ops->info_get) {
- err = devlink->ops->info_get(devlink, &req, extack);
- if (err)
- goto err_cancel_msg;
- }
-
- err = devlink_nl_driver_info_get(dev->driver, &req);
- if (err)
- goto err_cancel_msg;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-err_cancel_msg:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
- info->snd_portid, info->snd_seq, 0,
- info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
- int err = 0;
-
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (idx < start)
- goto inc;
-
- devl_lock(devlink);
- err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- cb->extack);
- devl_unlock(devlink);
- if (err == -EOPNOTSUPP)
- err = 0;
- else if (err) {
- devlink_put(devlink);
- break;
- }
-inc:
- idx++;
- devlink_put(devlink);
- }
-
- if (err != -EMSGSIZE)
- return err;
-
- cb->args[0] = idx;
- return msg->len;
-}
-
struct devlink_fmsg_item {
struct list_head list;
int attrtype;
@@ -7564,8 +5852,8 @@ devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb,
{
struct devlink_fmsg_item *item;
struct nlattr *fmsg_nlattr;
+ int err = 0;
int i = 0;
- int err;
fmsg_nlattr = nla_nest_start_noflag(skb, DEVLINK_ATTR_FMSG);
if (!fmsg_nlattr)
@@ -7666,7 +5954,8 @@ static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb,
struct netlink_callback *cb,
enum devlink_command cmd)
{
- int index = cb->args[0];
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ int index = state->idx;
int tmp_index = index;
void *hdr;
int err;
@@ -7682,7 +5971,7 @@ static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb,
if ((err && err != -EMSGSIZE) || tmp_index == index)
goto nla_put_failure;
- cb->args[0] = index;
+ state->idx = index;
genlmsg_end(skb, hdr);
return skb->len;
@@ -7708,7 +5997,6 @@ struct devlink_health_reporter {
u64 error_count;
u64 recovery_count;
u64 last_recovery_ts;
- refcount_t refcount;
};
void *
@@ -7720,12 +6008,10 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
static struct devlink_health_reporter *
__devlink_health_reporter_find_by_name(struct list_head *reporter_list,
- struct mutex *list_lock,
const char *reporter_name)
{
struct devlink_health_reporter *reporter;
- lockdep_assert_held(list_lock);
list_for_each_entry(reporter, reporter_list, list)
if (!strcmp(reporter->ops->name, reporter_name))
return reporter;
@@ -7737,7 +6023,6 @@ devlink_health_reporter_find_by_name(struct devlink *devlink,
const char *reporter_name)
{
return __devlink_health_reporter_find_by_name(&devlink->reporter_list,
- &devlink->reporters_lock,
reporter_name);
}
@@ -7746,7 +6031,6 @@ devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port,
const char *reporter_name)
{
return __devlink_health_reporter_find_by_name(&devlink_port->reporter_list,
- &devlink_port->reporters_lock,
reporter_name);
}
@@ -7771,13 +6055,12 @@ __devlink_health_reporter_create(struct devlink *devlink,
reporter->auto_recover = !!ops->recover;
reporter->auto_dump = !!ops->dump;
mutex_init(&reporter->dump_lock);
- refcount_set(&reporter->refcount, 1);
return reporter;
}
/**
- * devlink_port_health_reporter_create - create devlink health reporter for
- * specified port instance
+ * devl_port_health_reporter_create - create devlink health reporter for
+ * specified port instance
*
* @port: devlink_port which should contain the new reporter
* @ops: ops
@@ -7785,34 +6068,47 @@ __devlink_health_reporter_create(struct devlink *devlink,
* @priv: priv
*/
struct devlink_health_reporter *
-devlink_port_health_reporter_create(struct devlink_port *port,
- const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
+devl_port_health_reporter_create(struct devlink_port *port,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
{
struct devlink_health_reporter *reporter;
- mutex_lock(&port->reporters_lock);
+ devl_assert_locked(port->devlink);
+
if (__devlink_health_reporter_find_by_name(&port->reporter_list,
- &port->reporters_lock, ops->name)) {
- reporter = ERR_PTR(-EEXIST);
- goto unlock;
- }
+ ops->name))
+ return ERR_PTR(-EEXIST);
reporter = __devlink_health_reporter_create(port->devlink, ops,
graceful_period, priv);
if (IS_ERR(reporter))
- goto unlock;
+ return reporter;
reporter->devlink_port = port;
list_add_tail(&reporter->list, &port->reporter_list);
-unlock:
- mutex_unlock(&port->reporters_lock);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devl_port_health_reporter_create);
+
+struct devlink_health_reporter *
+devlink_port_health_reporter_create(struct devlink_port *port,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+ struct devlink *devlink = port->devlink;
+
+ devl_lock(devlink);
+ reporter = devl_port_health_reporter_create(port, ops,
+ graceful_period, priv);
+ devl_unlock(devlink);
return reporter;
}
EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create);
/**
- * devlink_health_reporter_create - create devlink health reporter
+ * devl_health_reporter_create - create devlink health reporter
*
* @devlink: devlink
* @ops: ops
@@ -7820,26 +6116,38 @@ EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create);
* @priv: priv
*/
struct devlink_health_reporter *
-devlink_health_reporter_create(struct devlink *devlink,
- const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
+devl_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
{
struct devlink_health_reporter *reporter;
- mutex_lock(&devlink->reporters_lock);
- if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
- reporter = ERR_PTR(-EEXIST);
- goto unlock;
- }
+ devl_assert_locked(devlink);
+
+ if (devlink_health_reporter_find_by_name(devlink, ops->name))
+ return ERR_PTR(-EEXIST);
reporter = __devlink_health_reporter_create(devlink, ops,
graceful_period, priv);
if (IS_ERR(reporter))
- goto unlock;
+ return reporter;
list_add_tail(&reporter->list, &devlink->reporter_list);
-unlock:
- mutex_unlock(&devlink->reporters_lock);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devl_health_reporter_create);
+
+struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ devl_lock(devlink);
+ reporter = devl_health_reporter_create(devlink, ops,
+ graceful_period, priv);
+ devl_unlock(devlink);
return reporter;
}
EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
@@ -7853,51 +6161,31 @@ devlink_health_reporter_free(struct devlink_health_reporter *reporter)
kfree(reporter);
}
-static void
-devlink_health_reporter_put(struct devlink_health_reporter *reporter)
-{
- if (refcount_dec_and_test(&reporter->refcount))
- devlink_health_reporter_free(reporter);
-}
-
-static void
-__devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
-{
- list_del(&reporter->list);
- devlink_health_reporter_put(reporter);
-}
-
/**
- * devlink_health_reporter_destroy - destroy devlink health reporter
+ * devl_health_reporter_destroy - destroy devlink health reporter
*
* @reporter: devlink health reporter to destroy
*/
void
-devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+devl_health_reporter_destroy(struct devlink_health_reporter *reporter)
{
- struct mutex *lock = &reporter->devlink->reporters_lock;
+ devl_assert_locked(reporter->devlink);
- mutex_lock(lock);
- __devlink_health_reporter_destroy(reporter);
- mutex_unlock(lock);
+ list_del(&reporter->list);
+ devlink_health_reporter_free(reporter);
}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
+EXPORT_SYMBOL_GPL(devl_health_reporter_destroy);
-/**
- * devlink_port_health_reporter_destroy - destroy devlink port health reporter
- *
- * @reporter: devlink health reporter to destroy
- */
void
-devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter)
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
{
- struct mutex *lock = &reporter->devlink_port->reporters_lock;
+ struct devlink *devlink = reporter->devlink;
- mutex_lock(lock);
- __devlink_health_reporter_destroy(reporter);
- mutex_unlock(lock);
+ devl_lock(devlink);
+ devl_health_reporter_destroy(reporter);
+ devl_unlock(devlink);
}
-EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy);
+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
static int
devlink_nl_health_reporter_fill(struct sk_buff *msg,
@@ -8128,7 +6416,6 @@ static struct devlink_health_reporter *
devlink_health_reporter_get_from_attrs(struct devlink *devlink,
struct nlattr **attrs)
{
- struct devlink_health_reporter *reporter;
struct devlink_port *devlink_port;
char *reporter_name;
@@ -8137,21 +6424,12 @@ devlink_health_reporter_get_from_attrs(struct devlink *devlink,
reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
devlink_port = devlink_port_get_from_attrs(devlink, attrs);
- if (IS_ERR(devlink_port)) {
- mutex_lock(&devlink->reporters_lock);
- reporter = devlink_health_reporter_find_by_name(devlink, reporter_name);
- if (reporter)
- refcount_inc(&reporter->refcount);
- mutex_unlock(&devlink->reporters_lock);
- } else {
- mutex_lock(&devlink_port->reporters_lock);
- reporter = devlink_port_health_reporter_find_by_name(devlink_port, reporter_name);
- if (reporter)
- refcount_inc(&reporter->refcount);
- mutex_unlock(&devlink_port->reporters_lock);
- }
-
- return reporter;
+ if (IS_ERR(devlink_port))
+ return devlink_health_reporter_find_by_name(devlink,
+ reporter_name);
+ else
+ return devlink_port_health_reporter_find_by_name(devlink_port,
+ reporter_name);
}
static struct devlink_health_reporter *
@@ -8169,9 +6447,10 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
struct nlattr **attrs = info->attrs;
struct devlink *devlink;
- devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
if (IS_ERR(devlink))
return NULL;
+ devl_unlock(devlink);
reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
devlink_put(devlink);
@@ -8209,10 +6488,8 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
return -EINVAL;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg) {
- err = -ENOMEM;
- goto out;
- }
+ if (!msg)
+ return -ENOMEM;
err = devlink_nl_health_reporter_fill(msg, reporter,
DEVLINK_CMD_HEALTH_REPORTER_GET,
@@ -8220,89 +6497,72 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
0);
if (err) {
nlmsg_free(msg);
- goto out;
+ return err;
}
- err = genlmsg_reply(msg, info);
-out:
- devlink_health_reporter_put(reporter);
- return err;
+ return genlmsg_reply(msg, info);
}
static int
-devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_health_reporter *reporter;
- unsigned long index, port_index;
struct devlink_port *port;
- struct devlink *devlink;
- int start = cb->args[0];
+ unsigned long port_index;
int idx = 0;
int err;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- mutex_lock(&devlink->reporters_lock);
- list_for_each_entry(reporter, &devlink->reporter_list,
- list) {
- if (idx < start) {
+ list_for_each_entry(reporter, &devlink->reporter_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_health_reporter_fill(msg, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
+ idx++;
+ }
+ xa_for_each(&devlink->ports, port_index, port) {
+ list_for_each_entry(reporter, &port->reporter_list, list) {
+ if (idx < state->idx) {
idx++;
continue;
}
- err = devlink_nl_health_reporter_fill(
- msg, reporter, DEVLINK_CMD_HEALTH_REPORTER_GET,
- NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ err = devlink_nl_health_reporter_fill(msg, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
if (err) {
- mutex_unlock(&devlink->reporters_lock);
- devlink_put(devlink);
- goto out;
+ state->idx = idx;
+ return err;
}
idx++;
}
- mutex_unlock(&devlink->reporters_lock);
- devlink_put(devlink);
- }
-
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- xa_for_each(&devlink->ports, port_index, port) {
- mutex_lock(&port->reporters_lock);
- list_for_each_entry(reporter, &port->reporter_list, list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_health_reporter_fill(
- msg, reporter,
- DEVLINK_CMD_HEALTH_REPORTER_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
- if (err) {
- mutex_unlock(&port->reporters_lock);
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
- idx++;
- }
- mutex_unlock(&port->reporters_lock);
- }
- devl_unlock(devlink);
- devlink_put(devlink);
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return 0;
}
+const struct devlink_cmd devl_cmd_health_reporter_get = {
+ .dump_one = devlink_nl_cmd_health_reporter_get_dump_one,
+};
+
static int
devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
- int err;
reporter = devlink_health_reporter_get_from_info(devlink, info);
if (!reporter)
@@ -8310,15 +6570,12 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
if (!reporter->ops->recover &&
(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
- info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) {
- err = -EOPNOTSUPP;
- goto out;
- }
+ info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
+ return -EOPNOTSUPP;
+
if (!reporter->ops->dump &&
- info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]) {
- err = -EOPNOTSUPP;
- goto out;
- }
+ info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
+ return -EOPNOTSUPP;
if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
reporter->graceful_period =
@@ -8332,11 +6589,7 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
reporter->auto_dump =
nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]);
- devlink_health_reporter_put(reporter);
return 0;
-out:
- devlink_health_reporter_put(reporter);
- return err;
}
static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
@@ -8344,16 +6597,12 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
- int err;
reporter = devlink_health_reporter_get_from_info(devlink, info);
if (!reporter)
return -EINVAL;
- err = devlink_health_reporter_recover(reporter, NULL, info->extack);
-
- devlink_health_reporter_put(reporter);
- return err;
+ return devlink_health_reporter_recover(reporter, NULL, info->extack);
}
static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
@@ -8368,16 +6617,12 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
if (!reporter)
return -EINVAL;
- if (!reporter->ops->diagnose) {
- devlink_health_reporter_put(reporter);
+ if (!reporter->ops->diagnose)
return -EOPNOTSUPP;
- }
fmsg = devlink_fmsg_alloc();
- if (!fmsg) {
- devlink_health_reporter_put(reporter);
+ if (!fmsg)
return -ENOMEM;
- }
err = devlink_fmsg_obj_nest_start(fmsg);
if (err)
@@ -8396,7 +6641,6 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
out:
devlink_fmsg_free(fmsg);
- devlink_health_reporter_put(reporter);
return err;
}
@@ -8404,26 +6648,25 @@ static int
devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_health_reporter *reporter;
- u64 start = cb->args[0];
int err;
reporter = devlink_health_reporter_get_from_cb(cb);
if (!reporter)
return -EINVAL;
- if (!reporter->ops->dump) {
- err = -EOPNOTSUPP;
- goto out;
- }
+ if (!reporter->ops->dump)
+ return -EOPNOTSUPP;
+
mutex_lock(&reporter->dump_lock);
- if (!start) {
+ if (!state->idx) {
err = devlink_health_do_dump(reporter, NULL, cb->extack);
if (err)
goto unlock;
- cb->args[1] = reporter->dump_ts;
+ state->dump_ts = reporter->dump_ts;
}
- if (!reporter->dump_fmsg || cb->args[1] != reporter->dump_ts) {
+ if (!reporter->dump_fmsg || state->dump_ts != reporter->dump_ts) {
NL_SET_ERR_MSG_MOD(cb->extack, "Dump trampled, please retry");
err = -EAGAIN;
goto unlock;
@@ -8433,8 +6676,6 @@ devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET);
unlock:
mutex_unlock(&reporter->dump_lock);
-out:
- devlink_health_reporter_put(reporter);
return err;
}
@@ -8449,15 +6690,12 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
if (!reporter)
return -EINVAL;
- if (!reporter->ops->dump) {
- devlink_health_reporter_put(reporter);
+ if (!reporter->ops->dump)
return -EOPNOTSUPP;
- }
mutex_lock(&reporter->dump_lock);
devlink_health_dump_clear(reporter);
mutex_unlock(&reporter->dump_lock);
- devlink_health_reporter_put(reporter);
return 0;
}
@@ -8466,21 +6704,15 @@ static int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
- int err;
reporter = devlink_health_reporter_get_from_info(devlink, info);
if (!reporter)
return -EINVAL;
- if (!reporter->ops->test) {
- devlink_health_reporter_put(reporter);
+ if (!reporter->ops->test)
return -EOPNOTSUPP;
- }
-
- err = reporter->ops->test(reporter, info->extack);
- devlink_health_reporter_put(reporter);
- return err;
+ return reporter->ops->test(reporter, info->extack);
}
struct devlink_stats {
@@ -8814,43 +7046,39 @@ err_trap_fill:
return err;
}
-static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_trap_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_trap_item *trap_item;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
- int err;
+ int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- list_for_each_entry(trap_item, &devlink->trap_list, list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_trap_fill(msg, devlink, trap_item,
- DEVLINK_CMD_TRAP_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (idx < state->idx) {
idx++;
+ continue;
}
- devl_unlock(devlink);
- devlink_put(devlink);
+ err = devlink_nl_trap_fill(msg, devlink, trap_item,
+ DEVLINK_CMD_TRAP_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return err;
}
+const struct devlink_cmd devl_cmd_trap_get = {
+ .dump_one = devlink_nl_cmd_trap_get_dump_one,
+};
+
static int __devlink_trap_action_set(struct devlink *devlink,
struct devlink_trap_item *trap_item,
enum devlink_trap_action trap_action,
@@ -9029,46 +7257,41 @@ err_trap_group_fill:
return err;
}
-static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_trap_group_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
- enum devlink_command cmd = DEVLINK_CMD_TRAP_GROUP_NEW;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_trap_group_item *group_item;
- u32 portid = NETLINK_CB(cb->skb).portid;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
- int err;
+ int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- list_for_each_entry(group_item, &devlink->trap_group_list,
- list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_trap_group_fill(msg, devlink,
- group_item, cmd,
- portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+ if (idx < state->idx) {
idx++;
+ continue;
}
- devl_unlock(devlink);
- devlink_put(devlink);
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return err;
}
+const struct devlink_cmd devl_cmd_trap_group_get = {
+ .dump_one = devlink_nl_cmd_trap_group_get_dump_one,
+};
+
static int
__devlink_trap_group_action_set(struct devlink *devlink,
struct devlink_trap_group_item *group_item,
@@ -9144,6 +7367,7 @@ static int devlink_trap_group_set(struct devlink *devlink,
struct netlink_ext_ack *extack = info->extack;
const struct devlink_trap_policer *policer;
struct nlattr **attrs = info->attrs;
+ u32 policer_id;
int err;
if (!attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
@@ -9152,17 +7376,11 @@ static int devlink_trap_group_set(struct devlink *devlink,
if (!devlink->ops->trap_group_set)
return -EOPNOTSUPP;
- policer_item = group_item->policer_item;
- if (attrs[DEVLINK_ATTR_TRAP_POLICER_ID]) {
- u32 policer_id;
-
- policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
- policer_item = devlink_trap_policer_item_lookup(devlink,
- policer_id);
- if (policer_id && !policer_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer");
- return -ENOENT;
- }
+ policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
+ policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
+ if (policer_id && !policer_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer");
+ return -ENOENT;
}
policer = policer_item ? policer_item->policer : NULL;
@@ -9333,46 +7551,40 @@ err_trap_policer_fill:
return err;
}
-static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_trap_policer_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
- enum devlink_command cmd = DEVLINK_CMD_TRAP_POLICER_NEW;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_trap_policer_item *policer_item;
- u32 portid = NETLINK_CB(cb->skb).portid;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
- int err;
+ int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- list_for_each_entry(policer_item, &devlink->trap_policer_list,
- list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_trap_policer_fill(msg, devlink,
- policer_item, cmd,
- portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
+ if (idx < state->idx) {
idx++;
+ continue;
+ }
+ err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
- devlink_put(devlink);
+ idx++;
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return err;
}
+const struct devlink_cmd devl_cmd_trap_policer_get = {
+ .dump_one = devlink_nl_cmd_trap_policer_get_dump_one,
+};
+
static int
devlink_trap_policer_set(struct devlink *devlink,
struct devlink_trap_policer_item *policer_item,
@@ -9445,88 +7657,19 @@ static int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
return devlink_trap_policer_set(devlink, policer_item, info);
}
-static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
- [DEVLINK_ATTR_UNSPEC] = { .strict_start_type =
- DEVLINK_ATTR_TRAP_POLICER_ID },
- [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
- [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_PORT_TYPE_AUTO,
- DEVLINK_PORT_TYPE_IB),
- [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
- [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 },
- [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 },
- [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 },
- [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
- [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
- [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_ESWITCH_MODE_LEGACY,
- DEVLINK_ESWITCH_MODE_SWITCHDEV),
- [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
- [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64},
- [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64},
- [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
- [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 },
- [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 },
- [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
- [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
- [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] =
- NLA_POLICY_BITFIELD32(DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS),
- [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
- [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
- [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
- [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
- [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 },
- [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 },
- [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 },
- [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 },
- [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED },
- [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
- DEVLINK_RELOAD_ACTION_MAX),
- [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK),
- [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NLA_U16 },
- [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 },
- [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
- [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
- [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
- [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 },
- [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
- [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 },
- [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED },
- [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 },
- [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 },
- [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG },
-};
-
-static const struct genl_small_ops devlink_nl_ops[] = {
+const struct genl_small_ops devlink_nl_ops[56] = {
{
.cmd = DEVLINK_CMD_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_get_doit,
- .dumpit = devlink_nl_cmd_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_PORT_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_get_doit,
- .dumpit = devlink_nl_cmd_port_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
/* can be retrieved by unprivileged users */
},
@@ -9540,7 +7683,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_RATE_GET,
.doit = devlink_nl_cmd_rate_get_doit,
- .dumpit = devlink_nl_cmd_rate_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
/* can be retrieved by unprivileged users */
},
@@ -9588,7 +7731,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_LINECARD_GET,
.doit = devlink_nl_cmd_linecard_get_doit,
- .dumpit = devlink_nl_cmd_linecard_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
/* can be retrieved by unprivileged users */
},
@@ -9602,14 +7745,14 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_SB_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_get_doit,
- .dumpit = devlink_nl_cmd_sb_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_SB_POOL_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_pool_get_doit,
- .dumpit = devlink_nl_cmd_sb_pool_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
@@ -9622,7 +7765,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_port_pool_get_doit,
- .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
/* can be retrieved by unprivileged users */
},
@@ -9637,7 +7780,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
- .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
/* can be retrieved by unprivileged users */
},
@@ -9718,7 +7861,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_PARAM_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_param_get_doit,
- .dumpit = devlink_nl_cmd_param_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
@@ -9746,7 +7889,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_REGION_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_region_get_doit,
- .dumpit = devlink_nl_cmd_region_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.flags = GENL_ADMIN_PERM,
},
{
@@ -9772,14 +7915,14 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_INFO_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_info_get_doit,
- .dumpit = devlink_nl_cmd_info_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_get_doit,
- .dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
/* can be retrieved by unprivileged users */
},
@@ -9834,7 +7977,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_TRAP_GET,
.doit = devlink_nl_cmd_trap_get_doit,
- .dumpit = devlink_nl_cmd_trap_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
@@ -9845,7 +7988,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_TRAP_GROUP_GET,
.doit = devlink_nl_cmd_trap_group_get_doit,
- .dumpit = devlink_nl_cmd_trap_group_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
@@ -9856,7 +7999,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_TRAP_POLICER_GET,
.doit = devlink_nl_cmd_trap_policer_get_doit,
- .dumpit = devlink_nl_cmd_trap_policer_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
@@ -9867,7 +8010,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_SELFTESTS_GET,
.doit = devlink_nl_cmd_selftests_get_doit,
- .dumpit = devlink_nl_cmd_selftests_get_dumpit
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
@@ -9875,148 +8018,9 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.doit = devlink_nl_cmd_selftests_run,
.flags = GENL_ADMIN_PERM,
},
+ /* -- No new ops here! Use split ops going forward! -- */
};
-static struct genl_family devlink_nl_family __ro_after_init = {
- .name = DEVLINK_GENL_NAME,
- .version = DEVLINK_GENL_VERSION,
- .maxattr = DEVLINK_ATTR_MAX,
- .policy = devlink_nl_policy,
- .netnsok = true,
- .parallel_ops = true,
- .pre_doit = devlink_nl_pre_doit,
- .post_doit = devlink_nl_post_doit,
- .module = THIS_MODULE,
- .small_ops = devlink_nl_ops,
- .n_small_ops = ARRAY_SIZE(devlink_nl_ops),
- .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1,
- .mcgrps = devlink_nl_mcgrps,
- .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
-};
-
-static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
-{
- const struct devlink_reload_combination *comb;
- int i;
-
- if (!devlink_reload_supported(ops)) {
- if (WARN_ON(ops->reload_actions))
- return false;
- return true;
- }
-
- if (WARN_ON(!ops->reload_actions ||
- ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
- ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX)))
- return false;
-
- if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) ||
- ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX)))
- return false;
-
- for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) {
- comb = &devlink_reload_invalid_combinations[i];
- if (ops->reload_actions == BIT(comb->action) &&
- ops->reload_limits == BIT(comb->limit))
- return false;
- }
- return true;
-}
-
-/**
- * devlink_set_features - Set devlink supported features
- *
- * @devlink: devlink
- * @features: devlink support features
- *
- * This interface allows us to set reload ops separatelly from
- * the devlink_alloc.
- */
-void devlink_set_features(struct devlink *devlink, u64 features)
-{
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
- WARN_ON(features & DEVLINK_F_RELOAD &&
- !devlink_reload_supported(devlink->ops));
- devlink->features = features;
-}
-EXPORT_SYMBOL_GPL(devlink_set_features);
-
-static int devlink_netdevice_event(struct notifier_block *nb,
- unsigned long event, void *ptr);
-
-/**
- * devlink_alloc_ns - Allocate new devlink instance resources
- * in specific namespace
- *
- * @ops: ops
- * @priv_size: size of user private data
- * @net: net namespace
- * @dev: parent device
- *
- * Allocate new devlink instance resources, including devlink index
- * and name.
- */
-struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
- size_t priv_size, struct net *net,
- struct device *dev)
-{
- struct devlink *devlink;
- static u32 last_id;
- int ret;
-
- WARN_ON(!ops || !dev);
- if (!devlink_reload_actions_valid(ops))
- return NULL;
-
- devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
- if (!devlink)
- return NULL;
-
- ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b,
- &last_id, GFP_KERNEL);
- if (ret < 0)
- goto err_xa_alloc;
-
- devlink->netdevice_nb.notifier_call = devlink_netdevice_event;
- ret = register_netdevice_notifier(&devlink->netdevice_nb);
- if (ret)
- goto err_register_netdevice_notifier;
-
- devlink->dev = dev;
- devlink->ops = ops;
- xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
- xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
- write_pnet(&devlink->_net, net);
- INIT_LIST_HEAD(&devlink->rate_list);
- INIT_LIST_HEAD(&devlink->linecard_list);
- INIT_LIST_HEAD(&devlink->sb_list);
- INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
- INIT_LIST_HEAD(&devlink->resource_list);
- INIT_LIST_HEAD(&devlink->param_list);
- INIT_LIST_HEAD(&devlink->region_list);
- INIT_LIST_HEAD(&devlink->reporter_list);
- INIT_LIST_HEAD(&devlink->trap_list);
- INIT_LIST_HEAD(&devlink->trap_group_list);
- INIT_LIST_HEAD(&devlink->trap_policer_list);
- lockdep_register_key(&devlink->lock_key);
- mutex_init(&devlink->lock);
- lockdep_set_class(&devlink->lock, &devlink->lock_key);
- mutex_init(&devlink->reporters_lock);
- mutex_init(&devlink->linecards_lock);
- refcount_set(&devlink->refcount, 1);
- init_completion(&devlink->comp);
-
- return devlink;
-
-err_register_netdevice_notifier:
- xa_erase(&devlinks, devlink->index);
-err_xa_alloc:
- kfree(devlink);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(devlink_alloc_ns);
-
static void
devlink_trap_policer_notify(struct devlink *devlink,
const struct devlink_trap_policer_item *policer_item,
@@ -10029,7 +8033,7 @@ static void devlink_trap_notify(struct devlink *devlink,
const struct devlink_trap_item *trap_item,
enum devlink_command cmd);
-static void devlink_notify_register(struct devlink *devlink)
+void devlink_notify_register(struct devlink *devlink)
{
struct devlink_trap_policer_item *policer_item;
struct devlink_trap_group_item *group_item;
@@ -10070,7 +8074,7 @@ static void devlink_notify_register(struct devlink *devlink)
DEVLINK_CMD_PARAM_NEW);
}
-static void devlink_notify_unregister(struct devlink *devlink)
+void devlink_notify_unregister(struct devlink *devlink)
{
struct devlink_trap_policer_item *policer_item;
struct devlink_trap_group_item *group_item;
@@ -10107,78 +8111,6 @@ static void devlink_notify_unregister(struct devlink *devlink)
devlink_notify(devlink, DEVLINK_CMD_DEL);
}
-/**
- * devlink_register - Register devlink instance
- *
- * @devlink: devlink
- */
-void devlink_register(struct devlink *devlink)
-{
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
- /* Make sure that we are in .probe() routine */
-
- xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
- devlink_notify_register(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_register);
-
-/**
- * devlink_unregister - Unregister devlink instance
- *
- * @devlink: devlink
- */
-void devlink_unregister(struct devlink *devlink)
-{
- ASSERT_DEVLINK_REGISTERED(devlink);
- /* Make sure that we are in .remove() routine */
-
- xa_set_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING);
- devlink_put(devlink);
- wait_for_completion(&devlink->comp);
-
- devlink_notify_unregister(devlink);
- xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
- xa_clear_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING);
-}
-EXPORT_SYMBOL_GPL(devlink_unregister);
-
-/**
- * devlink_free - Free devlink instance resources
- *
- * @devlink: devlink
- */
-void devlink_free(struct devlink *devlink)
-{
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
- mutex_destroy(&devlink->linecards_lock);
- mutex_destroy(&devlink->reporters_lock);
- mutex_destroy(&devlink->lock);
- lockdep_unregister_key(&devlink->lock_key);
- WARN_ON(!list_empty(&devlink->trap_policer_list));
- WARN_ON(!list_empty(&devlink->trap_group_list));
- WARN_ON(!list_empty(&devlink->trap_list));
- WARN_ON(!list_empty(&devlink->reporter_list));
- WARN_ON(!list_empty(&devlink->region_list));
- WARN_ON(!list_empty(&devlink->param_list));
- WARN_ON(!list_empty(&devlink->resource_list));
- WARN_ON(!list_empty(&devlink->dpipe_table_list));
- WARN_ON(!list_empty(&devlink->sb_list));
- WARN_ON(!list_empty(&devlink->rate_list));
- WARN_ON(!list_empty(&devlink->linecard_list));
- WARN_ON(!xa_empty(&devlink->ports));
-
- xa_destroy(&devlink->snapshot_ids);
- xa_destroy(&devlink->ports);
-
- WARN_ON_ONCE(unregister_netdevice_notifier(&devlink->netdevice_nb));
-
- xa_erase(&devlinks, devlink->index);
-
- kfree(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_free);
-
static void devlink_port_type_warn(struct work_struct *work)
{
WARN(true, "Type was not set for devlink port.");
@@ -10278,12 +8210,9 @@ int devl_port_register(struct devlink *devlink,
devlink_port->index = port_index;
spin_lock_init(&devlink_port->type_lock);
INIT_LIST_HEAD(&devlink_port->reporter_list);
- mutex_init(&devlink_port->reporters_lock);
err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL);
- if (err) {
- mutex_destroy(&devlink_port->reporters_lock);
+ if (err)
return err;
- }
INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
devlink_port_type_warn_schedule(devlink_port);
@@ -10334,7 +8263,6 @@ void devl_port_unregister(struct devlink_port *devlink_port)
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
xa_erase(&devlink_port->devlink->ports, devlink_port->index);
WARN_ON(!list_empty(&devlink_port->reporter_list));
- mutex_destroy(&devlink_port->reporters_lock);
devlink_port->registered = false;
}
EXPORT_SYMBOL_GPL(devl_port_unregister);
@@ -10479,8 +8407,8 @@ void devlink_port_type_clear(struct devlink_port *devlink_port)
}
EXPORT_SYMBOL_GPL(devlink_port_type_clear);
-static int devlink_netdevice_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
+int devlink_port_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
struct devlink_port *devlink_port = netdev->devlink_port;
@@ -10914,7 +8842,7 @@ static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
}
/**
- * devlink_linecard_create - Create devlink linecard
+ * devl_linecard_create - Create devlink linecard
*
* @devlink: devlink
* @linecard_index: driver-specific numerical identifier of the linecard
@@ -10927,8 +8855,8 @@ static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
* Return: Line card structure or an ERR_PTR() encoded error code.
*/
struct devlink_linecard *
-devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
- const struct devlink_linecard_ops *ops, void *priv)
+devl_linecard_create(struct devlink *devlink, unsigned int linecard_index,
+ const struct devlink_linecard_ops *ops, void *priv)
{
struct devlink_linecard *linecard;
int err;
@@ -10937,17 +8865,12 @@ devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
!ops->types_count || !ops->types_get))
return ERR_PTR(-EINVAL);
- mutex_lock(&devlink->linecards_lock);
- if (devlink_linecard_index_exists(devlink, linecard_index)) {
- mutex_unlock(&devlink->linecards_lock);
+ if (devlink_linecard_index_exists(devlink, linecard_index))
return ERR_PTR(-EEXIST);
- }
linecard = kzalloc(sizeof(*linecard), GFP_KERNEL);
- if (!linecard) {
- mutex_unlock(&devlink->linecards_lock);
+ if (!linecard)
return ERR_PTR(-ENOMEM);
- }
linecard->devlink = devlink;
linecard->index = linecard_index;
@@ -10960,35 +8883,29 @@ devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
if (err) {
mutex_destroy(&linecard->state_lock);
kfree(linecard);
- mutex_unlock(&devlink->linecards_lock);
return ERR_PTR(err);
}
list_add_tail(&linecard->list, &devlink->linecard_list);
- refcount_set(&linecard->refcount, 1);
- mutex_unlock(&devlink->linecards_lock);
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
return linecard;
}
-EXPORT_SYMBOL_GPL(devlink_linecard_create);
+EXPORT_SYMBOL_GPL(devl_linecard_create);
/**
- * devlink_linecard_destroy - Destroy devlink linecard
+ * devl_linecard_destroy - Destroy devlink linecard
*
* @linecard: devlink linecard
*/
-void devlink_linecard_destroy(struct devlink_linecard *linecard)
+void devl_linecard_destroy(struct devlink_linecard *linecard)
{
- struct devlink *devlink = linecard->devlink;
-
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
- mutex_lock(&devlink->linecards_lock);
list_del(&linecard->list);
devlink_linecard_types_fini(linecard);
- mutex_unlock(&devlink->linecards_lock);
- devlink_linecard_put(linecard);
+ mutex_destroy(&linecard->state_lock);
+ kfree(linecard);
}
-EXPORT_SYMBOL_GPL(devlink_linecard_destroy);
+EXPORT_SYMBOL_GPL(devl_linecard_destroy);
/**
* devlink_linecard_provision_set - Set provisioning on linecard
@@ -11591,8 +9508,46 @@ static int devlink_param_verify(const struct devlink_param *param)
return devlink_param_driver_verify(param);
}
+static int devlink_param_register(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ struct devlink_param_item *param_item;
+
+ WARN_ON(devlink_param_verify(param));
+ WARN_ON(devlink_param_find_by_name(&devlink->param_list, param->name));
+
+ if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
+ WARN_ON(param->get || param->set);
+ else
+ WARN_ON(!param->get || !param->set);
+
+ param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
+ if (!param_item)
+ return -ENOMEM;
+
+ param_item->param = param;
+
+ list_add_tail(&param_item->list, &devlink->param_list);
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+ return 0;
+}
+
+static void devlink_param_unregister(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ struct devlink_param_item *param_item;
+
+ param_item =
+ devlink_param_find_by_name(&devlink->param_list, param->name);
+ if (WARN_ON(!param_item))
+ return;
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL);
+ list_del(&param_item->list);
+ kfree(param_item);
+}
+
/**
- * devlink_params_register - register configuration parameters
+ * devl_params_register - register configuration parameters
*
* @devlink: devlink
* @params: configuration parameters array
@@ -11600,14 +9555,14 @@ static int devlink_param_verify(const struct devlink_param *param)
*
* Register the configuration parameters supported by the driver.
*/
-int devlink_params_register(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
+int devl_params_register(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
{
const struct devlink_param *param = params;
int i, err;
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ lockdep_assert_held(&devlink->lock);
for (i = 0; i < params_count; i++, param++) {
err = devlink_param_register(devlink, param);
@@ -11624,86 +9579,54 @@ rollback:
devlink_param_unregister(devlink, param);
return err;
}
+EXPORT_SYMBOL_GPL(devl_params_register);
+
+int devlink_params_register(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_params_register(devlink, params, params_count);
+ devl_unlock(devlink);
+ return err;
+}
EXPORT_SYMBOL_GPL(devlink_params_register);
/**
- * devlink_params_unregister - unregister configuration parameters
+ * devl_params_unregister - unregister configuration parameters
* @devlink: devlink
* @params: configuration parameters to unregister
* @params_count: number of parameters provided
*/
-void devlink_params_unregister(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
+void devl_params_unregister(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
{
const struct devlink_param *param = params;
int i;
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ lockdep_assert_held(&devlink->lock);
for (i = 0; i < params_count; i++, param++)
devlink_param_unregister(devlink, param);
}
-EXPORT_SYMBOL_GPL(devlink_params_unregister);
-
-/**
- * devlink_param_register - register one configuration parameter
- *
- * @devlink: devlink
- * @param: one configuration parameter
- *
- * Register the configuration parameter supported by the driver.
- * Return: returns 0 on successful registration or error code otherwise.
- */
-int devlink_param_register(struct devlink *devlink,
- const struct devlink_param *param)
-{
- struct devlink_param_item *param_item;
-
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
- WARN_ON(devlink_param_verify(param));
- WARN_ON(devlink_param_find_by_name(&devlink->param_list, param->name));
-
- if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
- WARN_ON(param->get || param->set);
- else
- WARN_ON(!param->get || !param->set);
-
- param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
- if (!param_item)
- return -ENOMEM;
-
- param_item->param = param;
-
- list_add_tail(&param_item->list, &devlink->param_list);
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_param_register);
+EXPORT_SYMBOL_GPL(devl_params_unregister);
-/**
- * devlink_param_unregister - unregister one configuration parameter
- * @devlink: devlink
- * @param: configuration parameter to unregister
- */
-void devlink_param_unregister(struct devlink *devlink,
- const struct devlink_param *param)
+void devlink_params_unregister(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
{
- struct devlink_param_item *param_item;
-
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
- param_item =
- devlink_param_find_by_name(&devlink->param_list, param->name);
- WARN_ON(!param_item);
- list_del(&param_item->list);
- kfree(param_item);
+ devl_lock(devlink);
+ devl_params_unregister(devlink, params, params_count);
+ devl_unlock(devlink);
}
-EXPORT_SYMBOL_GPL(devlink_param_unregister);
+EXPORT_SYMBOL_GPL(devlink_params_unregister);
/**
- * devlink_param_driverinit_value_get - get configuration parameter
- * value for driver initializing
+ * devl_param_driverinit_value_get - get configuration parameter
+ * value for driver initializing
*
* @devlink: devlink
* @param_id: parameter ID
@@ -11712,21 +9635,25 @@ EXPORT_SYMBOL_GPL(devlink_param_unregister);
* This function should be used by the driver to get driverinit
* configuration for initialization after reload command.
*/
-int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
- union devlink_param_value *init_val)
+int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+ union devlink_param_value *init_val)
{
struct devlink_param_item *param_item;
- if (!devlink_reload_supported(devlink->ops))
+ lockdep_assert_held(&devlink->lock);
+
+ if (WARN_ON(!devlink_reload_supported(devlink->ops)))
return -EOPNOTSUPP;
param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
if (!param_item)
return -EINVAL;
- if (!param_item->driverinit_value_valid ||
- !devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT))
+ if (!param_item->driverinit_value_valid)
+ return -EOPNOTSUPP;
+
+ if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT)))
return -EOPNOTSUPP;
if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
@@ -11736,12 +9663,12 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
return 0;
}
-EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
+EXPORT_SYMBOL_GPL(devl_param_driverinit_value_get);
/**
- * devlink_param_driverinit_value_set - set value of configuration
- * parameter for driverinit
- * configuration mode
+ * devl_param_driverinit_value_set - set value of configuration
+ * parameter for driverinit
+ * configuration mode
*
* @devlink: devlink
* @param_id: parameter ID
@@ -11750,34 +9677,33 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
* This function should be used by the driver to set driverinit
* configuration mode default value.
*/
-int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
- union devlink_param_value init_val)
+void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+ union devlink_param_value init_val)
{
struct devlink_param_item *param_item;
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
- if (!param_item)
- return -EINVAL;
+ if (WARN_ON(!param_item))
+ return;
- if (!devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT))
- return -EOPNOTSUPP;
+ if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT)))
+ return;
if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
strcpy(param_item->driverinit_value.vstr, init_val.vstr);
else
param_item->driverinit_value = init_val;
param_item->driverinit_value_valid = true;
- return 0;
+
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
}
-EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
+EXPORT_SYMBOL_GPL(devl_param_driverinit_value_set);
/**
- * devlink_param_value_changed - notify devlink on a parameter's value
- * change. Should be called by the driver
- * right after the change.
+ * devl_param_value_changed - notify devlink on a parameter's value
+ * change. Should be called by the driver
+ * right after the change.
*
* @devlink: devlink
* @param_id: parameter ID
@@ -11786,7 +9712,7 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
* change, excluding driverinit configuration mode.
* For driverinit configuration mode driver should use the function
*/
-void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
+void devl_param_value_changed(struct devlink *devlink, u32 param_id)
{
struct devlink_param_item *param_item;
@@ -11795,7 +9721,7 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
}
-EXPORT_SYMBOL_GPL(devlink_param_value_changed);
+EXPORT_SYMBOL_GPL(devl_param_value_changed);
/**
* devl_region_create - create a new address region
@@ -12881,76 +10807,6 @@ devl_trap_policers_unregister(struct devlink *devlink,
}
EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
-static void __devlink_compat_running_version(struct devlink *devlink,
- char *buf, size_t len)
-{
- struct devlink_info_req req = {};
- const struct nlattr *nlattr;
- struct sk_buff *msg;
- int rem, err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- req.msg = msg;
- err = devlink->ops->info_get(devlink, &req, NULL);
- if (err)
- goto free_msg;
-
- nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) {
- const struct nlattr *kv;
- int rem_kv;
-
- if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING)
- continue;
-
- nla_for_each_nested(kv, nlattr, rem_kv) {
- if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE)
- continue;
-
- strlcat(buf, nla_data(kv), len);
- strlcat(buf, " ", len);
- }
- }
-free_msg:
- nlmsg_free(msg);
-}
-
-void devlink_compat_running_version(struct devlink *devlink,
- char *buf, size_t len)
-{
- if (!devlink->ops->info_get)
- return;
-
- devl_lock(devlink);
- __devlink_compat_running_version(devlink, buf, len);
- devl_unlock(devlink);
-}
-
-int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
-{
- struct devlink_flash_update_params params = {};
- int ret;
-
- if (!devlink->ops->flash_update)
- return -EOPNOTSUPP;
-
- ret = request_firmware(&params.fw, file_name, devlink->dev);
- if (ret)
- return ret;
-
- devl_lock(devlink);
- devlink_flash_update_begin_notify(devlink);
- ret = devlink->ops->flash_update(devlink, &params, NULL);
- devlink_flash_update_end_notify(devlink);
- devl_unlock(devlink);
-
- release_firmware(params.fw);
-
- return ret;
-}
-
int devlink_compat_phys_port_name_get(struct net_device *dev,
char *name, size_t len)
{
@@ -12986,47 +10842,3 @@ int devlink_compat_switch_id_get(struct net_device *dev,
return 0;
}
-
-static void __net_exit devlink_pernet_pre_exit(struct net *net)
-{
- struct devlink *devlink;
- u32 actions_performed;
- unsigned long index;
- int err;
-
- /* In case network namespace is getting destroyed, reload
- * all devlink instances from this namespace into init_net.
- */
- devlinks_xa_for_each_registered_get(net, index, devlink) {
- WARN_ON(!(devlink->features & DEVLINK_F_RELOAD));
- mutex_lock(&devlink->lock);
- err = devlink_reload(devlink, &init_net,
- DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
- DEVLINK_RELOAD_LIMIT_UNSPEC,
- &actions_performed, NULL);
- mutex_unlock(&devlink->lock);
- if (err && err != -EOPNOTSUPP)
- pr_warn("Failed to reload devlink instance into init_net\n");
- devlink_put(devlink);
- }
-}
-
-static struct pernet_operations devlink_pernet_ops __net_initdata = {
- .pre_exit = devlink_pernet_pre_exit,
-};
-
-static int __init devlink_init(void)
-{
- int err;
-
- err = genl_register_family(&devlink_nl_family);
- if (err)
- goto out;
- err = register_pernet_subsys(&devlink_pernet_ops);
-
-out:
- WARN_ON(err);
- return err;
-}
-
-subsys_initcall(devlink_init);
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
new file mode 100644
index 000000000000..7a332eb70f70
--- /dev/null
+++ b/net/devlink/netlink.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <net/genetlink.h>
+#include <net/sock.h>
+
+#include "devl_internal.h"
+
+static const struct genl_multicast_group devlink_nl_mcgrps[] = {
+ [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
+};
+
+static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
+ [DEVLINK_ATTR_UNSPEC] = { .strict_start_type =
+ DEVLINK_ATTR_TRAP_POLICER_ID },
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_PORT_TYPE_AUTO,
+ DEVLINK_PORT_TYPE_IB),
+ [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_ESWITCH_MODE_LEGACY,
+ DEVLINK_ESWITCH_MODE_SWITCHDEV),
+ [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64},
+ [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64},
+ [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] =
+ NLA_POLICY_BITFIELD32(DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS),
+ [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED },
+ [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ DEVLINK_RELOAD_ACTION_MAX),
+ [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK),
+ [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED },
+ [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG },
+};
+
+struct devlink *
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
+{
+ struct devlink *devlink;
+ unsigned long index;
+ char *busname;
+ char *devname;
+
+ if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME])
+ return ERR_PTR(-EINVAL);
+
+ busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
+ devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
+
+ devlinks_xa_for_each_registered_get(net, index, devlink) {
+ devl_lock(devlink);
+ if (devl_is_registered(devlink) &&
+ strcmp(devlink->dev->bus->name, busname) == 0 &&
+ strcmp(dev_name(devlink->dev), devname) == 0)
+ return devlink;
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ }
+
+ return ERR_PTR(-ENODEV);
+}
+
+static int devlink_nl_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink_linecard *linecard;
+ struct devlink_port *devlink_port;
+ struct devlink *devlink;
+ int err;
+
+ devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs);
+ if (IS_ERR(devlink))
+ return PTR_ERR(devlink);
+
+ info->user_ptr[0] = devlink;
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
+ devlink_port = devlink_port_get_from_info(devlink, info);
+ if (IS_ERR(devlink_port)) {
+ err = PTR_ERR(devlink_port);
+ goto unlock;
+ }
+ info->user_ptr[1] = devlink_port;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
+ devlink_port = devlink_port_get_from_info(devlink, info);
+ if (!IS_ERR(devlink_port))
+ info->user_ptr[1] = devlink_port;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
+ struct devlink_rate *devlink_rate;
+
+ devlink_rate = devlink_rate_get_from_info(devlink, info);
+ if (IS_ERR(devlink_rate)) {
+ err = PTR_ERR(devlink_rate);
+ goto unlock;
+ }
+ info->user_ptr[1] = devlink_rate;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) {
+ struct devlink_rate *rate_node;
+
+ rate_node = devlink_rate_node_get_from_info(devlink, info);
+ if (IS_ERR(rate_node)) {
+ err = PTR_ERR(rate_node);
+ goto unlock;
+ }
+ info->user_ptr[1] = rate_node;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
+ linecard = devlink_linecard_get_from_info(devlink, info);
+ if (IS_ERR(linecard)) {
+ err = PTR_ERR(linecard);
+ goto unlock;
+ }
+ info->user_ptr[1] = linecard;
+ }
+ return 0;
+
+unlock:
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ return err;
+}
+
+static void devlink_nl_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink;
+
+ devlink = info->user_ptr[0];
+ devl_unlock(devlink);
+ devlink_put(devlink);
+}
+
+static const struct devlink_cmd *devl_cmds[] = {
+ [DEVLINK_CMD_GET] = &devl_cmd_get,
+ [DEVLINK_CMD_PORT_GET] = &devl_cmd_port_get,
+ [DEVLINK_CMD_SB_GET] = &devl_cmd_sb_get,
+ [DEVLINK_CMD_SB_POOL_GET] = &devl_cmd_sb_pool_get,
+ [DEVLINK_CMD_SB_PORT_POOL_GET] = &devl_cmd_sb_port_pool_get,
+ [DEVLINK_CMD_SB_TC_POOL_BIND_GET] = &devl_cmd_sb_tc_pool_bind_get,
+ [DEVLINK_CMD_PARAM_GET] = &devl_cmd_param_get,
+ [DEVLINK_CMD_REGION_GET] = &devl_cmd_region_get,
+ [DEVLINK_CMD_INFO_GET] = &devl_cmd_info_get,
+ [DEVLINK_CMD_HEALTH_REPORTER_GET] = &devl_cmd_health_reporter_get,
+ [DEVLINK_CMD_TRAP_GET] = &devl_cmd_trap_get,
+ [DEVLINK_CMD_TRAP_GROUP_GET] = &devl_cmd_trap_group_get,
+ [DEVLINK_CMD_TRAP_POLICER_GET] = &devl_cmd_trap_policer_get,
+ [DEVLINK_CMD_RATE_GET] = &devl_cmd_rate_get,
+ [DEVLINK_CMD_LINECARD_GET] = &devl_cmd_linecard_get,
+ [DEVLINK_CMD_SELFTESTS_GET] = &devl_cmd_selftests_get,
+};
+
+int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ const struct devlink_cmd *cmd;
+ struct devlink *devlink;
+ int err = 0;
+
+ cmd = devl_cmds[info->op.cmd];
+
+ while ((devlink = devlinks_xa_find_get(sock_net(msg->sk),
+ &state->instance))) {
+ devl_lock(devlink);
+
+ if (devl_is_registered(devlink))
+ err = cmd->dump_one(msg, devlink, cb);
+ else
+ err = 0;
+
+ devl_unlock(devlink);
+ devlink_put(devlink);
+
+ if (err)
+ break;
+
+ state->instance++;
+
+ /* restart sub-object walk for the next instance */
+ state->idx = 0;
+ }
+
+ if (err != -EMSGSIZE)
+ return err;
+ return msg->len;
+}
+
+struct genl_family devlink_nl_family __ro_after_init = {
+ .name = DEVLINK_GENL_NAME,
+ .version = DEVLINK_GENL_VERSION,
+ .maxattr = DEVLINK_ATTR_MAX,
+ .policy = devlink_nl_policy,
+ .netnsok = true,
+ .parallel_ops = true,
+ .pre_doit = devlink_nl_pre_doit,
+ .post_doit = devlink_nl_post_doit,
+ .module = THIS_MODULE,
+ .small_ops = devlink_nl_ops,
+ .n_small_ops = ARRAY_SIZE(devlink_nl_ops),
+ .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1,
+ .mcgrps = devlink_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
+};
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 26d90140d271..22d3f16b0e6d 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -299,7 +299,7 @@ static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
struct net_device *dev = to_net_dev(d);
struct dsa_port *cpu_dp = dev->dsa_ptr;
- return sprintf(buf, "%s\n",
+ return sysfs_emit(buf, "%s\n",
dsa_tag_protocol_to_str(cpu_dp->tag_ops));
}
@@ -464,9 +464,7 @@ int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp,
err = dsa_port_lag_join(cpu_dp, lag_dev, uinfo, extack);
if (err) {
- if (extack && !extack->_msg)
- NL_SET_ERR_MSG_MOD(extack,
- "CPU port failed to join LAG");
+ NL_SET_ERR_MSG_WEAK_MOD(extack, "CPU port failed to join LAG");
goto out_master_teardown;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index aab79c355224..6957971c2db2 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1117,6 +1117,40 @@ static void dsa_slave_net_selftest(struct net_device *ndev,
net_selftest(ndev, etest, buf);
}
+static int dsa_slave_get_mm(struct net_device *dev,
+ struct ethtool_mm_state *state)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->get_mm)
+ return -EOPNOTSUPP;
+
+ return ds->ops->get_mm(ds, dp->index, state);
+}
+
+static int dsa_slave_set_mm(struct net_device *dev, struct ethtool_mm_cfg *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->set_mm)
+ return -EOPNOTSUPP;
+
+ return ds->ops->set_mm(ds, dp->index, cfg, extack);
+}
+
+static void dsa_slave_get_mm_stats(struct net_device *dev,
+ struct ethtool_mm_stats *stats)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->get_mm_stats)
+ ds->ops->get_mm_stats(ds, dp->index, stats);
+}
+
static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -2205,6 +2239,9 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.set_rxnfc = dsa_slave_set_rxnfc,
.get_ts_info = dsa_slave_get_ts_info,
.self_test = dsa_slave_net_selftest,
+ .get_mm = dsa_slave_get_mm,
+ .set_mm = dsa_slave_set_mm,
+ .get_mm_stats = dsa_slave_get_mm_stats,
};
static const struct dcbnl_rtnl_ops __maybe_unused dsa_slave_dcbnl_ops = {
@@ -2655,9 +2692,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
if (!err)
dsa_bridge_mtu_normalization(dp);
if (err == -EOPNOTSUPP) {
- if (extack && !extack->_msg)
- NL_SET_ERR_MSG_MOD(extack,
- "Offloading not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
@@ -2670,8 +2706,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
err = dsa_port_lag_join(dp, info->upper_dev,
info->upper_info, extack);
if (err == -EOPNOTSUPP) {
- NL_SET_ERR_MSG_MOD(info->info.extack,
- "Offloading not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
@@ -2683,8 +2719,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
if (info->linking) {
err = dsa_port_hsr_join(dp, info->upper_dev);
if (err == -EOPNOTSUPP) {
- NL_SET_ERR_MSG_MOD(info->info.extack,
- "Offloading not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 080e5c369f5b..0eb1c7784c3d 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -4,8 +4,10 @@
* Copyright (c) 2017 Microchip Technology
*/
+#include <linux/dsa/ksz_common.h>
#include <linux/etherdevice.h>
#include <linux/list.h>
+#include <linux/ptp_classify.h>
#include <net/dsa.h>
#include "tag.h"
@@ -16,9 +18,71 @@
#define LAN937X_NAME "lan937x"
/* Typically only one byte is used for tail tag. */
+#define KSZ_PTP_TAG_LEN 4
#define KSZ_EGRESS_TAG_LEN 1
#define KSZ_INGRESS_TAG_LEN 1
+#define KSZ_HWTS_EN 0
+
+struct ksz_tagger_private {
+ struct ksz_tagger_data data; /* Must be first */
+ unsigned long state;
+ struct kthread_worker *xmit_worker;
+};
+
+static struct ksz_tagger_private *
+ksz_tagger_private(struct dsa_switch *ds)
+{
+ return ds->tagger_data;
+}
+
+static void ksz_hwtstamp_set_state(struct dsa_switch *ds, bool on)
+{
+ struct ksz_tagger_private *priv = ksz_tagger_private(ds);
+
+ if (on)
+ set_bit(KSZ_HWTS_EN, &priv->state);
+ else
+ clear_bit(KSZ_HWTS_EN, &priv->state);
+}
+
+static void ksz_disconnect(struct dsa_switch *ds)
+{
+ struct ksz_tagger_private *priv = ds->tagger_data;
+
+ kthread_destroy_worker(priv->xmit_worker);
+ kfree(priv);
+ ds->tagger_data = NULL;
+}
+
+static int ksz_connect(struct dsa_switch *ds)
+{
+ struct ksz_tagger_data *tagger_data;
+ struct kthread_worker *xmit_worker;
+ struct ksz_tagger_private *priv;
+ int ret;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ xmit_worker = kthread_create_worker(0, "dsa%d:%d_xmit",
+ ds->dst->index, ds->index);
+ if (IS_ERR(xmit_worker)) {
+ ret = PTR_ERR(xmit_worker);
+ kfree(priv);
+ return ret;
+ }
+
+ priv->xmit_worker = xmit_worker;
+ /* Export functions for switch driver use */
+ tagger_data = &priv->data;
+ tagger_data->hwtstamp_set_state = ksz_hwtstamp_set_state;
+ ds->tagger_data = priv;
+
+ return 0;
+}
+
static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
struct net_device *dev,
unsigned int port, unsigned int len)
@@ -92,17 +156,20 @@ DSA_TAG_DRIVER(ksz8795_netdev_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795, KSZ8795_NAME);
/*
- * For Ingress (Host -> KSZ9477), 2 bytes are added before FCS.
+ * For Ingress (Host -> KSZ9477), 2/6 bytes are added before FCS.
* ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|ts(4bytes)|tag0(1byte)|tag1(1byte)|
+ * FCS(4bytes)
* ---------------------------------------------------------------------------
+ * ts : time stamp (Present only if PTP is enabled in the Hardware)
* tag0 : Prioritization (not used now)
* tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
*
- * For Egress (KSZ9477 -> Host), 1 byte is added before FCS.
+ * For Egress (KSZ9477 -> Host), 1/5 bytes is added before FCS.
* ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|ts(4bytes)|tag0(1byte)|FCS(4bytes)
* ---------------------------------------------------------------------------
+ * ts : time stamp (Present only if bit 7 of tag0 is set)
* tag0 : zero-based value represents port
* (eg, 0x00=port1, 0x02=port3, 0x06=port7)
*/
@@ -111,12 +178,100 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795, KSZ8795_NAME);
#define KSZ9477_PTP_TAG_LEN 4
#define KSZ9477_PTP_TAG_INDICATION 0x80
+#define KSZ9477_TAIL_TAG_PRIO GENMASK(8, 7)
#define KSZ9477_TAIL_TAG_OVERRIDE BIT(9)
#define KSZ9477_TAIL_TAG_LOOKUP BIT(10)
+static void ksz_rcv_timestamp(struct sk_buff *skb, u8 *tag)
+{
+ u8 *tstamp_raw = tag - KSZ_PTP_TAG_LEN;
+ ktime_t tstamp;
+
+ tstamp = ksz_decode_tstamp(get_unaligned_be32(tstamp_raw));
+ KSZ_SKB_CB(skb)->tstamp = tstamp;
+}
+
+/* Time stamp tag *needs* to be inserted if PTP is enabled in hardware.
+ * Regardless of Whether it is a PTP frame or not.
+ */
+static void ksz_xmit_timestamp(struct dsa_port *dp, struct sk_buff *skb)
+{
+ struct ksz_tagger_private *priv;
+ struct ptp_header *ptp_hdr;
+ unsigned int ptp_type;
+ u32 tstamp_raw = 0;
+ s64 correction;
+
+ priv = ksz_tagger_private(dp->ds);
+
+ if (!test_bit(KSZ_HWTS_EN, &priv->state))
+ return;
+
+ if (!KSZ_SKB_CB(skb)->update_correction)
+ goto output_tag;
+
+ ptp_type = KSZ_SKB_CB(skb)->ptp_type;
+
+ ptp_hdr = ptp_parse_header(skb, ptp_type);
+ if (!ptp_hdr)
+ goto output_tag;
+
+ correction = (s64)get_unaligned_be64(&ptp_hdr->correction);
+
+ if (correction < 0) {
+ struct timespec64 ts;
+
+ ts = ns_to_timespec64(-correction >> 16);
+ tstamp_raw = ((ts.tv_sec & 3) << 30) | ts.tv_nsec;
+
+ /* Set correction field to 0 and update UDP checksum */
+ ptp_header_update_correction(skb, ptp_type, ptp_hdr, 0);
+ }
+
+output_tag:
+ put_unaligned_be32(tstamp_raw, skb_put(skb, KSZ_PTP_TAG_LEN));
+}
+
+/* Defer transmit if waiting for egress time stamp is required. */
+static struct sk_buff *ksz_defer_xmit(struct dsa_port *dp, struct sk_buff *skb)
+{
+ struct ksz_tagger_data *tagger_data = ksz_tagger_data(dp->ds);
+ struct ksz_tagger_private *priv = ksz_tagger_private(dp->ds);
+ void (*xmit_work_fn)(struct kthread_work *work);
+ struct sk_buff *clone = KSZ_SKB_CB(skb)->clone;
+ struct ksz_deferred_xmit_work *xmit_work;
+ struct kthread_worker *xmit_worker;
+
+ if (!clone)
+ return skb; /* no deferred xmit for this packet */
+
+ xmit_work_fn = tagger_data->xmit_work_fn;
+ xmit_worker = priv->xmit_worker;
+
+ if (!xmit_work_fn || !xmit_worker)
+ return NULL;
+
+ xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC);
+ if (!xmit_work)
+ return NULL;
+
+ kthread_init_work(&xmit_work->work, xmit_work_fn);
+ /* Increase refcount so the kfree_skb in dsa_slave_xmit
+ * won't really free the packet.
+ */
+ xmit_work->dp = dp;
+ xmit_work->skb = skb_get(skb);
+
+ kthread_queue_work(xmit_worker, &xmit_work->work);
+
+ return NULL;
+}
+
static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ u16 queue_mapping = skb_get_queue_mapping(skb);
+ u8 prio = netdev_txq_to_tc(dev, queue_mapping);
struct dsa_port *dp = dsa_slave_to_port(dev);
__be16 *tag;
u8 *addr;
@@ -126,17 +281,21 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
return NULL;
/* Tag encoding */
+ ksz_xmit_timestamp(dp, skb);
+
tag = skb_put(skb, KSZ9477_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
val = BIT(dp->index);
+ val |= FIELD_PREP(KSZ9477_TAIL_TAG_PRIO, prio);
+
if (is_link_local_ether_addr(addr))
val |= KSZ9477_TAIL_TAG_OVERRIDE;
*tag = cpu_to_be16(val);
- return skb;
+ return ksz_defer_xmit(dp, skb);
}
static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
@@ -147,8 +306,10 @@ static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
unsigned int len = KSZ_EGRESS_TAG_LEN;
/* Extra 4-bytes PTP timestamp */
- if (tag[0] & KSZ9477_PTP_TAG_INDICATION)
- len += KSZ9477_PTP_TAG_LEN;
+ if (tag[0] & KSZ9477_PTP_TAG_INDICATION) {
+ ksz_rcv_timestamp(skb, tag);
+ len += KSZ_PTP_TAG_LEN;
+ }
return ksz_common_rcv(skb, dev, port, len);
}
@@ -158,18 +319,23 @@ static const struct dsa_device_ops ksz9477_netdev_ops = {
.proto = DSA_TAG_PROTO_KSZ9477,
.xmit = ksz9477_xmit,
.rcv = ksz9477_rcv,
- .needed_tailroom = KSZ9477_INGRESS_TAG_LEN,
+ .connect = ksz_connect,
+ .disconnect = ksz_disconnect,
+ .needed_tailroom = KSZ9477_INGRESS_TAG_LEN + KSZ_PTP_TAG_LEN,
};
DSA_TAG_DRIVER(ksz9477_netdev_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9477, KSZ9477_NAME);
+#define KSZ9893_TAIL_TAG_PRIO GENMASK(4, 3)
#define KSZ9893_TAIL_TAG_OVERRIDE BIT(5)
#define KSZ9893_TAIL_TAG_LOOKUP BIT(6)
static struct sk_buff *ksz9893_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ u16 queue_mapping = skb_get_queue_mapping(skb);
+ u8 prio = netdev_txq_to_tc(dev, queue_mapping);
struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *addr;
u8 *tag;
@@ -178,15 +344,19 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb,
return NULL;
/* Tag encoding */
+ ksz_xmit_timestamp(dp, skb);
+
tag = skb_put(skb, KSZ_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
*tag = BIT(dp->index);
+ *tag |= FIELD_PREP(KSZ9893_TAIL_TAG_PRIO, prio);
+
if (is_link_local_ether_addr(addr))
*tag |= KSZ9893_TAIL_TAG_OVERRIDE;
- return skb;
+ return ksz_defer_xmit(dp, skb);
}
static const struct dsa_device_ops ksz9893_netdev_ops = {
@@ -194,23 +364,28 @@ static const struct dsa_device_ops ksz9893_netdev_ops = {
.proto = DSA_TAG_PROTO_KSZ9893,
.xmit = ksz9893_xmit,
.rcv = ksz9477_rcv,
- .needed_tailroom = KSZ_INGRESS_TAG_LEN,
+ .connect = ksz_connect,
+ .disconnect = ksz_disconnect,
+ .needed_tailroom = KSZ_INGRESS_TAG_LEN + KSZ_PTP_TAG_LEN,
};
DSA_TAG_DRIVER(ksz9893_netdev_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893, KSZ9893_NAME);
-/* For xmit, 2 bytes are added before FCS.
+/* For xmit, 2/6 bytes are added before FCS.
* ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|ts(4bytes)|tag0(1byte)|tag1(1byte)|
+ * FCS(4bytes)
* ---------------------------------------------------------------------------
+ * ts : time stamp (Present only if PTP is enabled in the Hardware)
* tag0 : represents tag override, lookup and valid
* tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x80=port8)
*
- * For rcv, 1 byte is added before FCS.
+ * For rcv, 1/5 bytes is added before FCS.
* ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|ts(4bytes)|tag0(1byte)|FCS(4bytes)
* ---------------------------------------------------------------------------
+ * ts : time stamp (Present only if bit 7 of tag0 is set)
* tag0 : zero-based value represents port
* (eg, 0x00=port1, 0x02=port3, 0x07=port8)
*/
@@ -219,11 +394,14 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893, KSZ9893_NAME);
#define LAN937X_TAIL_TAG_BLOCKING_OVERRIDE BIT(11)
#define LAN937X_TAIL_TAG_LOOKUP BIT(12)
#define LAN937X_TAIL_TAG_VALID BIT(13)
+#define LAN937X_TAIL_TAG_PRIO GENMASK(10, 8)
#define LAN937X_TAIL_TAG_PORT_MASK 7
static struct sk_buff *lan937x_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ u16 queue_mapping = skb_get_queue_mapping(skb);
+ u8 prio = netdev_txq_to_tc(dev, queue_mapping);
struct dsa_port *dp = dsa_slave_to_port(dev);
const struct ethhdr *hdr = eth_hdr(skb);
__be16 *tag;
@@ -232,10 +410,14 @@ static struct sk_buff *lan937x_xmit(struct sk_buff *skb,
if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
return NULL;
+ ksz_xmit_timestamp(dp, skb);
+
tag = skb_put(skb, LAN937X_EGRESS_TAG_LEN);
val = BIT(dp->index);
+ val |= FIELD_PREP(LAN937X_TAIL_TAG_PRIO, prio);
+
if (is_link_local_ether_addr(hdr->h_dest))
val |= LAN937X_TAIL_TAG_BLOCKING_OVERRIDE;
@@ -244,7 +426,7 @@ static struct sk_buff *lan937x_xmit(struct sk_buff *skb,
put_unaligned_be16(val, tag);
- return skb;
+ return ksz_defer_xmit(dp, skb);
}
static const struct dsa_device_ops lan937x_netdev_ops = {
@@ -252,7 +434,9 @@ static const struct dsa_device_ops lan937x_netdev_ops = {
.proto = DSA_TAG_PROTO_LAN937X,
.xmit = lan937x_xmit,
.rcv = ksz9477_rcv,
- .needed_tailroom = LAN937X_EGRESS_TAG_LEN,
+ .connect = ksz_connect,
+ .disconnect = ksz_disconnect,
+ .needed_tailroom = LAN937X_EGRESS_TAG_LEN + KSZ_PTP_TAG_LEN,
};
DSA_TAG_DRIVER(lan937x_netdev_ops);
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 228f13df2e18..504f954a1b28 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,5 +7,5 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o
ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
- tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o \
- pse-pd.o
+ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \
+ module.o pse-pd.o plca.o mm.o
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index c7e37130647e..61c40e889a4d 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -86,18 +86,6 @@ static int channels_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_channels_request_ops = {
- .request_cmd = ETHTOOL_MSG_CHANNELS_GET,
- .reply_cmd = ETHTOOL_MSG_CHANNELS_GET_REPLY,
- .hdr_attr = ETHTOOL_A_CHANNELS_HEADER,
- .req_info_size = sizeof(struct channels_req_info),
- .reply_data_size = sizeof(struct channels_reply_data),
-
- .prepare_data = channels_prepare_data,
- .reply_size = channels_reply_size,
- .fill_reply = channels_fill_reply,
-};
-
/* CHANNELS_SET */
const struct nla_policy ethnl_channels_set_policy[] = {
@@ -109,36 +97,28 @@ const struct nla_policy ethnl_channels_set_policy[] = {
[ETHTOOL_A_CHANNELS_COMBINED_COUNT] = { .type = NLA_U32 },
};
-int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_channels_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_channels && ops->set_channels ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
{
unsigned int from_channel, old_total, i;
bool mod = false, mod_combined = false;
+ struct net_device *dev = req_info->dev;
struct ethtool_channels channels = {};
- struct ethnl_req_info req_info = {};
struct nlattr **tb = info->attrs;
u32 err_attr, max_rxfh_in_use;
- const struct ethtool_ops *ops;
- struct net_device *dev;
u64 max_rxnfc_in_use;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_CHANNELS_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
- if (!ops->get_channels || !ops->set_channels)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
- ops->get_channels(dev, &channels);
+ dev->ethtool_ops->get_channels(dev, &channels);
old_total = channels.combined_count +
max(channels.rx_count, channels.tx_count);
@@ -151,9 +131,8 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
ethnl_update_u32(&channels.combined_count,
tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT], &mod_combined);
mod |= mod_combined;
- ret = 0;
if (!mod)
- goto out_ops;
+ return 0;
/* ensure new channel counts are within limits */
if (channels.rx_count > channels.max_rx)
@@ -167,10 +146,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
else
err_attr = 0;
if (err_attr) {
- ret = -EINVAL;
NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr],
"requested channel count exceeds maximum");
- goto out_ops;
+ return -EINVAL;
}
/* ensure there is at least one RX and one TX channel */
@@ -183,10 +161,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
if (err_attr) {
if (mod_combined)
err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT;
- ret = -EINVAL;
NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr],
"requested channel counts would result in no RX or TX channel being configured");
- goto out_ops;
+ return -EINVAL;
}
/* ensure the new Rx count fits within the configured Rx flow
@@ -198,14 +175,12 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
ethtool_get_max_rxfh_channel(dev, &max_rxfh_in_use))
max_rxfh_in_use = 0;
if (channels.combined_count + channels.rx_count <= max_rxfh_in_use) {
- ret = -EINVAL;
GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing indirection table settings");
- goto out_ops;
+ return -EINVAL;
}
if (channels.combined_count + channels.rx_count <= max_rxnfc_in_use) {
- ret = -EINVAL;
GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing ntuple filter settings");
- goto out_ops;
+ return -EINVAL;
}
/* Disabling channels, query zero-copy AF_XDP sockets */
@@ -213,21 +188,26 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
min(channels.rx_count, channels.tx_count);
for (i = from_channel; i < old_total; i++)
if (xsk_get_pool_from_qid(dev, i)) {
- ret = -EINVAL;
GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets");
- goto out_ops;
+ return -EINVAL;
}
ret = dev->ethtool_ops->set_channels(dev, &channels);
- if (ret < 0)
- goto out_ops;
- ethtool_notify(dev, ETHTOOL_MSG_CHANNELS_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return ret < 0 ? ret : 1;
}
+
+const struct ethnl_request_ops ethnl_channels_request_ops = {
+ .request_cmd = ETHTOOL_MSG_CHANNELS_GET,
+ .reply_cmd = ETHTOOL_MSG_CHANNELS_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_CHANNELS_HEADER,
+ .req_info_size = sizeof(struct channels_req_info),
+ .reply_data_size = sizeof(struct channels_reply_data),
+
+ .prepare_data = channels_prepare_data,
+ .reply_size = channels_reply_size,
+ .fill_reply = channels_fill_reply,
+
+ .set_validate = ethnl_set_channels_validate,
+ .set = ethnl_set_channels,
+ .set_ntf_cmd = ETHTOOL_MSG_CHANNELS_NTF,
+};
diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
index 487bdf345541..443e7e642c96 100644
--- a/net/ethtool/coalesce.c
+++ b/net/ethtool/coalesce.c
@@ -105,7 +105,10 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u32)) + /* _TX_MAX_FRAMES_HIGH */
nla_total_size(sizeof(u32)) + /* _RATE_SAMPLE_INTERVAL */
nla_total_size(sizeof(u8)) + /* _USE_CQE_MODE_TX */
- nla_total_size(sizeof(u8)); /* _USE_CQE_MODE_RX */
+ nla_total_size(sizeof(u8)) + /* _USE_CQE_MODE_RX */
+ nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_BYTES */
+ nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_FRAMES */
+ nla_total_size(sizeof(u32)); /* _TX_AGGR_TIME_USECS */
}
static bool coalesce_put_u32(struct sk_buff *skb, u16 attr_type, u32 val,
@@ -180,24 +183,18 @@ static int coalesce_fill_reply(struct sk_buff *skb,
coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_TX,
kcoal->use_cqe_mode_tx, supported) ||
coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_RX,
- kcoal->use_cqe_mode_rx, supported))
+ kcoal->use_cqe_mode_rx, supported) ||
+ coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES,
+ kcoal->tx_aggr_max_bytes, supported) ||
+ coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES,
+ kcoal->tx_aggr_max_frames, supported) ||
+ coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS,
+ kcoal->tx_aggr_time_usecs, supported))
return -EMSGSIZE;
return 0;
}
-const struct ethnl_request_ops ethnl_coalesce_request_ops = {
- .request_cmd = ETHTOOL_MSG_COALESCE_GET,
- .reply_cmd = ETHTOOL_MSG_COALESCE_GET_REPLY,
- .hdr_attr = ETHTOOL_A_COALESCE_HEADER,
- .req_info_size = sizeof(struct coalesce_req_info),
- .reply_data_size = sizeof(struct coalesce_reply_data),
-
- .prepare_data = coalesce_prepare_data,
- .reply_size = coalesce_reply_size,
- .fill_reply = coalesce_fill_reply,
-};
-
/* COALESCE_SET */
const struct nla_policy ethnl_coalesce_set_policy[] = {
@@ -227,51 +224,49 @@ const struct nla_policy ethnl_coalesce_set_policy[] = {
[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL] = { .type = NLA_U32 },
[ETHTOOL_A_COALESCE_USE_CQE_MODE_TX] = NLA_POLICY_MAX(NLA_U8, 1),
[ETHTOOL_A_COALESCE_USE_CQE_MODE_RX] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .type = NLA_U32 },
+ [ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .type = NLA_U32 },
+ [ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .type = NLA_U32 },
};
-int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_coalesce_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
{
- struct kernel_ethtool_coalesce kernel_coalesce = {};
- struct ethtool_coalesce coalesce = {};
- struct ethnl_req_info req_info = {};
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
struct nlattr **tb = info->attrs;
- const struct ethtool_ops *ops;
- struct net_device *dev;
u32 supported_params;
- bool mod = false;
- int ret;
u16 a;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_COALESCE_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
if (!ops->get_coalesce || !ops->set_coalesce)
- goto out_dev;
+ return -EOPNOTSUPP;
/* make sure that only supported parameters are present */
supported_params = ops->supported_coalesce_params;
for (a = ETHTOOL_A_COALESCE_RX_USECS; a < __ETHTOOL_A_COALESCE_CNT; a++)
if (tb[a] && !(supported_params & attr_to_mask(a))) {
- ret = -EINVAL;
NL_SET_ERR_MSG_ATTR(info->extack, tb[a],
"cannot modify an unsupported parameter");
- goto out_dev;
+ return -EINVAL;
}
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
- ret = ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
- info->extack);
+ return 1;
+}
+
+static int
+ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct kernel_ethtool_coalesce kernel_coalesce = {};
+ struct net_device *dev = req_info->dev;
+ struct ethtool_coalesce coalesce = {};
+ struct nlattr **tb = info->attrs;
+ bool mod = false;
+ int ret;
+
+ ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
+ info->extack);
if (ret < 0)
- goto out_ops;
+ return ret;
ethnl_update_u32(&coalesce.rx_coalesce_usecs,
tb[ETHTOOL_A_COALESCE_RX_USECS], &mod);
@@ -321,21 +316,32 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_TX], &mod);
ethnl_update_u8(&kernel_coalesce.use_cqe_mode_rx,
tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_RX], &mod);
- ret = 0;
+ ethnl_update_u32(&kernel_coalesce.tx_aggr_max_bytes,
+ tb[ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES], &mod);
+ ethnl_update_u32(&kernel_coalesce.tx_aggr_max_frames,
+ tb[ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES], &mod);
+ ethnl_update_u32(&kernel_coalesce.tx_aggr_time_usecs,
+ tb[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS], &mod);
if (!mod)
- goto out_ops;
+ return 0;
ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce,
info->extack);
- if (ret < 0)
- goto out_ops;
- ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return ret < 0 ? ret : 1;
}
+
+const struct ethnl_request_ops ethnl_coalesce_request_ops = {
+ .request_cmd = ETHTOOL_MSG_COALESCE_GET,
+ .reply_cmd = ETHTOOL_MSG_COALESCE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_COALESCE_HEADER,
+ .req_info_size = sizeof(struct coalesce_req_info),
+ .reply_data_size = sizeof(struct coalesce_reply_data),
+
+ .prepare_data = coalesce_prepare_data,
+ .reply_size = coalesce_reply_size,
+ .fill_reply = coalesce_fill_reply,
+
+ .set_validate = ethnl_set_coalesce_validate,
+ .set = ethnl_set_coalesce,
+ .set_ntf_cmd = ETHTOOL_MSG_COALESCE_NTF,
+};
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 6f399afc2ff2..5fb19050991e 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -208,6 +208,9 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
__DEFINE_LINK_MODE_NAME(800000, DR8_2, Full),
__DEFINE_LINK_MODE_NAME(800000, SR8, Full),
__DEFINE_LINK_MODE_NAME(800000, VR8, Full),
+ __DEFINE_LINK_MODE_NAME(10, T1S, Full),
+ __DEFINE_LINK_MODE_NAME(10, T1S, Half),
+ __DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half),
};
static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -244,6 +247,8 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
#define __LINK_MODE_LANES_X 1
#define __LINK_MODE_LANES_FX 1
#define __LINK_MODE_LANES_T1L 1
+#define __LINK_MODE_LANES_T1S 1
+#define __LINK_MODE_LANES_T1S_P2MP 1
#define __LINK_MODE_LANES_VR8 8
#define __LINK_MODE_LANES_DR8_2 8
@@ -366,6 +371,9 @@ const struct link_mode_info link_mode_params[] = {
__DEFINE_LINK_MODE_PARAMS(800000, DR8_2, Full),
__DEFINE_LINK_MODE_PARAMS(800000, SR8, Full),
__DEFINE_LINK_MODE_PARAMS(800000, VR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(10, T1S, Full),
+ __DEFINE_LINK_MODE_PARAMS(10, T1S, Half),
+ __DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half),
};
static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS);
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index b1b9db810eca..28b8aaaf9bcb 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -54,4 +54,6 @@ int ethtool_get_module_info_call(struct net_device *dev,
int ethtool_get_module_eeprom_call(struct net_device *dev,
struct ethtool_eeprom *ee, u8 *data);
+bool __ethtool_dev_mm_supported(struct net_device *dev);
+
#endif /* _ETHTOOL_COMMON_H */
diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c
index d73888c7d19c..e4369769817e 100644
--- a/net/ethtool/debug.c
+++ b/net/ethtool/debug.c
@@ -63,18 +63,6 @@ static int debug_fill_reply(struct sk_buff *skb,
netif_msg_class_names, compact);
}
-const struct ethnl_request_ops ethnl_debug_request_ops = {
- .request_cmd = ETHTOOL_MSG_DEBUG_GET,
- .reply_cmd = ETHTOOL_MSG_DEBUG_GET_REPLY,
- .hdr_attr = ETHTOOL_A_DEBUG_HEADER,
- .req_info_size = sizeof(struct debug_req_info),
- .reply_data_size = sizeof(struct debug_reply_data),
-
- .prepare_data = debug_prepare_data,
- .reply_size = debug_reply_size,
- .fill_reply = debug_fill_reply,
-};
-
/* DEBUG_SET */
const struct nla_policy ethnl_debug_set_policy[] = {
@@ -83,46 +71,47 @@ const struct nla_policy ethnl_debug_set_policy[] = {
[ETHTOOL_A_DEBUG_MSGMASK] = { .type = NLA_NESTED },
};
-int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_debug_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
{
- struct ethnl_req_info req_info = {};
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_msglevel && ops->set_msglevel ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_debug(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct net_device *dev;
bool mod = false;
u32 msg_mask;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_DEBUG_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ret = -EOPNOTSUPP;
- if (!dev->ethtool_ops->get_msglevel || !dev->ethtool_ops->set_msglevel)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
-
msg_mask = dev->ethtool_ops->get_msglevel(dev);
ret = ethnl_update_bitset32(&msg_mask, NETIF_MSG_CLASS_COUNT,
tb[ETHTOOL_A_DEBUG_MSGMASK],
netif_msg_class_names, info->extack, &mod);
if (ret < 0 || !mod)
- goto out_ops;
+ return ret;
dev->ethtool_ops->set_msglevel(dev, msg_mask);
- ethtool_notify(dev, ETHTOOL_MSG_DEBUG_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return 1;
}
+
+const struct ethnl_request_ops ethnl_debug_request_ops = {
+ .request_cmd = ETHTOOL_MSG_DEBUG_GET,
+ .reply_cmd = ETHTOOL_MSG_DEBUG_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_DEBUG_HEADER,
+ .req_info_size = sizeof(struct debug_req_info),
+ .reply_data_size = sizeof(struct debug_reply_data),
+
+ .prepare_data = debug_prepare_data,
+ .reply_size = debug_reply_size,
+ .fill_reply = debug_fill_reply,
+
+ .set_validate = ethnl_set_debug_validate,
+ .set = ethnl_set_debug,
+ .set_ntf_cmd = ETHTOOL_MSG_DEBUG_NTF,
+};
diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c
index 45c42b2d5f17..42104bcb0e47 100644
--- a/net/ethtool/eee.c
+++ b/net/ethtool/eee.c
@@ -108,18 +108,6 @@ static int eee_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_eee_request_ops = {
- .request_cmd = ETHTOOL_MSG_EEE_GET,
- .reply_cmd = ETHTOOL_MSG_EEE_GET_REPLY,
- .hdr_attr = ETHTOOL_A_EEE_HEADER,
- .req_info_size = sizeof(struct eee_req_info),
- .reply_data_size = sizeof(struct eee_reply_data),
-
- .prepare_data = eee_prepare_data,
- .reply_size = eee_reply_size,
- .fill_reply = eee_fill_reply,
-};
-
/* EEE_SET */
const struct nla_policy ethnl_eee_set_policy[] = {
@@ -131,60 +119,56 @@ const struct nla_policy ethnl_eee_set_policy[] = {
[ETHTOOL_A_EEE_TX_LPI_TIMER] = { .type = NLA_U32 },
};
-int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_eee_validate(struct ethnl_req_info *req_info, struct genl_info *info)
{
- struct ethnl_req_info req_info = {};
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_eee && ops->set_eee ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- const struct ethtool_ops *ops;
struct ethtool_eee eee = {};
- struct net_device *dev;
bool mod = false;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_EEE_HEADER],
- genl_info_net(info), info->extack,
- true);
+ ret = dev->ethtool_ops->get_eee(dev, &eee);
if (ret < 0)
return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
- if (!ops->get_eee || !ops->set_eee)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
- ret = ops->get_eee(dev, &eee);
- if (ret < 0)
- goto out_ops;
ret = ethnl_update_bitset32(&eee.advertised, EEE_MODES_COUNT,
tb[ETHTOOL_A_EEE_MODES_OURS],
link_mode_names, info->extack, &mod);
if (ret < 0)
- goto out_ops;
+ return ret;
ethnl_update_bool32(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod);
ethnl_update_bool32(&eee.tx_lpi_enabled,
tb[ETHTOOL_A_EEE_TX_LPI_ENABLED], &mod);
ethnl_update_u32(&eee.tx_lpi_timer, tb[ETHTOOL_A_EEE_TX_LPI_TIMER],
&mod);
- ret = 0;
if (!mod)
- goto out_ops;
+ return 0;
ret = dev->ethtool_ops->set_eee(dev, &eee);
- if (ret < 0)
- goto out_ops;
- ethtool_notify(dev, ETHTOOL_MSG_EEE_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return ret < 0 ? ret : 1;
}
+
+const struct ethnl_request_ops ethnl_eee_request_ops = {
+ .request_cmd = ETHTOOL_MSG_EEE_GET,
+ .reply_cmd = ETHTOOL_MSG_EEE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_EEE_HEADER,
+ .req_info_size = sizeof(struct eee_req_info),
+ .reply_data_size = sizeof(struct eee_reply_data),
+
+ .prepare_data = eee_prepare_data,
+ .reply_size = eee_reply_size,
+ .fill_reply = eee_fill_reply,
+
+ .set_validate = ethnl_set_eee_validate,
+ .set = ethnl_set_eee,
+ .set_ntf_cmd = ETHTOOL_MSG_EEE_NTF,
+};
diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c
index 9f5a134e2e01..0d9a3d153170 100644
--- a/net/ethtool/fec.c
+++ b/net/ethtool/fec.c
@@ -217,18 +217,6 @@ static int fec_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_fec_request_ops = {
- .request_cmd = ETHTOOL_MSG_FEC_GET,
- .reply_cmd = ETHTOOL_MSG_FEC_GET_REPLY,
- .hdr_attr = ETHTOOL_A_FEC_HEADER,
- .req_info_size = sizeof(struct fec_req_info),
- .reply_data_size = sizeof(struct fec_reply_data),
-
- .prepare_data = fec_prepare_data,
- .reply_size = fec_reply_size,
- .fill_reply = fec_fill_reply,
-};
-
/* FEC_SET */
const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1] = {
@@ -237,36 +225,28 @@ const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1] = {
[ETHTOOL_A_FEC_AUTO] = NLA_POLICY_MAX(NLA_U8, 1),
};
-int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_fec_validate(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_fecparam && ops->set_fecparam ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_fec(struct ethnl_req_info *req_info, struct genl_info *info)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes) = {};
- struct ethnl_req_info req_info = {};
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
struct ethtool_fecparam fec = {};
- const struct ethtool_ops *ops;
- struct net_device *dev;
bool mod = false;
u8 fec_auto;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_FEC_HEADER],
- genl_info_net(info), info->extack,
- true);
+ ret = dev->ethtool_ops->get_fecparam(dev, &fec);
if (ret < 0)
return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
- if (!ops->get_fecparam || !ops->set_fecparam)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
- ret = ops->get_fecparam(dev, &fec);
- if (ret < 0)
- goto out_ops;
ethtool_fec_to_link_modes(fec.fec, fec_link_modes, &fec_auto);
@@ -275,36 +255,39 @@ int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info)
tb[ETHTOOL_A_FEC_MODES],
link_mode_names, info->extack, &mod);
if (ret < 0)
- goto out_ops;
+ return ret;
ethnl_update_u8(&fec_auto, tb[ETHTOOL_A_FEC_AUTO], &mod);
-
- ret = 0;
if (!mod)
- goto out_ops;
+ return 0;
ret = ethtool_link_modes_to_fecparam(&fec, fec_link_modes, fec_auto);
if (ret) {
NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES],
"invalid FEC modes requested");
- goto out_ops;
+ return ret;
}
if (!fec.fec) {
- ret = -EINVAL;
NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES],
"no FEC modes set");
- goto out_ops;
+ return -EINVAL;
}
ret = dev->ethtool_ops->set_fecparam(dev, &fec);
- if (ret < 0)
- goto out_ops;
- ethtool_notify(dev, ETHTOOL_MSG_FEC_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return ret < 0 ? ret : 1;
}
+
+const struct ethnl_request_ops ethnl_fec_request_ops = {
+ .request_cmd = ETHTOOL_MSG_FEC_GET,
+ .reply_cmd = ETHTOOL_MSG_FEC_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_FEC_HEADER,
+ .req_info_size = sizeof(struct fec_req_info),
+ .reply_data_size = sizeof(struct fec_reply_data),
+
+ .prepare_data = fec_prepare_data,
+ .reply_size = fec_reply_size,
+ .fill_reply = fec_fill_reply,
+
+ .set_validate = ethnl_set_fec_validate,
+ .set = ethnl_set_fec,
+ .set_ntf_cmd = ETHTOOL_MSG_FEC_NTF,
+};
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
index efa0f7f48836..310dfe63292a 100644
--- a/net/ethtool/linkinfo.c
+++ b/net/ethtool/linkinfo.c
@@ -73,18 +73,6 @@ static int linkinfo_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_linkinfo_request_ops = {
- .request_cmd = ETHTOOL_MSG_LINKINFO_GET,
- .reply_cmd = ETHTOOL_MSG_LINKINFO_GET_REPLY,
- .hdr_attr = ETHTOOL_A_LINKINFO_HEADER,
- .req_info_size = sizeof(struct linkinfo_req_info),
- .reply_data_size = sizeof(struct linkinfo_reply_data),
-
- .prepare_data = linkinfo_prepare_data,
- .reply_size = linkinfo_reply_size,
- .fill_reply = linkinfo_fill_reply,
-};
-
/* LINKINFO_SET */
const struct nla_policy ethnl_linkinfo_set_policy[] = {
@@ -95,37 +83,31 @@ const struct nla_policy ethnl_linkinfo_set_policy[] = {
[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL] = { .type = NLA_U8 },
};
-int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_linkinfo_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ if (!ops->get_link_ksettings || !ops->set_link_ksettings)
+ return -EOPNOTSUPP;
+ return 1;
+}
+
+static int
+ethnl_set_linkinfo(struct ethnl_req_info *req_info, struct genl_info *info)
{
struct ethtool_link_ksettings ksettings = {};
struct ethtool_link_settings *lsettings;
- struct ethnl_req_info req_info = {};
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct net_device *dev;
bool mod = false;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_LINKINFO_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ret = -EOPNOTSUPP;
- if (!dev->ethtool_ops->get_link_ksettings ||
- !dev->ethtool_ops->set_link_ksettings)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
-
ret = __ethtool_get_link_ksettings(dev, &ksettings);
if (ret < 0) {
GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
- goto out_ops;
+ return ret;
}
lsettings = &ksettings.base;
@@ -134,21 +116,30 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info)
&mod);
ethnl_update_u8(&lsettings->eth_tp_mdix_ctrl,
tb[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL], &mod);
- ret = 0;
if (!mod)
- goto out_ops;
+ return 0;
ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings);
- if (ret < 0)
+ if (ret < 0) {
GENL_SET_ERR_MSG(info, "link settings update failed");
- else
- ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL);
+ return ret;
+ }
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return 1;
}
+
+const struct ethnl_request_ops ethnl_linkinfo_request_ops = {
+ .request_cmd = ETHTOOL_MSG_LINKINFO_GET,
+ .reply_cmd = ETHTOOL_MSG_LINKINFO_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_LINKINFO_HEADER,
+ .req_info_size = sizeof(struct linkinfo_req_info),
+ .reply_data_size = sizeof(struct linkinfo_reply_data),
+
+ .prepare_data = linkinfo_prepare_data,
+ .reply_size = linkinfo_reply_size,
+ .fill_reply = linkinfo_fill_reply,
+
+ .set_validate = ethnl_set_linkinfo_validate,
+ .set = ethnl_set_linkinfo,
+ .set_ntf_cmd = ETHTOOL_MSG_LINKINFO_NTF,
+};
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index 126e06c713a3..fab66c169b9f 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -151,18 +151,6 @@ static int linkmodes_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_linkmodes_request_ops = {
- .request_cmd = ETHTOOL_MSG_LINKMODES_GET,
- .reply_cmd = ETHTOOL_MSG_LINKMODES_GET_REPLY,
- .hdr_attr = ETHTOOL_A_LINKMODES_HEADER,
- .req_info_size = sizeof(struct linkmodes_req_info),
- .reply_data_size = sizeof(struct linkmodes_reply_data),
-
- .prepare_data = linkmodes_prepare_data,
- .reply_size = linkmodes_reply_size,
- .fill_reply = linkmodes_fill_reply,
-};
-
/* LINKMODES_SET */
const struct nla_policy ethnl_linkmodes_set_policy[] = {
@@ -310,59 +298,64 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb,
return 0;
}
-int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_linkmodes_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
{
- struct ethtool_link_ksettings ksettings = {};
- struct ethnl_req_info req_info = {};
- struct nlattr **tb = info->attrs;
- struct net_device *dev;
- bool mod = false;
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
int ret;
- ret = ethnl_check_linkmodes(info, tb);
+ ret = ethnl_check_linkmodes(info, info->attrs);
if (ret < 0)
return ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_LINKMODES_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ret = -EOPNOTSUPP;
- if (!dev->ethtool_ops->get_link_ksettings ||
- !dev->ethtool_ops->set_link_ksettings)
- goto out_dev;
+ if (!ops->get_link_ksettings || !ops->set_link_ksettings)
+ return -EOPNOTSUPP;
+ return 1;
+}
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
+static int
+ethnl_set_linkmodes(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct ethtool_link_ksettings ksettings = {};
+ struct net_device *dev = req_info->dev;
+ struct nlattr **tb = info->attrs;
+ bool mod = false;
+ int ret;
ret = __ethtool_get_link_ksettings(dev, &ksettings);
if (ret < 0) {
GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
- goto out_ops;
+ return ret;
}
ret = ethnl_update_linkmodes(info, tb, &ksettings, &mod, dev);
if (ret < 0)
- goto out_ops;
+ return ret;
+ if (!mod)
+ return 0;
- if (mod) {
- ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings);
- if (ret < 0)
- GENL_SET_ERR_MSG(info, "link settings update failed");
- else
- ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF, NULL);
+ ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings);
+ if (ret < 0) {
+ GENL_SET_ERR_MSG(info, "link settings update failed");
+ return ret;
}
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return 1;
}
+
+const struct ethnl_request_ops ethnl_linkmodes_request_ops = {
+ .request_cmd = ETHTOOL_MSG_LINKMODES_GET,
+ .reply_cmd = ETHTOOL_MSG_LINKMODES_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_LINKMODES_HEADER,
+ .req_info_size = sizeof(struct linkmodes_req_info),
+ .reply_data_size = sizeof(struct linkmodes_reply_data),
+
+ .prepare_data = linkmodes_prepare_data,
+ .reply_size = linkmodes_reply_size,
+ .fill_reply = linkmodes_fill_reply,
+
+ .set_validate = ethnl_set_linkmodes_validate,
+ .set = ethnl_set_linkmodes,
+ .set_ntf_cmd = ETHTOOL_MSG_LINKMODES_NTF,
+};
diff --git a/net/ethtool/mm.c b/net/ethtool/mm.c
new file mode 100644
index 000000000000..e612856eed8c
--- /dev/null
+++ b/net/ethtool/mm.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022-2023 NXP
+ */
+#include "common.h"
+#include "netlink.h"
+
+struct mm_req_info {
+ struct ethnl_req_info base;
+};
+
+struct mm_reply_data {
+ struct ethnl_reply_data base;
+ struct ethtool_mm_state state;
+ struct ethtool_mm_stats stats;
+};
+
+#define MM_REPDATA(__reply_base) \
+ container_of(__reply_base, struct mm_reply_data, base)
+
+#define ETHTOOL_MM_STAT_CNT \
+ (__ETHTOOL_A_MM_STAT_CNT - (ETHTOOL_A_MM_STAT_PAD + 1))
+
+const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1] = {
+ [ETHTOOL_A_MM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats),
+};
+
+static int mm_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct mm_reply_data *data = MM_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ const struct ethtool_ops *ops;
+ int ret;
+
+ ops = dev->ethtool_ops;
+
+ if (!ops->get_mm)
+ return -EOPNOTSUPP;
+
+ ethtool_stats_init((u64 *)&data->stats,
+ sizeof(data->stats) / sizeof(u64));
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = ops->get_mm(dev, &data->state);
+ if (ret)
+ goto out_complete;
+
+ if (ops->get_mm_stats && (req_base->flags & ETHTOOL_FLAG_STATS))
+ ops->get_mm_stats(dev, &data->stats);
+
+out_complete:
+ ethnl_ops_complete(dev);
+
+ return ret;
+}
+
+static int mm_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ int len = 0;
+
+ len += nla_total_size(sizeof(u8)); /* _MM_PMAC_ENABLED */
+ len += nla_total_size(sizeof(u8)); /* _MM_TX_ENABLED */
+ len += nla_total_size(sizeof(u8)); /* _MM_TX_ACTIVE */
+ len += nla_total_size(sizeof(u8)); /* _MM_VERIFY_ENABLED */
+ len += nla_total_size(sizeof(u8)); /* _MM_VERIFY_STATUS */
+ len += nla_total_size(sizeof(u32)); /* _MM_VERIFY_TIME */
+ len += nla_total_size(sizeof(u32)); /* _MM_MAX_VERIFY_TIME */
+ len += nla_total_size(sizeof(u32)); /* _MM_TX_MIN_FRAG_SIZE */
+ len += nla_total_size(sizeof(u32)); /* _MM_RX_MIN_FRAG_SIZE */
+
+ if (req_base->flags & ETHTOOL_FLAG_STATS)
+ len += nla_total_size(0) + /* _MM_STATS */
+ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_MM_STAT_CNT;
+
+ return len;
+}
+
+static int mm_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
+{
+ if (val == ETHTOOL_STAT_NOT_SET)
+ return 0;
+ if (nla_put_u64_64bit(skb, attrtype, val, ETHTOOL_A_MM_STAT_PAD))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int mm_put_stats(struct sk_buff *skb,
+ const struct ethtool_mm_stats *stats)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, ETHTOOL_A_MM_STATS);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (mm_put_stat(skb, stats->MACMergeFrameAssErrorCount,
+ ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS) ||
+ mm_put_stat(skb, stats->MACMergeFrameSmdErrorCount,
+ ETHTOOL_A_MM_STAT_SMD_ERRORS) ||
+ mm_put_stat(skb, stats->MACMergeFrameAssOkCount,
+ ETHTOOL_A_MM_STAT_REASSEMBLY_OK) ||
+ mm_put_stat(skb, stats->MACMergeFragCountRx,
+ ETHTOOL_A_MM_STAT_RX_FRAG_COUNT) ||
+ mm_put_stat(skb, stats->MACMergeFragCountTx,
+ ETHTOOL_A_MM_STAT_TX_FRAG_COUNT) ||
+ mm_put_stat(skb, stats->MACMergeHoldCount,
+ ETHTOOL_A_MM_STAT_HOLD_COUNT))
+ goto err_cancel;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+err_cancel:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int mm_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct mm_reply_data *data = MM_REPDATA(reply_base);
+ const struct ethtool_mm_state *state = &data->state;
+
+ if (nla_put_u8(skb, ETHTOOL_A_MM_TX_ENABLED, state->tx_enabled) ||
+ nla_put_u8(skb, ETHTOOL_A_MM_TX_ACTIVE, state->tx_active) ||
+ nla_put_u8(skb, ETHTOOL_A_MM_PMAC_ENABLED, state->pmac_enabled) ||
+ nla_put_u8(skb, ETHTOOL_A_MM_VERIFY_ENABLED, state->verify_enabled) ||
+ nla_put_u8(skb, ETHTOOL_A_MM_VERIFY_STATUS, state->verify_status) ||
+ nla_put_u32(skb, ETHTOOL_A_MM_VERIFY_TIME, state->verify_time) ||
+ nla_put_u32(skb, ETHTOOL_A_MM_MAX_VERIFY_TIME, state->max_verify_time) ||
+ nla_put_u32(skb, ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, state->tx_min_frag_size) ||
+ nla_put_u32(skb, ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, state->rx_min_frag_size))
+ return -EMSGSIZE;
+
+ if (req_base->flags & ETHTOOL_FLAG_STATS &&
+ mm_put_stats(skb, &data->stats))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1] = {
+ [ETHTOOL_A_MM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_MM_VERIFY_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_MM_VERIFY_TIME] = NLA_POLICY_RANGE(NLA_U32, 1, 128),
+ [ETHTOOL_A_MM_TX_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_MM_PMAC_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_MM_TX_MIN_FRAG_SIZE] = NLA_POLICY_RANGE(NLA_U32, 60, 252),
+};
+
+static void mm_state_to_cfg(const struct ethtool_mm_state *state,
+ struct ethtool_mm_cfg *cfg)
+{
+ /* We could also compare state->verify_status against
+ * ETHTOOL_MM_VERIFY_STATUS_DISABLED, but state->verify_enabled
+ * is more like an administrative state which should be seen in
+ * ETHTOOL_MSG_MM_GET replies. For example, a port with verification
+ * disabled might be in the ETHTOOL_MM_VERIFY_STATUS_INITIAL
+ * if it's down.
+ */
+ cfg->verify_enabled = state->verify_enabled;
+ cfg->verify_time = state->verify_time;
+ cfg->tx_enabled = state->tx_enabled;
+ cfg->pmac_enabled = state->pmac_enabled;
+ cfg->tx_min_frag_size = state->tx_min_frag_size;
+}
+
+static int
+ethnl_set_mm_validate(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_mm && ops->set_mm ? 1 : -EOPNOTSUPP;
+}
+
+static int ethnl_set_mm(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct net_device *dev = req_info->dev;
+ struct ethtool_mm_state state = {};
+ struct nlattr **tb = info->attrs;
+ struct ethtool_mm_cfg cfg = {};
+ bool mod = false;
+ int ret;
+
+ ret = dev->ethtool_ops->get_mm(dev, &state);
+ if (ret)
+ return ret;
+
+ mm_state_to_cfg(&state, &cfg);
+
+ ethnl_update_bool(&cfg.verify_enabled, tb[ETHTOOL_A_MM_VERIFY_ENABLED],
+ &mod);
+ ethnl_update_u32(&cfg.verify_time, tb[ETHTOOL_A_MM_VERIFY_TIME], &mod);
+ ethnl_update_bool(&cfg.tx_enabled, tb[ETHTOOL_A_MM_TX_ENABLED], &mod);
+ ethnl_update_bool(&cfg.pmac_enabled, tb[ETHTOOL_A_MM_PMAC_ENABLED],
+ &mod);
+ ethnl_update_u32(&cfg.tx_min_frag_size,
+ tb[ETHTOOL_A_MM_TX_MIN_FRAG_SIZE], &mod);
+
+ if (!mod)
+ return 0;
+
+ if (cfg.verify_time > state.max_verify_time) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MM_VERIFY_TIME],
+ "verifyTime exceeds device maximum");
+ return -ERANGE;
+ }
+
+ ret = dev->ethtool_ops->set_mm(dev, &cfg, extack);
+ return ret < 0 ? ret : 1;
+}
+
+const struct ethnl_request_ops ethnl_mm_request_ops = {
+ .request_cmd = ETHTOOL_MSG_MM_GET,
+ .reply_cmd = ETHTOOL_MSG_MM_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_MM_HEADER,
+ .req_info_size = sizeof(struct mm_req_info),
+ .reply_data_size = sizeof(struct mm_reply_data),
+
+ .prepare_data = mm_prepare_data,
+ .reply_size = mm_reply_size,
+ .fill_reply = mm_fill_reply,
+
+ .set_validate = ethnl_set_mm_validate,
+ .set = ethnl_set_mm,
+ .set_ntf_cmd = ETHTOOL_MSG_MM_NTF,
+};
+
+/* Returns whether a given device supports the MAC merge layer
+ * (has an eMAC and a pMAC). Must be called under rtnl_lock() and
+ * ethnl_ops_begin().
+ */
+bool __ethtool_dev_mm_supported(struct net_device *dev)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_mm_state state = {};
+ int ret = -EOPNOTSUPP;
+
+ if (ops && ops->get_mm)
+ ret = ops->get_mm(dev, &state);
+
+ return !!ret;
+}
diff --git a/net/ethtool/module.c b/net/ethtool/module.c
index 898ed436b9e4..e0d539b21423 100644
--- a/net/ethtool/module.c
+++ b/net/ethtool/module.c
@@ -91,18 +91,6 @@ static int module_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_module_request_ops = {
- .request_cmd = ETHTOOL_MSG_MODULE_GET,
- .reply_cmd = ETHTOOL_MSG_MODULE_GET_REPLY,
- .hdr_attr = ETHTOOL_A_MODULE_HEADER,
- .req_info_size = sizeof(struct module_req_info),
- .reply_data_size = sizeof(struct module_reply_data),
-
- .prepare_data = module_prepare_data,
- .reply_size = module_reply_size,
- .fill_reply = module_fill_reply,
-};
-
/* MODULE_SET */
const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1] = {
@@ -112,69 +100,62 @@ const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLI
ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO),
};
-static int module_set_power_mode(struct net_device *dev, struct nlattr **tb,
- bool *p_mod, struct netlink_ext_ack *extack)
+static int
+ethnl_set_module_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
{
- struct ethtool_module_power_mode_params power = {};
- struct ethtool_module_power_mode_params power_new;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- int ret;
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+ struct nlattr **tb = info->attrs;
if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY])
return 0;
if (!ops->get_module_power_mode || !ops->set_module_power_mode) {
- NL_SET_ERR_MSG_ATTR(extack,
+ NL_SET_ERR_MSG_ATTR(info->extack,
tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY],
"Setting power mode policy is not supported by this device");
return -EOPNOTSUPP;
}
- power_new.policy = nla_get_u8(tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]);
- ret = ops->get_module_power_mode(dev, &power, extack);
- if (ret < 0)
- return ret;
-
- if (power_new.policy == power.policy)
- return 0;
- *p_mod = true;
-
- return ops->set_module_power_mode(dev, &power_new, extack);
+ return 1;
}
-int ethnl_set_module(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_module(struct ethnl_req_info *req_info, struct genl_info *info)
{
- struct ethnl_req_info req_info = {};
+ struct ethtool_module_power_mode_params power = {};
+ struct ethtool_module_power_mode_params power_new;
+ const struct ethtool_ops *ops;
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct net_device *dev;
- bool mod = false;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_MODULE_HEADER],
- genl_info_net(info), info->extack,
- true);
+ ops = dev->ethtool_ops;
+
+ power_new.policy = nla_get_u8(tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]);
+ ret = ops->get_module_power_mode(dev, &power, info->extack);
if (ret < 0)
return ret;
- dev = req_info.dev;
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
+ if (power_new.policy == power.policy)
+ return 0;
- ret = module_set_power_mode(dev, tb, &mod, info->extack);
- if (ret < 0)
- goto out_ops;
+ ret = ops->set_module_power_mode(dev, &power_new, info->extack);
+ return ret < 0 ? ret : 1;
+}
- if (!mod)
- goto out_ops;
+const struct ethnl_request_ops ethnl_module_request_ops = {
+ .request_cmd = ETHTOOL_MSG_MODULE_GET,
+ .reply_cmd = ETHTOOL_MSG_MODULE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_MODULE_HEADER,
+ .req_info_size = sizeof(struct module_req_info),
+ .reply_data_size = sizeof(struct module_reply_data),
- ethtool_notify(dev, ETHTOOL_MSG_MODULE_NTF, NULL);
+ .prepare_data = module_prepare_data,
+ .reply_size = module_reply_size,
+ .fill_reply = module_fill_reply,
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
- ethnl_parse_header_dev_put(&req_info);
- return ret;
-}
+ .set_validate = ethnl_set_module_validate,
+ .set = ethnl_set_module,
+ .set_ntf_cmd = ETHTOOL_MSG_MODULE_NTF,
+};
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index aee98be6237f..08120095cc68 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -269,25 +269,43 @@ static const struct ethnl_request_ops *
ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
[ETHTOOL_MSG_STRSET_GET] = &ethnl_strset_request_ops,
[ETHTOOL_MSG_LINKINFO_GET] = &ethnl_linkinfo_request_ops,
+ [ETHTOOL_MSG_LINKINFO_SET] = &ethnl_linkinfo_request_ops,
[ETHTOOL_MSG_LINKMODES_GET] = &ethnl_linkmodes_request_ops,
+ [ETHTOOL_MSG_LINKMODES_SET] = &ethnl_linkmodes_request_ops,
[ETHTOOL_MSG_LINKSTATE_GET] = &ethnl_linkstate_request_ops,
[ETHTOOL_MSG_DEBUG_GET] = &ethnl_debug_request_ops,
+ [ETHTOOL_MSG_DEBUG_SET] = &ethnl_debug_request_ops,
[ETHTOOL_MSG_WOL_GET] = &ethnl_wol_request_ops,
+ [ETHTOOL_MSG_WOL_SET] = &ethnl_wol_request_ops,
[ETHTOOL_MSG_FEATURES_GET] = &ethnl_features_request_ops,
[ETHTOOL_MSG_PRIVFLAGS_GET] = &ethnl_privflags_request_ops,
+ [ETHTOOL_MSG_PRIVFLAGS_SET] = &ethnl_privflags_request_ops,
[ETHTOOL_MSG_RINGS_GET] = &ethnl_rings_request_ops,
+ [ETHTOOL_MSG_RINGS_SET] = &ethnl_rings_request_ops,
[ETHTOOL_MSG_CHANNELS_GET] = &ethnl_channels_request_ops,
+ [ETHTOOL_MSG_CHANNELS_SET] = &ethnl_channels_request_ops,
[ETHTOOL_MSG_COALESCE_GET] = &ethnl_coalesce_request_ops,
+ [ETHTOOL_MSG_COALESCE_SET] = &ethnl_coalesce_request_ops,
[ETHTOOL_MSG_PAUSE_GET] = &ethnl_pause_request_ops,
+ [ETHTOOL_MSG_PAUSE_SET] = &ethnl_pause_request_ops,
[ETHTOOL_MSG_EEE_GET] = &ethnl_eee_request_ops,
+ [ETHTOOL_MSG_EEE_SET] = &ethnl_eee_request_ops,
[ETHTOOL_MSG_FEC_GET] = &ethnl_fec_request_ops,
+ [ETHTOOL_MSG_FEC_SET] = &ethnl_fec_request_ops,
[ETHTOOL_MSG_TSINFO_GET] = &ethnl_tsinfo_request_ops,
[ETHTOOL_MSG_MODULE_EEPROM_GET] = &ethnl_module_eeprom_request_ops,
[ETHTOOL_MSG_STATS_GET] = &ethnl_stats_request_ops,
[ETHTOOL_MSG_PHC_VCLOCKS_GET] = &ethnl_phc_vclocks_request_ops,
[ETHTOOL_MSG_MODULE_GET] = &ethnl_module_request_ops,
+ [ETHTOOL_MSG_MODULE_SET] = &ethnl_module_request_ops,
[ETHTOOL_MSG_PSE_GET] = &ethnl_pse_request_ops,
+ [ETHTOOL_MSG_PSE_SET] = &ethnl_pse_request_ops,
[ETHTOOL_MSG_RSS_GET] = &ethnl_rss_request_ops,
+ [ETHTOOL_MSG_PLCA_GET_CFG] = &ethnl_plca_cfg_request_ops,
+ [ETHTOOL_MSG_PLCA_SET_CFG] = &ethnl_plca_cfg_request_ops,
+ [ETHTOOL_MSG_PLCA_GET_STATUS] = &ethnl_plca_status_request_ops,
+ [ETHTOOL_MSG_MM_GET] = &ethnl_mm_request_ops,
+ [ETHTOOL_MSG_MM_SET] = &ethnl_mm_request_ops,
};
static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -588,6 +606,52 @@ static int ethnl_default_done(struct netlink_callback *cb)
return 0;
}
+static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ const struct ethnl_request_ops *ops;
+ struct ethnl_req_info req_info = {};
+ const u8 cmd = info->genlhdr->cmd;
+ int ret;
+
+ ops = ethnl_default_requests[cmd];
+ if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", cmd))
+ return -EOPNOTSUPP;
+ if (GENL_REQ_ATTR_CHECK(info, ops->hdr_attr))
+ return -EINVAL;
+
+ ret = ethnl_parse_header_dev_get(&req_info, info->attrs[ops->hdr_attr],
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ return ret;
+
+ if (ops->set_validate) {
+ ret = ops->set_validate(&req_info, info);
+ /* 0 means nothing to do */
+ if (ret <= 0)
+ goto out_dev;
+ }
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(req_info.dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = ops->set(&req_info, info);
+ if (ret <= 0)
+ goto out_ops;
+ ethtool_notify(req_info.dev, ops->set_ntf_cmd, NULL);
+
+ ret = 0;
+out_ops:
+ ethnl_ops_complete(req_info.dev);
+out_rtnl:
+ rtnl_unlock();
+out_dev:
+ ethnl_parse_header_dev_put(&req_info);
+ return ret;
+}
+
static const struct ethnl_request_ops *
ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
[ETHTOOL_MSG_LINKINFO_NTF] = &ethnl_linkinfo_request_ops,
@@ -603,6 +667,8 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
[ETHTOOL_MSG_EEE_NTF] = &ethnl_eee_request_ops,
[ETHTOOL_MSG_FEC_NTF] = &ethnl_fec_request_ops,
[ETHTOOL_MSG_MODULE_NTF] = &ethnl_module_request_ops,
+ [ETHTOOL_MSG_PLCA_NTF] = &ethnl_plca_cfg_request_ops,
+ [ETHTOOL_MSG_MM_NTF] = &ethnl_mm_request_ops,
};
/* default notification handler */
@@ -696,6 +762,8 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = {
[ETHTOOL_MSG_EEE_NTF] = ethnl_default_notify,
[ETHTOOL_MSG_FEC_NTF] = ethnl_default_notify,
[ETHTOOL_MSG_MODULE_NTF] = ethnl_default_notify,
+ [ETHTOOL_MSG_PLCA_NTF] = ethnl_default_notify,
+ [ETHTOOL_MSG_MM_NTF] = ethnl_default_notify,
};
void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data)
@@ -760,7 +828,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_LINKINFO_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_linkinfo,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_linkinfo_set_policy,
.maxattr = ARRAY_SIZE(ethnl_linkinfo_set_policy) - 1,
},
@@ -776,7 +844,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_LINKMODES_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_linkmodes,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_linkmodes_set_policy,
.maxattr = ARRAY_SIZE(ethnl_linkmodes_set_policy) - 1,
},
@@ -801,7 +869,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_DEBUG_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_debug,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_debug_set_policy,
.maxattr = ARRAY_SIZE(ethnl_debug_set_policy) - 1,
},
@@ -818,7 +886,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_WOL_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_wol,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_wol_set_policy,
.maxattr = ARRAY_SIZE(ethnl_wol_set_policy) - 1,
},
@@ -850,7 +918,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_PRIVFLAGS_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_privflags,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_privflags_set_policy,
.maxattr = ARRAY_SIZE(ethnl_privflags_set_policy) - 1,
},
@@ -866,7 +934,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_RINGS_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_rings,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_rings_set_policy,
.maxattr = ARRAY_SIZE(ethnl_rings_set_policy) - 1,
},
@@ -882,7 +950,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_CHANNELS_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_channels,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_channels_set_policy,
.maxattr = ARRAY_SIZE(ethnl_channels_set_policy) - 1,
},
@@ -898,7 +966,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_COALESCE_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_coalesce,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_coalesce_set_policy,
.maxattr = ARRAY_SIZE(ethnl_coalesce_set_policy) - 1,
},
@@ -914,7 +982,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_PAUSE_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_pause,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_pause_set_policy,
.maxattr = ARRAY_SIZE(ethnl_pause_set_policy) - 1,
},
@@ -930,7 +998,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_EEE_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_eee,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_eee_set_policy,
.maxattr = ARRAY_SIZE(ethnl_eee_set_policy) - 1,
},
@@ -977,7 +1045,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_FEC_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_fec,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_fec_set_policy,
.maxattr = ARRAY_SIZE(ethnl_fec_set_policy) - 1,
},
@@ -1021,7 +1089,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_MODULE_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_module,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_module_set_policy,
.maxattr = ARRAY_SIZE(ethnl_module_set_policy) - 1,
},
@@ -1037,7 +1105,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_PSE_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_pse,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_pse_set_policy,
.maxattr = ARRAY_SIZE(ethnl_pse_set_policy) - 1,
},
@@ -1047,6 +1115,47 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_rss_get_policy,
.maxattr = ARRAY_SIZE(ethnl_rss_get_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_PLCA_GET_CFG,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ .policy = ethnl_plca_get_cfg_policy,
+ .maxattr = ARRAY_SIZE(ethnl_plca_get_cfg_policy) - 1,
+ },
+ {
+ .cmd = ETHTOOL_MSG_PLCA_SET_CFG,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_default_set_doit,
+ .policy = ethnl_plca_set_cfg_policy,
+ .maxattr = ARRAY_SIZE(ethnl_plca_set_cfg_policy) - 1,
+ },
+ {
+ .cmd = ETHTOOL_MSG_PLCA_GET_STATUS,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ .policy = ethnl_plca_get_status_policy,
+ .maxattr = ARRAY_SIZE(ethnl_plca_get_status_policy) - 1,
+ },
+ {
+ .cmd = ETHTOOL_MSG_MM_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ .policy = ethnl_mm_get_policy,
+ .maxattr = ARRAY_SIZE(ethnl_mm_get_policy) - 1,
+ },
+ {
+ .cmd = ETHTOOL_MSG_MM_SET,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_default_set_doit,
+ .policy = ethnl_mm_set_policy,
+ .maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 3753787ba233..ae0732460e88 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -138,6 +138,32 @@ static inline void ethnl_update_bool32(u32 *dst, const struct nlattr *attr,
}
/**
+ * ethnl_update_bool() - updateb bool used as bool from NLA_U8 attribute
+ * @dst: value to update
+ * @attr: netlink attribute with new value or null
+ * @mod: pointer to bool for modification tracking
+ *
+ * Use the bool value from NLA_U8 netlink attribute @attr to set bool variable
+ * pointed to by @dst to 0 (if zero) or 1 (if not); do nothing if @attr is
+ * null. Bool pointed to by @mod is set to true if this function changed the
+ * logical value of *dst, otherwise it is left as is.
+ */
+static inline void ethnl_update_bool(bool *dst, const struct nlattr *attr,
+ bool *mod)
+{
+ u8 val;
+
+ if (!attr)
+ return;
+ val = !!nla_get_u8(attr);
+ if (!!*dst == val)
+ return;
+
+ *dst = val;
+ *mod = true;
+}
+
+/**
* ethnl_update_binary() - update binary data from NLA_BINARY attribute
* @dst: value to update
* @len: destination buffer length
@@ -258,13 +284,14 @@ int ethnl_ops_begin(struct net_device *dev);
void ethnl_ops_complete(struct net_device *dev);
/**
- * struct ethnl_request_ops - unified handling of GET requests
+ * struct ethnl_request_ops - unified handling of GET and SET requests
* @request_cmd: command id for request (GET)
* @reply_cmd: command id for reply (GET_REPLY)
* @hdr_attr: attribute type for request header
* @req_info_size: size of request info
* @reply_data_size: size of reply data
* @allow_nodev_do: allow non-dump request with no device identification
+ * @set_ntf_cmd: notification to generate on changes (SET)
* @parse_request:
* Parse request except common header (struct ethnl_req_info). Common
* header is already filled on entry, the rest up to @repdata_offset
@@ -293,6 +320,18 @@ void ethnl_ops_complete(struct net_device *dev);
* used e.g. to free any additional data structures outside the main
* structure which were allocated by ->prepare_data(). When processing
* dump requests, ->cleanup() is called for each message.
+ * @set_validate:
+ * Check if set operation is supported for a given device, and perform
+ * extra input checks. Expected return values:
+ * - 0 if the operation is a noop for the device (rare)
+ * - 1 if operation should proceed to calling @set
+ * - negative errno on errors
+ * Called without any locks, just a reference on the netdev.
+ * @set:
+ * Execute the set operation. The implementation should return
+ * - 0 if no configuration has changed
+ * - 1 if configuration changed and notification should be generated
+ * - negative errno on errors
*
* Description of variable parts of GET request handling when using the
* unified infrastructure. When used, a pointer to an instance of this
@@ -309,6 +348,7 @@ struct ethnl_request_ops {
unsigned int req_info_size;
unsigned int reply_data_size;
bool allow_nodev_do;
+ u8 set_ntf_cmd;
int (*parse_request)(struct ethnl_req_info *req_info,
struct nlattr **tb,
@@ -322,6 +362,11 @@ struct ethnl_request_ops {
const struct ethnl_req_info *req_info,
const struct ethnl_reply_data *reply_data);
void (*cleanup_data)(struct ethnl_reply_data *reply_data);
+
+ int (*set_validate)(struct ethnl_req_info *req_info,
+ struct genl_info *info);
+ int (*set)(struct ethnl_req_info *req_info,
+ struct genl_info *info);
};
/* request handlers */
@@ -347,6 +392,9 @@ extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops;
extern const struct ethnl_request_ops ethnl_module_request_ops;
extern const struct ethnl_request_ops ethnl_pse_request_ops;
extern const struct ethnl_request_ops ethnl_rss_request_ops;
+extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops;
+extern const struct ethnl_request_ops ethnl_plca_status_request_ops;
+extern const struct ethnl_request_ops ethnl_mm_request_ops;
extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -370,7 +418,7 @@ extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEAD
extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1];
extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
extern const struct nla_policy ethnl_coalesce_set_policy[ETHTOOL_A_COALESCE_MAX + 1];
-extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_HEADER + 1];
+extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_STATS_SRC + 1];
extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1];
extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1];
extern const struct nla_policy ethnl_eee_set_policy[ETHTOOL_A_EEE_TX_LPI_TIMER + 1];
@@ -381,33 +429,25 @@ extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INF
extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1];
extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1];
extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1];
-extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1];
+extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_SRC + 1];
extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1];
extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + 1];
extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1];
extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1];
extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1];
extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_CONTEXT + 1];
+extern const struct nla_policy ethnl_plca_get_cfg_policy[ETHTOOL_A_PLCA_HEADER + 1];
+extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1];
+extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1];
+extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1];
+extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1];
-int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info);
int ethnl_set_features(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info);
int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info);
int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
int ethnl_tunnel_info_start(struct netlink_callback *cb);
int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
-int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_module(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_pse(struct sk_buff *skb, struct genl_info *info);
extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c
index a8c113d244db..6657d0b888d8 100644
--- a/net/ethtool/pause.c
+++ b/net/ethtool/pause.c
@@ -5,8 +5,12 @@
struct pause_req_info {
struct ethnl_req_info base;
+ enum ethtool_mac_stats_src src;
};
+#define PAUSE_REQINFO(__req_base) \
+ container_of(__req_base, struct pause_req_info, base)
+
struct pause_reply_data {
struct ethnl_reply_data base;
struct ethtool_pauseparam pauseparam;
@@ -19,13 +23,40 @@ struct pause_reply_data {
const struct nla_policy ethnl_pause_get_policy[] = {
[ETHTOOL_A_PAUSE_HEADER] =
NLA_POLICY_NESTED(ethnl_header_policy_stats),
+ [ETHTOOL_A_PAUSE_STATS_SRC] =
+ NLA_POLICY_MAX(NLA_U32, ETHTOOL_MAC_STATS_SRC_PMAC),
};
+static int pause_parse_request(struct ethnl_req_info *req_base,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ enum ethtool_mac_stats_src src = ETHTOOL_MAC_STATS_SRC_AGGREGATE;
+ struct pause_req_info *req_info = PAUSE_REQINFO(req_base);
+
+ if (tb[ETHTOOL_A_PAUSE_STATS_SRC]) {
+ if (!(req_base->flags & ETHTOOL_FLAG_STATS)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "ETHTOOL_FLAG_STATS must be set when requesting a source of stats");
+ return -EINVAL;
+ }
+
+ src = nla_get_u32(tb[ETHTOOL_A_PAUSE_STATS_SRC]);
+ }
+
+ req_info->src = src;
+
+ return 0;
+}
+
static int pause_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
struct genl_info *info)
{
+ const struct pause_req_info *req_info = PAUSE_REQINFO(req_base);
+ struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct pause_reply_data *data = PAUSE_REPDATA(reply_base);
+ enum ethtool_mac_stats_src src = req_info->src;
struct net_device *dev = reply_base->dev;
int ret;
@@ -34,14 +65,26 @@ static int pause_prepare_data(const struct ethnl_req_info *req_base,
ethtool_stats_init((u64 *)&data->pausestat,
sizeof(data->pausestat) / 8);
+ data->pausestat.src = src;
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
+
+ if ((src == ETHTOOL_MAC_STATS_SRC_EMAC ||
+ src == ETHTOOL_MAC_STATS_SRC_PMAC) &&
+ !__ethtool_dev_mm_supported(dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Device does not support MAC merge layer");
+ ethnl_ops_complete(dev);
+ return -EOPNOTSUPP;
+ }
+
dev->ethtool_ops->get_pauseparam(dev, &data->pauseparam);
if (req_base->flags & ETHTOOL_FLAG_STATS &&
dev->ethtool_ops->get_pause_stats)
dev->ethtool_ops->get_pause_stats(dev, &data->pausestat);
+
ethnl_ops_complete(dev);
return 0;
@@ -56,6 +99,7 @@ static int pause_reply_size(const struct ethnl_req_info *req_base,
if (req_base->flags & ETHTOOL_FLAG_STATS)
n += nla_total_size(0) + /* _PAUSE_STATS */
+ nla_total_size(sizeof(u32)) + /* _PAUSE_STATS_SRC */
nla_total_size_64bit(sizeof(u64)) * ETHTOOL_PAUSE_STAT_CNT;
return n;
}
@@ -77,6 +121,9 @@ static int pause_put_stats(struct sk_buff *skb,
const u16 pad = ETHTOOL_A_PAUSE_STAT_PAD;
struct nlattr *nest;
+ if (nla_put_u32(skb, ETHTOOL_A_PAUSE_STATS_SRC, pause_stats->src))
+ return -EMSGSIZE;
+
nest = nla_nest_start(skb, ETHTOOL_A_PAUSE_STATS);
if (!nest)
return -EMSGSIZE;
@@ -114,18 +161,6 @@ static int pause_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_pause_request_ops = {
- .request_cmd = ETHTOOL_MSG_PAUSE_GET,
- .reply_cmd = ETHTOOL_MSG_PAUSE_GET_REPLY,
- .hdr_attr = ETHTOOL_A_PAUSE_HEADER,
- .req_info_size = sizeof(struct pause_req_info),
- .reply_data_size = sizeof(struct pause_reply_data),
-
- .prepare_data = pause_prepare_data,
- .reply_size = pause_reply_size,
- .fill_reply = pause_fill_reply,
-};
-
/* PAUSE_SET */
const struct nla_policy ethnl_pause_set_policy[] = {
@@ -136,51 +171,49 @@ const struct nla_policy ethnl_pause_set_policy[] = {
[ETHTOOL_A_PAUSE_TX] = { .type = NLA_U8 },
};
-int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_pause_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_pauseparam && ops->set_pauseparam ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_pause(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct net_device *dev = req_info->dev;
struct ethtool_pauseparam params = {};
- struct ethnl_req_info req_info = {};
struct nlattr **tb = info->attrs;
- const struct ethtool_ops *ops;
- struct net_device *dev;
bool mod = false;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_PAUSE_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
- if (!ops->get_pauseparam || !ops->set_pauseparam)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
- ops->get_pauseparam(dev, &params);
+ dev->ethtool_ops->get_pauseparam(dev, &params);
ethnl_update_bool32(&params.autoneg, tb[ETHTOOL_A_PAUSE_AUTONEG], &mod);
ethnl_update_bool32(&params.rx_pause, tb[ETHTOOL_A_PAUSE_RX], &mod);
ethnl_update_bool32(&params.tx_pause, tb[ETHTOOL_A_PAUSE_TX], &mod);
- ret = 0;
if (!mod)
- goto out_ops;
+ return 0;
ret = dev->ethtool_ops->set_pauseparam(dev, &params);
- if (ret < 0)
- goto out_ops;
- ethtool_notify(dev, ETHTOOL_MSG_PAUSE_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return ret < 0 ? ret : 1;
}
+
+const struct ethnl_request_ops ethnl_pause_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PAUSE_GET,
+ .reply_cmd = ETHTOOL_MSG_PAUSE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_PAUSE_HEADER,
+ .req_info_size = sizeof(struct pause_req_info),
+ .reply_data_size = sizeof(struct pause_reply_data),
+
+ .parse_request = pause_parse_request,
+ .prepare_data = pause_prepare_data,
+ .reply_size = pause_reply_size,
+ .fill_reply = pause_fill_reply,
+
+ .set_validate = ethnl_set_pause_validate,
+ .set = ethnl_set_pause,
+ .set_ntf_cmd = ETHTOOL_MSG_PAUSE_NTF,
+};
diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c
new file mode 100644
index 000000000000..5a8cab4df0c9
--- /dev/null
+++ b/net/ethtool/plca.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/phy.h>
+#include <linux/ethtool_netlink.h>
+
+#include "netlink.h"
+#include "common.h"
+
+struct plca_req_info {
+ struct ethnl_req_info base;
+};
+
+struct plca_reply_data {
+ struct ethnl_reply_data base;
+ struct phy_plca_cfg plca_cfg;
+ struct phy_plca_status plca_st;
+};
+
+// Helpers ------------------------------------------------------------------ //
+
+#define PLCA_REPDATA(__reply_base) \
+ container_of(__reply_base, struct plca_reply_data, base)
+
+static void plca_update_sint(int *dst, const struct nlattr *attr,
+ bool *mod)
+{
+ if (!attr)
+ return;
+
+ *dst = nla_get_u32(attr);
+ *mod = true;
+}
+
+// PLCA get configuration message ------------------------------------------- //
+
+const struct nla_policy ethnl_plca_get_cfg_policy[] = {
+ [ETHTOOL_A_PLCA_HEADER] =
+ NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct plca_reply_data *data = PLCA_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ const struct ethtool_phy_ops *ops;
+ int ret;
+
+ // check that the PHY device is available and connected
+ if (!dev->phydev) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ // note: rtnl_lock is held already by ethnl_default_doit
+ ops = ethtool_phy_ops;
+ if (!ops || !ops->get_plca_cfg) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out;
+
+ memset(&data->plca_cfg, 0xff,
+ sizeof_field(struct plca_reply_data, plca_cfg));
+
+ ret = ops->get_plca_cfg(dev->phydev, &data->plca_cfg);
+ ethnl_ops_complete(dev);
+
+out:
+ return ret;
+}
+
+static int plca_get_cfg_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ return nla_total_size(sizeof(u16)) + /* _VERSION */
+ nla_total_size(sizeof(u8)) + /* _ENABLED */
+ nla_total_size(sizeof(u32)) + /* _NODE_CNT */
+ nla_total_size(sizeof(u32)) + /* _NODE_ID */
+ nla_total_size(sizeof(u32)) + /* _TO_TIMER */
+ nla_total_size(sizeof(u32)) + /* _BURST_COUNT */
+ nla_total_size(sizeof(u32)); /* _BURST_TIMER */
+}
+
+static int plca_get_cfg_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct plca_reply_data *data = PLCA_REPDATA(reply_base);
+ const struct phy_plca_cfg *plca = &data->plca_cfg;
+
+ if ((plca->version >= 0 &&
+ nla_put_u16(skb, ETHTOOL_A_PLCA_VERSION, plca->version)) ||
+ (plca->enabled >= 0 &&
+ nla_put_u8(skb, ETHTOOL_A_PLCA_ENABLED, !!plca->enabled)) ||
+ (plca->node_id >= 0 &&
+ nla_put_u32(skb, ETHTOOL_A_PLCA_NODE_ID, plca->node_id)) ||
+ (plca->node_cnt >= 0 &&
+ nla_put_u32(skb, ETHTOOL_A_PLCA_NODE_CNT, plca->node_cnt)) ||
+ (plca->to_tmr >= 0 &&
+ nla_put_u32(skb, ETHTOOL_A_PLCA_TO_TMR, plca->to_tmr)) ||
+ (plca->burst_cnt >= 0 &&
+ nla_put_u32(skb, ETHTOOL_A_PLCA_BURST_CNT, plca->burst_cnt)) ||
+ (plca->burst_tmr >= 0 &&
+ nla_put_u32(skb, ETHTOOL_A_PLCA_BURST_TMR, plca->burst_tmr)))
+ return -EMSGSIZE;
+
+ return 0;
+};
+
+// PLCA set configuration message ------------------------------------------- //
+
+const struct nla_policy ethnl_plca_set_cfg_policy[] = {
+ [ETHTOOL_A_PLCA_HEADER] =
+ NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_PLCA_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_PLCA_NODE_ID] = NLA_POLICY_MAX(NLA_U32, 255),
+ [ETHTOOL_A_PLCA_NODE_CNT] = NLA_POLICY_RANGE(NLA_U32, 1, 255),
+ [ETHTOOL_A_PLCA_TO_TMR] = NLA_POLICY_MAX(NLA_U32, 255),
+ [ETHTOOL_A_PLCA_BURST_CNT] = NLA_POLICY_MAX(NLA_U32, 255),
+ [ETHTOOL_A_PLCA_BURST_TMR] = NLA_POLICY_MAX(NLA_U32, 255),
+};
+
+static int
+ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct net_device *dev = req_info->dev;
+ const struct ethtool_phy_ops *ops;
+ struct nlattr **tb = info->attrs;
+ struct phy_plca_cfg plca_cfg;
+ bool mod = false;
+ int ret;
+
+ // check that the PHY device is available and connected
+ if (!dev->phydev)
+ return -EOPNOTSUPP;
+
+ ops = ethtool_phy_ops;
+ if (!ops || !ops->set_plca_cfg)
+ return -EOPNOTSUPP;
+
+ memset(&plca_cfg, 0xff, sizeof(plca_cfg));
+ plca_update_sint(&plca_cfg.enabled, tb[ETHTOOL_A_PLCA_ENABLED], &mod);
+ plca_update_sint(&plca_cfg.node_id, tb[ETHTOOL_A_PLCA_NODE_ID], &mod);
+ plca_update_sint(&plca_cfg.node_cnt, tb[ETHTOOL_A_PLCA_NODE_CNT], &mod);
+ plca_update_sint(&plca_cfg.to_tmr, tb[ETHTOOL_A_PLCA_TO_TMR], &mod);
+ plca_update_sint(&plca_cfg.burst_cnt, tb[ETHTOOL_A_PLCA_BURST_CNT],
+ &mod);
+ plca_update_sint(&plca_cfg.burst_tmr, tb[ETHTOOL_A_PLCA_BURST_TMR],
+ &mod);
+ if (!mod)
+ return 0;
+
+ ret = ops->set_plca_cfg(dev->phydev, &plca_cfg, info->extack);
+ return ret < 0 ? ret : 1;
+}
+
+const struct ethnl_request_ops ethnl_plca_cfg_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PLCA_GET_CFG,
+ .reply_cmd = ETHTOOL_MSG_PLCA_GET_CFG_REPLY,
+ .hdr_attr = ETHTOOL_A_PLCA_HEADER,
+ .req_info_size = sizeof(struct plca_req_info),
+ .reply_data_size = sizeof(struct plca_reply_data),
+
+ .prepare_data = plca_get_cfg_prepare_data,
+ .reply_size = plca_get_cfg_reply_size,
+ .fill_reply = plca_get_cfg_fill_reply,
+
+ .set = ethnl_set_plca,
+ .set_ntf_cmd = ETHTOOL_MSG_PLCA_NTF,
+};
+
+// PLCA get status message -------------------------------------------------- //
+
+const struct nla_policy ethnl_plca_get_status_policy[] = {
+ [ETHTOOL_A_PLCA_HEADER] =
+ NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct plca_reply_data *data = PLCA_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ const struct ethtool_phy_ops *ops;
+ int ret;
+
+ // check that the PHY device is available and connected
+ if (!dev->phydev) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ // note: rtnl_lock is held already by ethnl_default_doit
+ ops = ethtool_phy_ops;
+ if (!ops || !ops->get_plca_status) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out;
+
+ memset(&data->plca_st, 0xff,
+ sizeof_field(struct plca_reply_data, plca_st));
+
+ ret = ops->get_plca_status(dev->phydev, &data->plca_st);
+ ethnl_ops_complete(dev);
+out:
+ return ret;
+}
+
+static int plca_get_status_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ return nla_total_size(sizeof(u8)); /* _STATUS */
+}
+
+static int plca_get_status_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct plca_reply_data *data = PLCA_REPDATA(reply_base);
+ const u8 status = data->plca_st.pst;
+
+ if (nla_put_u8(skb, ETHTOOL_A_PLCA_STATUS, !!status))
+ return -EMSGSIZE;
+
+ return 0;
+};
+
+const struct ethnl_request_ops ethnl_plca_status_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PLCA_GET_STATUS,
+ .reply_cmd = ETHTOOL_MSG_PLCA_GET_STATUS_REPLY,
+ .hdr_attr = ETHTOOL_A_PLCA_HEADER,
+ .req_info_size = sizeof(struct plca_req_info),
+ .reply_data_size = sizeof(struct plca_reply_data),
+
+ .prepare_data = plca_get_status_prepare_data,
+ .reply_size = plca_get_status_reply_size,
+ .fill_reply = plca_get_status_fill_reply,
+};
diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c
index 4c7bfa81e4ab..23264a1ebf12 100644
--- a/net/ethtool/privflags.c
+++ b/net/ethtool/privflags.c
@@ -118,19 +118,6 @@ static void privflags_cleanup_data(struct ethnl_reply_data *reply_data)
kfree(data->priv_flag_names);
}
-const struct ethnl_request_ops ethnl_privflags_request_ops = {
- .request_cmd = ETHTOOL_MSG_PRIVFLAGS_GET,
- .reply_cmd = ETHTOOL_MSG_PRIVFLAGS_GET_REPLY,
- .hdr_attr = ETHTOOL_A_PRIVFLAGS_HEADER,
- .req_info_size = sizeof(struct privflags_req_info),
- .reply_data_size = sizeof(struct privflags_reply_data),
-
- .prepare_data = privflags_prepare_data,
- .reply_size = privflags_reply_size,
- .fill_reply = privflags_fill_reply,
- .cleanup_data = privflags_cleanup_data,
-};
-
/* PRIVFLAGS_SET */
const struct nla_policy ethnl_privflags_set_policy[] = {
@@ -139,63 +126,70 @@ const struct nla_policy ethnl_privflags_set_policy[] = {
[ETHTOOL_A_PRIVFLAGS_FLAGS] = { .type = NLA_NESTED },
};
-int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_privflags_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ if (!info->attrs[ETHTOOL_A_PRIVFLAGS_FLAGS])
+ return -EINVAL;
+
+ if (!ops->get_priv_flags || !ops->set_priv_flags ||
+ !ops->get_sset_count || !ops->get_strings)
+ return -EOPNOTSUPP;
+ return 1;
+}
+
+static int
+ethnl_set_privflags(struct ethnl_req_info *req_info, struct genl_info *info)
{
const char (*names)[ETH_GSTRING_LEN] = NULL;
- struct ethnl_req_info req_info = {};
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- const struct ethtool_ops *ops;
- struct net_device *dev;
unsigned int nflags;
bool mod = false;
bool compact;
u32 flags;
int ret;
- if (!tb[ETHTOOL_A_PRIVFLAGS_FLAGS])
- return -EINVAL;
ret = ethnl_bitset_is_compact(tb[ETHTOOL_A_PRIVFLAGS_FLAGS], &compact);
if (ret < 0)
return ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_PRIVFLAGS_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
- if (!ops->get_priv_flags || !ops->set_priv_flags ||
- !ops->get_sset_count || !ops->get_strings)
- goto out_dev;
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
ret = ethnl_get_priv_flags_info(dev, &nflags, compact ? NULL : &names);
if (ret < 0)
- goto out_ops;
- flags = ops->get_priv_flags(dev);
+ return ret;
+ flags = dev->ethtool_ops->get_priv_flags(dev);
ret = ethnl_update_bitset32(&flags, nflags,
tb[ETHTOOL_A_PRIVFLAGS_FLAGS], names,
info->extack, &mod);
if (ret < 0 || !mod)
goto out_free;
- ret = ops->set_priv_flags(dev, flags);
+ ret = dev->ethtool_ops->set_priv_flags(dev, flags);
if (ret < 0)
goto out_free;
- ethtool_notify(dev, ETHTOOL_MSG_PRIVFLAGS_NTF, NULL);
+ ret = 1;
out_free:
kfree(names);
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
return ret;
}
+
+const struct ethnl_request_ops ethnl_privflags_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PRIVFLAGS_GET,
+ .reply_cmd = ETHTOOL_MSG_PRIVFLAGS_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_PRIVFLAGS_HEADER,
+ .req_info_size = sizeof(struct privflags_req_info),
+ .reply_data_size = sizeof(struct privflags_reply_data),
+
+ .prepare_data = privflags_prepare_data,
+ .reply_size = privflags_reply_size,
+ .fill_reply = privflags_fill_reply,
+ .cleanup_data = privflags_cleanup_data,
+
+ .set_validate = ethnl_set_privflags_validate,
+ .set = ethnl_set_privflags,
+ .set_ntf_cmd = ETHTOOL_MSG_PRIVFLAGS_NTF,
+};
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index e8683e485dc9..a5b607b0a652 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -106,18 +106,6 @@ static int pse_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_pse_request_ops = {
- .request_cmd = ETHTOOL_MSG_PSE_GET,
- .reply_cmd = ETHTOOL_MSG_PSE_GET_REPLY,
- .hdr_attr = ETHTOOL_A_PSE_HEADER,
- .req_info_size = sizeof(struct pse_req_info),
- .reply_data_size = sizeof(struct pse_reply_data),
-
- .prepare_data = pse_prepare_data,
- .reply_size = pse_reply_size,
- .fill_reply = pse_fill_reply,
-};
-
/* PSE_SET */
const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
@@ -127,59 +115,50 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED),
};
-static int pse_set_pse_config(struct net_device *dev,
- struct netlink_ext_ack *extack,
- struct nlattr **tb)
+static int
+ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
{
- struct phy_device *phydev = dev->phydev;
- struct pse_control_config config = {};
+ return !!info->attrs[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL];
+}
- /* Optional attribute. Do not return error if not set. */
- if (!tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL])
- return 0;
+static int
+ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct net_device *dev = req_info->dev;
+ struct pse_control_config config = {};
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
/* this values are already validated by the ethnl_pse_set_policy */
config.admin_cotrol = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]);
+ phydev = dev->phydev;
if (!phydev) {
- NL_SET_ERR_MSG(extack, "No PHY is attached");
+ NL_SET_ERR_MSG(info->extack, "No PHY is attached");
return -EOPNOTSUPP;
}
if (!phydev->psec) {
- NL_SET_ERR_MSG(extack, "No PSE is attached");
+ NL_SET_ERR_MSG(info->extack, "No PSE is attached");
return -EOPNOTSUPP;
}
- return pse_ethtool_set_config(phydev->psec, extack, &config);
+ /* Return errno directly - PSE has no notification */
+ return pse_ethtool_set_config(phydev->psec, info->extack, &config);
}
-int ethnl_set_pse(struct sk_buff *skb, struct genl_info *info)
-{
- struct ethnl_req_info req_info = {};
- struct nlattr **tb = info->attrs;
- struct net_device *dev;
- int ret;
-
- ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_PSE_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
-
- dev = req_info.dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
-
- ret = pse_set_pse_config(dev, info->extack, tb);
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
+const struct ethnl_request_ops ethnl_pse_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PSE_GET,
+ .reply_cmd = ETHTOOL_MSG_PSE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_PSE_HEADER,
+ .req_info_size = sizeof(struct pse_req_info),
+ .reply_data_size = sizeof(struct pse_reply_data),
- ethnl_parse_header_dev_put(&req_info);
+ .prepare_data = pse_prepare_data,
+ .reply_size = pse_reply_size,
+ .fill_reply = pse_fill_reply,
- return ret;
-}
+ .set_validate = ethnl_set_pse_validate,
+ .set = ethnl_set_pse,
+ /* PSE has no notification */
+};
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index fa3ec8d438f7..2a2d3539630c 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -102,18 +102,6 @@ static int rings_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_rings_request_ops = {
- .request_cmd = ETHTOOL_MSG_RINGS_GET,
- .reply_cmd = ETHTOOL_MSG_RINGS_GET_REPLY,
- .hdr_attr = ETHTOOL_A_RINGS_HEADER,
- .req_info_size = sizeof(struct rings_req_info),
- .reply_data_size = sizeof(struct rings_reply_data),
-
- .prepare_data = rings_prepare_data,
- .reply_size = rings_reply_size,
- .fill_reply = rings_fill_reply,
-};
-
/* RINGS_SET */
const struct nla_policy ethnl_rings_set_policy[] = {
@@ -128,62 +116,53 @@ const struct nla_policy ethnl_rings_set_policy[] = {
[ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
};
-int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_rings_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
{
- struct kernel_ethtool_ringparam kernel_ringparam = {};
- struct ethtool_ringparam ringparam = {};
- struct ethnl_req_info req_info = {};
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
struct nlattr **tb = info->attrs;
- const struct nlattr *err_attr;
- const struct ethtool_ops *ops;
- struct net_device *dev;
- bool mod = false;
- int ret;
-
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_RINGS_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
- if (!ops->get_ringparam || !ops->set_ringparam)
- goto out_dev;
if (tb[ETHTOOL_A_RINGS_RX_BUF_LEN] &&
!(ops->supported_ring_params & ETHTOOL_RING_USE_RX_BUF_LEN)) {
- ret = -EOPNOTSUPP;
NL_SET_ERR_MSG_ATTR(info->extack,
tb[ETHTOOL_A_RINGS_RX_BUF_LEN],
"setting rx buf len not supported");
- goto out_dev;
+ return -EOPNOTSUPP;
}
if (tb[ETHTOOL_A_RINGS_CQE_SIZE] &&
!(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) {
- ret = -EOPNOTSUPP;
NL_SET_ERR_MSG_ATTR(info->extack,
tb[ETHTOOL_A_RINGS_CQE_SIZE],
"setting cqe size not supported");
- goto out_dev;
+ return -EOPNOTSUPP;
}
if (tb[ETHTOOL_A_RINGS_TX_PUSH] &&
!(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH)) {
- ret = -EOPNOTSUPP;
NL_SET_ERR_MSG_ATTR(info->extack,
tb[ETHTOOL_A_RINGS_TX_PUSH],
"setting tx push not supported");
- goto out_dev;
+ return -EOPNOTSUPP;
}
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
- ops->get_ringparam(dev, &ringparam, &kernel_ringparam, info->extack);
+ return ops->get_ringparam && ops->set_ringparam ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct kernel_ethtool_ringparam kernel_ringparam = {};
+ struct ethtool_ringparam ringparam = {};
+ struct net_device *dev = req_info->dev;
+ struct nlattr **tb = info->attrs;
+ const struct nlattr *err_attr;
+ bool mod = false;
+ int ret;
+
+ dev->ethtool_ops->get_ringparam(dev, &ringparam,
+ &kernel_ringparam, info->extack);
ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod);
ethnl_update_u32(&ringparam.rx_mini_pending,
@@ -197,9 +176,8 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod);
ethnl_update_u8(&kernel_ringparam.tx_push,
tb[ETHTOOL_A_RINGS_TX_PUSH], &mod);
- ret = 0;
if (!mod)
- goto out_ops;
+ return 0;
/* ensure new ring parameters are within limits */
if (ringparam.rx_pending > ringparam.rx_max_pending)
@@ -213,23 +191,28 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
else
err_attr = NULL;
if (err_attr) {
- ret = -EINVAL;
NL_SET_ERR_MSG_ATTR(info->extack, err_attr,
"requested ring size exceeds maximum");
- goto out_ops;
+ return -EINVAL;
}
ret = dev->ethtool_ops->set_ringparam(dev, &ringparam,
&kernel_ringparam, info->extack);
- if (ret < 0)
- goto out_ops;
- ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return ret < 0 ? ret : 1;
}
+
+const struct ethnl_request_ops ethnl_rings_request_ops = {
+ .request_cmd = ETHTOOL_MSG_RINGS_GET,
+ .reply_cmd = ETHTOOL_MSG_RINGS_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_RINGS_HEADER,
+ .req_info_size = sizeof(struct rings_req_info),
+ .reply_data_size = sizeof(struct rings_reply_data),
+
+ .prepare_data = rings_prepare_data,
+ .reply_size = rings_reply_size,
+ .fill_reply = rings_fill_reply,
+
+ .set_validate = ethnl_set_rings_validate,
+ .set = ethnl_set_rings,
+ .set_ntf_cmd = ETHTOOL_MSG_RINGS_NTF,
+};
diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index a20e0a24ff61..010ed19ccc99 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -7,6 +7,7 @@
struct stats_req_info {
struct ethnl_req_info base;
DECLARE_BITMAP(stat_mask, __ETHTOOL_STATS_CNT);
+ enum ethtool_mac_stats_src src;
};
#define STATS_REQINFO(__req_base) \
@@ -75,16 +76,19 @@ const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN] = {
[ETHTOOL_A_STATS_RMON_JABBER] = "etherStatsJabbers",
};
-const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1] = {
+const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_SRC + 1] = {
[ETHTOOL_A_STATS_HEADER] =
NLA_POLICY_NESTED(ethnl_header_policy),
[ETHTOOL_A_STATS_GROUPS] = { .type = NLA_NESTED },
+ [ETHTOOL_A_STATS_SRC] =
+ NLA_POLICY_MAX(NLA_U32, ETHTOOL_MAC_STATS_SRC_PMAC),
};
static int stats_parse_request(struct ethnl_req_info *req_base,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
+ enum ethtool_mac_stats_src src = ETHTOOL_MAC_STATS_SRC_AGGREGATE;
struct stats_req_info *req_info = STATS_REQINFO(req_base);
bool mod = false;
int err;
@@ -100,6 +104,11 @@ static int stats_parse_request(struct ethnl_req_info *req_base,
return -EINVAL;
}
+ if (tb[ETHTOOL_A_STATS_SRC])
+ src = nla_get_u32(tb[ETHTOOL_A_STATS_SRC]);
+
+ req_info->src = src;
+
return 0;
}
@@ -108,7 +117,9 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
struct genl_info *info)
{
const struct stats_req_info *req_info = STATS_REQINFO(req_base);
+ struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct stats_reply_data *data = STATS_REPDATA(reply_base);
+ enum ethtool_mac_stats_src src = req_info->src;
struct net_device *dev = reply_base->dev;
int ret;
@@ -116,11 +127,25 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
+ if ((src == ETHTOOL_MAC_STATS_SRC_EMAC ||
+ src == ETHTOOL_MAC_STATS_SRC_PMAC) &&
+ !__ethtool_dev_mm_supported(dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Device does not support MAC merge layer");
+ ethnl_ops_complete(dev);
+ return -EOPNOTSUPP;
+ }
+
/* Mark all stats as unset (see ETHTOOL_STAT_NOT_SET) to prevent them
* from being reported to user space in case driver did not set them.
*/
memset(&data->stats, 0xff, sizeof(data->stats));
+ data->phy_stats.src = src;
+ data->mac_stats.src = src;
+ data->ctrl_stats.src = src;
+ data->rmon_stats.src = src;
+
if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) &&
dev->ethtool_ops->get_eth_phy_stats)
dev->ethtool_ops->get_eth_phy_stats(dev, &data->phy_stats);
@@ -146,6 +171,8 @@ static int stats_reply_size(const struct ethnl_req_info *req_base,
unsigned int n_grps = 0, n_stats = 0;
int len = 0;
+ len += nla_total_size(sizeof(u32)); /* _STATS_SRC */
+
if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask)) {
n_stats += sizeof(struct ethtool_eth_phy_stats) / sizeof(u64);
n_grps++;
@@ -379,6 +406,9 @@ static int stats_fill_reply(struct sk_buff *skb,
const struct stats_reply_data *data = STATS_REPDATA(reply_base);
int ret = 0;
+ if (nla_put_u32(skb, ETHTOOL_A_STATS_SRC, req_info->src))
+ return -EMSGSIZE;
+
if (!ret && test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask))
ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_PHY,
ETH_SS_STATS_ETH_PHY,
@@ -410,3 +440,130 @@ const struct ethnl_request_ops ethnl_stats_request_ops = {
.reply_size = stats_reply_size,
.fill_reply = stats_fill_reply,
};
+
+static u64 ethtool_stats_sum(u64 a, u64 b)
+{
+ if (a == ETHTOOL_STAT_NOT_SET)
+ return b;
+ if (b == ETHTOOL_STAT_NOT_SET)
+ return a;
+ return a + b;
+}
+
+/* Avoid modifying the aggregation procedure every time a new counter is added
+ * by treating the structures as an array of u64 statistics.
+ */
+static void ethtool_aggregate_stats(void *aggr_stats, const void *emac_stats,
+ const void *pmac_stats, size_t stats_size,
+ size_t stats_offset)
+{
+ size_t num_stats = stats_size / sizeof(u64);
+ const u64 *s1 = emac_stats + stats_offset;
+ const u64 *s2 = pmac_stats + stats_offset;
+ u64 *s = aggr_stats + stats_offset;
+ int i;
+
+ for (i = 0; i < num_stats; i++)
+ s[i] = ethtool_stats_sum(s1[i], s2[i]);
+}
+
+void ethtool_aggregate_mac_stats(struct net_device *dev,
+ struct ethtool_eth_mac_stats *mac_stats)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_eth_mac_stats pmac, emac;
+
+ memset(&emac, 0xff, sizeof(emac));
+ memset(&pmac, 0xff, sizeof(pmac));
+ emac.src = ETHTOOL_MAC_STATS_SRC_EMAC;
+ pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC;
+
+ ops->get_eth_mac_stats(dev, &emac);
+ ops->get_eth_mac_stats(dev, &pmac);
+
+ ethtool_aggregate_stats(mac_stats, &emac, &pmac,
+ sizeof(mac_stats->stats),
+ offsetof(struct ethtool_eth_mac_stats, stats));
+}
+EXPORT_SYMBOL(ethtool_aggregate_mac_stats);
+
+void ethtool_aggregate_phy_stats(struct net_device *dev,
+ struct ethtool_eth_phy_stats *phy_stats)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_eth_phy_stats pmac, emac;
+
+ memset(&emac, 0xff, sizeof(emac));
+ memset(&pmac, 0xff, sizeof(pmac));
+ emac.src = ETHTOOL_MAC_STATS_SRC_EMAC;
+ pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC;
+
+ ops->get_eth_phy_stats(dev, &emac);
+ ops->get_eth_phy_stats(dev, &pmac);
+
+ ethtool_aggregate_stats(phy_stats, &emac, &pmac,
+ sizeof(phy_stats->stats),
+ offsetof(struct ethtool_eth_phy_stats, stats));
+}
+EXPORT_SYMBOL(ethtool_aggregate_phy_stats);
+
+void ethtool_aggregate_ctrl_stats(struct net_device *dev,
+ struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_eth_ctrl_stats pmac, emac;
+
+ memset(&emac, 0xff, sizeof(emac));
+ memset(&pmac, 0xff, sizeof(pmac));
+ emac.src = ETHTOOL_MAC_STATS_SRC_EMAC;
+ pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC;
+
+ ops->get_eth_ctrl_stats(dev, &emac);
+ ops->get_eth_ctrl_stats(dev, &pmac);
+
+ ethtool_aggregate_stats(ctrl_stats, &emac, &pmac,
+ sizeof(ctrl_stats->stats),
+ offsetof(struct ethtool_eth_ctrl_stats, stats));
+}
+EXPORT_SYMBOL(ethtool_aggregate_ctrl_stats);
+
+void ethtool_aggregate_pause_stats(struct net_device *dev,
+ struct ethtool_pause_stats *pause_stats)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_pause_stats pmac, emac;
+
+ memset(&emac, 0xff, sizeof(emac));
+ memset(&pmac, 0xff, sizeof(pmac));
+ emac.src = ETHTOOL_MAC_STATS_SRC_EMAC;
+ pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC;
+
+ ops->get_pause_stats(dev, &emac);
+ ops->get_pause_stats(dev, &pmac);
+
+ ethtool_aggregate_stats(pause_stats, &emac, &pmac,
+ sizeof(pause_stats->stats),
+ offsetof(struct ethtool_pause_stats, stats));
+}
+EXPORT_SYMBOL(ethtool_aggregate_pause_stats);
+
+void ethtool_aggregate_rmon_stats(struct net_device *dev,
+ struct ethtool_rmon_stats *rmon_stats)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ const struct ethtool_rmon_hist_range *dummy;
+ struct ethtool_rmon_stats pmac, emac;
+
+ memset(&emac, 0xff, sizeof(emac));
+ memset(&pmac, 0xff, sizeof(pmac));
+ emac.src = ETHTOOL_MAC_STATS_SRC_EMAC;
+ pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC;
+
+ ops->get_rmon_stats(dev, &emac, &dummy);
+ ops->get_rmon_stats(dev, &pmac, &dummy);
+
+ ethtool_aggregate_stats(rmon_stats, &emac, &pmac,
+ sizeof(rmon_stats->stats),
+ offsetof(struct ethtool_rmon_stats, stats));
+}
+EXPORT_SYMBOL(ethtool_aggregate_rmon_stats);
diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c
index 88f435e76481..a4a43d9e6e9d 100644
--- a/net/ethtool/wol.c
+++ b/net/ethtool/wol.c
@@ -82,18 +82,6 @@ static int wol_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_wol_request_ops = {
- .request_cmd = ETHTOOL_MSG_WOL_GET,
- .reply_cmd = ETHTOOL_MSG_WOL_GET_REPLY,
- .hdr_attr = ETHTOOL_A_WOL_HEADER,
- .req_info_size = sizeof(struct wol_req_info),
- .reply_data_size = sizeof(struct wol_reply_data),
-
- .prepare_data = wol_prepare_data,
- .reply_size = wol_reply_size,
- .fill_reply = wol_fill_reply,
-};
-
/* WOL_SET */
const struct nla_policy ethnl_wol_set_policy[] = {
@@ -104,67 +92,66 @@ const struct nla_policy ethnl_wol_set_policy[] = {
.len = SOPASS_MAX },
};
-int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_wol_validate(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_wol && ops->set_wol ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_wol(struct ethnl_req_info *req_info, struct genl_info *info)
{
struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
- struct ethnl_req_info req_info = {};
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct net_device *dev;
bool mod = false;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_WOL_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ret = -EOPNOTSUPP;
- if (!dev->ethtool_ops->get_wol || !dev->ethtool_ops->set_wol)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
-
dev->ethtool_ops->get_wol(dev, &wol);
ret = ethnl_update_bitset32(&wol.wolopts, WOL_MODE_COUNT,
tb[ETHTOOL_A_WOL_MODES], wol_mode_names,
info->extack, &mod);
if (ret < 0)
- goto out_ops;
+ return ret;
if (wol.wolopts & ~wol.supported) {
NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_WOL_MODES],
"cannot enable unsupported WoL mode");
- ret = -EINVAL;
- goto out_ops;
+ return -EINVAL;
}
if (tb[ETHTOOL_A_WOL_SOPASS]) {
if (!(wol.supported & WAKE_MAGICSECURE)) {
NL_SET_ERR_MSG_ATTR(info->extack,
tb[ETHTOOL_A_WOL_SOPASS],
"magicsecure not supported, cannot set password");
- ret = -EINVAL;
- goto out_ops;
+ return -EINVAL;
}
ethnl_update_binary(wol.sopass, sizeof(wol.sopass),
tb[ETHTOOL_A_WOL_SOPASS], &mod);
}
if (!mod)
- goto out_ops;
+ return 0;
ret = dev->ethtool_ops->set_wol(dev, &wol);
if (ret)
- goto out_ops;
+ return ret;
dev->wol_enabled = !!wol.wolopts;
- ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return 1;
}
+
+const struct ethnl_request_ops ethnl_wol_request_ops = {
+ .request_cmd = ETHTOOL_MSG_WOL_GET,
+ .reply_cmd = ETHTOOL_MSG_WOL_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_WOL_HEADER,
+ .req_info_size = sizeof(struct wol_req_info),
+ .reply_data_size = sizeof(struct wol_reply_data),
+
+ .prepare_data = wol_prepare_data,
+ .reply_size = wol_reply_size,
+ .fill_reply = wol_fill_reply,
+
+ .set_validate = ethnl_set_wol_validate,
+ .set = ethnl_set_wol,
+ .set_ntf_cmd = ETHTOOL_MSG_WOL_NTF,
+};
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index af7d2cf490fb..880277c9fd07 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_IP_MROUTE) += ipmr.o
obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
+fou-y := fou_core.o fou_nl.o
obj-$(CONFIG_NET_FOU) += fou.o
obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
obj-$(CONFIG_NET_IPGRE) += ip_gre.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cf11f10927e1..2c778b013cb0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1486,6 +1486,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
if (unlikely(ip_fast_csum((u8 *)iph, 5)))
goto out;
+ NAPI_GRO_CB(skb)->proto = proto;
id = ntohl(*(__be32 *)&iph->id);
flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
id >>= 16;
@@ -1619,9 +1620,9 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
- __be16 newlen = htons(skb->len - nhoff);
struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
const struct net_offload *ops;
+ __be16 totlen = iph->tot_len;
int proto = iph->protocol;
int err = -ENOSYS;
@@ -1630,8 +1631,8 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
skb_set_inner_network_header(skb, nhoff);
}
- csum_replace2(&iph->check, iph->tot_len, newlen);
- iph->tot_len = newlen;
+ iph_set_totlen(iph, skb->len - nhoff);
+ csum_replace2(&iph->check, totlen, iph->tot_len);
ops = rcu_dereference(inet_offloads[proto]);
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 4517d2bd186a..13fc0c185cd9 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -248,7 +248,8 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
}
static int bpf_tcp_ca_check_member(const struct btf_type *t,
- const struct btf_member *member)
+ const struct btf_member *member,
+ const struct bpf_prog *prog)
{
if (is_unsupported(__btf_member_bit_offset(t, member) / 8))
return -ENOTSUPP;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 6cd3b6c559f0..79ae7204e8ed 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -2222,7 +2222,7 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb,
memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len);
if (len_delta != 0) {
iph->ihl = 5 + (opt_len >> 2);
- iph->tot_len = htons(skb->len);
+ iph_set_totlen(iph, skb->len);
}
ip_send_check(iph);
diff --git a/net/ipv4/fou.c b/net/ipv4/fou_core.c
index 0c3c6d0cee29..cafec9b4eee0 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou_core.c
@@ -19,6 +19,8 @@
#include <uapi/linux/fou.h>
#include <uapi/linux/genetlink.h>
+#include "fou_nl.h"
+
struct fou {
struct socket *sock;
u8 protocol;
@@ -640,20 +642,6 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg)
static struct genl_family fou_nl_family;
-static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
- [FOU_ATTR_PORT] = { .type = NLA_U16, },
- [FOU_ATTR_AF] = { .type = NLA_U8, },
- [FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
- [FOU_ATTR_TYPE] = { .type = NLA_U8, },
- [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
- [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, },
- [FOU_ATTR_PEER_V4] = { .type = NLA_U32, },
- [FOU_ATTR_LOCAL_V6] = { .len = sizeof(struct in6_addr), },
- [FOU_ATTR_PEER_V6] = { .len = sizeof(struct in6_addr), },
- [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, },
- [FOU_ATTR_IFINDEX] = { .type = NLA_S32, },
-};
-
static int parse_nl_config(struct genl_info *info,
struct fou_cfg *cfg)
{
@@ -745,7 +733,7 @@ static int parse_nl_config(struct genl_info *info,
return 0;
}
-static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
+int fou_nl_add_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
struct fou_cfg cfg;
@@ -758,7 +746,7 @@ static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
return fou_create(net, &cfg, NULL);
}
-static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
+int fou_nl_del_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
struct fou_cfg cfg;
@@ -827,7 +815,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info)
+int fou_nl_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
struct fou_net *fn = net_generic(net, fou_net_id);
@@ -874,7 +862,7 @@ out_free:
return ret;
}
-static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
+int fou_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct fou_net *fn = net_generic(net, fou_net_id);
@@ -897,33 +885,12 @@ static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static const struct genl_small_ops fou_nl_ops[] = {
- {
- .cmd = FOU_CMD_ADD,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = fou_nl_cmd_add_port,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = FOU_CMD_DEL,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = fou_nl_cmd_rm_port,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = FOU_CMD_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = fou_nl_cmd_get_port,
- .dumpit = fou_nl_dump,
- },
-};
-
static struct genl_family fou_nl_family __ro_after_init = {
.hdrsize = 0,
.name = FOU_GENL_NAME,
.version = FOU_GENL_VERSION,
.maxattr = FOU_ATTR_MAX,
- .policy = fou_nl_policy,
+ .policy = fou_nl_policy,
.netnsok = true,
.module = THIS_MODULE,
.small_ops = fou_nl_ops,
diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c
new file mode 100644
index 000000000000..6c3820f41dd5
--- /dev/null
+++ b/net/ipv4/fou_nl.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/fou.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "fou_nl.h"
+
+#include <linux/fou.h>
+
+/* Global operation policy for fou */
+const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = {
+ [FOU_ATTR_PORT] = { .type = NLA_U16, },
+ [FOU_ATTR_AF] = { .type = NLA_U8, },
+ [FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
+ [FOU_ATTR_TYPE] = { .type = NLA_U8, },
+ [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
+ [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, },
+ [FOU_ATTR_LOCAL_V6] = { .len = 16, },
+ [FOU_ATTR_PEER_V4] = { .type = NLA_U32, },
+ [FOU_ATTR_PEER_V6] = { .len = 16, },
+ [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, },
+ [FOU_ATTR_IFINDEX] = { .type = NLA_S32, },
+};
+
+/* Ops table for fou */
+const struct genl_small_ops fou_nl_ops[3] = {
+ {
+ .cmd = FOU_CMD_ADD,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = fou_nl_add_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = FOU_CMD_DEL,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = fou_nl_del_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = FOU_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = fou_nl_get_doit,
+ .dumpit = fou_nl_get_dumpit,
+ },
+};
diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h
new file mode 100644
index 000000000000..b7a68121ce6f
--- /dev/null
+++ b/net/ipv4/fou_nl.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/fou.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_FOU_GEN_H
+#define _LINUX_FOU_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <linux/fou.h>
+
+/* Global operation policy for fou */
+extern const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1];
+
+/* Ops table for fou */
+extern const struct genl_small_ops fou_nl_ops[3];
+
+int fou_nl_add_doit(struct sk_buff *skb, struct genl_info *info);
+int fou_nl_del_doit(struct sk_buff *skb, struct genl_info *info);
+int fou_nl_get_doit(struct sk_buff *skb, struct genl_info *info);
+int fou_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+
+#endif /* _LINUX_FOU_GEN_H */
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 46aa2d65e40a..8cebb476b3ab 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -296,6 +296,7 @@ static bool icmpv4_global_allow(struct net *net, int type, int code)
if (icmp_global_allow())
return true;
+ __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
@@ -325,6 +326,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
if (peer)
inet_putpeer(peer);
out:
+ if (!rc)
+ __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
return rc;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f2c43f67187d..7d206a10ad14 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -117,7 +117,7 @@ bool inet_rcv_saddr_any(const struct sock *sk)
return !sk->sk_rcv_saddr;
}
-void inet_get_local_port_range(struct net *net, int *low, int *high)
+void inet_get_local_port_range(const struct net *net, int *low, int *high)
{
unsigned int seq;
@@ -130,6 +130,27 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
+void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct net *net = sock_net(sk);
+ int lo, hi, sk_lo, sk_hi;
+
+ inet_get_local_port_range(net, &lo, &hi);
+
+ sk_lo = inet->local_port_range.lo;
+ sk_hi = inet->local_port_range.hi;
+
+ if (unlikely(lo <= sk_lo && sk_lo <= hi))
+ lo = sk_lo;
+ if (unlikely(lo <= sk_hi && sk_hi <= hi))
+ hi = sk_hi;
+
+ *low = lo;
+ *high = hi;
+}
+EXPORT_SYMBOL(inet_sk_get_local_port_range);
+
static bool inet_use_bhash2_on_bind(const struct sock *sk)
{
#if IS_ENABLED(CONFIG_IPV6)
@@ -316,7 +337,7 @@ inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret,
ports_exhausted:
attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
other_half_scan:
- inet_get_local_port_range(net, &low, &high);
+ inet_sk_get_local_port_range(sk, &low, &high);
high++; /* [32768, 60999] -> [32768, 61000[ */
if (high - low < 4)
attempt_half = 0;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f58d73888638..e41fdc38ce19 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -1008,17 +1008,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
u32 index;
if (port) {
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- tb = inet_csk(sk)->icsk_bind_hash;
- spin_lock_bh(&head->lock);
- if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- inet_ehash_nolisten(sk, NULL, NULL);
- spin_unlock_bh(&head->lock);
- return 0;
- }
- spin_unlock(&head->lock);
- /* No definite answer... Walk to established hash table */
+ local_bh_disable();
ret = check_established(death_row, sk, port, NULL);
local_bh_enable();
return ret;
@@ -1026,7 +1016,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
l3mdev = inet_sk_bound_l3mdev(sk);
- inet_get_local_port_range(net, &low, &high);
+ inet_sk_get_local_port_range(sk, &low, &high);
high++; /* [32768, 60999] -> [32768, 61000[ */
remaining = high - low;
if (likely(remaining > 1))
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e880ce77322a..fe9ead9ee863 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -511,7 +511,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto csum_error;
- len = ntohs(iph->tot_len);
+ len = iph_totlen(skb, iph);
if (skb->len < len) {
drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 922c87ef1ab5..4e4e308c3230 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -100,7 +100,7 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
- iph->tot_len = htons(skb->len);
+ iph_set_totlen(iph, skb->len);
ip_send_check(iph);
/* if egress device is enslaved to an L3 master device pass the
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 9f92ae35bb01..b511ff0adc0a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -923,6 +923,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
case IP_CHECKSUM:
case IP_RECVFRAGSIZE:
case IP_RECVERR_RFC4884:
+ case IP_LOCAL_PORT_RANGE:
if (optlen >= sizeof(int)) {
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
@@ -1365,6 +1366,20 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
WRITE_ONCE(inet->min_ttl, val);
break;
+ case IP_LOCAL_PORT_RANGE:
+ {
+ const __u16 lo = val;
+ const __u16 hi = val >> 16;
+
+ if (optlen != sizeof(__u32))
+ goto e_inval;
+ if (lo != 0 && hi != 0 && lo > hi)
+ goto e_inval;
+
+ inet->local_port_range.lo = lo;
+ inet->local_port_range.hi = hi;
+ break;
+ }
default:
err = -ENOPROTOOPT;
break;
@@ -1743,6 +1758,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_MINTTL:
val = inet->min_ttl;
break;
+ case IP_LOCAL_PORT_RANGE:
+ val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
+ break;
default:
sockopt_release_sock(sk);
return -ENOPROTOOPT;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index aab384126f61..f71a7e9a7de6 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -259,20 +259,6 @@ config IP_NF_MANGLE
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_CLUSTERIP
- tristate "CLUSTERIP target support"
- depends on IP_NF_MANGLE
- depends on NF_CONNTRACK
- depends on NETFILTER_ADVANCED
- select NF_CONNTRACK_MARK
- select NETFILTER_FAMILY_ARP
- help
- The CLUSTERIP target allows you to build load-balancing clusters of
- network servers without having a dedicated load-balancing
- router/server/switch.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_TARGET_ECN
tristate "ECN target support"
depends on IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 93bad1184251..5a26f9de1ab9 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -39,7 +39,6 @@ obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
# targets
-obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
deleted file mode 100644
index b3cc416ed292..000000000000
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ /dev/null
@@ -1,929 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Cluster IP hashmark target
- * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
- * based on ideas of Fabio Olive Leite <olive@unixforge.org>
- *
- * Development of this code funded by SuSE Linux AG, https://www.suse.com/
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/jhash.h>
-#include <linux/bitops.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <linux/if_arp.h>
-#include <linux/seq_file.h>
-#include <linux/refcount.h>
-#include <linux/netfilter_arp.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/checksum.h>
-#include <net/ip.h>
-
-#define CLUSTERIP_VERSION "0.8"
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
-
-struct clusterip_config {
- struct list_head list; /* list of all configs */
- refcount_t refcount; /* reference count */
- refcount_t entries; /* number of entries/rules
- * referencing us */
-
- __be32 clusterip; /* the IP address */
- u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
- int ifindex; /* device ifindex */
- u_int16_t num_total_nodes; /* total number of nodes */
- unsigned long local_nodes; /* node number array */
-
-#ifdef CONFIG_PROC_FS
- struct proc_dir_entry *pde; /* proc dir entry */
-#endif
- enum clusterip_hashmode hash_mode; /* which hashing mode */
- u_int32_t hash_initval; /* hash initialization */
- struct rcu_head rcu; /* for call_rcu */
- struct net *net; /* netns for pernet list */
- char ifname[IFNAMSIZ]; /* device ifname */
-};
-
-#ifdef CONFIG_PROC_FS
-static const struct proc_ops clusterip_proc_ops;
-#endif
-
-struct clusterip_net {
- struct list_head configs;
- /* lock protects the configs list */
- spinlock_t lock;
-
- bool clusterip_deprecated_warning;
-#ifdef CONFIG_PROC_FS
- struct proc_dir_entry *procdir;
- /* mutex protects the config->pde*/
- struct mutex mutex;
-#endif
- unsigned int hook_users;
-};
-
-static unsigned int clusterip_arp_mangle(void *priv, struct sk_buff *skb, const struct nf_hook_state *state);
-
-static const struct nf_hook_ops cip_arp_ops = {
- .hook = clusterip_arp_mangle,
- .pf = NFPROTO_ARP,
- .hooknum = NF_ARP_OUT,
- .priority = -1
-};
-
-static unsigned int clusterip_net_id __read_mostly;
-static inline struct clusterip_net *clusterip_pernet(struct net *net)
-{
- return net_generic(net, clusterip_net_id);
-}
-
-static inline void
-clusterip_config_get(struct clusterip_config *c)
-{
- refcount_inc(&c->refcount);
-}
-
-static void clusterip_config_rcu_free(struct rcu_head *head)
-{
- struct clusterip_config *config;
- struct net_device *dev;
-
- config = container_of(head, struct clusterip_config, rcu);
- dev = dev_get_by_name(config->net, config->ifname);
- if (dev) {
- dev_mc_del(dev, config->clustermac);
- dev_put(dev);
- }
- kfree(config);
-}
-
-static inline void
-clusterip_config_put(struct clusterip_config *c)
-{
- if (refcount_dec_and_test(&c->refcount))
- call_rcu(&c->rcu, clusterip_config_rcu_free);
-}
-
-/* decrease the count of entries using/referencing this config. If last
- * entry(rule) is removed, remove the config from lists, but don't free it
- * yet, since proc-files could still be holding references */
-static inline void
-clusterip_config_entry_put(struct clusterip_config *c)
-{
- struct clusterip_net *cn = clusterip_pernet(c->net);
-
- local_bh_disable();
- if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
- list_del_rcu(&c->list);
- spin_unlock(&cn->lock);
- local_bh_enable();
- /* In case anyone still accesses the file, the open/close
- * functions are also incrementing the refcount on their own,
- * so it's safe to remove the entry even if it's in use. */
-#ifdef CONFIG_PROC_FS
- mutex_lock(&cn->mutex);
- if (cn->procdir)
- proc_remove(c->pde);
- mutex_unlock(&cn->mutex);
-#endif
- return;
- }
- local_bh_enable();
-}
-
-static struct clusterip_config *
-__clusterip_config_find(struct net *net, __be32 clusterip)
-{
- struct clusterip_config *c;
- struct clusterip_net *cn = clusterip_pernet(net);
-
- list_for_each_entry_rcu(c, &cn->configs, list) {
- if (c->clusterip == clusterip)
- return c;
- }
-
- return NULL;
-}
-
-static inline struct clusterip_config *
-clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
-{
- struct clusterip_config *c;
-
- rcu_read_lock_bh();
- c = __clusterip_config_find(net, clusterip);
- if (c) {
-#ifdef CONFIG_PROC_FS
- if (!c->pde)
- c = NULL;
- else
-#endif
- if (unlikely(!refcount_inc_not_zero(&c->refcount)))
- c = NULL;
- else if (entry) {
- if (unlikely(!refcount_inc_not_zero(&c->entries))) {
- clusterip_config_put(c);
- c = NULL;
- }
- }
- }
- rcu_read_unlock_bh();
-
- return c;
-}
-
-static void
-clusterip_config_init_nodelist(struct clusterip_config *c,
- const struct ipt_clusterip_tgt_info *i)
-{
- int n;
-
- for (n = 0; n < i->num_local_nodes; n++)
- set_bit(i->local_nodes[n] - 1, &c->local_nodes);
-}
-
-static int
-clusterip_netdev_event(struct notifier_block *this, unsigned long event,
- void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct net *net = dev_net(dev);
- struct clusterip_net *cn = clusterip_pernet(net);
- struct clusterip_config *c;
-
- spin_lock_bh(&cn->lock);
- list_for_each_entry_rcu(c, &cn->configs, list) {
- switch (event) {
- case NETDEV_REGISTER:
- if (!strcmp(dev->name, c->ifname)) {
- c->ifindex = dev->ifindex;
- dev_mc_add(dev, c->clustermac);
- }
- break;
- case NETDEV_UNREGISTER:
- if (dev->ifindex == c->ifindex) {
- dev_mc_del(dev, c->clustermac);
- c->ifindex = -1;
- }
- break;
- case NETDEV_CHANGENAME:
- if (!strcmp(dev->name, c->ifname)) {
- c->ifindex = dev->ifindex;
- dev_mc_add(dev, c->clustermac);
- } else if (dev->ifindex == c->ifindex) {
- dev_mc_del(dev, c->clustermac);
- c->ifindex = -1;
- }
- break;
- }
- }
- spin_unlock_bh(&cn->lock);
-
- return NOTIFY_DONE;
-}
-
-static struct clusterip_config *
-clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
- __be32 ip, const char *iniface)
-{
- struct clusterip_net *cn = clusterip_pernet(net);
- struct clusterip_config *c;
- struct net_device *dev;
- int err;
-
- if (iniface[0] == '\0') {
- pr_info("Please specify an interface name\n");
- return ERR_PTR(-EINVAL);
- }
-
- c = kzalloc(sizeof(*c), GFP_ATOMIC);
- if (!c)
- return ERR_PTR(-ENOMEM);
-
- dev = dev_get_by_name(net, iniface);
- if (!dev) {
- pr_info("no such interface %s\n", iniface);
- kfree(c);
- return ERR_PTR(-ENOENT);
- }
- c->ifindex = dev->ifindex;
- strcpy(c->ifname, dev->name);
- memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
- dev_mc_add(dev, c->clustermac);
- dev_put(dev);
-
- c->clusterip = ip;
- c->num_total_nodes = i->num_total_nodes;
- clusterip_config_init_nodelist(c, i);
- c->hash_mode = i->hash_mode;
- c->hash_initval = i->hash_initval;
- c->net = net;
- refcount_set(&c->refcount, 1);
-
- spin_lock_bh(&cn->lock);
- if (__clusterip_config_find(net, ip)) {
- err = -EBUSY;
- goto out_config_put;
- }
-
- list_add_rcu(&c->list, &cn->configs);
- spin_unlock_bh(&cn->lock);
-
-#ifdef CONFIG_PROC_FS
- {
- char buffer[16];
-
- /* create proc dir entry */
- sprintf(buffer, "%pI4", &ip);
- mutex_lock(&cn->mutex);
- c->pde = proc_create_data(buffer, 0600,
- cn->procdir,
- &clusterip_proc_ops, c);
- mutex_unlock(&cn->mutex);
- if (!c->pde) {
- err = -ENOMEM;
- goto err;
- }
- }
-#endif
-
- refcount_set(&c->entries, 1);
- return c;
-
-#ifdef CONFIG_PROC_FS
-err:
-#endif
- spin_lock_bh(&cn->lock);
- list_del_rcu(&c->list);
-out_config_put:
- spin_unlock_bh(&cn->lock);
- clusterip_config_put(c);
- return ERR_PTR(err);
-}
-
-#ifdef CONFIG_PROC_FS
-static int
-clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
-{
-
- if (nodenum == 0 ||
- nodenum > c->num_total_nodes)
- return 1;
-
- /* check if we already have this number in our bitfield */
- if (test_and_set_bit(nodenum - 1, &c->local_nodes))
- return 1;
-
- return 0;
-}
-
-static bool
-clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
-{
- if (nodenum == 0 ||
- nodenum > c->num_total_nodes)
- return true;
-
- if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
- return false;
-
- return true;
-}
-#endif
-
-static inline u_int32_t
-clusterip_hashfn(const struct sk_buff *skb,
- const struct clusterip_config *config)
-{
- const struct iphdr *iph = ip_hdr(skb);
- unsigned long hashval;
- u_int16_t sport = 0, dport = 0;
- int poff;
-
- poff = proto_ports_offset(iph->protocol);
- if (poff >= 0) {
- const u_int16_t *ports;
- u16 _ports[2];
-
- ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
- if (ports) {
- sport = ports[0];
- dport = ports[1];
- }
- } else {
- net_info_ratelimited("unknown protocol %u\n", iph->protocol);
- }
-
- switch (config->hash_mode) {
- case CLUSTERIP_HASHMODE_SIP:
- hashval = jhash_1word(ntohl(iph->saddr),
- config->hash_initval);
- break;
- case CLUSTERIP_HASHMODE_SIP_SPT:
- hashval = jhash_2words(ntohl(iph->saddr), sport,
- config->hash_initval);
- break;
- case CLUSTERIP_HASHMODE_SIP_SPT_DPT:
- hashval = jhash_3words(ntohl(iph->saddr), sport, dport,
- config->hash_initval);
- break;
- default:
- /* to make gcc happy */
- hashval = 0;
- /* This cannot happen, unless the check function wasn't called
- * at rule load time */
- pr_info("unknown mode %u\n", config->hash_mode);
- BUG();
- break;
- }
-
- /* node numbers are 1..n, not 0..n */
- return reciprocal_scale(hashval, config->num_total_nodes) + 1;
-}
-
-static inline int
-clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
-{
- return test_bit(hash - 1, &config->local_nodes);
-}
-
-/***********************************************************************
- * IPTABLES TARGET
- ***********************************************************************/
-
-static unsigned int
-clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
- const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- u_int32_t hash;
-
- /* don't need to clusterip_config_get() here, since refcount
- * is only decremented by destroy() - and ip_tables guarantees
- * that the ->target() function isn't called after ->destroy() */
-
- ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL)
- return NF_DROP;
-
- /* special case: ICMP error handling. conntrack distinguishes between
- * error messages (RELATED) and information requests (see below) */
- if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
- (ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY))
- return XT_CONTINUE;
-
- /* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO,
- * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here
- * on, which all have an ID field [relevant for hashing]. */
-
- hash = clusterip_hashfn(skb, cipinfo->config);
-
- switch (ctinfo) {
- case IP_CT_NEW:
- WRITE_ONCE(ct->mark, hash);
- break;
- case IP_CT_RELATED:
- case IP_CT_RELATED_REPLY:
- /* FIXME: we don't handle expectations at the moment.
- * They can arrive on a different node than
- * the master connection (e.g. FTP passive mode) */
- case IP_CT_ESTABLISHED:
- case IP_CT_ESTABLISHED_REPLY:
- break;
- default: /* Prevent gcc warnings */
- break;
- }
-
-#ifdef DEBUG
- nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-#endif
- pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark));
- if (!clusterip_responsible(cipinfo->config, hash)) {
- pr_debug("not responsible\n");
- return NF_DROP;
- }
- pr_debug("responsible\n");
-
- /* despite being received via linklayer multicast, this is
- * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
- skb->pkt_type = PACKET_HOST;
-
- return XT_CONTINUE;
-}
-
-static int clusterip_tg_check(const struct xt_tgchk_param *par)
-{
- struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
- struct clusterip_net *cn = clusterip_pernet(par->net);
- const struct ipt_entry *e = par->entryinfo;
- struct clusterip_config *config;
- int ret, i;
-
- if (par->nft_compat) {
- pr_err("cannot use CLUSTERIP target from nftables compat\n");
- return -EOPNOTSUPP;
- }
-
- if (cn->hook_users == UINT_MAX)
- return -EOVERFLOW;
-
- if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
- cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
- cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
- pr_info("unknown mode %u\n", cipinfo->hash_mode);
- return -EINVAL;
-
- }
- if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
- e->ip.dst.s_addr == 0) {
- pr_info("Please specify destination IP\n");
- return -EINVAL;
- }
- if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) {
- pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes);
- return -EINVAL;
- }
- for (i = 0; i < cipinfo->num_local_nodes; i++) {
- if (cipinfo->local_nodes[i] - 1 >=
- sizeof(config->local_nodes) * 8) {
- pr_info("bad local_nodes[%d] %u\n",
- i, cipinfo->local_nodes[i]);
- return -EINVAL;
- }
- }
-
- config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
- if (!config) {
- if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
- pr_info("no config found for %pI4, need 'new'\n",
- &e->ip.dst.s_addr);
- return -EINVAL;
- } else {
- config = clusterip_config_init(par->net, cipinfo,
- e->ip.dst.s_addr,
- e->ip.iniface);
- if (IS_ERR(config))
- return PTR_ERR(config);
- }
- } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) {
- clusterip_config_entry_put(config);
- clusterip_config_put(config);
- return -EINVAL;
- }
-
- ret = nf_ct_netns_get(par->net, par->family);
- if (ret < 0) {
- pr_info("cannot load conntrack support for proto=%u\n",
- par->family);
- clusterip_config_entry_put(config);
- clusterip_config_put(config);
- return ret;
- }
-
- if (cn->hook_users == 0) {
- ret = nf_register_net_hook(par->net, &cip_arp_ops);
-
- if (ret < 0) {
- clusterip_config_entry_put(config);
- clusterip_config_put(config);
- nf_ct_netns_put(par->net, par->family);
- return ret;
- }
- }
-
- cn->hook_users++;
-
- if (!cn->clusterip_deprecated_warning) {
- pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
- "use xt_cluster instead\n");
- cn->clusterip_deprecated_warning = true;
- }
-
- cipinfo->config = config;
- return ret;
-}
-
-/* drop reference count of cluster config when rule is deleted */
-static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
-{
- const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
- struct clusterip_net *cn = clusterip_pernet(par->net);
-
- /* if no more entries are referencing the config, remove it
- * from the list and destroy the proc entry */
- clusterip_config_entry_put(cipinfo->config);
-
- clusterip_config_put(cipinfo->config);
-
- nf_ct_netns_put(par->net, par->family);
- cn->hook_users--;
-
- if (cn->hook_users == 0)
- nf_unregister_net_hook(par->net, &cip_arp_ops);
-}
-
-#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
-struct compat_ipt_clusterip_tgt_info
-{
- u_int32_t flags;
- u_int8_t clustermac[6];
- u_int16_t num_total_nodes;
- u_int16_t num_local_nodes;
- u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
- u_int32_t hash_mode;
- u_int32_t hash_initval;
- compat_uptr_t config;
-};
-#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
-
-static struct xt_target clusterip_tg_reg __read_mostly = {
- .name = "CLUSTERIP",
- .family = NFPROTO_IPV4,
- .target = clusterip_tg,
- .checkentry = clusterip_tg_check,
- .destroy = clusterip_tg_destroy,
- .targetsize = sizeof(struct ipt_clusterip_tgt_info),
- .usersize = offsetof(struct ipt_clusterip_tgt_info, config),
-#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
- .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info),
-#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
- .me = THIS_MODULE
-};
-
-
-/***********************************************************************
- * ARP MANGLING CODE
- ***********************************************************************/
-
-/* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
-struct arp_payload {
- u_int8_t src_hw[ETH_ALEN];
- __be32 src_ip;
- u_int8_t dst_hw[ETH_ALEN];
- __be32 dst_ip;
-} __packed;
-
-#ifdef DEBUG
-static void arp_print(struct arp_payload *payload)
-{
-#define HBUFFERLEN 30
- char hbuffer[HBUFFERLEN];
- int j, k;
-
- for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) {
- hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
- hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
- hbuffer[k++] = ':';
- }
- hbuffer[--k] = '\0';
-
- pr_debug("src %pI4@%s, dst %pI4\n",
- &payload->src_ip, hbuffer, &payload->dst_ip);
-}
-#endif
-
-static unsigned int
-clusterip_arp_mangle(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct arphdr *arp = arp_hdr(skb);
- struct arp_payload *payload;
- struct clusterip_config *c;
- struct net *net = state->net;
-
- /* we don't care about non-ethernet and non-ipv4 ARP */
- if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
- arp->ar_pro != htons(ETH_P_IP) ||
- arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
- return NF_ACCEPT;
-
- /* we only want to mangle arp requests and replies */
- if (arp->ar_op != htons(ARPOP_REPLY) &&
- arp->ar_op != htons(ARPOP_REQUEST))
- return NF_ACCEPT;
-
- payload = (void *)(arp+1);
-
- /* if there is no clusterip configuration for the arp reply's
- * source ip, we don't want to mangle it */
- c = clusterip_config_find_get(net, payload->src_ip, 0);
- if (!c)
- return NF_ACCEPT;
-
- /* normally the linux kernel always replies to arp queries of
- * addresses on different interfacs. However, in the CLUSTERIP case
- * this wouldn't work, since we didn't subscribe the mcast group on
- * other interfaces */
- if (c->ifindex != state->out->ifindex) {
- pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n",
- c->ifindex, state->out->ifindex);
- clusterip_config_put(c);
- return NF_ACCEPT;
- }
-
- /* mangle reply hardware address */
- memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
-
-#ifdef DEBUG
- pr_debug("mangled arp reply: ");
- arp_print(payload);
-#endif
-
- clusterip_config_put(c);
-
- return NF_ACCEPT;
-}
-
-/***********************************************************************
- * PROC DIR HANDLING
- ***********************************************************************/
-
-#ifdef CONFIG_PROC_FS
-
-struct clusterip_seq_position {
- unsigned int pos; /* position */
- unsigned int weight; /* number of bits set == size */
- unsigned int bit; /* current bit */
- unsigned long val; /* current value */
-};
-
-static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
-{
- struct clusterip_config *c = s->private;
- unsigned int weight;
- u_int32_t local_nodes;
- struct clusterip_seq_position *idx;
-
- /* FIXME: possible race */
- local_nodes = c->local_nodes;
- weight = hweight32(local_nodes);
- if (*pos >= weight)
- return NULL;
-
- idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
- if (!idx)
- return ERR_PTR(-ENOMEM);
-
- idx->pos = *pos;
- idx->weight = weight;
- idx->bit = ffs(local_nodes);
- idx->val = local_nodes;
- clear_bit(idx->bit - 1, &idx->val);
-
- return idx;
-}
-
-static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- struct clusterip_seq_position *idx = v;
-
- *pos = ++idx->pos;
- if (*pos >= idx->weight) {
- kfree(v);
- return NULL;
- }
- idx->bit = ffs(idx->val);
- clear_bit(idx->bit - 1, &idx->val);
- return idx;
-}
-
-static void clusterip_seq_stop(struct seq_file *s, void *v)
-{
- if (!IS_ERR(v))
- kfree(v);
-}
-
-static int clusterip_seq_show(struct seq_file *s, void *v)
-{
- struct clusterip_seq_position *idx = v;
-
- if (idx->pos != 0)
- seq_putc(s, ',');
-
- seq_printf(s, "%u", idx->bit);
-
- if (idx->pos == idx->weight - 1)
- seq_putc(s, '\n');
-
- return 0;
-}
-
-static const struct seq_operations clusterip_seq_ops = {
- .start = clusterip_seq_start,
- .next = clusterip_seq_next,
- .stop = clusterip_seq_stop,
- .show = clusterip_seq_show,
-};
-
-static int clusterip_proc_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &clusterip_seq_ops);
-
- if (!ret) {
- struct seq_file *sf = file->private_data;
- struct clusterip_config *c = pde_data(inode);
-
- sf->private = c;
-
- clusterip_config_get(c);
- }
-
- return ret;
-}
-
-static int clusterip_proc_release(struct inode *inode, struct file *file)
-{
- struct clusterip_config *c = pde_data(inode);
- int ret;
-
- ret = seq_release(inode, file);
-
- if (!ret)
- clusterip_config_put(c);
-
- return ret;
-}
-
-static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
- size_t size, loff_t *ofs)
-{
- struct clusterip_config *c = pde_data(file_inode(file));
-#define PROC_WRITELEN 10
- char buffer[PROC_WRITELEN+1];
- unsigned long nodenum;
- int rc;
-
- if (size > PROC_WRITELEN)
- return -EIO;
- if (copy_from_user(buffer, input, size))
- return -EFAULT;
- buffer[size] = 0;
-
- if (*buffer == '+') {
- rc = kstrtoul(buffer+1, 10, &nodenum);
- if (rc)
- return rc;
- if (clusterip_add_node(c, nodenum))
- return -ENOMEM;
- } else if (*buffer == '-') {
- rc = kstrtoul(buffer+1, 10, &nodenum);
- if (rc)
- return rc;
- if (clusterip_del_node(c, nodenum))
- return -ENOENT;
- } else
- return -EIO;
-
- return size;
-}
-
-static const struct proc_ops clusterip_proc_ops = {
- .proc_open = clusterip_proc_open,
- .proc_read = seq_read,
- .proc_write = clusterip_proc_write,
- .proc_lseek = seq_lseek,
- .proc_release = clusterip_proc_release,
-};
-
-#endif /* CONFIG_PROC_FS */
-
-static int clusterip_net_init(struct net *net)
-{
- struct clusterip_net *cn = clusterip_pernet(net);
-
- INIT_LIST_HEAD(&cn->configs);
-
- spin_lock_init(&cn->lock);
-
-#ifdef CONFIG_PROC_FS
- cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
- if (!cn->procdir) {
- pr_err("Unable to proc dir entry\n");
- return -ENOMEM;
- }
- mutex_init(&cn->mutex);
-#endif /* CONFIG_PROC_FS */
-
- return 0;
-}
-
-static void clusterip_net_exit(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
- struct clusterip_net *cn = clusterip_pernet(net);
-
- mutex_lock(&cn->mutex);
- proc_remove(cn->procdir);
- cn->procdir = NULL;
- mutex_unlock(&cn->mutex);
-#endif
-}
-
-static struct pernet_operations clusterip_net_ops = {
- .init = clusterip_net_init,
- .exit = clusterip_net_exit,
- .id = &clusterip_net_id,
- .size = sizeof(struct clusterip_net),
-};
-
-static struct notifier_block cip_netdev_notifier = {
- .notifier_call = clusterip_netdev_event
-};
-
-static int __init clusterip_tg_init(void)
-{
- int ret;
-
- ret = register_pernet_subsys(&clusterip_net_ops);
- if (ret < 0)
- return ret;
-
- ret = xt_register_target(&clusterip_tg_reg);
- if (ret < 0)
- goto cleanup_subsys;
-
- ret = register_netdevice_notifier(&cip_netdev_notifier);
- if (ret < 0)
- goto unregister_target;
-
- pr_info("ClusterIP Version %s loaded successfully\n",
- CLUSTERIP_VERSION);
-
- return 0;
-
-unregister_target:
- xt_unregister_target(&clusterip_tg_reg);
-cleanup_subsys:
- unregister_pernet_subsys(&clusterip_net_ops);
- return ret;
-}
-
-static void __exit clusterip_tg_exit(void)
-{
- pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
-
- unregister_netdevice_notifier(&cip_netdev_notifier);
- xt_unregister_target(&clusterip_tg_reg);
- unregister_pernet_subsys(&clusterip_net_ops);
-
- /* Wait for completion of call_rcu()'s (clusterip_config_rcu_free) */
- rcu_barrier();
-}
-
-module_init(clusterip_tg_init);
-module_exit(clusterip_tg_exit);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f88daace9de3..eaf1d3113b62 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -353,7 +353,7 @@ static void icmp_put(struct seq_file *seq)
seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " In%s", icmpmibmap[i].name);
- seq_puts(seq, " OutMsgs OutErrors");
+ seq_puts(seq, " OutMsgs OutErrors OutRateLimitGlobal OutRateLimitHost");
for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " Out%s", icmpmibmap[i].name);
seq_printf(seq, "\nIcmp: %lu %lu %lu",
@@ -363,9 +363,11 @@ static void icmp_put(struct seq_file *seq)
for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " %lu",
atomic_long_read(ptr + icmpmibmap[i].index));
- seq_printf(seq, " %lu %lu",
+ seq_printf(seq, " %lu %lu %lu %lu",
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
- snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
+ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS),
+ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_RATELIMITGLOBAL),
+ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_RATELIMITHOST));
for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " %lu",
atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 006c1f0ed8b4..94df935ee0c5 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -93,7 +93,7 @@ int raw_hash_sk(struct sock *sk)
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
struct hlist_nulls_head *hlist;
- hlist = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
+ hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)];
spin_lock(&h->lock);
__sk_nulls_add_node_rcu(sk, hlist);
@@ -160,9 +160,9 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
* RFC 1122: SHOULD pass TOS value up to the transport layer.
* -> It does. And not only TOS, but all IP header.
*/
-static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
+static int raw_v4_input(struct net *net, struct sk_buff *skb,
+ const struct iphdr *iph, int hash)
{
- struct net *net = dev_net(skb->dev);
struct hlist_nulls_head *hlist;
struct hlist_nulls_node *hnode;
int sdif = inet_sdif(skb);
@@ -193,9 +193,10 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
int raw_local_deliver(struct sk_buff *skb, int protocol)
{
- int hash = protocol & (RAW_HTABLE_SIZE - 1);
+ struct net *net = dev_net(skb->dev);
- return raw_v4_input(skb, ip_hdr(skb), hash);
+ return raw_v4_input(net, skb, ip_hdr(skb),
+ raw_hashfunc(net, protocol));
}
static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
@@ -271,7 +272,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
struct sock *sk;
int hash;
- hash = protocol & (RAW_HTABLE_SIZE - 1);
+ hash = raw_hashfunc(net, protocol);
hlist = &raw_v4_hashinfo.ht[hash];
rcu_read_lock();
@@ -287,11 +288,13 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
+
/* Charge it to the socket. */
ipv4_pktinfo_prepare(sk, skb);
- if (sock_queue_rcv_skb(sk, skb) < 0) {
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
@@ -302,7 +305,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
{
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
nf_reset_ct(skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8320d0ecb13a..ea370afa70ed 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2102,6 +2102,7 @@ process:
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
+ drop_reason = SKB_DROP_REASON_TCP_MINTTL;
goto discard_and_relse;
}
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9592fe3e444a..c605d171eb2d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -248,7 +248,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
int low, high, remaining;
unsigned int rand;
- inet_get_local_port_range(net, &low, &high);
+ inet_sk_get_local_port_range(sk, &low, &high);
remaining = (high - low) + 1;
rand = get_random_u32();
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 9d92d51c4757..c9346515e24d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -183,6 +183,7 @@ static bool icmpv6_global_allow(struct net *net, int type)
if (icmp_global_allow())
return true;
+ __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
@@ -224,6 +225,9 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (peer)
inet_putpeer(peer);
}
+ if (!res)
+ __ICMP6_INC_STATS(net, ip6_dst_idev(dst),
+ ICMP6_MIB_RATELIMITHOST);
dst_release(dst);
return res;
}
@@ -328,7 +332,6 @@ static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt
{
struct ipv6hdr *iph = ipv6_hdr(skb);
struct ipv6_destopt_hao *hao;
- struct in6_addr tmp;
int off;
if (opt->dsthao) {
@@ -336,9 +339,7 @@ static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt
if (likely(off >= 0)) {
hao = (struct ipv6_destopt_hao *)
(skb_network_header(skb) + off);
- tmp = iph->saddr;
- iph->saddr = hao->addr;
- hao->addr = tmp;
+ swap(iph->saddr, hao->addr);
}
}
}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index d6306aa46bb1..e20b3705c2d2 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -94,6 +94,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS),
+ SNMP_MIB_ITEM("Icmp6OutRateLimitHost", ICMP6_MIB_RATELIMITHOST),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ada087b50541..bac9ba747bde 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -152,7 +152,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
saddr = &ipv6_hdr(skb)->saddr;
daddr = saddr + 1;
- hash = nexthdr & (RAW_HTABLE_SIZE - 1);
+ hash = raw_hashfunc(net, nexthdr);
hlist = &raw_v6_hashinfo.ht[hash];
rcu_read_lock();
sk_nulls_for_each(sk, hnode, hlist) {
@@ -338,7 +338,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
struct sock *sk;
int hash;
- hash = nexthdr & (RAW_HTABLE_SIZE - 1);
+ hash = raw_hashfunc(net, nexthdr);
hlist = &raw_v6_hashinfo.ht[hash];
rcu_read_lock();
sk_nulls_for_each(sk, hnode, hlist) {
@@ -355,17 +355,19 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
+
if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
/* Charge it to the socket. */
skb_dst_drop(skb);
- if (sock_queue_rcv_skb(sk, skb) < 0) {
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
@@ -386,7 +388,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
@@ -410,7 +412,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (inet->hdrincl) {
if (skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e74e0361fd92..c180c2ef17c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -91,7 +91,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
struct net_device *dev, int how);
-static int ip6_dst_gc(struct dst_ops *ops);
+static void ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
@@ -2593,9 +2593,10 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
}
-struct dst_entry *ip6_route_output_flags_noref(struct net *net,
- const struct sock *sk,
- struct flowi6 *fl6, int flags)
+static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
+ const struct sock *sk,
+ struct flowi6 *fl6,
+ int flags)
{
bool any_src;
@@ -2624,7 +2625,6 @@ struct dst_entry *ip6_route_output_flags_noref(struct net *net,
return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}
-EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
struct dst_entry *ip6_route_output_flags(struct net *net,
const struct sock *sk,
@@ -3284,23 +3284,17 @@ out:
return dst;
}
-static int ip6_dst_gc(struct dst_ops *ops)
+static void ip6_dst_gc(struct dst_ops *ops)
{
struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
- int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
unsigned int val;
int entries;
- entries = dst_entries_get_fast(ops);
- if (entries > rt_max_size)
- entries = dst_entries_get_slow(ops);
-
- if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
- entries <= rt_max_size)
+ if (time_after(rt_last_gc + rt_min_interval, jiffies))
goto out;
fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
@@ -3310,7 +3304,6 @@ static int ip6_dst_gc(struct dst_ops *ops)
out:
val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
- return entries > rt_max_size;
}
static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
@@ -6512,7 +6505,7 @@ static int __net_init ip6_route_net_init(struct net *net)
#endif
net->ipv6.sysctl.flush_delay = 0;
- net->ipv6.sysctl.ip6_rt_max_size = 4096;
+ net->ipv6.sysctl.ip6_rt_max_size = INT_MAX;
net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index ff691d9f4a04..b1c028df686e 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -13,7 +13,7 @@
#include <net/rpl.h>
struct rpl_iptunnel_encap {
- struct ipv6_rpl_sr_hdr srh[0];
+ DECLARE_FLEX_ARRAY(struct ipv6_rpl_sr_hdr, srh);
};
struct rpl_lwt {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 11b736a76bd7..543ee2167720 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1708,8 +1708,9 @@ process:
if (static_branch_unlikely(&ip6_min_hopcount)) {
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
- if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
+ if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
+ drop_reason = SKB_DROP_REASON_TCP_MINTTL;
goto discard_and_relse;
}
}
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 890a2423f559..cfe828bd7fc6 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -28,6 +28,7 @@
#include <net/netns/generic.h>
#include <net/sock.h>
#include <uapi/linux/kcm.h>
+#include <trace/events/sock.h>
unsigned int kcm_net_id;
@@ -349,6 +350,8 @@ static void psock_data_ready(struct sock *sk)
{
struct kcm_psock *psock;
+ trace_sk_data_ready(sk);
+
read_lock_bh(&sk->sk_callback_lock);
psock = (struct kcm_psock *)sk->sk_user_data;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 672eff6f5d32..f5d43f42f6d8 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1252,6 +1252,21 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
prev_beacon_int = link_conf->beacon_int;
link_conf->beacon_int = params->beacon_interval;
+ if (params->vht_cap) {
+ link_conf->vht_su_beamformer =
+ params->vht_cap->vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE);
+ link_conf->vht_su_beamformee =
+ params->vht_cap->vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE);
+ link_conf->vht_mu_beamformer =
+ params->vht_cap->vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE);
+ link_conf->vht_mu_beamformee =
+ params->vht_cap->vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE);
+ }
+
if (params->he_cap && params->he_oper) {
link_conf->he_support = true;
link_conf->htc_trig_based_pkt_ext =
@@ -1266,6 +1281,21 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
changed |= BSS_CHANGED_HE_BSS_COLOR;
}
+ if (params->he_cap) {
+ link_conf->he_su_beamformer =
+ params->he_cap->phy_cap_info[3] &
+ IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER;
+ link_conf->he_su_beamformee =
+ params->he_cap->phy_cap_info[4] &
+ IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE;
+ link_conf->he_mu_beamformer =
+ params->he_cap->phy_cap_info[4] &
+ IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER;
+ link_conf->he_full_ul_mumimo =
+ params->he_cap->phy_cap_info[2] &
+ IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO;
+ }
+
if (sdata->vif.type == NL80211_IFTYPE_AP &&
params->mbssid_config.tx_wdev) {
err = ieee80211_set_ap_mbssid_options(sdata,
@@ -2734,7 +2764,7 @@ static int ieee80211_scan(struct wiphy *wiphy,
* If the scan has been forced (and the driver supports
* forcing), don't care about being beaconing already.
* This will create problems to the attached stations (e.g. all
- * the frames sent while scanning on other channel will be
+ * the frames sent while scanning on other channel will be
* lost)
*/
if (sdata->deflink.u.ap.beacon &&
@@ -4632,7 +4662,7 @@ void ieee80211_color_change_finish(struct ieee80211_vif *vif)
EXPORT_SYMBOL_GPL(ieee80211_color_change_finish);
void
-ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
+ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
u64 color_bitmap, gfp_t gfp)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
@@ -4642,7 +4672,7 @@ ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap, gfp);
}
-EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify);
+EXPORT_SYMBOL_GPL(ieee80211_obss_color_collision_notify);
static int
ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c6562a6d2503..e284897ba5e9 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3219,9 +3219,9 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
color = le32_get_bits(he_oper->he_oper_params,
IEEE80211_HE_OPERATION_BSS_COLOR_MASK);
if (color == bss_conf->he_bss_color.color)
- ieeee80211_obss_color_collision_notify(&rx->sdata->vif,
- BIT_ULL(color),
- GFP_ATOMIC);
+ ieee80211_obss_color_collision_notify(&rx->sdata->vif,
+ BIT_ULL(color),
+ GFP_ATOMIC);
}
}
@@ -5194,6 +5194,15 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
status->rate_idx, status->nss))
goto drop;
break;
+ case RX_ENC_EHT:
+ if (WARN_ONCE(status->rate_idx > 15 ||
+ !status->nss ||
+ status->nss > 8 ||
+ status->eht.gi > NL80211_RATE_INFO_EHT_GI_3_2,
+ "Rate marked as an EHT rate but data is invalid: MCS:%d, NSS:%d, GI:%d\n",
+ status->rate_idx, status->nss, status->eht.gi))
+ goto drop;
+ break;
default:
WARN_ON_ONCE(1);
fallthrough;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 04e0f132b1d9..27c737fe7fb8 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#include <linux/module.h>
@@ -2406,6 +2406,13 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate,
rinfo->he_ru_alloc = STA_STATS_GET(HE_RU, rate);
rinfo->he_dcm = STA_STATS_GET(HE_DCM, rate);
break;
+ case STA_STATS_RATE_TYPE_EHT:
+ rinfo->flags = RATE_INFO_FLAGS_EHT_MCS;
+ rinfo->mcs = STA_STATS_GET(EHT_MCS, rate);
+ rinfo->nss = STA_STATS_GET(EHT_NSS, rate);
+ rinfo->eht_gi = STA_STATS_GET(EHT_GI, rate);
+ rinfo->eht_ru_alloc = STA_STATS_GET(EHT_RU, rate);
+ break;
}
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 69820b551668..c30f02874fb1 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -936,6 +936,7 @@ enum sta_stats_type {
STA_STATS_RATE_TYPE_VHT,
STA_STATS_RATE_TYPE_HE,
STA_STATS_RATE_TYPE_S1G,
+ STA_STATS_RATE_TYPE_EHT,
};
#define STA_STATS_FIELD_HT_MCS GENMASK( 7, 0)
@@ -945,12 +946,16 @@ enum sta_stats_type {
#define STA_STATS_FIELD_VHT_NSS GENMASK( 7, 4)
#define STA_STATS_FIELD_HE_MCS GENMASK( 3, 0)
#define STA_STATS_FIELD_HE_NSS GENMASK( 7, 4)
-#define STA_STATS_FIELD_BW GENMASK(11, 8)
-#define STA_STATS_FIELD_SGI GENMASK(12, 12)
-#define STA_STATS_FIELD_TYPE GENMASK(15, 13)
-#define STA_STATS_FIELD_HE_RU GENMASK(18, 16)
-#define STA_STATS_FIELD_HE_GI GENMASK(20, 19)
-#define STA_STATS_FIELD_HE_DCM GENMASK(21, 21)
+#define STA_STATS_FIELD_EHT_MCS GENMASK( 3, 0)
+#define STA_STATS_FIELD_EHT_NSS GENMASK( 7, 4)
+#define STA_STATS_FIELD_BW GENMASK(12, 8)
+#define STA_STATS_FIELD_SGI GENMASK(13, 13)
+#define STA_STATS_FIELD_TYPE GENMASK(16, 14)
+#define STA_STATS_FIELD_HE_RU GENMASK(19, 17)
+#define STA_STATS_FIELD_HE_GI GENMASK(21, 20)
+#define STA_STATS_FIELD_HE_DCM GENMASK(22, 22)
+#define STA_STATS_FIELD_EHT_RU GENMASK(20, 17)
+#define STA_STATS_FIELD_EHT_GI GENMASK(22, 21)
#define STA_STATS_FIELD(_n, _v) FIELD_PREP(STA_STATS_FIELD_ ## _n, _v)
#define STA_STATS_GET(_n, _v) FIELD_GET(STA_STATS_FIELD_ ## _n, _v)
@@ -989,6 +994,13 @@ static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s)
r |= STA_STATS_FIELD(HE_RU, s->he_ru);
r |= STA_STATS_FIELD(HE_DCM, s->he_dcm);
break;
+ case RX_ENC_EHT:
+ r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_EHT);
+ r |= STA_STATS_FIELD(EHT_NSS, s->nss);
+ r |= STA_STATS_FIELD(EHT_MCS, s->rate_idx);
+ r |= STA_STATS_FIELD(EHT_GI, s->eht.gi);
+ r |= STA_STATS_FIELD(EHT_RU, s->eht.ru);
+ break;
default:
WARN_ON(1);
return STA_STATS_RATE_INVALID;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 261ac667887f..1a28fe5cb614 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -832,19 +832,6 @@ static void __iterate_stations(struct ieee80211_local *local,
}
}
-void ieee80211_iterate_stations(struct ieee80211_hw *hw,
- void (*iterator)(void *data,
- struct ieee80211_sta *sta),
- void *data)
-{
- struct ieee80211_local *local = hw_to_local(hw);
-
- mutex_lock(&local->sta_mtx);
- __iterate_stations(local, iterator, data);
- mutex_unlock(&local->sta_mtx);
-}
-EXPORT_SYMBOL_GPL(ieee80211_iterate_stations);
-
void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw,
void (*iterator)(void *data,
struct ieee80211_sta *sta),
@@ -4033,6 +4020,19 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
/* Fill cfg80211 rate info */
switch (status->encoding) {
+ case RX_ENC_EHT:
+ ri.flags |= RATE_INFO_FLAGS_EHT_MCS;
+ ri.mcs = status->rate_idx;
+ ri.nss = status->nss;
+ ri.eht_ru_alloc = status->eht.ru;
+ if (status->enc_flags & RX_ENC_FLAG_SHORT_GI)
+ ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
+ /* TODO/FIXME: is this right? handle other PPDUs */
+ if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
+ mpdu_offset += 2;
+ ts += 36;
+ }
+ break;
case RX_ENC_HE:
ri.flags |= RATE_INFO_FLAGS_HE_MCS;
ri.mcs = status->rate_idx;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 5ded85e2c374..b30cea2fbf3f 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1594,8 +1594,7 @@ mp_rst:
TCPOLEN_MPTCP_PRIO,
opts->backup, TCPOPT_NOP);
- MPTCP_INC_STATS(sock_net((const struct sock *)tp),
- MPTCP_MIB_MPPRIOTX);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPPRIOTX);
}
mp_capable_done:
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 10fe9771a852..56628b52d100 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -152,7 +152,6 @@ static struct mptcp_pm_addr_entry *
select_local_address(const struct pm_nl_pernet *pernet,
const struct mptcp_sock *msk)
{
- const struct sock *sk = (const struct sock *)msk;
struct mptcp_pm_addr_entry *entry, *ret = NULL;
msk_owned_by_me(msk);
@@ -165,16 +164,6 @@ select_local_address(const struct pm_nl_pernet *pernet,
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
- if (entry->addr.family != sk->sk_family) {
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if ((entry->addr.family == AF_INET &&
- !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
- (sk->sk_family == AF_INET &&
- !ipv6_addr_v4mapped(&entry->addr.addr6)))
-#endif
- continue;
- }
-
ret = entry;
break;
}
@@ -423,7 +412,9 @@ static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
-static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
+static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
+ struct mptcp_addr_info *local,
+ bool fullmesh,
struct mptcp_addr_info *addrs)
{
bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
@@ -443,6 +434,9 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
if (deny_id0)
return 0;
+ if (!mptcp_pm_addr_families_match(sk, local, &remote))
+ return 0;
+
msk->pm.subflows++;
addrs[i++] = remote;
} else {
@@ -453,6 +447,9 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
if (deny_id0 && !addrs[i].id)
continue;
+ if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
+ continue;
+
if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
@@ -603,9 +600,11 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
msk->pm.local_addr_used++;
- nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
- if (nr)
- __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs);
+ if (nr == 0)
+ continue;
+
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
__mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
@@ -628,11 +627,11 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
* and return the array size.
*/
static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
+ struct mptcp_addr_info *remote,
struct mptcp_addr_info *addrs)
{
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *entry;
- struct mptcp_addr_info local;
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
int i = 0;
@@ -645,15 +644,8 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
continue;
- if (entry->addr.family != sk->sk_family) {
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if ((entry->addr.family == AF_INET &&
- !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
- (sk->sk_family == AF_INET &&
- !ipv6_addr_v4mapped(&entry->addr.addr6)))
-#endif
- continue;
- }
+ if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote))
+ continue;
if (msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
@@ -666,8 +658,18 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
* 'IPADDRANY' local address
*/
if (!i) {
+ struct mptcp_addr_info local;
+
memset(&local, 0, sizeof(local));
- local.family = msk->pm.remote.family;
+ local.family =
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ remote->family == AF_INET6 &&
+ ipv6_addr_v4mapped(&remote->addr6) ? AF_INET :
+#endif
+ remote->family;
+
+ if (!mptcp_pm_addr_families_match(sk, &local, remote))
+ return 0;
msk->pm.subflows++;
addrs[i++] = local;
@@ -706,7 +708,9 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
- nr = fill_local_addresses_vec(msk, addrs);
+ nr = fill_local_addresses_vec(msk, &remote, addrs);
+ if (nr == 0)
+ return;
msk->pm.add_addr_accepted++;
if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
@@ -1145,7 +1149,7 @@ void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ss
if (!tcp_rtx_and_write_queues_empty(ssk)) {
subflow->stale = 1;
__mptcp_retransmit_pending_data(sk);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE);
+ MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE);
}
unlock_sock_fast(ssk, slow);
@@ -1905,8 +1909,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
}
if (token)
- return mptcp_userspace_pm_set_flags(sock_net(skb->sk),
- token, &addr, &remote, bkup);
+ return mptcp_userspace_pm_set_flags(net, token, &addr, &remote, bkup);
spin_lock_bh(&pernet->lock);
entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index ea6ad9da7493..a02d3cbf2a1b 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -59,8 +59,8 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
*/
e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC);
if (!e) {
- spin_unlock_bh(&msk->pm.lock);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto append_err;
}
*e = *entry;
@@ -74,6 +74,7 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
ret = entry->addr.id;
}
+append_err:
spin_unlock_bh(&msk->pm.lock);
return ret;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index bc6c1f62a690..c9817aa0f413 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -923,9 +923,8 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- sock_owned_by_me(sk);
+ msk_owned_by_me(msk);
mptcp_for_each_subflow(msk, subflow) {
if (READ_ONCE(subflow->data_avail))
@@ -1408,7 +1407,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
u64 linger_time;
long tout = 0;
- sock_owned_by_me(sk);
+ msk_owned_by_me(msk);
if (__mptcp_check_fallback(msk)) {
if (!msk->first)
@@ -1890,7 +1889,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
u32 time, advmss = 1;
u64 rtt_us, mstamp;
- sock_owned_by_me(sk);
+ msk_owned_by_me(msk);
if (copied <= 0)
return;
@@ -2217,7 +2216,7 @@ static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
struct mptcp_subflow_context *subflow;
int min_stale_count = INT_MAX;
- sock_owned_by_me((const struct sock *)msk);
+ msk_owned_by_me(msk);
if (__mptcp_check_fallback(msk))
return NULL;
@@ -2724,8 +2723,8 @@ static int mptcp_init_sock(struct sock *sk)
mptcp_ca_reset(sk);
sk_sockets_allocated_inc(sk);
- sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
- sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+ sk->sk_rcvbuf = READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]);
+ sk->sk_sndbuf = READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]);
return 0;
}
@@ -2892,6 +2891,12 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
return EPOLLIN | EPOLLRDNORM;
}
+static void mptcp_listen_inuse_dec(struct sock *sk)
+{
+ if (inet_sk_state_load(sk) == TCP_LISTEN)
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+}
+
bool __mptcp_close(struct sock *sk, long timeout)
{
struct mptcp_subflow_context *subflow;
@@ -2902,6 +2907,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
sk->sk_shutdown = SHUTDOWN_MASK;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
+ mptcp_listen_inuse_dec(sk);
inet_sk_state_store(sk, TCP_CLOSE);
goto cleanup;
}
@@ -3010,6 +3016,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
if (msk->fastopening)
return 0;
+ mptcp_listen_inuse_dec(sk);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_timer(sk);
@@ -3648,12 +3655,13 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct sock *sk = sock->sk;
struct socket *ssock;
int err;
pr_debug("msk=%p", msk);
- lock_sock(sock->sk);
+ lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
err = -EINVAL;
@@ -3661,18 +3669,20 @@ static int mptcp_listen(struct socket *sock, int backlog)
}
mptcp_token_destroy(msk);
- inet_sk_state_store(sock->sk, TCP_LISTEN);
- sock_set_flag(sock->sk, SOCK_RCU_FREE);
+ inet_sk_state_store(sk, TCP_LISTEN);
+ sock_set_flag(sk, SOCK_RCU_FREE);
err = ssock->ops->listen(ssock, backlog);
- inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
- if (!err)
- mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ if (!err) {
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ mptcp_copy_inaddrs(sk, ssock->sk);
+ }
mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
unlock:
- release_sock(sock->sk);
+ release_sock(sk);
return err;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 601469249da8..61fd8eabfca2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -755,7 +755,7 @@ static inline void mptcp_token_init_request(struct request_sock *req)
int mptcp_token_new_request(struct request_sock *req);
void mptcp_token_destroy_request(struct request_sock *req);
-int mptcp_token_new_connect(struct sock *sk);
+int mptcp_token_new_connect(struct sock *ssk);
void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
struct mptcp_sock *msk);
bool mptcp_token_exists(u32 token);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 7f2c3727ab23..8a9656248b0f 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -18,7 +18,7 @@
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
- sock_owned_by_me((const struct sock *)msk);
+ msk_owned_by_me(msk);
if (likely(!__mptcp_check_fallback(msk)))
return NULL;
@@ -1262,6 +1262,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
ssk->sk_priority = sk->sk_priority;
ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
+ ssk->sk_ipv6only = sk->sk_ipv6only;
__ip_sock_set_tos(ssk, inet_sk(sk)->tos);
if (sk->sk_userlocks & tx_rx_locks) {
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 32904c76c6a1..4ae1a7304cf0 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -26,6 +26,7 @@
#include "mib.h"
#include <trace/events/mptcp.h>
+#include <trace/events/sock.h>
static void mptcp_subflow_ops_undo_override(struct sock *ssk);
@@ -1446,6 +1447,8 @@ static void subflow_data_ready(struct sock *sk)
struct sock *parent = subflow->conn;
struct mptcp_sock *msk;
+ trace_sk_data_ready(sk);
+
msk = mptcp_sk(parent);
if (state & TCPF_LISTEN) {
/* MPJ subflow are removed from accept queue before reaching here,
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index 65430f314a68..5bb924534387 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -134,7 +134,7 @@ int mptcp_token_new_request(struct request_sock *req)
/**
* mptcp_token_new_connect - create new key/idsn/token for subflow
- * @sk: the socket that will initiate a connection
+ * @ssk: the socket that will initiate a connection
*
* This function is called when a new outgoing mptcp connection is
* initiated.
@@ -148,11 +148,12 @@ int mptcp_token_new_request(struct request_sock *req)
*
* returns 0 on success.
*/
-int mptcp_token_new_connect(struct sock *sk)
+int mptcp_token_new_connect(struct sock *ssk)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
int retries = MPTCP_TOKEN_MAX_RETRIES;
+ struct sock *sk = subflow->conn;
struct token_bucket *bucket;
again:
@@ -169,12 +170,13 @@ again:
}
pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
- sk, subflow->local_key, subflow->token, subflow->idsn);
+ ssk, subflow->local_key, subflow->token, subflow->idsn);
WRITE_ONCE(msk->token, subflow->token);
__sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain);
bucket->chain_len++;
spin_unlock_bh(&bucket->lock);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
return 0;
}
@@ -190,8 +192,10 @@ void mptcp_token_accept(struct mptcp_subflow_request_sock *req,
struct mptcp_sock *msk)
{
struct mptcp_subflow_request_sock *pos;
+ struct sock *sk = (struct sock *)msk;
struct token_bucket *bucket;
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
bucket = token_bucket(req->token);
spin_lock_bh(&bucket->lock);
@@ -370,12 +374,14 @@ void mptcp_token_destroy_request(struct request_sock *req)
*/
void mptcp_token_destroy(struct mptcp_sock *msk)
{
+ struct sock *sk = (struct sock *)msk;
struct token_bucket *bucket;
struct mptcp_sock *pos;
if (sk_unhashed((struct sock *)msk))
return;
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
bucket = token_bucket(msk->token);
spin_lock_bh(&bucket->lock);
pos = __token_lookup_msk(bucket, msk->token);
diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c
index 5d984bec1cd8..0758865ab658 100644
--- a/net/mptcp/token_test.c
+++ b/net/mptcp/token_test.c
@@ -57,6 +57,9 @@ static struct mptcp_sock *build_msk(struct kunit *test)
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk);
refcount_set(&((struct sock *)msk)->sk_refcnt, 1);
sock_net_set((struct sock *)msk, &init_net);
+
+ /* be sure the token helpers can dereference sk->sk_prot */
+ ((struct sock *)msk)->sk_prot = &tcp_prot;
return msk;
}
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3754eb06fb41..ba2a6b5e93d9 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -98,6 +98,12 @@ nf_tables-objs += nft_set_pipapo_avx2.o
endif
endif
+ifdef CONFIG_NFT_CT
+ifdef CONFIG_RETPOLINE
+nf_tables-objs += nft_ct_fast.o
+endif
+endif
+
obj-$(CONFIG_NF_TABLES) += nf_tables.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index 3c273483df23..b1ea054bb82c 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -30,7 +30,7 @@ config IP_SET_BITMAP_IP
depends on IP_SET
help
This option adds the bitmap:ip set type support, by which one
- can store IPv4 addresses (or network addresse) from a range.
+ can store IPv4 addresses (or network addresses) from a range.
To compile it as a module, choose M here. If unsure, say N.
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 029171379884..80448885c3d7 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -994,7 +994,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
old_dsfield = ipv4_get_dsfield(old_iph);
*ttl = old_iph->ttl;
if (payload_len)
- *payload_len = ntohs(old_iph->tot_len);
+ *payload_len = skb_ip_totlen(skb);
}
/* Implement full-functionality option for ECN encapsulation */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 496c4920505b..9a830573480e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -514,7 +514,6 @@ EXPORT_SYMBOL_GPL(nf_ct_get_id);
static void
clean_from_lists(struct nf_conn *ct)
{
- pr_debug("clean_from_lists(%p)\n", ct);
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
@@ -582,7 +581,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
- pr_debug("%s(%p)\n", __func__, ct);
WARN_ON(refcount_read(&nfct->use) != 0);
if (unlikely(nf_ct_is_template(ct))) {
@@ -603,7 +601,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct)
if (ct->master)
nf_ct_put(ct->master);
- pr_debug("%s: returning ct=%p to slab\n", __func__, ct);
nf_conntrack_free(ct);
}
EXPORT_SYMBOL(nf_ct_destroy);
@@ -786,8 +783,6 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
- rcu_read_lock();
-
h = ____nf_conntrack_find(net, zone, tuple, hash);
if (h) {
/* We have a candidate that matches the tuple we're interested
@@ -799,7 +794,7 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
smp_acquire__after_ctrl_dep();
if (likely(nf_ct_key_equal(h, tuple, zone, net)))
- goto found;
+ return h;
/* TYPESAFE_BY_RCU recycled the candidate */
nf_ct_put(ct);
@@ -807,8 +802,6 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
h = NULL;
}
-found:
- rcu_read_unlock();
return h;
}
@@ -820,16 +813,21 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
struct nf_conntrack_tuple_hash *thash;
+ rcu_read_lock();
+
thash = __nf_conntrack_find_get(net, zone, tuple,
hash_conntrack_raw(tuple, zone_id, net));
if (thash)
- return thash;
+ goto out_unlock;
rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
if (rid != zone_id)
- return __nf_conntrack_find_get(net, zone, tuple,
- hash_conntrack_raw(tuple, rid, net));
+ thash = __nf_conntrack_find_get(net, zone, tuple,
+ hash_conntrack_raw(tuple, rid, net));
+
+out_unlock:
+ rcu_read_unlock();
return thash;
}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
@@ -1210,7 +1208,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
goto dying;
}
- pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
* user context, else we insert an already 'dead' hash, blocking
@@ -1374,9 +1371,6 @@ static unsigned int early_drop_list(struct net *net,
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
- continue;
-
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
continue;
@@ -1446,11 +1440,14 @@ static bool gc_worker_skip_ct(const struct nf_conn *ct)
static bool gc_worker_can_early_drop(const struct nf_conn *ct)
{
const struct nf_conntrack_l4proto *l4proto;
+ u8 protonum = nf_ct_protonum(ct);
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status) && protonum != IPPROTO_UDP)
+ return false;
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
return true;
- l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
+ l4proto = nf_ct_l4proto_find(protonum);
if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
return true;
@@ -1507,7 +1504,8 @@ static void gc_worker(struct work_struct *work)
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
nf_ct_offload_timeout(tmp);
- continue;
+ if (!nf_conntrack_max95)
+ continue;
}
if (expired_count > GC_SCAN_EXPIRED_MAX) {
@@ -1721,10 +1719,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_zone tmp;
struct nf_conntrack_net *cnet;
- if (!nf_ct_invert_tuple(&repl_tuple, tuple)) {
- pr_debug("Can't invert tuple.\n");
+ if (!nf_ct_invert_tuple(&repl_tuple, tuple))
return NULL;
- }
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
@@ -1764,8 +1760,6 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
spin_lock_bh(&nf_conntrack_expect_lock);
exp = nf_ct_find_expectation(net, zone, tuple);
if (exp) {
- pr_debug("expectation arrives ct=%p exp=%p\n",
- ct, exp);
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &ct->status);
/* exp->master safe, refcnt bumped in nf_ct_find_expectation */
@@ -1829,10 +1823,8 @@ resolve_normal_ct(struct nf_conn *tmpl,
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, state->pf, protonum, state->net,
- &tuple)) {
- pr_debug("Can't get tuple\n");
+ &tuple))
return 0;
- }
/* look for tuple match */
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
@@ -1864,17 +1856,15 @@ resolve_normal_ct(struct nf_conn *tmpl,
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
ctinfo = IP_CT_ESTABLISHED_REPLY;
} else {
+ unsigned long status = READ_ONCE(ct->status);
+
/* Once we've had two way comms, always ESTABLISHED. */
- if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
- pr_debug("normal packet for %p\n", ct);
+ if (likely(status & IPS_SEEN_REPLY))
ctinfo = IP_CT_ESTABLISHED;
- } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
- pr_debug("related packet for %p\n", ct);
+ else if (status & IPS_EXPECTED)
ctinfo = IP_CT_RELATED;
- } else {
- pr_debug("new packet for %p\n", ct);
+ else
ctinfo = IP_CT_NEW;
- }
}
nf_ct_set(skb, ct, ctinfo);
return 0;
@@ -1988,7 +1978,6 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
/* rcu_read_lock()ed by nf_hook_thresh */
dataoff = get_l4proto(skb, skb_network_offset(skb), state->pf, &protonum);
if (dataoff <= 0) {
- pr_debug("not prepared to track yet or error occurred\n");
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
ret = NF_ACCEPT;
goto out;
@@ -2027,7 +2016,6 @@ repeat:
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
- pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_ct_put(ct);
skb->_nfct = 0;
/* Special case: TCP tracker reports an attempt to reopen a
@@ -2066,7 +2054,6 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
/* Should be unconfirmed, so not in hash table yet */
WARN_ON(nf_ct_is_confirmed(ct));
- pr_debug("Altering reply tuple of %p to ", ct);
nf_ct_dump_tuple(newreply);
ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1286ae7d4609..308fc0023c7e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3866,7 +3866,7 @@ static int __init ctnetlink_init(void)
{
int ret;
- BUILD_BUG_ON(sizeof(struct ctnetlink_list_dump_ctx) > sizeof_field(struct netlink_callback, ctx));
+ NL_ASSERT_DUMP_CTX_FITS(struct ctnetlink_list_dump_ctx);
ret = nfnetlink_subsys_register(&ctnl_subsys);
if (ret < 0) {
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index ccef340be575..c928ff63b10e 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -284,16 +284,11 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
/* We only do TCP and SCTP at the moment: is there a better way? */
if (tuple.dst.protonum != IPPROTO_TCP &&
- tuple.dst.protonum != IPPROTO_SCTP) {
- pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
+ tuple.dst.protonum != IPPROTO_SCTP)
return -ENOPROTOOPT;
- }
- if ((unsigned int)*len < sizeof(struct sockaddr_in)) {
- pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
- *len, sizeof(struct sockaddr_in));
+ if ((unsigned int)*len < sizeof(struct sockaddr_in))
return -EINVAL;
- }
h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (h) {
@@ -307,17 +302,12 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
.tuple.dst.u3.ip;
memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
- pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
- &sin.sin_addr.s_addr, ntohs(sin.sin_port));
nf_ct_put(ct);
if (copy_to_user(user, &sin, sizeof(sin)) != 0)
return -EFAULT;
else
return 0;
}
- pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
- &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
- &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
return -ENOENT;
}
@@ -360,12 +350,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
return -EINVAL;
h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
- if (!h) {
- pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
- &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
- &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
+ if (!h)
return -ENOENT;
- }
ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 011d414038ea..91eacc9b0b98 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -153,7 +153,8 @@ for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
static int do_basic_checks(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
- unsigned long *map)
+ unsigned long *map,
+ const struct nf_hook_state *state)
{
u_int32_t offset, count;
struct sctp_chunkhdr _sch, *sch;
@@ -162,8 +163,6 @@ static int do_basic_checks(struct nf_conn *ct,
flag = 0;
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
- pr_debug("Chunk Num: %d Type: %d\n", count, sch->type);
-
if (sch->type == SCTP_CID_INIT ||
sch->type == SCTP_CID_INIT_ACK ||
sch->type == SCTP_CID_SHUTDOWN_COMPLETE)
@@ -178,7 +177,9 @@ static int do_basic_checks(struct nf_conn *ct,
sch->type == SCTP_CID_COOKIE_ECHO ||
flag) &&
count != 0) || !sch->length) {
- pr_debug("Basic checks failed\n");
+ nf_ct_l4proto_log_invalid(skb, ct, state,
+ "%s failed. chunk num %d, type %d, len %d flag %d\n",
+ __func__, count, sch->type, sch->length, flag);
return 1;
}
@@ -186,7 +187,6 @@ static int do_basic_checks(struct nf_conn *ct,
set_bit(sch->type, map);
}
- pr_debug("Basic checks passed\n");
return count == 0;
}
@@ -196,64 +196,47 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
{
int i;
- pr_debug("Chunk type: %d\n", chunk_type);
-
switch (chunk_type) {
case SCTP_CID_INIT:
- pr_debug("SCTP_CID_INIT\n");
i = 0;
break;
case SCTP_CID_INIT_ACK:
- pr_debug("SCTP_CID_INIT_ACK\n");
i = 1;
break;
case SCTP_CID_ABORT:
- pr_debug("SCTP_CID_ABORT\n");
i = 2;
break;
case SCTP_CID_SHUTDOWN:
- pr_debug("SCTP_CID_SHUTDOWN\n");
i = 3;
break;
case SCTP_CID_SHUTDOWN_ACK:
- pr_debug("SCTP_CID_SHUTDOWN_ACK\n");
i = 4;
break;
case SCTP_CID_ERROR:
- pr_debug("SCTP_CID_ERROR\n");
i = 5;
break;
case SCTP_CID_COOKIE_ECHO:
- pr_debug("SCTP_CID_COOKIE_ECHO\n");
i = 6;
break;
case SCTP_CID_COOKIE_ACK:
- pr_debug("SCTP_CID_COOKIE_ACK\n");
i = 7;
break;
case SCTP_CID_SHUTDOWN_COMPLETE:
- pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
i = 8;
break;
case SCTP_CID_HEARTBEAT:
- pr_debug("SCTP_CID_HEARTBEAT");
i = 9;
break;
case SCTP_CID_HEARTBEAT_ACK:
- pr_debug("SCTP_CID_HEARTBEAT_ACK");
i = 10;
break;
default:
/* Other chunks like DATA or SACK do not change the state */
- pr_debug("Unknown chunk type, Will stay in %s\n",
- sctp_conntrack_names[cur_state]);
+ pr_debug("Unknown chunk type %d, Will stay in %s\n",
+ chunk_type, sctp_conntrack_names[cur_state]);
return cur_state;
}
- pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
- dir, sctp_conntrack_names[cur_state], chunk_type,
- sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
-
return sctp_conntracks[dir][i][cur_state];
}
@@ -370,7 +353,7 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
if (sh == NULL)
goto out;
- if (do_basic_checks(ct, skb, dataoff, map) != 0)
+ if (do_basic_checks(ct, skb, dataoff, map, state) != 0)
goto out;
if (!nf_ct_is_confirmed(ct)) {
@@ -393,7 +376,9 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
!test_bit(SCTP_CID_HEARTBEAT, map) &&
!test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
sh->vtag != ct->proto.sctp.vtag[dir]) {
- pr_debug("Verification tag check failed\n");
+ nf_ct_l4proto_log_invalid(skb, ct, state,
+ "verification tag check failed %x vs %x for dir %d",
+ sh->vtag, ct->proto.sctp.vtag[dir], dir);
goto out;
}
@@ -468,9 +453,10 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
/* Invalid */
if (new_state == SCTP_CONNTRACK_MAX) {
- pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u "
- "conntrack=%u\n",
- dir, sch->type, old_state);
+ nf_ct_l4proto_log_invalid(skb, ct, state,
+ "Invalid, old_state %d, dir %d, type %d",
+ old_state, dir, sch->type);
+
goto out_unlock;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3ac1af6f59fc..16ee5ebe1ce1 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -930,7 +930,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
{
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = nf_tcp_pernet(net);
- struct nf_conntrack_tuple *tuple;
enum tcp_conntrack new_state, old_state;
unsigned int index, *timeouts;
enum nf_ct_tcp_action res;
@@ -954,7 +953,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
dir = CTINFO2DIR(ctinfo);
index = get_conntrack_index(th);
new_state = tcp_conntracks[dir][index][old_state];
- tuple = &ct->tuplehash[dir].tuple;
switch (new_state) {
case TCP_CONNTRACK_SYN_SENT:
@@ -1232,13 +1230,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_index = index;
ct->proto.tcp.last_dir = dir;
- pr_debug("tcp_conntracks: ");
- nf_ct_dump_tuple(tuple);
- pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
- (th->syn ? 1 : 0), (th->ack ? 1 : 0),
- (th->fin ? 1 : 0), (th->rst ? 1 : 0),
- old_state, new_state);
-
ct->proto.tcp.state = new_state;
if (old_state != new_state
&& new_state == TCP_CONNTRACK_FIN_WAIT)
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3b516cffc779..6b9206635b24 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -88,6 +88,7 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
const struct nf_hook_state *state)
{
unsigned int *timeouts;
+ unsigned long status;
if (udp_error(skb, dataoff, state))
return -NF_ACCEPT;
@@ -96,26 +97,27 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
if (!timeouts)
timeouts = udp_get_timeouts(nf_ct_net(ct));
- if (!nf_ct_is_confirmed(ct))
+ status = READ_ONCE(ct->status);
+ if ((status & IPS_CONFIRMED) == 0)
ct->proto.udp.stream_ts = 2 * HZ + jiffies;
/* If we've seen traffic both ways, this is some kind of UDP
* stream. Set Assured.
*/
- if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ if (status & IPS_SEEN_REPLY_BIT) {
unsigned long extra = timeouts[UDP_CT_UNREPLIED];
bool stream = false;
/* Still active after two seconds? Extend timeout. */
if (time_after(jiffies, ct->proto.udp.stream_ts)) {
extra = timeouts[UDP_CT_REPLIED];
- stream = true;
+ stream = (status & IPS_ASSURED) == 0;
}
nf_ct_refresh_acct(ct, ctinfo, skb, extra);
/* never set ASSURED for IPS_NAT_CLASH, they time out soon */
- if (unlikely((ct->status & IPS_NAT_CLASH)))
+ if (unlikely((status & IPS_NAT_CLASH)))
return NF_ACCEPT;
/* Also, more likely to be important, and not a probe */
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 81c26a96c30b..04bd0ed4d2ae 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -193,8 +193,11 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net);
+ enum udp_conntrack state =
+ test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ UDP_CT_REPLIED : UDP_CT_UNREPLIED;
- timeout = tn->timeouts[UDP_CT_REPLIED];
+ timeout = tn->timeouts[state];
timeout -= tn->offload_timeout;
} else {
return;
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 0ccabf3fa6aa..9505f9d188ff 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -39,7 +39,7 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
}
static int nf_flow_rule_route_inet(struct net *net,
- const struct flow_offload *flow,
+ struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 4d9b99abe37d..1c26f03fc661 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -679,7 +679,7 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
return 0;
}
-int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
+int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
@@ -704,7 +704,7 @@ int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
}
EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
-int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
+int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
@@ -735,7 +735,7 @@ nf_flow_offload_rule_alloc(struct net *net,
{
const struct nf_flowtable *flowtable = offload->flowtable;
const struct flow_offload_tuple *tuple, *other_tuple;
- const struct flow_offload *flow = offload->flow;
+ struct flow_offload *flow = offload->flow;
struct dst_entry *other_dst = NULL;
struct nf_flow_rule *flow_rule;
int err = -ENOMEM;
@@ -895,8 +895,9 @@ static int flow_offload_rule_add(struct flow_offload_work *offload,
ok_count += flow_offload_tuple_add(offload, flow_rule[0],
FLOW_OFFLOAD_DIR_ORIGINAL);
- ok_count += flow_offload_tuple_add(offload, flow_rule[1],
- FLOW_OFFLOAD_DIR_REPLY);
+ if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
+ FLOW_OFFLOAD_DIR_REPLY);
if (ok_count == 0)
return -ENOENT;
@@ -926,7 +927,8 @@ static void flow_offload_work_del(struct flow_offload_work *offload)
{
clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
- flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
+ if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
}
@@ -946,7 +948,9 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
u64 lastused;
flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
- flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
+ if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY,
+ &stats[1]);
lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
offload->flow->timeout = max_t(u64, offload->flow->timeout,
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index cb894f0d63e9..c66689ad2b49 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -322,7 +322,7 @@ dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
- ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
+ iph_totlen(skb, ih), ih->tos & IPTOS_TOS_MASK,
ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
/* Max length: 6 "CE DF MF " */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8c09e4d12ac1..974b95dece1d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1401,6 +1401,10 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
}
if (IS_ERR(table)) {
+ if (PTR_ERR(table) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYTABLE)
+ return 0;
+
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(table);
}
@@ -2639,6 +2643,10 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
chain = nft_chain_lookup(net, table, attr, genmask);
}
if (IS_ERR(chain)) {
+ if (PTR_ERR(chain) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN)
+ return 0;
+
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(chain);
}
@@ -3716,6 +3724,10 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
genmask);
if (IS_ERR(chain)) {
+ if (PTR_ERR(rule) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYRULE)
+ return 0;
+
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
}
@@ -3729,6 +3741,10 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_RULE_HANDLE]) {
rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule)) {
+ if (PTR_ERR(rule) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYRULE)
+ return 0;
+
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return PTR_ERR(rule);
}
@@ -4808,6 +4824,10 @@ static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
}
if (IS_ERR(set)) {
+ if (PTR_ERR(set) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYSET)
+ return 0;
+
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(set);
}
@@ -6690,6 +6710,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_del_setelem(&ctx, set, attr);
+ if (err == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYSETELEM)
+ continue;
+
if (err < 0) {
NL_SET_BAD_ATTR(extack, attr);
break;
@@ -7334,6 +7358,10 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
}
if (IS_ERR(obj)) {
+ if (PTR_ERR(obj) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYOBJ)
+ return 0;
+
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(obj);
}
@@ -7964,6 +7992,10 @@ static int nf_tables_delflowtable(struct sk_buff *skb,
}
if (IS_ERR(flowtable)) {
+ if (PTR_ERR(flowtable) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYFLOWTABLE)
+ return 0;
+
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(flowtable);
}
@@ -8373,6 +8405,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
+ [NFT_MSG_DESTROYTABLE] = {
+ .call = nf_tables_deltable,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_TABLE_MAX,
+ .policy = nft_table_policy,
+ },
[NFT_MSG_NEWCHAIN] = {
.call = nf_tables_newchain,
.type = NFNL_CB_BATCH,
@@ -8391,6 +8429,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
+ [NFT_MSG_DESTROYCHAIN] = {
+ .call = nf_tables_delchain,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_CHAIN_MAX,
+ .policy = nft_chain_policy,
+ },
[NFT_MSG_NEWRULE] = {
.call = nf_tables_newrule,
.type = NFNL_CB_BATCH,
@@ -8415,6 +8459,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_RULE_MAX,
.policy = nft_rule_policy,
},
+ [NFT_MSG_DESTROYRULE] = {
+ .call = nf_tables_delrule,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_RULE_MAX,
+ .policy = nft_rule_policy,
+ },
[NFT_MSG_NEWSET] = {
.call = nf_tables_newset,
.type = NFNL_CB_BATCH,
@@ -8433,6 +8483,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
+ [NFT_MSG_DESTROYSET] = {
+ .call = nf_tables_delset,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_SET_MAX,
+ .policy = nft_set_policy,
+ },
[NFT_MSG_NEWSETELEM] = {
.call = nf_tables_newsetelem,
.type = NFNL_CB_BATCH,
@@ -8451,6 +8507,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
+ [NFT_MSG_DESTROYSETELEM] = {
+ .call = nf_tables_delsetelem,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_SET_ELEM_LIST_MAX,
+ .policy = nft_set_elem_list_policy,
+ },
[NFT_MSG_GETGEN] = {
.call = nf_tables_getgen,
.type = NFNL_CB_RCU,
@@ -8473,6 +8535,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
+ [NFT_MSG_DESTROYOBJ] = {
+ .call = nf_tables_delobj,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_OBJ_MAX,
+ .policy = nft_obj_policy,
+ },
[NFT_MSG_GETOBJ_RESET] = {
.call = nf_tables_getobj,
.type = NFNL_CB_RCU,
@@ -8497,6 +8565,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_FLOWTABLE_MAX,
.policy = nft_flowtable_policy,
},
+ [NFT_MSG_DESTROYFLOWTABLE] = {
+ .call = nf_tables_delflowtable,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_FLOWTABLE_MAX,
+ .policy = nft_flowtable_policy,
+ },
};
static int nf_tables_validate(struct net *net)
@@ -8590,6 +8664,7 @@ static void nft_commit_release(struct nft_trans *trans)
{
switch (trans->msg_type) {
case NFT_MSG_DELTABLE:
+ case NFT_MSG_DESTROYTABLE:
nf_tables_table_destroy(&trans->ctx);
break;
case NFT_MSG_NEWCHAIN:
@@ -8597,23 +8672,29 @@ static void nft_commit_release(struct nft_trans *trans)
kfree(nft_trans_chain_name(trans));
break;
case NFT_MSG_DELCHAIN:
+ case NFT_MSG_DESTROYCHAIN:
nf_tables_chain_destroy(&trans->ctx);
break;
case NFT_MSG_DELRULE:
+ case NFT_MSG_DESTROYRULE:
nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
break;
case NFT_MSG_DELSET:
+ case NFT_MSG_DESTROYSET:
nft_set_destroy(&trans->ctx, nft_trans_set(trans));
break;
case NFT_MSG_DELSETELEM:
+ case NFT_MSG_DESTROYSETELEM:
nf_tables_set_elem_destroy(&trans->ctx,
nft_trans_elem_set(trans),
nft_trans_elem(trans).priv);
break;
case NFT_MSG_DELOBJ:
+ case NFT_MSG_DESTROYOBJ:
nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
break;
case NFT_MSG_DELFLOWTABLE:
+ case NFT_MSG_DESTROYFLOWTABLE:
if (nft_trans_flowtable_update(trans))
nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans));
else
@@ -9065,8 +9146,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELTABLE:
+ case NFT_MSG_DESTROYTABLE:
list_del_rcu(&trans->ctx.table->list);
- nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
+ nf_tables_table_notify(&trans->ctx, trans->msg_type);
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
@@ -9081,8 +9163,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
break;
case NFT_MSG_DELCHAIN:
+ case NFT_MSG_DESTROYCHAIN:
nft_chain_del(trans->ctx.chain);
- nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
+ nf_tables_chain_notify(&trans->ctx, trans->msg_type);
nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table,
trans->ctx.chain);
@@ -9098,10 +9181,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELRULE:
+ case NFT_MSG_DESTROYRULE:
list_del_rcu(&nft_trans_rule(trans)->list);
nf_tables_rule_notify(&trans->ctx,
nft_trans_rule(trans),
- NFT_MSG_DELRULE);
+ trans->msg_type);
nft_rule_expr_deactivate(&trans->ctx,
nft_trans_rule(trans),
NFT_TRANS_COMMIT);
@@ -9129,9 +9213,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSET:
+ case NFT_MSG_DESTROYSET:
list_del_rcu(&nft_trans_set(trans)->list);
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
- NFT_MSG_DELSET, GFP_KERNEL);
+ trans->msg_type, GFP_KERNEL);
break;
case NFT_MSG_NEWSETELEM:
te = (struct nft_trans_elem *)trans->data;
@@ -9143,11 +9228,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
+ case NFT_MSG_DESTROYSETELEM:
te = (struct nft_trans_elem *)trans->data;
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
- NFT_MSG_DELSETELEM);
+ trans->msg_type);
nft_setelem_remove(net, te->set, &te->elem);
if (!nft_setelem_is_catchall(te->set, &te->elem)) {
atomic_dec(&te->set->nelems);
@@ -9169,9 +9255,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
break;
case NFT_MSG_DELOBJ:
+ case NFT_MSG_DESTROYOBJ:
nft_obj_del(nft_trans_obj(trans));
nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
- NFT_MSG_DELOBJ);
+ trans->msg_type);
break;
case NFT_MSG_NEWFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
@@ -9193,11 +9280,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELFLOWTABLE:
+ case NFT_MSG_DESTROYFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
nf_tables_flowtable_notify(&trans->ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans),
- NFT_MSG_DELFLOWTABLE);
+ trans->msg_type);
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable_hooks(trans));
} else {
@@ -9205,7 +9293,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_flowtable_notify(&trans->ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable(trans)->hook_list,
- NFT_MSG_DELFLOWTABLE);
+ trans->msg_type);
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable(trans)->hook_list);
}
@@ -9301,6 +9389,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
break;
case NFT_MSG_DELTABLE:
+ case NFT_MSG_DESTROYTABLE:
nft_clear(trans->ctx.net, trans->ctx.table);
nft_trans_destroy(trans);
break;
@@ -9322,6 +9411,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
break;
case NFT_MSG_DELCHAIN:
+ case NFT_MSG_DESTROYCHAIN:
trans->ctx.table->use++;
nft_clear(trans->ctx.net, trans->ctx.chain);
nft_trans_destroy(trans);
@@ -9336,6 +9426,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_DELRULE:
+ case NFT_MSG_DESTROYRULE:
trans->ctx.chain->use++;
nft_clear(trans->ctx.net, nft_trans_rule(trans));
nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
@@ -9357,6 +9448,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_del_rcu(&nft_trans_set(trans)->list);
break;
case NFT_MSG_DELSET:
+ case NFT_MSG_DESTROYSET:
trans->ctx.table->use++;
nft_clear(trans->ctx.net, nft_trans_set(trans));
nft_trans_destroy(trans);
@@ -9372,6 +9464,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
atomic_dec(&te->set->nelems);
break;
case NFT_MSG_DELSETELEM:
+ case NFT_MSG_DESTROYSETELEM:
te = (struct nft_trans_elem *)trans->data;
nft_setelem_data_activate(net, te->set, &te->elem);
@@ -9391,6 +9484,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
break;
case NFT_MSG_DELOBJ:
+ case NFT_MSG_DESTROYOBJ:
trans->ctx.table->use++;
nft_clear(trans->ctx.net, nft_trans_obj(trans));
nft_trans_destroy(trans);
@@ -9407,6 +9501,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
break;
case NFT_MSG_DELFLOWTABLE:
+ case NFT_MSG_DESTROYFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
list_splice(&nft_trans_flowtable_hooks(trans),
&nft_trans_flowtable(trans)->hook_list);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 709a736c301c..6ecd0ba2e546 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -21,6 +21,26 @@
#include <net/netfilter/nf_log.h>
#include <net/netfilter/nft_meta.h>
+#if defined(CONFIG_RETPOLINE) && defined(CONFIG_X86)
+
+static struct static_key_false nf_tables_skip_direct_calls;
+
+static bool nf_skip_indirect_calls(void)
+{
+ return static_branch_likely(&nf_tables_skip_direct_calls);
+}
+
+static void __init nf_skip_indirect_calls_enable(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE))
+ static_branch_enable(&nf_tables_skip_direct_calls);
+}
+#else
+static inline bool nf_skip_indirect_calls(void) { return false; }
+
+static inline void nf_skip_indirect_calls_enable(void) { }
+#endif
+
static noinline void __nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
enum nft_trace_types type)
@@ -193,7 +213,12 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
struct nft_pktinfo *pkt)
{
#ifdef CONFIG_RETPOLINE
- unsigned long e = (unsigned long)expr->ops->eval;
+ unsigned long e;
+
+ if (nf_skip_indirect_calls())
+ goto indirect_call;
+
+ e = (unsigned long)expr->ops->eval;
#define X(e, fun) \
do { if ((e) == (unsigned long)(fun)) \
return fun(expr, regs, pkt); } while (0)
@@ -203,13 +228,19 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
X(e, nft_counter_eval);
X(e, nft_meta_get_eval);
X(e, nft_lookup_eval);
+#if IS_ENABLED(CONFIG_NFT_CT)
+ X(e, nft_ct_get_fast_eval);
+#endif
X(e, nft_range_eval);
X(e, nft_immediate_eval);
X(e, nft_byteorder_eval);
X(e, nft_dynset_eval);
X(e, nft_rt_get_eval);
X(e, nft_bitwise_eval);
+ X(e, nft_objref_eval);
+ X(e, nft_objref_map_eval);
#undef X
+indirect_call:
#endif /* CONFIG_RETPOLINE */
expr->ops->eval(expr, regs, pkt);
}
@@ -369,6 +400,8 @@ int __init nf_tables_core_module_init(void)
goto err;
}
+ nf_skip_indirect_calls_enable();
+
return 0;
err:
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index c68e2151defe..b9c84499438b 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -12,7 +12,7 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_tuple.h>
@@ -23,16 +23,6 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
-struct nft_ct {
- enum nft_ct_keys key:8;
- enum ip_conntrack_dir dir:8;
- u8 len;
- union {
- u8 dreg;
- u8 sreg;
- };
-};
-
struct nft_ct_helper_obj {
struct nf_conntrack_helper *helper4;
struct nf_conntrack_helper *helper6;
@@ -759,6 +749,18 @@ static bool nft_ct_set_reduce(struct nft_regs_track *track,
return false;
}
+#ifdef CONFIG_RETPOLINE
+static const struct nft_expr_ops nft_ct_get_fast_ops = {
+ .type = &nft_ct_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+ .eval = nft_ct_get_fast_eval,
+ .init = nft_ct_get_init,
+ .destroy = nft_ct_get_destroy,
+ .dump = nft_ct_get_dump,
+ .reduce = nft_ct_set_reduce,
+};
+#endif
+
static const struct nft_expr_ops nft_ct_set_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
@@ -791,8 +793,21 @@ nft_ct_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_CT_DREG] && tb[NFTA_CT_SREG])
return ERR_PTR(-EINVAL);
- if (tb[NFTA_CT_DREG])
+ if (tb[NFTA_CT_DREG]) {
+#ifdef CONFIG_RETPOLINE
+ u32 k = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+
+ switch (k) {
+ case NFT_CT_STATE:
+ case NFT_CT_DIRECTION:
+ case NFT_CT_STATUS:
+ case NFT_CT_MARK:
+ case NFT_CT_SECMARK:
+ return &nft_ct_get_fast_ops;
+ }
+#endif
return &nft_ct_get_ops;
+ }
if (tb[NFTA_CT_SREG]) {
#ifdef CONFIG_NF_CONNTRACK_ZONES
diff --git a/net/netfilter/nft_ct_fast.c b/net/netfilter/nft_ct_fast.c
new file mode 100644
index 000000000000..89983b0613fa
--- /dev/null
+++ b/net/netfilter/nft_ct_fast.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#if IS_ENABLED(CONFIG_NFT_CT)
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_conntrack.h>
+
+void nft_ct_get_fast_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_ct *priv = nft_expr_priv(expr);
+ u32 *dest = &regs->data[priv->dreg];
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int state;
+
+ ct = nf_ct_get(pkt->skb, &ctinfo);
+ if (!ct) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
+ switch (priv->key) {
+ case NFT_CT_STATE:
+ if (ct)
+ state = NF_CT_STATE_BIT(ctinfo);
+ else if (ctinfo == IP_CT_UNTRACKED)
+ state = NF_CT_STATE_UNTRACKED_BIT;
+ else
+ state = NF_CT_STATE_INVALID_BIT;
+ *dest = state;
+ return;
+ case NFT_CT_DIRECTION:
+ nft_reg_store8(dest, CTINFO2DIR(ctinfo));
+ return;
+ case NFT_CT_STATUS:
+ *dest = ct->status;
+ return;
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case NFT_CT_MARK:
+ *dest = ct->mark;
+ return;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ case NFT_CT_SECMARK:
+ *dest = ct->secmark;
+ return;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ regs->verdict.code = NFT_BREAK;
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(nft_ct_get_fast_eval);
+#endif
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 7b01aa2ef653..cb37169608ba 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -13,9 +13,9 @@
#define nft_objref_priv(expr) *((struct nft_object **)nft_expr_priv(expr))
-static void nft_objref_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_objref_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
struct nft_object *obj = nft_objref_priv(expr);
@@ -100,9 +100,9 @@ struct nft_objref_map {
struct nft_set_binding binding;
};
-static void nft_objref_map_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_objref_map_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
struct nft_objref_map *priv = nft_expr_priv(expr);
const struct nft_set *set = priv->set;
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 1873da3a945a..b3d623a52885 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -21,7 +21,7 @@ static bool
length_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_length_info *info = par->matchinfo;
- u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
+ u32 pktlen = skb_ip_totlen(skb);
return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c8b137649ca4..2172930b1f17 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1103,7 +1103,7 @@ static int ovs_skb_network_trim(struct sk_buff *skb)
switch (skb->protocol) {
case htons(ETH_P_IP):
- len = ntohs(ip_hdr(skb)->tot_len);
+ len = skb_ip_totlen(skb);
break;
case htons(ETH_P_IPV6):
len = sizeof(struct ipv6hdr)
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index e20d1a973417..416976f70322 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -107,7 +107,8 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
rcu_assign_pointer(flow->stats[cpu],
new_stats);
- cpumask_set_cpu(cpu, &flow->cpu_used_mask);
+ cpumask_set_cpu(cpu,
+ flow->cpu_used_mask);
goto unlock;
}
}
@@ -135,7 +136,8 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
memset(ovs_stats, 0, sizeof(*ovs_stats));
/* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+ for (cpu = 0; cpu < nr_cpu_ids;
+ cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
if (stats) {
@@ -159,7 +161,8 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
int cpu;
/* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+ for (cpu = 0; cpu < nr_cpu_ids;
+ cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
if (stats) {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 073ab73ffeaa..b5711aff6e76 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -229,7 +229,7 @@ struct sw_flow {
*/
struct sw_flow_key key;
struct sw_flow_id id;
- struct cpumask cpu_used_mask;
+ struct cpumask *cpu_used_mask;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
struct sw_flow_stats __rcu *stats[]; /* One for each CPU. First one
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 0a0e4c283f02..791504b7f42b 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -79,6 +79,7 @@ struct sw_flow *ovs_flow_alloc(void)
return ERR_PTR(-ENOMEM);
flow->stats_last_writer = -1;
+ flow->cpu_used_mask = (struct cpumask *)&flow->stats[nr_cpu_ids];
/* Initialize the default stat node. */
stats = kmem_cache_alloc_node(flow_stats_cache,
@@ -91,7 +92,7 @@ struct sw_flow *ovs_flow_alloc(void)
RCU_INIT_POINTER(flow->stats[0], stats);
- cpumask_set_cpu(0, &flow->cpu_used_mask);
+ cpumask_set_cpu(0, flow->cpu_used_mask);
return flow;
err:
@@ -115,7 +116,7 @@ static void flow_free(struct sw_flow *flow)
flow->sf_acts);
/* We open code this to make sure cpu 0 is always considered */
for (cpu = 0; cpu < nr_cpu_ids;
- cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+ cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
(struct sw_flow_stats __force *)flow->stats[cpu]);
@@ -1196,7 +1197,8 @@ int ovs_flow_init(void)
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+ (nr_cpu_ids
- * sizeof(struct sw_flow_stats *)),
+ * sizeof(struct sw_flow_stats *))
+ + cpumask_size(),
0, 0, NULL);
if (flow_cache == NULL)
return -ENOMEM;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5ab98ca2511..8ffb19c643ab 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2296,6 +2296,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
else if (skb->pkt_type != PACKET_OUTGOING &&
skb_csum_unnecessary(skb))
status |= TP_STATUS_CSUM_VALID;
+ if (skb_is_gso(skb) && skb_is_gso_tcp(skb))
+ status |= TP_STATUS_GSO_TCP;
if (snaplen > res)
snaplen = res;
@@ -3522,6 +3524,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
else if (skb->pkt_type != PACKET_OUTGOING &&
skb_csum_unnecessary(skb))
aux.tp_status |= TP_STATUS_CSUM_VALID;
+ if (skb_is_gso(skb) && skb_is_gso_tcp(skb))
+ aux.tp_status |= TP_STATUS_GSO_TCP;
aux.tp_len = origlen;
aux.tp_snaplen = skb->len;
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index 1f5df0432d37..7f68d8662cfb 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -19,6 +19,8 @@
#include <net/tcp_states.h>
#include <net/phonet/gprs.h>
+#include <trace/events/sock.h>
+
#define GPRS_DEFAULT_MTU 1400
struct gprs_dev {
@@ -138,6 +140,8 @@ static void gprs_data_ready(struct sock *sk)
struct gprs_dev *gp = sk->sk_user_data;
struct sk_buff *skb;
+ trace_sk_data_ready(sk);
+
while ((skb = pep_read(sk)) != NULL) {
skb_orphan(skb);
gprs_recv(gp, skb);
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index e595079c2caf..722936f7dd98 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -12,6 +12,7 @@
#include "qrtr.h"
+#include <trace/events/sock.h>
#define CREATE_TRACE_POINTS
#include <trace/events/qrtr.h>
@@ -755,6 +756,8 @@ static void qrtr_ns_worker(struct work_struct *work)
static void qrtr_ns_data_ready(struct sock *sk)
{
+ trace_sk_data_ready(sk);
+
queue_work(qrtr_ns.workqueue, &qrtr_ns.work);
}
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index cfbf0e129cba..e53b7f266bd7 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -31,6 +31,7 @@
*
*/
#include <linux/kernel.h>
+#include <linux/sched/clock.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 5b426dc3634d..c71b923764fd 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -35,6 +35,7 @@
#include <net/sock.h>
#include <linux/in.h>
#include <linux/export.h>
+#include <linux/sched/clock.h>
#include <linux/time.h>
#include <linux/rds.h>
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 7edf2e69d3fe..014fa24418c1 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -34,6 +34,7 @@
#include <linux/gfp.h>
#include <linux/in.h>
#include <net/tcp.h>
+#include <trace/events/sock.h>
#include "rds.h"
#include "tcp.h"
@@ -234,6 +235,7 @@ void rds_tcp_listen_data_ready(struct sock *sk)
{
void (*ready)(struct sock *sk);
+ trace_sk_data_ready(sk);
rdsdebug("listen data ready sk %p\n", sk);
read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index f4ee13da90c7..c00f04a1a534 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -33,6 +33,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <net/tcp.h>
+#include <trace/events/sock.h>
#include "rds.h"
#include "tcp.h"
@@ -309,6 +310,7 @@ void rds_tcp_data_ready(struct sock *sk)
struct rds_conn_path *cp;
struct rds_tcp_connection *tc;
+ trace_sk_data_ready(sk);
rdsdebug("data ready sk %p\n", sk);
read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index f5afc9bcdee6..786dbfdad772 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -75,6 +75,8 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
{
struct rfkill_gpio_data *rfkill;
struct gpio_desc *gpio;
+ const char *name_property;
+ const char *type_property;
const char *type_name;
int ret;
@@ -82,8 +84,15 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
if (!rfkill)
return -ENOMEM;
- device_property_read_string(&pdev->dev, "name", &rfkill->name);
- device_property_read_string(&pdev->dev, "type", &type_name);
+ if (dev_of_node(&pdev->dev)) {
+ name_property = "label";
+ type_property = "radio-type";
+ } else {
+ name_property = "name";
+ type_property = "type";
+ }
+ device_property_read_string(&pdev->dev, name_property, &rfkill->name);
+ device_property_read_string(&pdev->dev, type_property, &type_name);
if (!rfkill->name)
rfkill->name = dev_name(&pdev->dev);
@@ -157,12 +166,19 @@ static const struct acpi_device_id rfkill_acpi_match[] = {
MODULE_DEVICE_TABLE(acpi, rfkill_acpi_match);
#endif
+static const struct of_device_id rfkill_of_match[] __maybe_unused = {
+ { .compatible = "rfkill-gpio", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, rfkill_of_match);
+
static struct platform_driver rfkill_gpio_driver = {
.probe = rfkill_gpio_probe,
.remove = rfkill_gpio_remove,
.driver = {
.name = "rfkill_gpio",
.acpi_match_table = ACPI_PTR(rfkill_acpi_match),
+ .of_match_table = of_match_ptr(rfkill_of_match),
},
};
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 7ae023b37a83..a20986806fea 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -36,6 +36,15 @@ config AF_RXRPC_INJECT_LOSS
Say Y here to inject packet loss by discarding some received and some
transmitted packets.
+config AF_RXRPC_INJECT_RX_DELAY
+ bool "Inject delay into packet reception"
+ depends on SYSCTL
+ help
+ Say Y here to inject a delay into packet reception, allowing an
+ extended RTT time to be modelled. The delay can be configured using
+ /proc/sys/net/rxrpc/rxrpc_inject_rx_delay, setting a number of
+ milliseconds up to 0.5s (note that the granularity is actually in
+ jiffies).
config AF_RXRPC_DEBUG
bool "RxRPC dynamic debugging"
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index ebbd4a1c3f86..102f5cbff91a 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -786,7 +786,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
INIT_LIST_HEAD(&rx->sock_calls);
INIT_LIST_HEAD(&rx->to_be_accepted);
INIT_LIST_HEAD(&rx->recvmsg_q);
- rwlock_init(&rx->recvmsg_lock);
+ spin_lock_init(&rx->recvmsg_lock);
rwlock_init(&rx->call_lock);
memset(&rx->srx, 0, sizeof(rx->srx));
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 433060cade03..9e19688b0e06 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -149,7 +149,7 @@ struct rxrpc_sock {
struct list_head sock_calls; /* List of calls owned by this socket */
struct list_head to_be_accepted; /* calls awaiting acceptance */
struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */
- rwlock_t recvmsg_lock; /* Lock for recvmsg_q */
+ spinlock_t recvmsg_lock; /* Lock for recvmsg_q */
struct key *key; /* security for this socket */
struct key *securities; /* list of server security descriptors */
struct rb_root calls; /* User ID -> call mapping */
@@ -284,7 +284,9 @@ struct rxrpc_local {
struct task_struct *io_thread;
struct completion io_thread_ready; /* Indication that the I/O thread started */
struct rxrpc_sock *service; /* Service(s) listening on this endpoint */
- struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ struct sk_buff_head rx_delay_queue; /* Delay injection queue */
+#endif
struct sk_buff_head rx_queue; /* Received packets */
struct list_head conn_attend_q; /* Conns requiring immediate attention */
struct list_head call_attend_q; /* Calls requiring immediate attention */
@@ -688,9 +690,11 @@ struct rxrpc_call {
/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
+ u16 ackr_sack_base; /* Starting slot in SACK table ring */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
- atomic64_t ackr_window; /* Base (in LSW) and top (in MSW) of SACK window */
- atomic_t ackr_nr_unacked; /* Number of unacked packets */
+ rxrpc_seq_t ackr_window; /* Base of SACK window */
+ rxrpc_seq_t ackr_wtop; /* Base of SACK window */
+ unsigned int ackr_nr_unacked; /* Number of unacked packets */
atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */
struct {
#define RXRPC_SACK_SIZE 256
@@ -1109,6 +1113,9 @@ extern unsigned long rxrpc_idle_ack_delay;
extern unsigned int rxrpc_rx_window_size;
extern unsigned int rxrpc_rx_mtu;
extern unsigned int rxrpc_rx_jumbo_max;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+extern unsigned long rxrpc_inject_rx_delay;
+#endif
/*
* net_ns.c
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 3e8689fdc437..0f5a1d77b890 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -195,7 +195,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->peer_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_peer *peer = b->peer_backlog[tail];
- rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_conn);
+ rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_peer);
kfree(peer);
tail = (tail + 1) & (size - 1);
}
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 1abdef15debc..e363f21a2014 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -498,9 +498,18 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
rxrpc_propose_ack_rx_idle);
- if (atomic_read(&call->ackr_nr_unacked) > 2)
- rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
- rxrpc_propose_ack_input_data);
+ if (call->ackr_nr_unacked > 2) {
+ if (call->peer->rtt_count < 3)
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_rtt);
+ else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ ktime_get_real()))
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_old_rtt);
+ else
+ rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
+ rxrpc_propose_ack_input_data);
+ }
/* Make sure the timer is restarted */
if (!__rxrpc_call_is_complete(call)) {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index f3c9f0201c15..6eaffb0d8fdc 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -167,7 +167,8 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
call->tx_total_len = -1;
call->next_rx_timo = 20 * HZ;
call->next_req_timo = 1 * HZ;
- atomic64_set(&call->ackr_window, 0x100000001ULL);
+ call->ackr_window = 1;
+ call->ackr_wtop = 1;
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
@@ -560,7 +561,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
rxrpc_put_call_slot(call);
/* Make sure we don't get any more notifications */
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
if (!list_empty(&call->recvmsg_link)) {
_debug("unlinking once-pending call %p { e=%lx f=%lx }",
@@ -573,7 +574,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
call->recvmsg_link.next = NULL;
call->recvmsg_link.prev = NULL;
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
if (put)
rxrpc_put_call(call, rxrpc_call_put_unnotify);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index f30323de82bd..89ac05a711a4 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -8,11 +8,6 @@
#include <linux/slab.h>
#include "ar-internal.h"
-static struct rxrpc_bundle rxrpc_service_dummy_bundle = {
- .ref = REFCOUNT_INIT(1),
- .debug_id = UINT_MAX,
-};
-
/*
* Find a service connection under RCU conditions.
*
@@ -132,8 +127,6 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
*/
conn->state = RXRPC_CONN_SERVICE_PREALLOC;
refcount_set(&conn->ref, 2);
- conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle,
- rxrpc_bundle_get_service_conn);
atomic_inc(&rxnet->nr_conns);
write_lock(&rxnet->conn_lock);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 367927a99881..d68848fce51f 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -338,7 +338,8 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
static void rxrpc_input_update_ack_window(struct rxrpc_call *call,
rxrpc_seq_t window, rxrpc_seq_t wtop)
{
- atomic64_set_release(&call->ackr_window, ((u64)wtop) << 32 | window);
+ call->ackr_window = window;
+ call->ackr_wtop = wtop;
}
/*
@@ -367,9 +368,9 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct sk_buff *oos;
rxrpc_serial_t serial = sp->hdr.serial;
- u64 win = atomic64_read(&call->ackr_window);
- rxrpc_seq_t window = lower_32_bits(win);
- rxrpc_seq_t wtop = upper_32_bits(win);
+ unsigned int sack = call->ackr_sack_base;
+ rxrpc_seq_t window = call->ackr_window;
+ rxrpc_seq_t wtop = call->ackr_wtop;
rxrpc_seq_t wlimit = window + call->rx_winsize - 1;
rxrpc_seq_t seq = sp->hdr.seq;
bool last = sp->hdr.flags & RXRPC_LAST_PACKET;
@@ -410,20 +411,23 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
/* Queue the packet. */
if (seq == window) {
- rxrpc_seq_t reset_from;
- bool reset_sack = false;
-
if (sp->hdr.flags & RXRPC_REQUEST_ACK)
ack_reason = RXRPC_ACK_REQUESTED;
/* Send an immediate ACK if we fill in a hole */
else if (!skb_queue_empty(&call->rx_oos_queue))
ack_reason = RXRPC_ACK_DELAY;
else
- atomic_inc_return(&call->ackr_nr_unacked);
+ call->ackr_nr_unacked++;
window++;
- if (after(window, wtop))
+ if (after(window, wtop)) {
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_none);
wtop = window;
+ } else {
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_advance);
+ sack = (sack + 1) % RXRPC_SACK_SIZE;
+ }
+
rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg);
@@ -440,43 +444,39 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
__skb_unlink(oos, &call->rx_oos_queue);
last = osp->hdr.flags & RXRPC_LAST_PACKET;
seq = osp->hdr.seq;
- if (!reset_sack) {
- reset_from = seq;
- reset_sack = true;
- }
+ call->ackr_sack_table[sack] = 0;
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_fill);
+ sack = (sack + 1) % RXRPC_SACK_SIZE;
window++;
rxrpc_input_queue_data(call, oos, window, wtop,
- rxrpc_receive_queue_oos);
+ rxrpc_receive_queue_oos);
}
spin_unlock(&call->recvmsg_queue.lock);
- if (reset_sack) {
- do {
- call->ackr_sack_table[reset_from % RXRPC_SACK_SIZE] = 0;
- } while (reset_from++, before(reset_from, window));
- }
+ call->ackr_sack_base = sack;
} else {
- bool keep = false;
+ unsigned int slot;
ack_reason = RXRPC_ACK_OUT_OF_SEQUENCE;
- if (!call->ackr_sack_table[seq % RXRPC_SACK_SIZE]) {
- call->ackr_sack_table[seq % RXRPC_SACK_SIZE] = 1;
- keep = 1;
+ slot = seq - window;
+ sack = (sack + slot) % RXRPC_SACK_SIZE;
+
+ if (call->ackr_sack_table[sack % RXRPC_SACK_SIZE]) {
+ ack_reason = RXRPC_ACK_DUPLICATE;
+ goto send_ack;
}
+ call->ackr_sack_table[sack % RXRPC_SACK_SIZE] |= 1;
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_oos);
+
if (after(seq + 1, wtop)) {
wtop = seq + 1;
rxrpc_input_update_ack_window(call, window, wtop);
}
- if (!keep) {
- ack_reason = RXRPC_ACK_DUPLICATE;
- goto send_ack;
- }
-
skb_queue_walk(&call->rx_oos_queue, oos) {
struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
@@ -567,8 +567,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_serial_t serial = sp->hdr.serial;
rxrpc_seq_t seq0 = sp->hdr.seq;
- _enter("{%llx,%x},{%u,%x}",
- atomic64_read(&call->ackr_window), call->rx_highest_seq,
+ _enter("{%x,%x,%x},{%u,%x}",
+ call->ackr_window, call->ackr_wtop, call->rx_highest_seq,
skb->len, seq0);
if (__rxrpc_call_is_complete(call))
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 9e9dfb2fc559..4a3a08a0e2cd 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -25,6 +25,7 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
*/
int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
{
+ struct sk_buff_head *rx_queue;
struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
if (unlikely(!local)) {
@@ -36,7 +37,16 @@ int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
skb->mark = RXRPC_SKB_MARK_PACKET;
rxrpc_new_skb(skb, rxrpc_skb_new_encap_rcv);
- skb_queue_tail(&local->rx_queue, skb);
+ rx_queue = &local->rx_queue;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ if (rxrpc_inject_rx_delay ||
+ !skb_queue_empty(&local->rx_delay_queue)) {
+ skb->tstamp = ktime_add_ms(skb->tstamp, rxrpc_inject_rx_delay);
+ rx_queue = &local->rx_delay_queue;
+ }
+#endif
+
+ skb_queue_tail(rx_queue, skb);
rxrpc_wake_up_io_thread(local);
return 0;
}
@@ -407,6 +417,9 @@ int rxrpc_io_thread(void *data)
struct rxrpc_local *local = data;
struct rxrpc_call *call;
struct sk_buff *skb;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ ktime_t now;
+#endif
bool should_stop;
complete(&local->io_thread_ready);
@@ -481,6 +494,17 @@ int rxrpc_io_thread(void *data)
continue;
}
+ /* Inject a delay into packets if requested. */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ now = ktime_get_real();
+ while ((skb = skb_peek(&local->rx_delay_queue))) {
+ if (ktime_before(now, skb->tstamp))
+ break;
+ skb = skb_dequeue(&local->rx_delay_queue);
+ skb_queue_tail(&local->rx_queue, skb);
+ }
+#endif
+
if (!skb_queue_empty(&local->rx_queue)) {
spin_lock_irq(&local->rx_queue.lock);
skb_queue_splice_tail_init(&local->rx_queue, &rx_queue);
@@ -502,6 +526,28 @@ int rxrpc_io_thread(void *data)
if (should_stop)
break;
+
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ skb = skb_peek(&local->rx_delay_queue);
+ if (skb) {
+ unsigned long timeout;
+ ktime_t tstamp = skb->tstamp;
+ ktime_t now = ktime_get_real();
+ s64 delay_ns = ktime_to_ns(ktime_sub(tstamp, now));
+
+ if (delay_ns <= 0) {
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+
+ timeout = nsecs_to_jiffies(delay_ns);
+ timeout = max(timeout, 1UL);
+ schedule_timeout(timeout);
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+#endif
+
schedule();
}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index b8eaca5d9f22..7d910aee4f8c 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -108,8 +108,10 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net,
local->net = net;
local->rxnet = rxrpc_net(net);
INIT_HLIST_NODE(&local->link);
- init_rwsem(&local->defrag_sem);
init_completion(&local->io_thread_ready);
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ skb_queue_head_init(&local->rx_delay_queue);
+#endif
skb_queue_head_init(&local->rx_queue);
INIT_LIST_HEAD(&local->conn_attend_q);
INIT_LIST_HEAD(&local->call_attend_q);
@@ -434,6 +436,9 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
/* At this point, there should be no more packets coming in to the
* local endpoint.
*/
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ rxrpc_purge_queue(&local->rx_delay_queue);
+#endif
rxrpc_purge_queue(&local->rx_queue);
rxrpc_purge_client_connections(local);
}
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 056c428d8bf3..825b81183046 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -53,3 +53,10 @@ unsigned int rxrpc_rx_mtu = 5692;
* sender that we're willing to handle.
*/
unsigned int rxrpc_rx_jumbo_max = 4;
+
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+/*
+ * The delay to inject into packet reception.
+ */
+unsigned long rxrpc_inject_rx_delay;
+#endif
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index a9746be29634..6b2022240076 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -83,59 +83,36 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
struct rxrpc_txbuf *txb)
{
struct rxrpc_ackinfo ackinfo;
- unsigned int qsize;
- rxrpc_seq_t window, wtop, wrap_point, ix, first;
+ unsigned int qsize, sack, wrap, to;
+ rxrpc_seq_t window, wtop;
int rsize;
- u64 wtmp;
u32 mtu, jmax;
u8 *ackp = txb->acks;
- u8 sack_buffer[sizeof(call->ackr_sack_table)] __aligned(8);
- atomic_set(&call->ackr_nr_unacked, 0);
+ call->ackr_nr_unacked = 0;
atomic_set(&call->ackr_nr_consumed, 0);
rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
+ clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
- /* Barrier against rxrpc_input_data(). */
-retry:
- wtmp = atomic64_read_acquire(&call->ackr_window);
- window = lower_32_bits(wtmp);
- wtop = upper_32_bits(wtmp);
+ window = call->ackr_window;
+ wtop = call->ackr_wtop;
+ sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
txb->ack.firstPacket = htonl(window);
- txb->ack.nAcks = 0;
+ txb->ack.nAcks = wtop - window;
if (after(wtop, window)) {
- /* Try to copy the SACK ring locklessly. We can use the copy,
- * only if the now-current top of the window didn't go past the
- * previously read base - otherwise we can't know whether we
- * have old data or new data.
- */
- memcpy(sack_buffer, call->ackr_sack_table, sizeof(sack_buffer));
- wrap_point = window + RXRPC_SACK_SIZE - 1;
- wtmp = atomic64_read_acquire(&call->ackr_window);
- window = lower_32_bits(wtmp);
- wtop = upper_32_bits(wtmp);
- if (after(wtop, wrap_point)) {
- cond_resched();
- goto retry;
- }
-
- /* The buffer is maintained as a ring with an invariant mapping
- * between bit position and sequence number, so we'll probably
- * need to rotate it.
- */
- txb->ack.nAcks = wtop - window;
- ix = window % RXRPC_SACK_SIZE;
- first = sizeof(sack_buffer) - ix;
+ wrap = RXRPC_SACK_SIZE - sack;
+ to = min_t(unsigned int, txb->ack.nAcks, RXRPC_SACK_SIZE);
- if (ix + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
- memcpy(txb->acks, sack_buffer + ix, txb->ack.nAcks);
+ if (sack + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
+ memcpy(txb->acks, call->ackr_sack_table + sack, txb->ack.nAcks);
} else {
- memcpy(txb->acks, sack_buffer + ix, first);
- memcpy(txb->acks + first, sack_buffer,
- txb->ack.nAcks - first);
+ memcpy(txb->acks, call->ackr_sack_table + sack, wrap);
+ memcpy(txb->acks + wrap, call->ackr_sack_table,
+ to - wrap);
}
- ackp += txb->ack.nAcks;
+ ackp += to;
} else if (before(wtop, window)) {
pr_warn("ack window backward %x %x", window, wtop);
} else if (txb->ack.reason == RXRPC_ACK_DELAY) {
@@ -253,12 +230,15 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
ret = do_udp_sendmsg(conn->local->socket, &msg, len);
call->peer->last_tx_at = ktime_get_seconds();
- if (ret < 0)
+ if (ret < 0) {
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_ack);
- else
+ } else {
trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
rxrpc_tx_point_call_ack);
+ if (txb->wire.flags & RXRPC_REQUEST_ACK)
+ call->peer->rtt_last_req = ktime_get_real();
+ }
rxrpc_tx_backoff(call, ret);
if (!__rxrpc_call_is_complete(call)) {
@@ -429,8 +409,6 @@ dont_set_request_ack:
if (txb->len >= call->peer->maxdata)
goto send_fragmentable;
- down_read(&conn->local->defrag_sem);
-
txb->last_sent = ktime_get_real();
if (txb->wire.flags & RXRPC_REQUEST_ACK)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
@@ -445,7 +423,6 @@ dont_set_request_ack:
ret = do_udp_sendmsg(conn->local->socket, &msg, len);
conn->peer->last_tx_at = ktime_get_seconds();
- up_read(&conn->local->defrag_sem);
if (ret < 0) {
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
@@ -506,8 +483,6 @@ send_fragmentable:
/* attempt to send this message with fragmentation enabled */
_debug("send fragment");
- down_write(&conn->local->defrag_sem);
-
txb->last_sent = ktime_get_real();
if (txb->wire.flags & RXRPC_REQUEST_ACK)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
@@ -539,8 +514,6 @@ send_fragmentable:
rxrpc_tx_point_call_data_frag);
}
rxrpc_tx_backoff(call, ret);
-
- up_write(&conn->local->defrag_sem);
goto done;
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 750158a085cd..682636d3b060 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -55,7 +55,6 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
unsigned long timeout = 0;
rxrpc_seq_t acks_hard_ack;
char lbuff[50], rbuff[50];
- u64 wtmp;
if (v == &rxnet->calls) {
seq_puts(seq,
@@ -83,7 +82,6 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
}
acks_hard_ack = READ_ONCE(call->acks_hard_ack);
- wtmp = atomic64_read_acquire(&call->ackr_window);
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
" %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n",
@@ -98,7 +96,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
call->abort_code,
call->debug_id,
acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
- lower_32_bits(wtmp), upper_32_bits(wtmp) - lower_32_bits(wtmp),
+ call->ackr_window, call->ackr_wtop - call->ackr_window,
call->rx_serial,
call->cong_cwnd,
timeout);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index dd54ceee7bcc..50d263a6359d 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -40,12 +40,12 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
call->notify_rx(sk, call, call->user_call_ID);
spin_unlock(&call->notify_lock);
} else {
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
if (list_empty(&call->recvmsg_link)) {
rxrpc_get_call(call, rxrpc_call_get_notify_socket);
list_add_tail(&call->recvmsg_link, &rx->recvmsg_q);
}
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
if (!sock_flag(sk, SOCK_DEAD)) {
_debug("call %ps", sk->sk_data_ready);
@@ -95,7 +95,7 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
}
trace_rxrpc_recvdata(call, rxrpc_recvmsg_terminal,
- lower_32_bits(atomic64_read(&call->ackr_window)) - 1,
+ call->ackr_window - 1,
call->rx_pkt_offset, call->rx_pkt_len, ret);
return ret;
}
@@ -175,13 +175,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
rx_pkt_len = call->rx_pkt_len;
if (rxrpc_call_has_failed(call)) {
- seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1;
+ seq = call->ackr_window - 1;
ret = -EIO;
goto done;
}
if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) {
- seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1;
+ seq = call->ackr_window - 1;
ret = 1;
goto done;
}
@@ -335,14 +335,14 @@ try_again:
/* Find the next call and dequeue it if we're not just peeking. If we
* do dequeue it, that comes with a ref that we will need to release.
*/
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
l = rx->recvmsg_q.next;
call = list_entry(l, struct rxrpc_call, recvmsg_link);
if (!(flags & MSG_PEEK))
list_del_init(&call->recvmsg_link);
else
rxrpc_get_call(call, rxrpc_call_get_recvmsg);
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
call_debug_id = call->debug_id;
trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0);
@@ -431,9 +431,9 @@ error_unlock_call:
error_requeue_call:
if (!(flags & MSG_PEEK)) {
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
list_add(&call->recvmsg_link, &rx->recvmsg_q);
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0);
} else {
rxrpc_put_call(call, rxrpc_call_put_recvmsg);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index ebe0c75e7b07..944320e65ea8 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
if (skb) {
int n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_CONSUMED);
}
}
@@ -78,6 +78,6 @@ void rxrpc_purge_queue(struct sk_buff_head *list)
int n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, refcount_read(&skb->users), n,
rxrpc_skb_put_purge);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_CONSUMED);
}
}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index cde3224a5cd2..ecaeb4ecfb58 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -17,6 +17,9 @@ static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = 255;
static const unsigned long one_jiffy = 1;
static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+static const unsigned long max_500 = 500;
+#endif
/*
* RxRPC operating parameters.
@@ -63,6 +66,19 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra2 = (void *)&max_jiffies,
},
+ /* Values used in milliseconds */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ {
+ .procname = "inject_rx_delay",
+ .data = &rxrpc_inject_rx_delay,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = (void *)SYSCTL_LONG_ZERO,
+ .extra2 = (void *)&max_500,
+ },
+#endif
+
/* Non-time values */
{
.procname = "reap_client_conns",
@@ -109,7 +125,6 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&four,
},
-
{ }
};
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
index d2cf2aac3adb..d43be8512386 100644
--- a/net/rxrpc/txbuf.c
+++ b/net/rxrpc/txbuf.c
@@ -110,12 +110,8 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
_enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top);
- for (;;) {
- spin_lock(&call->tx_lock);
- txb = list_first_entry_or_null(&call->tx_buffer,
- struct rxrpc_txbuf, call_link);
- if (!txb)
- break;
+ while ((txb = list_first_entry_or_null(&call->tx_buffer,
+ struct rxrpc_txbuf, call_link))) {
hard_ack = smp_load_acquire(&call->acks_hard_ack);
if (before(hard_ack, txb->seq))
break;
@@ -128,15 +124,11 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue);
- spin_unlock(&call->tx_lock);
-
rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated);
if (after(call->acks_hard_ack, call->tx_bottom + 128))
wake = true;
}
- spin_unlock(&call->tx_lock);
-
if (wake)
wake_up(&call->waitq);
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 777d6b50505c..f5acb535413d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -195,8 +195,14 @@ config NET_SCH_ETF
To compile this code as a module, choose M here: the
module will be called sch_etf.
+config NET_SCH_MQPRIO_LIB
+ tristate
+ help
+ Common library for manipulating mqprio queue configurations.
+
config NET_SCH_TAPRIO
tristate "Time Aware Priority (taprio) Scheduler"
+ select NET_SCH_MQPRIO_LIB
help
Say Y here if you want to use the Time Aware Priority (taprio) packet
scheduling algorithm.
@@ -253,6 +259,7 @@ config NET_SCH_DRR
config NET_SCH_MQPRIO
tristate "Multi-queue priority scheduler (MQPRIO)"
+ select NET_SCH_MQPRIO_LIB
help
Say Y here if you want to use the Multi-queue Priority scheduler.
This scheduler allows QOS to be offloaded on NICs that have support
@@ -337,7 +344,7 @@ config NET_SCH_FQ
Say Y here if you want to use the FQ packet scheduling algorithm.
FQ does flow separation, and is able to respect pacing requirements
- set by TCP stack into sk->sk_pacing_rate (for localy generated
+ set by TCP stack into sk->sk_pacing_rate (for locally generated
traffic)
To compile this driver as a module, choose M here: the module
diff --git a/net/sched/Makefile b/net/sched/Makefile
index dd14ef413fda..7911eec09837 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
obj-$(CONFIG_NET_SCH_ETS) += sch_ets.o
obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
+obj-$(CONFIG_NET_SCH_MQPRIO_LIB) += sch_mqprio_lib.o
obj-$(CONFIG_NET_SCH_SKBPRIO) += sch_skbprio.o
obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 5b3c0ac495be..cd09ef49df22 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1582,7 +1582,7 @@ errout:
static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
u32 portid, u32 seq, u16 flags, int event, int bind,
- int ref)
+ int ref, struct netlink_ext_ack *extack)
{
struct tcamsg *t;
struct nlmsghdr *nlh;
@@ -1606,7 +1606,12 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
nla_nest_end(skb, nest);
+ if (extack && extack->_msg &&
+ nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
+ goto out_nlmsg_trim;
+
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+
return skb->len;
out_nlmsg_trim:
@@ -1625,7 +1630,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
if (!skb)
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
- 0, 1) <= 0) {
+ 0, 1, NULL) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
kfree_skb(skb);
return -EINVAL;
@@ -1799,7 +1804,7 @@ tcf_reoffload_del_notify(struct net *net, struct tc_action *action)
if (!skb)
return -ENOBUFS;
- if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1) <= 0) {
+ if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1, NULL) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1886,7 +1891,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
- 0, 2) <= 0) {
+ 0, 2, extack) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
kfree_skb(skb);
return -EINVAL;
@@ -1965,7 +1970,7 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
- RTM_NEWACTION, 0, 0) <= 0) {
+ RTM_NEWACTION, 0, 0, extack) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
kfree_skb(skb);
return -EINVAL;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 0ca2bb8ed026..b126f03c1bb6 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -170,11 +170,11 @@ tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple,
static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
enum ip_conntrack_dir dir,
+ enum ip_conntrack_info ctinfo,
struct flow_action *action)
{
struct nf_conn_labels *ct_labels;
struct flow_action_entry *entry;
- enum ip_conntrack_info ctinfo;
u32 *act_ct_labels;
entry = tcf_ct_flow_table_flow_action_get_next(action);
@@ -182,8 +182,6 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
entry->ct_metadata.mark = READ_ONCE(ct->mark);
#endif
- ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
- IP_CT_ESTABLISHED_REPLY;
/* aligns with the CT reference on the SKB nf_ct_set */
entry->ct_metadata.cookie = (unsigned long)ct | ctinfo;
entry->ct_metadata.orig_dir = dir == IP_CT_DIR_ORIGINAL;
@@ -237,22 +235,28 @@ static int tcf_ct_flow_table_add_action_nat(struct net *net,
}
static int tcf_ct_flow_table_fill_actions(struct net *net,
- const struct flow_offload *flow,
+ struct flow_offload *flow,
enum flow_offload_tuple_dir tdir,
struct nf_flow_rule *flow_rule)
{
struct flow_action *action = &flow_rule->rule->action;
int num_entries = action->num_entries;
struct nf_conn *ct = flow->ct;
+ enum ip_conntrack_info ctinfo;
enum ip_conntrack_dir dir;
int i, err;
switch (tdir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
dir = IP_CT_DIR_ORIGINAL;
+ ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ IP_CT_ESTABLISHED : IP_CT_NEW;
+ if (ctinfo == IP_CT_ESTABLISHED)
+ set_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
break;
case FLOW_OFFLOAD_DIR_REPLY:
dir = IP_CT_DIR_REPLY;
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
break;
default:
return -EOPNOTSUPP;
@@ -262,7 +266,7 @@ static int tcf_ct_flow_table_fill_actions(struct net *net,
if (err)
goto err_nat;
- tcf_ct_flow_table_add_action_meta(ct, dir, action);
+ tcf_ct_flow_table_add_action_meta(ct, dir, ctinfo, action);
return 0;
err_nat:
@@ -365,7 +369,7 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
- bool tcp)
+ bool tcp, bool bidirectional)
{
struct nf_conn_act_ct_ext *act_ct_ext;
struct flow_offload *entry;
@@ -384,6 +388,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
+ if (bidirectional)
+ __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &entry->flags);
act_ct_ext = nf_conn_act_ct_ext_find(ct);
if (act_ct_ext) {
@@ -407,26 +413,34 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
- bool tcp = false;
-
- if ((ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) ||
- !test_bit(IPS_ASSURED_BIT, &ct->status))
- return;
+ bool tcp = false, bidirectional = true;
switch (nf_ct_protonum(ct)) {
case IPPROTO_TCP:
- tcp = true;
- if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+ if ((ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+ !test_bit(IPS_ASSURED_BIT, &ct->status) ||
+ ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
return;
+
+ tcp = true;
break;
case IPPROTO_UDP:
+ if (!nf_ct_is_confirmed(ct))
+ return;
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status))
+ bidirectional = false;
break;
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE: {
struct nf_conntrack_tuple *tuple;
- if (ct->status & IPS_NAT_MASK)
+ if ((ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+ !test_bit(IPS_ASSURED_BIT, &ct->status) ||
+ ct->status & IPS_NAT_MASK)
return;
+
tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* No support for GRE v1 */
if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
@@ -442,7 +456,7 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
ct->status & IPS_SEQ_ADJUST)
return;
- tcf_ct_flow_table_add(ct_ft, ct, tcp);
+ tcf_ct_flow_table_add(ct_ft, ct, tcp, bidirectional);
}
static bool
@@ -621,13 +635,30 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
ct = flow->ct;
+ if (dir == FLOW_OFFLOAD_DIR_REPLY &&
+ !test_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags)) {
+ /* Only offload reply direction after connection became
+ * assured.
+ */
+ if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
+ else if (test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags))
+ /* If flow_table flow has already been updated to the
+ * established state, then don't refresh.
+ */
+ return false;
+ }
+
if (tcph && (unlikely(tcph->fin || tcph->rst))) {
flow_offload_teardown(flow);
return false;
}
- ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
- IP_CT_ESTABLISHED_REPLY;
+ if (dir == FLOW_OFFLOAD_DIR_ORIGINAL)
+ ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ IP_CT_ESTABLISHED : IP_CT_NEW;
+ else
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
flow_offload_refresh(nf_ft, flow);
nf_conntrack_get(&ct->ct_general);
@@ -707,7 +738,7 @@ static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family)
switch (family) {
case NFPROTO_IPV4:
- len = ntohs(ip_hdr(skb)->tot_len);
+ len = skb_ip_totlen(skb);
break;
case NFPROTO_IPV6:
len = sizeof(struct ipv6hdr)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 7284bcea7b0b..8037ec9b1d31 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -29,8 +29,8 @@
static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
-#define MIRRED_RECURSION_LIMIT 4
-static DEFINE_PER_CPU(unsigned int, mirred_rec_level);
+#define MIRRED_NEST_LIMIT 4
+static DEFINE_PER_CPU(unsigned int, mirred_nest_level);
static bool tcf_mirred_is_act_redirect(int action)
{
@@ -206,12 +206,19 @@ release_idr:
return err;
}
+static bool is_mirred_nested(void)
+{
+ return unlikely(__this_cpu_read(mirred_nest_level) > 1);
+}
+
static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
{
int err;
if (!want_ingress)
err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
+ else if (is_mirred_nested())
+ err = netif_rx(skb);
else
err = netif_receive_skb(skb);
@@ -226,7 +233,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
struct sk_buff *skb2 = skb;
bool m_mac_header_xmit;
struct net_device *dev;
- unsigned int rec_level;
+ unsigned int nest_level;
int retval, err = 0;
bool use_reinsert;
bool want_ingress;
@@ -237,11 +244,11 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
int mac_len;
bool at_nh;
- rec_level = __this_cpu_inc_return(mirred_rec_level);
- if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) {
+ nest_level = __this_cpu_inc_return(mirred_nest_level);
+ if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
netdev_name(skb->dev));
- __this_cpu_dec(mirred_rec_level);
+ __this_cpu_dec(mirred_nest_level);
return TC_ACT_SHOT;
}
@@ -310,7 +317,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
err = tcf_mirred_forward(want_ingress, skb);
if (err)
tcf_action_inc_overlimit_qstats(&m->common);
- __this_cpu_dec(mirred_rec_level);
+ __this_cpu_dec(mirred_nest_level);
return TC_ACT_CONSUMED;
}
}
@@ -322,7 +329,7 @@ out:
if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT;
}
- __this_cpu_dec(mirred_rec_level);
+ __this_cpu_dec(mirred_nest_level);
return retval;
}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index a0378e9f0121..c42fcc47dd6d 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -134,6 +134,17 @@ nla_failure:
return -EINVAL;
}
+static void tcf_pedit_cleanup_rcu(struct rcu_head *head)
+{
+ struct tcf_pedit_parms *parms =
+ container_of(head, struct tcf_pedit_parms, rcu);
+
+ kfree(parms->tcfp_keys_ex);
+ kfree(parms->tcfp_keys);
+
+ kfree(parms);
+}
+
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
struct tcf_proto *tp, u32 flags,
@@ -141,10 +152,9 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
{
struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
- struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
- struct tc_pedit_key *keys = NULL;
- struct tcf_pedit_key_ex *keys_ex;
+ struct tcf_pedit_parms *oparms, *nparms;
+ struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tc_pedit *parm;
struct nlattr *pattr;
struct tcf_pedit *p;
@@ -181,18 +191,25 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
return -EINVAL;
}
- keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
- if (IS_ERR(keys_ex))
- return PTR_ERR(keys_ex);
+ nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
+ if (!nparms)
+ return -ENOMEM;
+
+ nparms->tcfp_keys_ex =
+ tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
+ if (IS_ERR(nparms->tcfp_keys_ex)) {
+ ret = PTR_ERR(nparms->tcfp_keys_ex);
+ goto out_free;
+ }
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_pedit_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_pedit_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
- goto out_free;
+ goto out_free_ex;
}
ret = ACT_P_CREATED;
} else if (err > 0) {
@@ -204,7 +221,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
}
} else {
ret = err;
- goto out_free;
+ goto out_free_ex;
}
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
@@ -212,48 +229,50 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
ret = err;
goto out_release;
}
- p = to_pedit(*a);
- spin_lock_bh(&p->tcf_lock);
- if (ret == ACT_P_CREATED ||
- (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys)) {
- keys = kmalloc(ksize, GFP_ATOMIC);
- if (!keys) {
- spin_unlock_bh(&p->tcf_lock);
- ret = -ENOMEM;
- goto put_chain;
- }
- kfree(p->tcfp_keys);
- p->tcfp_keys = keys;
- p->tcfp_nkeys = parm->nkeys;
+ nparms->tcfp_off_max_hint = 0;
+ nparms->tcfp_flags = parm->flags;
+ nparms->tcfp_nkeys = parm->nkeys;
+
+ nparms->tcfp_keys = kmalloc(ksize, GFP_KERNEL);
+ if (!nparms->tcfp_keys) {
+ ret = -ENOMEM;
+ goto put_chain;
}
- memcpy(p->tcfp_keys, parm->keys, ksize);
- p->tcfp_off_max_hint = 0;
- for (i = 0; i < p->tcfp_nkeys; ++i) {
- u32 cur = p->tcfp_keys[i].off;
+
+ memcpy(nparms->tcfp_keys, parm->keys, ksize);
+
+ for (i = 0; i < nparms->tcfp_nkeys; ++i) {
+ u32 cur = nparms->tcfp_keys[i].off;
/* sanitize the shift value for any later use */
- p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
- p->tcfp_keys[i].shift);
+ nparms->tcfp_keys[i].shift = min_t(size_t,
+ BITS_PER_TYPE(int) - 1,
+ nparms->tcfp_keys[i].shift);
/* The AT option can read a single byte, we can bound the actual
* value with uchar max.
*/
- cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
+ cur += (0xff & nparms->tcfp_keys[i].offmask) >> nparms->tcfp_keys[i].shift;
/* Each key touches 4 bytes starting from the computed offset */
- p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
+ nparms->tcfp_off_max_hint =
+ max(nparms->tcfp_off_max_hint, cur + 4);
}
- p->tcfp_flags = parm->flags;
+ p = to_pedit(*a);
+
+ spin_lock_bh(&p->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ oparms = rcu_replace_pointer(p->parms, nparms, 1);
+ spin_unlock_bh(&p->tcf_lock);
- kfree(p->tcfp_keys_ex);
- p->tcfp_keys_ex = keys_ex;
+ if (oparms)
+ call_rcu(&oparms->rcu, tcf_pedit_cleanup_rcu);
- spin_unlock_bh(&p->tcf_lock);
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
+
return ret;
put_chain:
@@ -261,19 +280,22 @@ put_chain:
tcf_chain_put_by_act(goto_ch);
out_release:
tcf_idr_release(*a, bind);
+out_free_ex:
+ kfree(nparms->tcfp_keys_ex);
out_free:
- kfree(keys_ex);
+ kfree(nparms);
return ret;
-
}
static void tcf_pedit_cleanup(struct tc_action *a)
{
struct tcf_pedit *p = to_pedit(a);
- struct tc_pedit_key *keys = p->tcfp_keys;
+ struct tcf_pedit_parms *parms;
+
+ parms = rcu_dereference_protected(p->parms, 1);
- kfree(keys);
- kfree(p->tcfp_keys_ex);
+ if (parms)
+ call_rcu(&parms->rcu, tcf_pedit_cleanup_rcu);
}
static bool offset_valid(struct sk_buff *skb, int offset)
@@ -324,109 +346,107 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
const struct tc_action *a,
struct tcf_result *res)
{
+ enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+ enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
struct tcf_pedit *p = to_pedit(a);
+ struct tcf_pedit_key_ex *tkey_ex;
+ struct tcf_pedit_parms *parms;
+ struct tc_pedit_key *tkey;
u32 max_offset;
int i;
- spin_lock(&p->tcf_lock);
+ parms = rcu_dereference_bh(p->parms);
max_offset = (skb_transport_header_was_set(skb) ?
skb_transport_offset(skb) :
skb_network_offset(skb)) +
- p->tcfp_off_max_hint;
+ parms->tcfp_off_max_hint;
if (skb_ensure_writable(skb, min(skb->len, max_offset)))
- goto unlock;
+ goto done;
tcf_lastuse_update(&p->tcf_tm);
+ tcf_action_update_bstats(&p->common, skb);
- if (p->tcfp_nkeys > 0) {
- struct tc_pedit_key *tkey = p->tcfp_keys;
- struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex;
- enum pedit_header_type htype =
- TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
- enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
-
- for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
- u32 *ptr, hdata;
- int offset = tkey->off;
- int hoffset;
- u32 val;
- int rc;
-
- if (tkey_ex) {
- htype = tkey_ex->htype;
- cmd = tkey_ex->cmd;
-
- tkey_ex++;
- }
+ tkey = parms->tcfp_keys;
+ tkey_ex = parms->tcfp_keys_ex;
- rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
- if (rc) {
- pr_info("tc action pedit bad header type specified (0x%x)\n",
- htype);
- goto bad;
- }
+ for (i = parms->tcfp_nkeys; i > 0; i--, tkey++) {
+ int offset = tkey->off;
+ u32 *ptr, hdata;
+ int hoffset;
+ u32 val;
+ int rc;
- if (tkey->offmask) {
- u8 *d, _d;
-
- if (!offset_valid(skb, hoffset + tkey->at)) {
- pr_info("tc action pedit 'at' offset %d out of bounds\n",
- hoffset + tkey->at);
- goto bad;
- }
- d = skb_header_pointer(skb, hoffset + tkey->at,
- sizeof(_d), &_d);
- if (!d)
- goto bad;
- offset += (*d & tkey->offmask) >> tkey->shift;
- }
+ if (tkey_ex) {
+ htype = tkey_ex->htype;
+ cmd = tkey_ex->cmd;
- if (offset % 4) {
- pr_info("tc action pedit offset must be on 32 bit boundaries\n");
- goto bad;
- }
+ tkey_ex++;
+ }
- if (!offset_valid(skb, hoffset + offset)) {
- pr_info("tc action pedit offset %d out of bounds\n",
- hoffset + offset);
- goto bad;
- }
+ rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
+ if (rc) {
+ pr_info("tc action pedit bad header type specified (0x%x)\n",
+ htype);
+ goto bad;
+ }
- ptr = skb_header_pointer(skb, hoffset + offset,
- sizeof(hdata), &hdata);
- if (!ptr)
- goto bad;
- /* just do it, baby */
- switch (cmd) {
- case TCA_PEDIT_KEY_EX_CMD_SET:
- val = tkey->val;
- break;
- case TCA_PEDIT_KEY_EX_CMD_ADD:
- val = (*ptr + tkey->val) & ~tkey->mask;
- break;
- default:
- pr_info("tc action pedit bad command (%d)\n",
- cmd);
+ if (tkey->offmask) {
+ u8 *d, _d;
+
+ if (!offset_valid(skb, hoffset + tkey->at)) {
+ pr_info("tc action pedit 'at' offset %d out of bounds\n",
+ hoffset + tkey->at);
goto bad;
}
+ d = skb_header_pointer(skb, hoffset + tkey->at,
+ sizeof(_d), &_d);
+ if (!d)
+ goto bad;
+ offset += (*d & tkey->offmask) >> tkey->shift;
+ }
- *ptr = ((*ptr & tkey->mask) ^ val);
- if (ptr == &hdata)
- skb_store_bits(skb, hoffset + offset, ptr, 4);
+ if (offset % 4) {
+ pr_info("tc action pedit offset must be on 32 bit boundaries\n");
+ goto bad;
}
- goto done;
- } else {
- WARN(1, "pedit BUG: index %d\n", p->tcf_index);
+ if (!offset_valid(skb, hoffset + offset)) {
+ pr_info("tc action pedit offset %d out of bounds\n",
+ hoffset + offset);
+ goto bad;
+ }
+
+ ptr = skb_header_pointer(skb, hoffset + offset,
+ sizeof(hdata), &hdata);
+ if (!ptr)
+ goto bad;
+ /* just do it, baby */
+ switch (cmd) {
+ case TCA_PEDIT_KEY_EX_CMD_SET:
+ val = tkey->val;
+ break;
+ case TCA_PEDIT_KEY_EX_CMD_ADD:
+ val = (*ptr + tkey->val) & ~tkey->mask;
+ break;
+ default:
+ pr_info("tc action pedit bad command (%d)\n",
+ cmd);
+ goto bad;
+ }
+
+ *ptr = ((*ptr & tkey->mask) ^ val);
+ if (ptr == &hdata)
+ skb_store_bits(skb, hoffset + offset, ptr, 4);
}
+ goto done;
+
bad:
+ spin_lock(&p->tcf_lock);
p->tcf_qstats.overlimits++;
-done:
- bstats_update(&p->tcf_bstats, skb);
-unlock:
spin_unlock(&p->tcf_lock);
+done:
return p->tcf_action;
}
@@ -445,30 +465,33 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_pedit *p = to_pedit(a);
+ struct tcf_pedit_parms *parms;
struct tc_pedit *opt;
struct tcf_t t;
int s;
- s = struct_size(opt, keys, p->tcfp_nkeys);
+ spin_lock_bh(&p->tcf_lock);
+ parms = rcu_dereference_protected(p->parms, 1);
+ s = struct_size(opt, keys, parms->tcfp_nkeys);
- /* netlink spinlocks held above us - must use ATOMIC */
opt = kzalloc(s, GFP_ATOMIC);
- if (unlikely(!opt))
+ if (unlikely(!opt)) {
+ spin_unlock_bh(&p->tcf_lock);
return -ENOBUFS;
+ }
- spin_lock_bh(&p->tcf_lock);
- memcpy(opt->keys, p->tcfp_keys, flex_array_size(opt, keys, p->tcfp_nkeys));
+ memcpy(opt->keys, parms->tcfp_keys,
+ flex_array_size(opt, keys, parms->tcfp_nkeys));
opt->index = p->tcf_index;
- opt->nkeys = p->tcfp_nkeys;
- opt->flags = p->tcfp_flags;
+ opt->nkeys = parms->tcfp_nkeys;
+ opt->flags = parms->tcfp_flags;
opt->action = p->tcf_action;
opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
- if (p->tcfp_keys_ex) {
- if (tcf_pedit_key_ex_dump(skb,
- p->tcfp_keys_ex,
- p->tcfp_nkeys))
+ if (parms->tcfp_keys_ex) {
+ if (tcf_pedit_key_ex_dump(skb, parms->tcfp_keys_ex,
+ parms->tcfp_nkeys))
goto nla_put_failure;
if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt))
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 668130f08903..5b4a95e8a1ee 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -488,7 +488,8 @@ static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block,
#endif
static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
- u32 seq, u16 flags, int event, bool unicast);
+ u32 seq, u16 flags, int event, bool unicast,
+ struct netlink_ext_ack *extack);
static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
u32 chain_index, bool create,
@@ -521,7 +522,7 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
*/
if (is_first_reference && !by_act)
tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
- RTM_NEWCHAIN, false);
+ RTM_NEWCHAIN, false, NULL);
return chain;
@@ -1817,7 +1818,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
struct tcf_proto *tp, struct tcf_block *block,
struct Qdisc *q, u32 parent, void *fh,
u32 portid, u32 seq, u16 flags, int event,
- bool terse_dump, bool rtnl_held)
+ bool terse_dump, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1857,7 +1859,13 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
goto nla_put_failure;
}
+
+ if (extack && extack->_msg &&
+ nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
+ goto nla_put_failure;
+
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+
return skb->len;
out_nlmsg_trim:
@@ -1871,7 +1879,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
struct tcf_block *block, struct Qdisc *q,
u32 parent, void *fh, int event, bool unicast,
- bool rtnl_held)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1883,7 +1891,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
n->nlmsg_seq, n->nlmsg_flags, event,
- false, rtnl_held) <= 0) {
+ false, rtnl_held, extack) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1912,7 +1920,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
- false, rtnl_held) <= 0) {
+ false, rtnl_held, extack) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to build del event notification");
kfree_skb(skb);
return -EINVAL;
@@ -1938,14 +1946,15 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
struct tcf_block *block, struct Qdisc *q,
u32 parent, struct nlmsghdr *n,
- struct tcf_chain *chain, int event)
+ struct tcf_chain *chain, int event,
+ struct netlink_ext_ack *extack)
{
struct tcf_proto *tp;
for (tp = tcf_get_next_proto(chain, NULL);
tp; tp = tcf_get_next_proto(chain, tp))
- tfilter_notify(net, oskb, n, tp, block,
- q, parent, NULL, event, false, true);
+ tfilter_notify(net, oskb, n, tp, block, q, parent, NULL,
+ event, false, true, extack);
}
static void tfilter_put(struct tcf_proto *tp, void *fh)
@@ -2156,7 +2165,7 @@ replay:
flags, extack);
if (err == 0) {
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
- RTM_NEWTFILTER, false, rtnl_held);
+ RTM_NEWTFILTER, false, rtnl_held, extack);
tfilter_put(tp, fh);
/* q pointer is NULL for shared blocks */
if (q)
@@ -2284,7 +2293,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
if (prio == 0) {
tfilter_notify_chain(net, skb, block, q, parent, n,
- chain, RTM_DELTFILTER);
+ chain, RTM_DELTFILTER, extack);
tcf_chain_flush(chain, rtnl_held);
err = 0;
goto errout;
@@ -2308,7 +2317,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
tcf_proto_put(tp, rtnl_held, NULL);
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
- RTM_DELTFILTER, false, rtnl_held);
+ RTM_DELTFILTER, false, rtnl_held, extack);
err = 0;
goto errout;
}
@@ -2452,7 +2461,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
err = -ENOENT;
} else {
err = tfilter_notify(net, skb, n, tp, block, q, parent,
- fh, RTM_NEWTFILTER, true, rtnl_held);
+ fh, RTM_NEWTFILTER, true, rtnl_held, NULL);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
}
@@ -2490,7 +2499,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER, a->terse_dump, true);
+ RTM_NEWTFILTER, a->terse_dump, true, NULL);
}
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
@@ -2524,7 +2533,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER, false, true) <= 0)
+ RTM_NEWTFILTER, false, true, NULL) <= 0)
goto errout;
cb->args[1] = 1;
}
@@ -2667,7 +2676,8 @@ static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
void *tmplt_priv, u32 chain_index,
struct net *net, struct sk_buff *skb,
struct tcf_block *block,
- u32 portid, u32 seq, u16 flags, int event)
+ u32 portid, u32 seq, u16 flags, int event,
+ struct netlink_ext_ack *extack)
{
unsigned char *b = skb_tail_pointer(skb);
const struct tcf_proto_ops *ops;
@@ -2704,7 +2714,12 @@ static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
goto nla_put_failure;
}
+ if (extack && extack->_msg &&
+ nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
+ goto out_nlmsg_trim;
+
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+
return skb->len;
out_nlmsg_trim:
@@ -2714,7 +2729,8 @@ nla_put_failure:
}
static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
- u32 seq, u16 flags, int event, bool unicast)
+ u32 seq, u16 flags, int event, bool unicast,
+ struct netlink_ext_ack *extack)
{
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
struct tcf_block *block = chain->block;
@@ -2728,7 +2744,7 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
chain->index, net, skb, block, portid,
- seq, flags, event) <= 0) {
+ seq, flags, event, extack) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -2756,7 +2772,7 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
return -ENOBUFS;
if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
- block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
+ block, portid, seq, flags, RTM_DELCHAIN, NULL) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -2908,11 +2924,11 @@ replay:
}
tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
- RTM_NEWCHAIN, false);
+ RTM_NEWCHAIN, false, extack);
break;
case RTM_DELCHAIN:
tfilter_notify_chain(net, skb, block, q, parent, n,
- chain, RTM_DELTFILTER);
+ chain, RTM_DELTFILTER, extack);
/* Flush the chain first as the user requested chain removal. */
tcf_chain_flush(chain, true);
/* In case the chain was successfully deleted, put a reference
@@ -2922,7 +2938,7 @@ replay:
break;
case RTM_GETCHAIN:
err = tc_chain_notify(chain, skb, n->nlmsg_seq,
- n->nlmsg_flags, n->nlmsg_type, true);
+ n->nlmsg_flags, n->nlmsg_type, true, extack);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
break;
@@ -3022,7 +3038,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
chain->index, net, skb, block,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWCHAIN);
+ RTM_NEWCHAIN, NULL);
if (err <= 0)
break;
index++;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 72d2c204d5f3..e9780631b5b5 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -902,7 +902,8 @@ static void qdisc_offload_graft_root(struct net_device *dev,
}
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
- u32 portid, u32 seq, u16 flags, int event)
+ u32 portid, u32 seq, u16 flags, int event,
+ struct netlink_ext_ack *extack)
{
struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
struct gnet_stats_queue __percpu *cpu_qstats = NULL;
@@ -970,7 +971,12 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
if (gnet_stats_finish_copy(&d) < 0)
goto nla_put_failure;
+ if (extack && extack->_msg &&
+ nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
+ goto out_nlmsg_trim;
+
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+
return skb->len;
out_nlmsg_trim:
@@ -991,7 +997,8 @@ static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
static int qdisc_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, u32 clid,
- struct Qdisc *old, struct Qdisc *new)
+ struct Qdisc *old, struct Qdisc *new,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1002,12 +1009,12 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
if (old && !tc_qdisc_dump_ignore(old, false)) {
if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
- 0, RTM_DELQDISC) < 0)
+ 0, RTM_DELQDISC, extack) < 0)
goto err_out;
}
if (new && !tc_qdisc_dump_ignore(new, false)) {
if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
- old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+ old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
goto err_out;
}
@@ -1022,10 +1029,11 @@ err_out:
static void notify_and_destroy(struct net *net, struct sk_buff *skb,
struct nlmsghdr *n, u32 clid,
- struct Qdisc *old, struct Qdisc *new)
+ struct Qdisc *old, struct Qdisc *new,
+ struct netlink_ext_ack *extack)
{
if (new || old)
- qdisc_notify(net, skb, n, clid, old, new);
+ qdisc_notify(net, skb, n, clid, old, new, extack);
if (old)
qdisc_put(old);
@@ -1105,12 +1113,12 @@ skip:
qdisc_refcount_inc(new);
rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
- notify_and_destroy(net, skb, n, classid, old, new);
+ notify_and_destroy(net, skb, n, classid, old, new, extack);
if (new && new->ops->attach)
new->ops->attach(new);
} else {
- notify_and_destroy(net, skb, n, classid, old, new);
+ notify_and_destroy(net, skb, n, classid, old, new, extack);
}
if (dev->flags & IFF_UP)
@@ -1141,7 +1149,7 @@ skip:
err = cops->graft(parent, cl, new, &old, extack);
if (err)
return err;
- notify_and_destroy(net, skb, n, classid, old, new);
+ notify_and_destroy(net, skb, n, classid, old, new, extack);
}
return 0;
}
@@ -1274,12 +1282,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
if (err)
goto err_out3;
- if (ops->init) {
- err = ops->init(sch, tca[TCA_OPTIONS], extack);
- if (err != 0)
- goto err_out5;
- }
-
if (tca[TCA_STAB]) {
stab = qdisc_get_stab(tca[TCA_STAB], extack);
if (IS_ERR(stab)) {
@@ -1288,11 +1290,18 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
}
rcu_assign_pointer(sch->stab, stab);
}
+
+ if (ops->init) {
+ err = ops->init(sch, tca[TCA_OPTIONS], extack);
+ if (err != 0)
+ goto err_out5;
+ }
+
if (tca[TCA_RATE]) {
err = -EOPNOTSUPP;
if (sch->flags & TCQ_F_MQROOT) {
NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
- goto err_out4;
+ goto err_out5;
}
err = gen_new_estimator(&sch->bstats,
@@ -1303,7 +1312,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
tca[TCA_RATE]);
if (err) {
NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
- goto err_out4;
+ goto err_out5;
}
}
@@ -1313,6 +1322,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
return sch;
err_out5:
+ qdisc_put_stab(rtnl_dereference(sch->stab));
+err_out4:
/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
if (ops->destroy)
ops->destroy(sch);
@@ -1324,16 +1335,6 @@ err_out2:
err_out:
*errp = err;
return NULL;
-
-err_out4:
- /*
- * Any broken qdiscs that would require a ops->reset() here?
- * The qdisc was never in action so it shouldn't be necessary.
- */
- qdisc_put_stab(rtnl_dereference(sch->stab));
- if (ops->destroy)
- ops->destroy(sch);
- goto err_out3;
}
static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
@@ -1509,7 +1510,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
if (err != 0)
return err;
} else {
- qdisc_notify(net, skb, n, clid, NULL, q);
+ qdisc_notify(net, skb, n, clid, NULL, q, NULL);
}
return 0;
}
@@ -1648,7 +1649,7 @@ replay:
}
err = qdisc_change(q, tca, extack);
if (err == 0)
- qdisc_notify(net, skb, n, clid, NULL, q);
+ qdisc_notify(net, skb, n, clid, NULL, q, extack);
return err;
create_n_graft:
@@ -1715,7 +1716,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWQDISC) <= 0)
+ RTM_NEWQDISC, NULL) <= 0)
goto done;
q_idx++;
}
@@ -1737,7 +1738,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWQDISC) <= 0)
+ RTM_NEWQDISC, NULL) <= 0)
goto done;
q_idx++;
}
@@ -1810,8 +1811,8 @@ done:
************************************************/
static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
- unsigned long cl,
- u32 portid, u32 seq, u16 flags, int event)
+ unsigned long cl, u32 portid, u32 seq, u16 flags,
+ int event, struct netlink_ext_ack *extack)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1846,7 +1847,12 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
if (gnet_stats_finish_copy(&d) < 0)
goto nla_put_failure;
+ if (extack && extack->_msg &&
+ nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
+ goto out_nlmsg_trim;
+
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+
return skb->len;
out_nlmsg_trim:
@@ -1857,7 +1863,7 @@ nla_put_failure:
static int tclass_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct Qdisc *q,
- unsigned long cl, int event)
+ unsigned long cl, int event, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1866,7 +1872,7 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
+ if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1893,7 +1899,7 @@ static int tclass_del_notify(struct net *net,
return -ENOBUFS;
if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
- RTM_DELTCLASS) < 0) {
+ RTM_DELTCLASS, extack) < 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -2100,7 +2106,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
tc_bind_tclass(q, portid, clid, 0);
goto out;
case RTM_GETTCLASS:
- err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
+ err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS, extack);
goto out;
default:
err = -EINVAL;
@@ -2118,7 +2124,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
if (cops->change)
err = cops->change(q, clid, portid, tca, &new_cl, extack);
if (err == 0) {
- tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
+ tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
/* We just create a new class, need to do reverse binding. */
if (cl != new_cl)
tc_bind_tclass(q, portid, clid, new_cl);
@@ -2140,7 +2146,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTCLASS);
+ RTM_NEWTCLASS, NULL);
}
static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 3ed0c3342189..7970217b565a 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1209,7 +1209,7 @@ static struct sk_buff *cake_ack_filter(struct cake_sched_data *q,
iph_check->daddr != iph->daddr)
continue;
- seglen = ntohs(iph_check->tot_len) -
+ seglen = iph_totlen(skb, iph_check) -
(4 * iph_check->ihl);
} else if (iph_check->version == 6) {
ipv6h = (struct ipv6hdr *)iph;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 4c68abaa289b..48ed87b91086 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -17,6 +17,8 @@
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
+#include "sch_mqprio_lib.h"
+
struct mqprio_sched {
struct Qdisc **qdiscs;
u16 mode;
@@ -27,6 +29,62 @@ struct mqprio_sched {
u64 max_rate[TC_QOPT_MAX_QUEUE];
};
+static int mqprio_enable_offload(struct Qdisc *sch,
+ const struct tc_mqprio_qopt *qopt,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int err, i;
+
+ switch (priv->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
+ return -EINVAL;
+ break;
+ case TC_MQPRIO_MODE_CHANNEL:
+ mqprio.flags = priv->flags;
+ if (priv->flags & TC_MQPRIO_F_MODE)
+ mqprio.mode = priv->mode;
+ if (priv->flags & TC_MQPRIO_F_SHAPER)
+ mqprio.shaper = priv->shaper;
+ if (priv->flags & TC_MQPRIO_F_MIN_RATE)
+ for (i = 0; i < mqprio.qopt.num_tc; i++)
+ mqprio.min_rate[i] = priv->min_rate[i];
+ if (priv->flags & TC_MQPRIO_F_MAX_RATE)
+ for (i = 0; i < mqprio.qopt.num_tc; i++)
+ mqprio.max_rate[i] = priv->max_rate[i];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO,
+ &mqprio);
+ if (err)
+ return err;
+
+ priv->hw_offload = mqprio.qopt.hw;
+
+ return 0;
+}
+
+static void mqprio_disable_offload(struct Qdisc *sch)
+{
+ struct tc_mqprio_qopt_offload mqprio = { { 0 } };
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+
+ switch (priv->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ case TC_MQPRIO_MODE_CHANNEL:
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO,
+ &mqprio);
+ break;
+ }
+}
+
static void mqprio_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
@@ -41,37 +99,17 @@ static void mqprio_destroy(struct Qdisc *sch)
kfree(priv->qdiscs);
}
- if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
- struct tc_mqprio_qopt_offload mqprio = { { 0 } };
-
- switch (priv->mode) {
- case TC_MQPRIO_MODE_DCB:
- case TC_MQPRIO_MODE_CHANNEL:
- dev->netdev_ops->ndo_setup_tc(dev,
- TC_SETUP_QDISC_MQPRIO,
- &mqprio);
- break;
- default:
- return;
- }
- } else {
+ if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc)
+ mqprio_disable_offload(sch);
+ else
netdev_set_num_tc(dev, 0);
- }
}
-static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
+ const struct tc_mqprio_caps *caps,
+ struct netlink_ext_ack *extack)
{
- int i, j;
-
- /* Verify num_tc is not out of max range */
- if (qopt->num_tc > TC_MAX_QUEUE)
- return -EINVAL;
-
- /* Verify priority mapping uses valid tcs */
- for (i = 0; i < TC_BITMASK + 1; i++) {
- if (qopt->prio_tc_map[i] >= qopt->num_tc)
- return -EINVAL;
- }
+ int err;
/* Limit qopt->hw to maximum supported offload value. Drivers have
* the option of overriding this later if they don't support the a
@@ -80,31 +118,23 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX)
qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX;
- /* If hardware offload is requested we will leave it to the device
- * to either populate the queue counts itself or to validate the
- * provided queue counts. If ndo_setup_tc is not present then
- * hardware doesn't support offload and we should return an error.
+ /* If hardware offload is requested, we will leave 3 options to the
+ * device driver:
+ * - populate the queue counts itself (and ignore what was requested)
+ * - validate the provided queue counts by itself (and apply them)
+ * - request queue count validation here (and apply them)
*/
- if (qopt->hw)
- return dev->netdev_ops->ndo_setup_tc ? 0 : -EINVAL;
-
- for (i = 0; i < qopt->num_tc; i++) {
- unsigned int last = qopt->offset[i] + qopt->count[i];
-
- /* Verify the queue count is in tx range being equal to the
- * real_num_tx_queues indicates the last queue is in use.
- */
- if (qopt->offset[i] >= dev->real_num_tx_queues ||
- !qopt->count[i] ||
- last > dev->real_num_tx_queues)
- return -EINVAL;
-
- /* Verify that the offset and counts do not overlap */
- for (j = i + 1; j < qopt->num_tc; j++) {
- if (last > qopt->offset[j])
- return -EINVAL;
- }
- }
+ err = mqprio_validate_qopt(dev, qopt,
+ !qopt->hw || caps->validate_queue_counts,
+ false, extack);
+ if (err)
+ return err;
+
+ /* If ndo_setup_tc is not present then hardware doesn't support offload
+ * and we should return an error.
+ */
+ if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
+ return -EINVAL;
return 0;
}
@@ -130,6 +160,67 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
return 0;
}
+static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
+ struct nlattr *opt)
+{
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct nlattr *tb[TCA_MQPRIO_MAX + 1];
+ struct nlattr *attr;
+ int i, rem, err;
+
+ err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
+ sizeof(*qopt));
+ if (err < 0)
+ return err;
+
+ if (!qopt->hw)
+ return -EINVAL;
+
+ if (tb[TCA_MQPRIO_MODE]) {
+ priv->flags |= TC_MQPRIO_F_MODE;
+ priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
+ }
+
+ if (tb[TCA_MQPRIO_SHAPER]) {
+ priv->flags |= TC_MQPRIO_F_SHAPER;
+ priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
+ }
+
+ if (tb[TCA_MQPRIO_MIN_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ return -EINVAL;
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
+ return -EINVAL;
+ if (i >= qopt->num_tc)
+ break;
+ priv->min_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MIN_RATE;
+ }
+
+ if (tb[TCA_MQPRIO_MAX_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ return -EINVAL;
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
+ return -EINVAL;
+ if (i >= qopt->num_tc)
+ break;
+ priv->max_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MAX_RATE;
+ }
+
+ return 0;
+}
+
static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -139,9 +230,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
struct Qdisc *qdisc;
int i, err = -EOPNOTSUPP;
struct tc_mqprio_qopt *qopt = NULL;
- struct nlattr *tb[TCA_MQPRIO_MAX + 1];
- struct nlattr *attr;
- int rem;
+ struct tc_mqprio_caps caps;
int len;
BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
@@ -160,61 +249,18 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
if (!opt || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
+ qdisc_offload_query_caps(dev, TC_SETUP_QDISC_MQPRIO,
+ &caps, sizeof(caps));
+
qopt = nla_data(opt);
- if (mqprio_parse_opt(dev, qopt))
+ if (mqprio_parse_opt(dev, qopt, &caps, extack))
return -EINVAL;
len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
if (len > 0) {
- err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
- sizeof(*qopt));
- if (err < 0)
+ err = mqprio_parse_nlattr(sch, qopt, opt);
+ if (err)
return err;
-
- if (!qopt->hw)
- return -EINVAL;
-
- if (tb[TCA_MQPRIO_MODE]) {
- priv->flags |= TC_MQPRIO_F_MODE;
- priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
- }
-
- if (tb[TCA_MQPRIO_SHAPER]) {
- priv->flags |= TC_MQPRIO_F_SHAPER;
- priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
- }
-
- if (tb[TCA_MQPRIO_MIN_RATE64]) {
- if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
- return -EINVAL;
- i = 0;
- nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
- rem) {
- if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
- return -EINVAL;
- if (i >= qopt->num_tc)
- break;
- priv->min_rate[i] = *(u64 *)nla_data(attr);
- i++;
- }
- priv->flags |= TC_MQPRIO_F_MIN_RATE;
- }
-
- if (tb[TCA_MQPRIO_MAX_RATE64]) {
- if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
- return -EINVAL;
- i = 0;
- nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
- rem) {
- if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
- return -EINVAL;
- if (i >= qopt->num_tc)
- break;
- priv->max_rate[i] = *(u64 *)nla_data(attr);
- i++;
- }
- priv->flags |= TC_MQPRIO_F_MAX_RATE;
- }
}
/* pre-allocate qdisc, attachment can't fail */
@@ -241,36 +287,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
* supplied and verified mapping
*/
if (qopt->hw) {
- struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
-
- switch (priv->mode) {
- case TC_MQPRIO_MODE_DCB:
- if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
- return -EINVAL;
- break;
- case TC_MQPRIO_MODE_CHANNEL:
- mqprio.flags = priv->flags;
- if (priv->flags & TC_MQPRIO_F_MODE)
- mqprio.mode = priv->mode;
- if (priv->flags & TC_MQPRIO_F_SHAPER)
- mqprio.shaper = priv->shaper;
- if (priv->flags & TC_MQPRIO_F_MIN_RATE)
- for (i = 0; i < mqprio.qopt.num_tc; i++)
- mqprio.min_rate[i] = priv->min_rate[i];
- if (priv->flags & TC_MQPRIO_F_MAX_RATE)
- for (i = 0; i < mqprio.qopt.num_tc; i++)
- mqprio.max_rate[i] = priv->max_rate[i];
- break;
- default:
- return -EINVAL;
- }
- err = dev->netdev_ops->ndo_setup_tc(dev,
- TC_SETUP_QDISC_MQPRIO,
- &mqprio);
+ err = mqprio_enable_offload(sch, qopt, extack);
if (err)
return err;
-
- priv->hw_offload = mqprio.qopt.hw;
} else {
netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++)
@@ -387,7 +406,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
struct tc_mqprio_qopt opt = { 0 };
struct Qdisc *qdisc;
- unsigned int ntx, tc;
+ unsigned int ntx;
sch->q.qlen = 0;
gnet_stats_basic_sync_init(&sch->bstats);
@@ -411,15 +430,9 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
spin_unlock_bh(qdisc_lock(qdisc));
}
- opt.num_tc = netdev_get_num_tc(dev);
- memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+ mqprio_qopt_reconstruct(dev, &opt);
opt.hw = priv->hw_offload;
- for (tc = 0; tc < netdev_get_num_tc(dev); tc++) {
- opt.count[tc] = dev->tc_to_txq[tc].count;
- opt.offset[tc] = dev->tc_to_txq[tc].offset;
- }
-
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
diff --git a/net/sched/sch_mqprio_lib.c b/net/sched/sch_mqprio_lib.c
new file mode 100644
index 000000000000..c58a533b8ec5
--- /dev/null
+++ b/net/sched/sch_mqprio_lib.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/types.h>
+#include <net/pkt_sched.h>
+
+#include "sch_mqprio_lib.h"
+
+/* Returns true if the intervals [a, b) and [c, d) overlap. */
+static bool intervals_overlap(int a, int b, int c, int d)
+{
+ int left = max(a, c), right = min(b, d);
+
+ return left < right;
+}
+
+static int mqprio_validate_queue_counts(struct net_device *dev,
+ const struct tc_mqprio_qopt *qopt,
+ bool allow_overlapping_txqs,
+ struct netlink_ext_ack *extack)
+{
+ int i, j;
+
+ for (i = 0; i < qopt->num_tc; i++) {
+ unsigned int last = qopt->offset[i] + qopt->count[i];
+
+ if (!qopt->count[i]) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "No queues for TC %d",
+ i);
+ return -EINVAL;
+ }
+
+ /* Verify the queue count is in tx range being equal to the
+ * real_num_tx_queues indicates the last queue is in use.
+ */
+ if (qopt->offset[i] >= dev->real_num_tx_queues ||
+ last > dev->real_num_tx_queues) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Queues %d:%d for TC %d exceed the %d TX queues available",
+ qopt->count[i], qopt->offset[i],
+ i, dev->real_num_tx_queues);
+ return -EINVAL;
+ }
+
+ if (allow_overlapping_txqs)
+ continue;
+
+ /* Verify that the offset and counts do not overlap */
+ for (j = i + 1; j < qopt->num_tc; j++) {
+ if (intervals_overlap(qopt->offset[i], last,
+ qopt->offset[j],
+ qopt->offset[j] +
+ qopt->count[j])) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "TC %d queues %d@%d overlap with TC %d queues %d@%d",
+ i, qopt->count[i], qopt->offset[i],
+ j, qopt->count[j], qopt->offset[j]);
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int mqprio_validate_qopt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
+ bool validate_queue_counts,
+ bool allow_overlapping_txqs,
+ struct netlink_ext_ack *extack)
+{
+ int i, err;
+
+ /* Verify num_tc is not out of max range */
+ if (qopt->num_tc > TC_MAX_QUEUE) {
+ NL_SET_ERR_MSG(extack,
+ "Number of traffic classes is outside valid range");
+ return -EINVAL;
+ }
+
+ /* Verify priority mapping uses valid tcs */
+ for (i = 0; i <= TC_BITMASK; i++) {
+ if (qopt->prio_tc_map[i] >= qopt->num_tc) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid traffic class in priority to traffic class mapping");
+ return -EINVAL;
+ }
+ }
+
+ if (validate_queue_counts) {
+ err = mqprio_validate_queue_counts(dev, qopt,
+ allow_overlapping_txqs,
+ extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mqprio_validate_qopt);
+
+void mqprio_qopt_reconstruct(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+{
+ int tc, num_tc = netdev_get_num_tc(dev);
+
+ qopt->num_tc = num_tc;
+ memcpy(qopt->prio_tc_map, dev->prio_tc_map, sizeof(qopt->prio_tc_map));
+
+ for (tc = 0; tc < num_tc; tc++) {
+ qopt->count[tc] = dev->tc_to_txq[tc].count;
+ qopt->offset[tc] = dev->tc_to_txq[tc].offset;
+ }
+}
+EXPORT_SYMBOL_GPL(mqprio_qopt_reconstruct);
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_mqprio_lib.h b/net/sched/sch_mqprio_lib.h
new file mode 100644
index 000000000000..63f725ab8761
--- /dev/null
+++ b/net/sched/sch_mqprio_lib.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SCH_MQPRIO_LIB_H
+#define __SCH_MQPRIO_LIB_H
+
+#include <linux/types.h>
+
+struct net_device;
+struct netlink_ext_ack;
+struct tc_mqprio_qopt;
+
+int mqprio_validate_qopt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
+ bool validate_queue_counts,
+ bool allow_overlapping_txqs,
+ struct netlink_ext_ack *extack);
+void mqprio_qopt_reconstruct(struct net_device *dev,
+ struct tc_mqprio_qopt *qopt);
+
+#endif
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index c322a61eaeea..9781b47962bb 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -26,7 +26,11 @@
#include <net/sock.h>
#include <net/tcp.h>
+#include "sch_mqprio_lib.h"
+
static LIST_HEAD(taprio_list);
+static struct static_key_false taprio_have_broken_mqprio;
+static struct static_key_false taprio_have_working_mqprio;
#define TAPRIO_ALL_GATES_OPEN -1
@@ -35,15 +39,19 @@ static LIST_HEAD(taprio_list);
#define TAPRIO_FLAGS_INVALID U32_MAX
struct sched_entry {
- struct list_head list;
-
- /* The instant that this entry "closes" and the next one
- * should open, the qdisc will make some effort so that no
- * packet leaves after this time.
+ /* Durations between this GCL entry and the GCL entry where the
+ * respective traffic class gate closes
*/
- ktime_t close_time;
+ u64 gate_duration[TC_MAX_QUEUE];
+ atomic_t budget[TC_MAX_QUEUE];
+ /* The qdisc makes some effort so that no packet leaves
+ * after this time
+ */
+ ktime_t gate_close_time[TC_MAX_QUEUE];
+ struct list_head list;
+ /* Used to calculate when to advance the schedule */
+ ktime_t end_time;
ktime_t next_txtime;
- atomic_t budget;
int index;
u32 gate_mask;
u32 interval;
@@ -51,10 +59,16 @@ struct sched_entry {
};
struct sched_gate_list {
+ /* Longest non-zero contiguous gate durations per traffic class,
+ * or 0 if a traffic class gate never opens during the schedule.
+ */
+ u64 max_open_gate_duration[TC_MAX_QUEUE];
+ u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
+ u32 max_sdu[TC_MAX_QUEUE]; /* for dump */
struct rcu_head rcu;
struct list_head entries;
size_t num_entries;
- ktime_t cycle_close_time;
+ ktime_t cycle_end_time;
s64 cycle_time;
s64 cycle_time_extension;
s64 base_time;
@@ -67,6 +81,8 @@ struct taprio_sched {
enum tk_offsets tk_offset;
int clockid;
bool offloaded;
+ bool detected_mqprio;
+ bool broken_mqprio;
atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
* speeds it's sub-nanoseconds per byte
*/
@@ -78,8 +94,8 @@ struct taprio_sched {
struct sched_gate_list __rcu *admin_sched;
struct hrtimer advance_timer;
struct list_head taprio_list;
- u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
- u32 max_sdu[TC_MAX_QUEUE]; /* for dump and offloading */
+ int cur_txq[TC_MAX_QUEUE];
+ u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
u32 txtime_delay;
};
@@ -88,6 +104,57 @@ struct __tc_taprio_qopt_offload {
struct tc_taprio_qopt_offload offload;
};
+static void taprio_calculate_gate_durations(struct taprio_sched *q,
+ struct sched_gate_list *sched)
+{
+ struct net_device *dev = qdisc_dev(q->root);
+ int num_tc = netdev_get_num_tc(dev);
+ struct sched_entry *entry, *cur;
+ int tc;
+
+ list_for_each_entry(entry, &sched->entries, list) {
+ u32 gates_still_open = entry->gate_mask;
+
+ /* For each traffic class, calculate each open gate duration,
+ * starting at this schedule entry and ending at the schedule
+ * entry containing a gate close event for that TC.
+ */
+ cur = entry;
+
+ do {
+ if (!gates_still_open)
+ break;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ if (!(gates_still_open & BIT(tc)))
+ continue;
+
+ if (cur->gate_mask & BIT(tc))
+ entry->gate_duration[tc] += cur->interval;
+ else
+ gates_still_open &= ~BIT(tc);
+ }
+
+ cur = list_next_entry_circular(cur, &sched->entries, list);
+ } while (cur != entry);
+
+ /* Keep track of the maximum gate duration for each traffic
+ * class, taking care to not confuse a traffic class which is
+ * temporarily closed with one that is always closed.
+ */
+ for (tc = 0; tc < num_tc; tc++)
+ if (entry->gate_duration[tc] &&
+ sched->max_open_gate_duration[tc] < entry->gate_duration[tc])
+ sched->max_open_gate_duration[tc] = entry->gate_duration[tc];
+ }
+}
+
+static bool taprio_entry_allows_tx(ktime_t skb_end_time,
+ struct sched_entry *entry, int tc)
+{
+ return ktime_before(skb_end_time, entry->gate_close_time[tc]);
+}
+
static ktime_t sched_base_time(const struct sched_gate_list *sched)
{
if (!sched)
@@ -180,6 +247,55 @@ static int length_to_duration(struct taprio_sched *q, int len)
return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC);
}
+static int duration_to_length(struct taprio_sched *q, u64 duration)
+{
+ return div_u64(duration * PSEC_PER_NSEC, atomic64_read(&q->picos_per_byte));
+}
+
+/* Sets sched->max_sdu[] and sched->max_frm_len[] to the minimum between the
+ * q->max_sdu[] requested by the user and the max_sdu dynamically determined by
+ * the maximum open gate durations at the given link speed.
+ */
+static void taprio_update_queue_max_sdu(struct taprio_sched *q,
+ struct sched_gate_list *sched,
+ struct qdisc_size_table *stab)
+{
+ struct net_device *dev = qdisc_dev(q->root);
+ int num_tc = netdev_get_num_tc(dev);
+ u32 max_sdu_from_user;
+ u32 max_sdu_dynamic;
+ u32 max_sdu;
+ int tc;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ max_sdu_from_user = q->max_sdu[tc] ?: U32_MAX;
+
+ /* TC gate never closes => keep the queueMaxSDU
+ * selected by the user
+ */
+ if (sched->max_open_gate_duration[tc] == sched->cycle_time) {
+ max_sdu_dynamic = U32_MAX;
+ } else {
+ u32 max_frm_len;
+
+ max_frm_len = duration_to_length(q, sched->max_open_gate_duration[tc]);
+ if (stab)
+ max_frm_len -= stab->szopts.overhead;
+ max_sdu_dynamic = max_frm_len - dev->hard_header_len;
+ }
+
+ max_sdu = min(max_sdu_dynamic, max_sdu_from_user);
+
+ if (max_sdu != U32_MAX) {
+ sched->max_frm_len[tc] = max_sdu + dev->hard_header_len;
+ sched->max_sdu[tc] = max_sdu;
+ } else {
+ sched->max_frm_len[tc] = U32_MAX; /* never oversized */
+ sched->max_sdu[tc] = 0;
+ }
+ }
+}
+
/* Returns the entry corresponding to next available interval. If
* validate_interval is set, it only validates whether the timestamp occurs
* when the gate corresponding to the skb's traffic class is open.
@@ -413,14 +529,33 @@ done:
return txtime;
}
-static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
- struct Qdisc *child, struct sk_buff **to_free)
+/* Devices with full offload are expected to honor this in hardware */
+static bool taprio_skb_exceeds_queue_max_sdu(struct Qdisc *sch,
+ struct sk_buff *skb)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct sched_gate_list *sched;
int prio = skb->priority;
+ bool exceeds = false;
u8 tc;
+ tc = netdev_get_prio_tc_map(dev, prio);
+
+ rcu_read_lock();
+ sched = rcu_dereference(q->oper_sched);
+ if (sched && skb->len > sched->max_frm_len[tc])
+ exceeds = true;
+ rcu_read_unlock();
+
+ return exceeds;
+}
+
+static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
+ struct Qdisc *child, struct sk_buff **to_free)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+
/* sk_flags are only safe to use on full sockets. */
if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
if (!is_valid_interval(skb, sch))
@@ -431,17 +566,53 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_drop(skb, sch, to_free);
}
- /* Devices with full offload are expected to honor this in hardware */
- tc = netdev_get_prio_tc_map(dev, prio);
- if (skb->len > q->max_frm_len[tc])
- return qdisc_drop(skb, sch, to_free);
-
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return qdisc_enqueue(skb, child, to_free);
}
+static int taprio_enqueue_segmented(struct sk_buff *skb, struct Qdisc *sch,
+ struct Qdisc *child,
+ struct sk_buff **to_free)
+{
+ unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
+ netdev_features_t features = netif_skb_features(skb);
+ struct sk_buff *segs, *nskb;
+ int ret;
+
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR_OR_NULL(segs))
+ return qdisc_drop(skb, sch, to_free);
+
+ skb_list_walk_safe(segs, segs, nskb) {
+ skb_mark_not_on_list(segs);
+ qdisc_skb_cb(segs)->pkt_len = segs->len;
+ slen += segs->len;
+
+ /* FIXME: we should be segmenting to a smaller size
+ * rather than dropping these
+ */
+ if (taprio_skb_exceeds_queue_max_sdu(sch, segs))
+ ret = qdisc_drop(segs, sch, to_free);
+ else
+ ret = taprio_enqueue_one(segs, sch, child, to_free);
+
+ if (ret != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(ret))
+ qdisc_qstats_drop(sch);
+ } else {
+ numsegs++;
+ }
+ }
+
+ if (numsegs > 1)
+ qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
+ consume_skb(skb);
+
+ return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
+}
+
/* Will not be called in the full offload case, since the TX queues are
* attached to the Qdisc created using qdisc_create_dflt()
*/
@@ -458,97 +629,190 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (unlikely(!child))
return qdisc_drop(skb, sch, to_free);
- /* Large packets might not be transmitted when the transmission duration
- * exceeds any configured interval. Therefore, segment the skb into
- * smaller chunks. Drivers with full offload are expected to handle
- * this in hardware.
- */
- if (skb_is_gso(skb)) {
- unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
- netdev_features_t features = netif_skb_features(skb);
- struct sk_buff *segs, *nskb;
- int ret;
-
- segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
- if (IS_ERR_OR_NULL(segs))
- return qdisc_drop(skb, sch, to_free);
+ if (taprio_skb_exceeds_queue_max_sdu(sch, skb)) {
+ /* Large packets might not be transmitted when the transmission
+ * duration exceeds any configured interval. Therefore, segment
+ * the skb into smaller chunks. Drivers with full offload are
+ * expected to handle this in hardware.
+ */
+ if (skb_is_gso(skb))
+ return taprio_enqueue_segmented(skb, sch, child,
+ to_free);
- skb_list_walk_safe(segs, segs, nskb) {
- skb_mark_not_on_list(segs);
- qdisc_skb_cb(segs)->pkt_len = segs->len;
- slen += segs->len;
+ return qdisc_drop(skb, sch, to_free);
+ }
- ret = taprio_enqueue_one(segs, sch, child, to_free);
- if (ret != NET_XMIT_SUCCESS) {
- if (net_xmit_drop_count(ret))
- qdisc_qstats_drop(sch);
- } else {
- numsegs++;
- }
- }
+ return taprio_enqueue_one(skb, sch, child, to_free);
+}
+
+static struct sk_buff *taprio_peek(struct Qdisc *sch)
+{
+ WARN_ONCE(1, "taprio only supports operating as root qdisc, peek() not implemented");
+ return NULL;
+}
+
+static void taprio_set_budgets(struct taprio_sched *q,
+ struct sched_gate_list *sched,
+ struct sched_entry *entry)
+{
+ struct net_device *dev = qdisc_dev(q->root);
+ int num_tc = netdev_get_num_tc(dev);
+ int tc, budget;
- if (numsegs > 1)
- qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
- consume_skb(skb);
+ for (tc = 0; tc < num_tc; tc++) {
+ /* Traffic classes which never close have infinite budget */
+ if (entry->gate_duration[tc] == sched->cycle_time)
+ budget = INT_MAX;
+ else
+ budget = div64_u64((u64)entry->gate_duration[tc] * PSEC_PER_NSEC,
+ atomic64_read(&q->picos_per_byte));
- return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
+ atomic_set(&entry->budget[tc], budget);
}
+}
- return taprio_enqueue_one(skb, sch, child, to_free);
+/* When an skb is sent, it consumes from the budget of all traffic classes */
+static int taprio_update_budgets(struct sched_entry *entry, size_t len,
+ int tc_consumed, int num_tc)
+{
+ int tc, budget, new_budget = 0;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ budget = atomic_read(&entry->budget[tc]);
+ /* Don't consume from infinite budget */
+ if (budget == INT_MAX) {
+ if (tc == tc_consumed)
+ new_budget = budget;
+ continue;
+ }
+
+ if (tc == tc_consumed)
+ new_budget = atomic_sub_return(len, &entry->budget[tc]);
+ else
+ atomic_sub(len, &entry->budget[tc]);
+ }
+
+ return new_budget;
}
-/* Will not be called in the full offload case, since the TX queues are
- * attached to the Qdisc created using qdisc_create_dflt()
- */
-static struct sk_buff *taprio_peek(struct Qdisc *sch)
+static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq,
+ struct sched_entry *entry,
+ u32 gate_mask)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- struct sched_entry *entry;
+ struct Qdisc *child = q->qdiscs[txq];
+ int num_tc = netdev_get_num_tc(dev);
struct sk_buff *skb;
- u32 gate_mask;
- int i;
+ ktime_t guard;
+ int prio;
+ int len;
+ u8 tc;
- rcu_read_lock();
- entry = rcu_dereference(q->current_entry);
- gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
- rcu_read_unlock();
+ if (unlikely(!child))
+ return NULL;
- if (!gate_mask)
+ if (TXTIME_ASSIST_IS_ENABLED(q->flags))
+ goto skip_peek_checks;
+
+ skb = child->ops->peek(child);
+ if (!skb)
return NULL;
- for (i = 0; i < dev->num_tx_queues; i++) {
- struct Qdisc *child = q->qdiscs[i];
- int prio;
- u8 tc;
+ prio = skb->priority;
+ tc = netdev_get_prio_tc_map(dev, prio);
- if (unlikely(!child))
- continue;
+ if (!(gate_mask & BIT(tc)))
+ return NULL;
- skb = child->ops->peek(child);
- if (!skb)
- continue;
+ len = qdisc_pkt_len(skb);
+ guard = ktime_add_ns(taprio_get_time(q), length_to_duration(q, len));
- if (TXTIME_ASSIST_IS_ENABLED(q->flags))
- return skb;
+ /* In the case that there's no gate entry, there's no
+ * guard band ...
+ */
+ if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
+ !taprio_entry_allows_tx(guard, entry, tc))
+ return NULL;
+
+ /* ... and no budget. */
+ if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
+ taprio_update_budgets(entry, len, tc, num_tc) < 0)
+ return NULL;
+
+skip_peek_checks:
+ skb = child->ops->dequeue(child);
+ if (unlikely(!skb))
+ return NULL;
+
+ qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
+
+ return skb;
+}
+
+static void taprio_next_tc_txq(struct net_device *dev, int tc, int *txq)
+{
+ int offset = dev->tc_to_txq[tc].offset;
+ int count = dev->tc_to_txq[tc].count;
+
+ (*txq)++;
+ if (*txq == offset + count)
+ *txq = offset;
+}
- prio = skb->priority;
- tc = netdev_get_prio_tc_map(dev, prio);
+/* Prioritize higher traffic classes, and select among TXQs belonging to the
+ * same TC using round robin
+ */
+static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch,
+ struct sched_entry *entry,
+ u32 gate_mask)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int num_tc = netdev_get_num_tc(dev);
+ struct sk_buff *skb;
+ int tc;
+
+ for (tc = num_tc - 1; tc >= 0; tc--) {
+ int first_txq = q->cur_txq[tc];
if (!(gate_mask & BIT(tc)))
continue;
- return skb;
+ do {
+ skb = taprio_dequeue_from_txq(sch, q->cur_txq[tc],
+ entry, gate_mask);
+
+ taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]);
+
+ if (skb)
+ return skb;
+ } while (q->cur_txq[tc] != first_txq);
}
return NULL;
}
-static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
+/* Broken way of prioritizing smaller TXQ indices and ignoring the traffic
+ * class other than to determine whether the gate is open or not
+ */
+static struct sk_buff *taprio_dequeue_txq_priority(struct Qdisc *sch,
+ struct sched_entry *entry,
+ u32 gate_mask)
{
- atomic_set(&entry->budget,
- div64_u64((u64)entry->interval * PSEC_PER_NSEC,
- atomic64_read(&q->picos_per_byte)));
+ struct net_device *dev = qdisc_dev(sch);
+ struct sk_buff *skb;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ skb = taprio_dequeue_from_txq(sch, i, entry, gate_mask);
+ if (skb)
+ return skb;
+ }
+
+ return NULL;
}
/* Will not be called in the full offload case, since the TX queues are
@@ -557,11 +821,9 @@ static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
- struct net_device *dev = qdisc_dev(sch);
struct sk_buff *skb = NULL;
struct sched_entry *entry;
u32 gate_mask;
- int i;
rcu_read_lock();
entry = rcu_dereference(q->current_entry);
@@ -571,69 +833,23 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
* "AdminGateStates"
*/
gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
-
if (!gate_mask)
goto done;
- for (i = 0; i < dev->num_tx_queues; i++) {
- struct Qdisc *child = q->qdiscs[i];
- ktime_t guard;
- int prio;
- int len;
- u8 tc;
-
- if (unlikely(!child))
- continue;
-
- if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
- skb = child->ops->dequeue(child);
- if (!skb)
- continue;
- goto skb_found;
- }
-
- skb = child->ops->peek(child);
- if (!skb)
- continue;
-
- prio = skb->priority;
- tc = netdev_get_prio_tc_map(dev, prio);
-
- if (!(gate_mask & BIT(tc))) {
- skb = NULL;
- continue;
- }
-
- len = qdisc_pkt_len(skb);
- guard = ktime_add_ns(taprio_get_time(q),
- length_to_duration(q, len));
-
- /* In the case that there's no gate entry, there's no
- * guard band ...
- */
- if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
- ktime_after(guard, entry->close_time)) {
- skb = NULL;
- continue;
- }
-
- /* ... and no budget. */
- if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
- atomic_sub_return(len, &entry->budget) < 0) {
- skb = NULL;
- continue;
- }
-
- skb = child->ops->dequeue(child);
- if (unlikely(!skb))
- goto done;
-
-skb_found:
- qdisc_bstats_update(sch, skb);
- qdisc_qstats_backlog_dec(sch, skb);
- sch->q.qlen--;
-
- goto done;
+ if (static_branch_unlikely(&taprio_have_broken_mqprio) &&
+ !static_branch_likely(&taprio_have_working_mqprio)) {
+ /* Single NIC kind which is broken */
+ skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
+ } else if (static_branch_likely(&taprio_have_working_mqprio) &&
+ !static_branch_unlikely(&taprio_have_broken_mqprio)) {
+ /* Single NIC kind which prioritizes properly */
+ skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
+ } else {
+ /* Mixed NIC kinds present in system, need dynamic testing */
+ if (q->broken_mqprio)
+ skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
+ else
+ skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
}
done:
@@ -648,7 +864,7 @@ static bool should_restart_cycle(const struct sched_gate_list *oper,
if (list_is_last(&entry->list, &oper->entries))
return true;
- if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0)
+ if (ktime_compare(entry->end_time, oper->cycle_end_time) == 0)
return true;
return false;
@@ -656,7 +872,7 @@ static bool should_restart_cycle(const struct sched_gate_list *oper,
static bool should_change_schedules(const struct sched_gate_list *admin,
const struct sched_gate_list *oper,
- ktime_t close_time)
+ ktime_t end_time)
{
ktime_t next_base_time, extension_time;
@@ -665,18 +881,18 @@ static bool should_change_schedules(const struct sched_gate_list *admin,
next_base_time = sched_base_time(admin);
- /* This is the simple case, the close_time would fall after
+ /* This is the simple case, the end_time would fall after
* the next schedule base_time.
*/
- if (ktime_compare(next_base_time, close_time) <= 0)
+ if (ktime_compare(next_base_time, end_time) <= 0)
return true;
- /* This is the cycle_time_extension case, if the close_time
+ /* This is the cycle_time_extension case, if the end_time
* plus the amount that can be extended would fall after the
* next schedule base_time, we can extend the current schedule
* for that amount.
*/
- extension_time = ktime_add_ns(close_time, oper->cycle_time_extension);
+ extension_time = ktime_add_ns(end_time, oper->cycle_time_extension);
/* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about
* how precisely the extension should be made. So after
@@ -692,10 +908,13 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
{
struct taprio_sched *q = container_of(timer, struct taprio_sched,
advance_timer);
+ struct net_device *dev = qdisc_dev(q->root);
struct sched_gate_list *oper, *admin;
+ int num_tc = netdev_get_num_tc(dev);
struct sched_entry *entry, *next;
struct Qdisc *sch = q->root;
- ktime_t close_time;
+ ktime_t end_time;
+ int tc;
spin_lock(&q->current_entry_lock);
entry = rcu_dereference_protected(q->current_entry,
@@ -714,41 +933,49 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
* entry of all schedules are pre-calculated during the
* schedule initialization.
*/
- if (unlikely(!entry || entry->close_time == oper->base_time)) {
+ if (unlikely(!entry || entry->end_time == oper->base_time)) {
next = list_first_entry(&oper->entries, struct sched_entry,
list);
- close_time = next->close_time;
+ end_time = next->end_time;
goto first_run;
}
if (should_restart_cycle(oper, entry)) {
next = list_first_entry(&oper->entries, struct sched_entry,
list);
- oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time,
- oper->cycle_time);
+ oper->cycle_end_time = ktime_add_ns(oper->cycle_end_time,
+ oper->cycle_time);
} else {
next = list_next_entry(entry, list);
}
- close_time = ktime_add_ns(entry->close_time, next->interval);
- close_time = min_t(ktime_t, close_time, oper->cycle_close_time);
+ end_time = ktime_add_ns(entry->end_time, next->interval);
+ end_time = min_t(ktime_t, end_time, oper->cycle_end_time);
+
+ for (tc = 0; tc < num_tc; tc++) {
+ if (next->gate_duration[tc] == oper->cycle_time)
+ next->gate_close_time[tc] = KTIME_MAX;
+ else
+ next->gate_close_time[tc] = ktime_add_ns(entry->end_time,
+ next->gate_duration[tc]);
+ }
- if (should_change_schedules(admin, oper, close_time)) {
+ if (should_change_schedules(admin, oper, end_time)) {
/* Set things so the next time this runs, the new
* schedule runs.
*/
- close_time = sched_base_time(admin);
+ end_time = sched_base_time(admin);
switch_schedules(q, &admin, &oper);
}
- next->close_time = close_time;
- taprio_set_budget(q, next);
+ next->end_time = end_time;
+ taprio_set_budgets(q, oper, next);
first_run:
rcu_assign_pointer(q->current_entry, next);
spin_unlock(&q->current_entry_lock);
- hrtimer_set_expires(&q->advance_timer, close_time);
+ hrtimer_set_expires(&q->advance_timer, end_time);
rcu_read_lock();
__netif_schedule(sch);
@@ -916,6 +1143,8 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
new->cycle_time = cycle;
}
+ taprio_calculate_gate_durations(q, new);
+
return 0;
}
@@ -924,7 +1153,7 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
struct netlink_ext_ack *extack,
u32 taprio_flags)
{
- int i, j;
+ bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags);
if (!qopt && !dev->num_tc) {
NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
@@ -937,52 +1166,17 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
if (dev->num_tc)
return 0;
- /* Verify num_tc is not out of max range */
- if (qopt->num_tc > TC_MAX_QUEUE) {
- NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
- return -EINVAL;
- }
-
/* taprio imposes that traffic classes map 1:n to tx queues */
if (qopt->num_tc > dev->num_tx_queues) {
NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
return -EINVAL;
}
- /* Verify priority mapping uses valid tcs */
- for (i = 0; i <= TC_BITMASK; i++) {
- if (qopt->prio_tc_map[i] >= qopt->num_tc) {
- NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
- return -EINVAL;
- }
- }
-
- for (i = 0; i < qopt->num_tc; i++) {
- unsigned int last = qopt->offset[i] + qopt->count[i];
-
- /* Verify the queue count is in tx range being equal to the
- * real_num_tx_queues indicates the last queue is in use.
- */
- if (qopt->offset[i] >= dev->num_tx_queues ||
- !qopt->count[i] ||
- last > dev->real_num_tx_queues) {
- NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
- return -EINVAL;
- }
-
- if (TXTIME_ASSIST_IS_ENABLED(taprio_flags))
- continue;
-
- /* Verify that the offset and counts do not overlap */
- for (j = i + 1; j < qopt->num_tc; j++) {
- if (last > qopt->offset[j]) {
- NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
- return -EINVAL;
- }
- }
- }
-
- return 0;
+ /* For some reason, in txtime-assist mode, we allow TXQ ranges for
+ * different TCs to overlap, and just validate the TXQ ranges.
+ */
+ return mqprio_validate_qopt(dev, qopt, true, allow_overlapping_txqs,
+ extack);
}
static int taprio_get_start_time(struct Qdisc *sch,
@@ -1019,11 +1213,14 @@ static int taprio_get_start_time(struct Qdisc *sch,
return 0;
}
-static void setup_first_close_time(struct taprio_sched *q,
- struct sched_gate_list *sched, ktime_t base)
+static void setup_first_end_time(struct taprio_sched *q,
+ struct sched_gate_list *sched, ktime_t base)
{
+ struct net_device *dev = qdisc_dev(q->root);
+ int num_tc = netdev_get_num_tc(dev);
struct sched_entry *first;
ktime_t cycle;
+ int tc;
first = list_first_entry(&sched->entries,
struct sched_entry, list);
@@ -1031,10 +1228,18 @@ static void setup_first_close_time(struct taprio_sched *q,
cycle = sched->cycle_time;
/* FIXME: find a better place to do this */
- sched->cycle_close_time = ktime_add_ns(base, cycle);
+ sched->cycle_end_time = ktime_add_ns(base, cycle);
+
+ first->end_time = ktime_add_ns(base, first->interval);
+ taprio_set_budgets(q, sched, first);
+
+ for (tc = 0; tc < num_tc; tc++) {
+ if (first->gate_duration[tc] == sched->cycle_time)
+ first->gate_close_time[tc] = KTIME_MAX;
+ else
+ first->gate_close_time[tc] = ktime_add_ns(base, first->gate_duration[tc]);
+ }
- first->close_time = ktime_add_ns(base, first->interval);
- taprio_set_budget(q, first);
rcu_assign_pointer(q->current_entry, NULL);
}
@@ -1088,6 +1293,8 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct sched_gate_list *oper, *admin;
+ struct qdisc_size_table *stab;
struct taprio_sched *q;
ASSERT_RTNL();
@@ -1100,6 +1307,17 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
continue;
taprio_set_picos_per_byte(dev, q);
+
+ stab = rtnl_dereference(q->root->stab);
+
+ oper = rtnl_dereference(q->oper_sched);
+ if (oper)
+ taprio_update_queue_max_sdu(q, oper, stab);
+
+ admin = rtnl_dereference(q->admin_sched);
+ if (admin)
+ taprio_update_queue_max_sdu(q, admin, stab);
+
break;
}
@@ -1203,7 +1421,8 @@ static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask)
static void taprio_sched_to_offload(struct net_device *dev,
struct sched_gate_list *sched,
- struct tc_taprio_qopt_offload *offload)
+ struct tc_taprio_qopt_offload *offload,
+ const struct tc_taprio_caps *caps)
{
struct sched_entry *entry;
int i = 0;
@@ -1217,7 +1436,11 @@ static void taprio_sched_to_offload(struct net_device *dev,
e->command = entry->command;
e->interval = entry->interval;
- e->gate_mask = tc_map_to_queue_mask(dev, entry->gate_mask);
+ if (caps->gate_mask_per_txq)
+ e->gate_mask = tc_map_to_queue_mask(dev,
+ entry->gate_mask);
+ else
+ e->gate_mask = entry->gate_mask;
i++;
}
@@ -1225,6 +1448,34 @@ static void taprio_sched_to_offload(struct net_device *dev,
offload->num_entries = i;
}
+static void taprio_detect_broken_mqprio(struct taprio_sched *q)
+{
+ struct net_device *dev = qdisc_dev(q->root);
+ struct tc_taprio_caps caps;
+
+ qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO,
+ &caps, sizeof(caps));
+
+ q->broken_mqprio = caps.broken_mqprio;
+ if (q->broken_mqprio)
+ static_branch_inc(&taprio_have_broken_mqprio);
+ else
+ static_branch_inc(&taprio_have_working_mqprio);
+
+ q->detected_mqprio = true;
+}
+
+static void taprio_cleanup_broken_mqprio(struct taprio_sched *q)
+{
+ if (!q->detected_mqprio)
+ return;
+
+ if (q->broken_mqprio)
+ static_branch_dec(&taprio_have_broken_mqprio);
+ else
+ static_branch_dec(&taprio_have_working_mqprio);
+}
+
static int taprio_enable_offload(struct net_device *dev,
struct taprio_sched *q,
struct sched_gate_list *sched,
@@ -1261,7 +1512,8 @@ static int taprio_enable_offload(struct net_device *dev,
return -ENOMEM;
}
offload->enable = 1;
- taprio_sched_to_offload(dev, sched, offload);
+ mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
+ taprio_sched_to_offload(dev, sched, offload, &caps);
for (tc = 0; tc < TC_MAX_QUEUE; tc++)
offload->max_sdu[tc] = q->max_sdu[tc];
@@ -1452,7 +1704,6 @@ static int taprio_parse_tc_entries(struct Qdisc *sch,
struct netlink_ext_ack *extack)
{
struct taprio_sched *q = qdisc_priv(sch);
- struct net_device *dev = qdisc_dev(sch);
u32 max_sdu[TC_QOPT_MAX_QUEUE];
unsigned long seen_tcs = 0;
struct nlattr *n;
@@ -1466,18 +1717,14 @@ static int taprio_parse_tc_entries(struct Qdisc *sch,
if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY)
continue;
- err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs, extack);
+ err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs,
+ extack);
if (err)
goto out;
}
- for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
q->max_sdu[tc] = max_sdu[tc];
- if (max_sdu[tc])
- q->max_frm_len[tc] = max_sdu[tc] + dev->hard_header_len;
- else
- q->max_frm_len[tc] = U32_MAX; /* never oversized */
- }
out:
return err;
@@ -1533,6 +1780,7 @@ static int taprio_new_flags(const struct nlattr *attr, u32 old,
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
+ struct qdisc_size_table *stab = rtnl_dereference(sch->stab);
struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
struct sched_gate_list *oper, *admin, *new_admin;
struct taprio_sched *q = qdisc_priv(sch);
@@ -1600,15 +1848,18 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto free_sched;
taprio_set_picos_per_byte(dev, q);
+ taprio_update_queue_max_sdu(q, new_admin, stab);
if (mqprio) {
err = netdev_set_num_tc(dev, mqprio->num_tc);
if (err)
goto free_sched;
- for (i = 0; i < mqprio->num_tc; i++)
+ for (i = 0; i < mqprio->num_tc; i++) {
netdev_set_tc_queue(dev, i,
mqprio->count[i],
mqprio->offset[i]);
+ q->cur_txq[i] = mqprio->offset[i];
+ }
/* Always use supplied priority mappings */
for (i = 0; i <= TC_BITMASK; i++)
@@ -1663,7 +1914,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (admin)
call_rcu(&admin->rcu, taprio_free_sched_cb);
} else {
- setup_first_close_time(q, new_admin, start);
+ setup_first_end_time(q, new_admin, start);
/* Protects against advance_sched() */
spin_lock_irqsave(&q->current_entry_lock, flags);
@@ -1683,6 +1934,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
new_admin = NULL;
err = 0;
+ if (!stab)
+ NL_SET_ERR_MSG_MOD(extack,
+ "Size table not specified, frame length estimations may be inaccurate");
+
unlock:
spin_unlock_bh(qdisc_lock(sch));
@@ -1743,6 +1998,8 @@ static void taprio_destroy(struct Qdisc *sch)
if (admin)
call_rcu(&admin->rcu, taprio_free_sched_cb);
+
+ taprio_cleanup_broken_mqprio(q);
}
static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
@@ -1807,6 +2064,8 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
q->qdiscs[i] = qdisc;
}
+ taprio_detect_broken_mqprio(q);
+
return taprio_change(sch, opt, extack);
}
@@ -1947,7 +2206,8 @@ error_nest:
return -1;
}
-static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb)
+static int taprio_dump_tc_entries(struct sk_buff *skb,
+ struct sched_gate_list *sched)
{
struct nlattr *n;
int tc;
@@ -1961,7 +2221,7 @@ static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb)
goto nla_put_failure;
if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU,
- q->max_sdu[tc]))
+ sched->max_sdu[tc]))
goto nla_put_failure;
nla_nest_end(skb, n);
@@ -1981,18 +2241,11 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
struct sched_gate_list *oper, *admin;
struct tc_mqprio_qopt opt = { 0 };
struct nlattr *nest, *sched_nest;
- unsigned int i;
oper = rtnl_dereference(q->oper_sched);
admin = rtnl_dereference(q->admin_sched);
- opt.num_tc = netdev_get_num_tc(dev);
- memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
-
- for (i = 0; i < netdev_get_num_tc(dev); i++) {
- opt.count[i] = dev->tc_to_txq[i].count;
- opt.offset[i] = dev->tc_to_txq[i].offset;
- }
+ mqprio_qopt_reconstruct(dev, &opt);
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (!nest)
@@ -2012,7 +2265,7 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
goto options_error;
- if (taprio_dump_tc_entries(q, skb))
+ if (oper && taprio_dump_tc_entries(skb, oper))
goto options_error;
if (oper && dump_schedule(skb, oper))
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 84021a6c4f9d..b91616f819de 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -59,6 +59,7 @@
#include <net/ipv6.h>
#include <net/inet_common.h>
#include <net/busy_poll.h>
+#include <trace/events/sock.h>
#include <linux/socket.h> /* for sa_family_t */
#include <linux/export.h>
@@ -8321,7 +8322,7 @@ static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
int low, high, remaining, index;
unsigned int rover;
- inet_get_local_port_range(net, &low, &high);
+ inet_sk_get_local_port_range(sk, &low, &high);
remaining = (high - low) + 1;
rover = get_random_u32_below(remaining) + low;
@@ -9244,6 +9245,8 @@ void sctp_data_ready(struct sock *sk)
{
struct socket_wq *wq;
+ trace_sk_data_ready(sk);
+
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index e12d4fa5aece..b163266e581a 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -27,6 +27,7 @@
#include <linux/if_vlan.h>
#include <linux/rcupdate_wait.h>
#include <linux/ctype.h>
+#include <linux/splice.h>
#include <net/sock.h>
#include <net/tcp.h>
@@ -501,7 +502,7 @@ static int smcr_lgr_reg_sndbufs(struct smc_link *link,
return -EINVAL;
/* protect against parallel smcr_link_reg_buf() */
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
@@ -509,7 +510,7 @@ static int smcr_lgr_reg_sndbufs(struct smc_link *link,
if (rc)
break;
}
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
return rc;
}
@@ -518,15 +519,30 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
struct smc_buf_desc *rmb_desc)
{
struct smc_link_group *lgr = link->lgr;
+ bool do_slow = false;
int i, rc = 0;
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (rc)
return rc;
+
+ down_read(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_active(&lgr->lnk[i]))
+ continue;
+ if (!rmb_desc->is_reg_mr[link->link_idx]) {
+ up_read(&lgr->llc_conf_mutex);
+ goto slow_path;
+ }
+ }
+ /* mr register already */
+ goto fast_path;
+slow_path:
+ do_slow = true;
/* protect against parallel smc_llc_cli_rkey_exchange() and
* parallel smcr_link_reg_buf()
*/
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
@@ -534,7 +550,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
if (rc)
goto out;
}
-
+fast_path:
/* exchange confirm_rkey msg with peer */
rc = smc_llc_do_confirm_rkey(link, rmb_desc);
if (rc) {
@@ -543,7 +559,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
}
rmb_desc->is_conf_rkey = true;
out:
- mutex_unlock(&lgr->llc_conf_mutex);
+ do_slow ? up_write(&lgr->llc_conf_mutex) : up_read(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
return rc;
}
@@ -3382,12 +3398,14 @@ static int __init smc_init(void)
if (rc)
goto out_pernet_subsys;
- smc_ism_init();
+ rc = smc_ism_init();
+ if (rc)
+ goto out_pernet_subsys_stat;
smc_clc_init();
rc = smc_nl_init();
if (rc)
- goto out_pernet_subsys_stat;
+ goto out_ism;
rc = smc_pnet_init();
if (rc)
@@ -3480,6 +3498,8 @@ out_pnet:
smc_pnet_exit();
out_nl:
smc_nl_exit();
+out_ism:
+ smc_ism_exit();
out_pernet_subsys_stat:
unregister_pernet_subsys(&smc_net_stat_ops);
out_pernet_subsys:
@@ -3495,6 +3515,7 @@ static void __exit smc_exit(void)
sock_unregister(PF_SMC);
smc_core_exit();
smc_ib_unregister_client();
+ smc_ism_exit();
destroy_workqueue(smc_close_wq);
destroy_workqueue(smc_tcp_ls_wq);
destroy_workqueue(smc_hs_wq);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index dfb9797f7bc6..b9b8b07aa702 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -813,6 +813,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
struct smc_clc_v2_extension *v2_ext;
struct smc_clc_msg_smcd *pclc_smcd;
struct smc_clc_msg_trail *trl;
+ struct smcd_dev *smcd;
int len, i, plen, rc;
int reason_code = 0;
struct kvec vec[8];
@@ -868,7 +869,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
if (smcd_indicated(ini->smc_type_v1)) {
/* add SMC-D specifics */
if (ini->ism_dev[0]) {
- pclc_smcd->ism.gid = htonll(ini->ism_dev[0]->local_gid);
+ smcd = ini->ism_dev[0];
+ pclc_smcd->ism.gid =
+ htonll(smcd->ops->get_local_gid(smcd));
pclc_smcd->ism.chid =
htons(smc_ism_get_chid(ini->ism_dev[0]));
}
@@ -914,8 +917,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
plen += sizeof(*smcd_v2_ext);
if (ini->ism_offered_cnt) {
for (i = 1; i <= ini->ism_offered_cnt; i++) {
+ smcd = ini->ism_dev[i];
gidchids[i - 1].gid =
- htonll(ini->ism_dev[i]->local_gid);
+ htonll(smcd->ops->get_local_gid(smcd));
gidchids[i - 1].chid =
htons(smc_ism_get_chid(ini->ism_dev[i]));
}
@@ -1000,7 +1004,8 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
clc->hdr.typev1 = SMC_TYPE_D;
- clc->d0.gid = conn->lgr->smcd->local_gid;
+ clc->d0.gid =
+ conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd);
clc->d0.token = conn->rmb_desc->token;
clc->d0.dmbe_size = conn->rmbe_size_short;
clc->d0.dmbe_idx = 0;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index c305d8dd23f8..b330a1fa453e 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -500,6 +500,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
struct netlink_callback *cb)
{
char smc_pnet[SMC_MAX_PNETID_LEN + 1];
+ struct smcd_dev *smcd = lgr->smcd;
struct nlattr *attrs;
void *nlh;
@@ -515,8 +516,9 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
goto errattr;
- if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID, lgr->smcd->local_gid,
- SMC_NLA_LGR_D_PAD))
+ if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID,
+ smcd->ops->get_local_gid(smcd),
+ SMC_NLA_LGR_D_PAD))
goto errattr;
if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid,
SMC_NLA_LGR_D_PAD))
@@ -820,6 +822,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_link_group *lgr;
struct list_head *lgr_list;
+ struct smcd_dev *smcd;
struct smc_link *lnk;
spinlock_t *lgr_lock;
u8 link_idx;
@@ -851,8 +854,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
- mutex_init(&lgr->sndbufs_lock);
- mutex_init(&lgr->rmbs_lock);
+ init_rwsem(&lgr->sndbufs_lock);
+ init_rwsem(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
INIT_LIST_HEAD(&lgr->sndbufs[i]);
@@ -866,7 +869,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->conns_all = RB_ROOT;
if (ini->is_smcd) {
/* SMC-D specific settings */
- get_device(&ini->ism_dev[ini->ism_selected]->dev);
+ smcd = ini->ism_dev[ini->ism_selected];
+ get_device(smcd->ops->get_dev(smcd));
lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
lgr->smcd = ini->ism_dev[ini->ism_selected];
lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
@@ -1094,7 +1098,7 @@ err_out:
static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link_group *lgr)
{
- struct mutex *lock; /* lock buffer list */
+ struct rw_semaphore *lock; /* lock buffer list */
int rc;
if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
@@ -1102,10 +1106,10 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (!rc) {
/* protect against smc_llc_cli_rkey_exchange() */
- mutex_lock(&lgr->llc_conf_mutex);
+ down_read(&lgr->llc_conf_mutex);
smc_llc_do_delete_rkey(lgr, buf_desc);
buf_desc->is_conf_rkey = false;
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_read(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
}
@@ -1114,9 +1118,9 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
/* buf registration failed, reuse not possible */
lock = is_rmb ? &lgr->rmbs_lock :
&lgr->sndbufs_lock;
- mutex_lock(lock);
+ down_write(lock);
list_del(&buf_desc->list);
- mutex_unlock(lock);
+ up_write(lock);
smc_buf_free(lgr, is_rmb, buf_desc);
} else {
@@ -1220,15 +1224,16 @@ static void smcr_buf_unmap_lgr(struct smc_link *lnk)
int i;
for (i = 0; i < SMC_RMBE_SIZES; i++) {
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
smcr_buf_unmap_link(buf_desc, true, lnk);
- mutex_unlock(&lgr->rmbs_lock);
- mutex_lock(&lgr->sndbufs_lock);
+ up_write(&lgr->rmbs_lock);
+
+ down_write(&lgr->sndbufs_lock);
list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
list)
smcr_buf_unmap_link(buf_desc, false, lnk);
- mutex_unlock(&lgr->sndbufs_lock);
+ up_write(&lgr->sndbufs_lock);
}
}
@@ -1373,19 +1378,19 @@ static void smc_lgr_free(struct smc_link_group *lgr)
int i;
if (!lgr->is_smcd) {
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].state != SMC_LNK_UNUSED)
smcr_link_clear(&lgr->lnk[i], false);
}
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
smc_llc_lgr_clear(lgr);
}
destroy_workqueue(lgr->tx_wq);
if (lgr->is_smcd) {
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
- put_device(&lgr->smcd->dev);
+ put_device(lgr->smcd->ops->get_dev(lgr->smcd));
}
smc_lgr_put(lgr); /* theoretically last lgr_put */
}
@@ -1692,12 +1697,12 @@ static void smcr_link_down(struct smc_link *lnk)
} else {
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
/* another llc task is ongoing */
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
wait_event_timeout(lgr->llc_flow_waiter,
(list_empty(&lgr->list) ||
lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
SMC_LLC_WAIT_TIME);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
}
if (!list_empty(&lgr->list)) {
smc_llc_send_delete_link(to_lnk, del_link_id,
@@ -1757,9 +1762,9 @@ static void smc_link_down_work(struct work_struct *work)
if (list_empty(&lgr->list))
return;
wake_up_all(&lgr->llc_msg_waiter);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
smcr_link_down(link);
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
}
static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
@@ -1986,19 +1991,19 @@ int smc_uncompress_bufsize(u8 compressed)
* buffer size; if not available, return NULL
*/
static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
- struct mutex *lock,
+ struct rw_semaphore *lock,
struct list_head *buf_list)
{
struct smc_buf_desc *buf_slot;
- mutex_lock(lock);
+ down_read(lock);
list_for_each_entry(buf_slot, buf_list, list) {
if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
- mutex_unlock(lock);
+ up_read(lock);
return buf_slot;
}
}
- mutex_unlock(lock);
+ up_read(lock);
return NULL;
}
@@ -2107,13 +2112,13 @@ int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
return 0;
}
-static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
+static int _smcr_buf_map_lgr(struct smc_link *lnk, struct rw_semaphore *lock,
struct list_head *lst, bool is_rmb)
{
struct smc_buf_desc *buf_desc, *bf;
int rc = 0;
- mutex_lock(lock);
+ down_write(lock);
list_for_each_entry_safe(buf_desc, bf, lst, list) {
if (!buf_desc->used)
continue;
@@ -2122,7 +2127,7 @@ static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
goto out;
}
out:
- mutex_unlock(lock);
+ up_write(lock);
return rc;
}
@@ -2155,37 +2160,37 @@ int smcr_buf_reg_lgr(struct smc_link *lnk)
int i, rc = 0;
/* reg all RMBs for a new link */
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
if (!buf_desc->used)
continue;
rc = smcr_link_reg_buf(lnk, buf_desc);
if (rc) {
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return rc;
}
}
}
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
return rc;
/* reg all vzalloced sndbufs for a new link */
- mutex_lock(&lgr->sndbufs_lock);
+ down_write(&lgr->sndbufs_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
if (!buf_desc->used || !buf_desc->is_vm)
continue;
rc = smcr_link_reg_buf(lnk, buf_desc);
if (rc) {
- mutex_unlock(&lgr->sndbufs_lock);
+ up_write(&lgr->sndbufs_lock);
return rc;
}
}
}
- mutex_unlock(&lgr->sndbufs_lock);
+ up_write(&lgr->sndbufs_lock);
return rc;
}
@@ -2243,7 +2248,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
int i, rc = 0, cnt = 0;
/* protect against parallel link reconfiguration */
- mutex_lock(&lgr->llc_conf_mutex);
+ down_read(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
@@ -2256,7 +2261,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
cnt++;
}
out:
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_read(&lgr->llc_conf_mutex);
if (!rc && !cnt)
rc = -EINVAL;
return rc;
@@ -2305,8 +2310,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
int bufsize, bufsize_short;
+ struct rw_semaphore *lock; /* lock buffer list */
bool is_dgraded = false;
- struct mutex *lock; /* lock buffer list */
int sk_buf_size;
if (is_rmb)
@@ -2354,9 +2359,9 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
buf_desc->used = 1;
- mutex_lock(lock);
+ down_write(lock);
list_add(&buf_desc->list, buf_list);
- mutex_unlock(lock);
+ up_write(lock);
break; /* found */
}
@@ -2430,9 +2435,9 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
/* create rmb */
rc = __smc_buf_create(smc, is_smcd, true);
if (rc) {
- mutex_lock(&smc->conn.lgr->sndbufs_lock);
+ down_write(&smc->conn.lgr->sndbufs_lock);
list_del(&smc->conn.sndbuf_desc->list);
- mutex_unlock(&smc->conn.lgr->sndbufs_lock);
+ up_write(&smc->conn.lgr->sndbufs_lock);
smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
smc->conn.sndbuf_desc = NULL;
}
@@ -2595,6 +2600,7 @@ static int smc_core_reboot_event(struct notifier_block *this,
{
smc_lgrs_shutdown();
smc_ib_unregister_client();
+ smc_ism_exit();
return 0;
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 285f9bd8e232..08b457c2d294 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -252,9 +252,9 @@ struct smc_link_group {
unsigned short vlan_id; /* vlan id of link group */
struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */
- struct mutex sndbufs_lock; /* protects tx buffers */
+ struct rw_semaphore sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
- struct mutex rmbs_lock; /* protects rx buffers */
+ struct rw_semaphore rmbs_lock; /* protects rx buffers */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
@@ -298,7 +298,7 @@ struct smc_link_group {
/* queue for llc events */
spinlock_t llc_event_q_lock;
/* protects llc_event_q */
- struct mutex llc_conf_mutex;
+ struct rw_semaphore llc_conf_mutex;
/* protects lgr reconfig. */
struct work_struct llc_add_link_work;
struct work_struct llc_del_link_work;
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 80ea7d954ece..7ff2152971a5 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -167,12 +167,13 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
!list_empty(&smc->conn.lgr->list)) {
struct smc_connection *conn = &smc->conn;
struct smcd_diag_dmbinfo dinfo;
+ struct smcd_dev *smcd = conn->lgr->smcd;
memset(&dinfo, 0, sizeof(dinfo));
dinfo.linkid = *((u32 *)conn->lgr->id);
dinfo.peer_gid = conn->lgr->peer_gid;
- dinfo.my_gid = conn->lgr->smcd->local_gid;
+ dinfo.my_gid = smcd->ops->get_local_gid(smcd);
dinfo.token = conn->rmb_desc->token;
dinfo.peer_token = conn->peer_token;
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 911fe08bc54b..3b0b7710c6b0 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -17,6 +17,7 @@
#include "smc_ism.h"
#include "smc_pnet.h"
#include "smc_netlink.h"
+#include "linux/ism.h"
struct smcd_dev_list smcd_dev_list = {
.list = LIST_HEAD_INIT(smcd_dev_list.list),
@@ -26,6 +27,22 @@ struct smcd_dev_list smcd_dev_list = {
static bool smc_ism_v2_capable;
static u8 smc_ism_v2_system_eid[SMC_MAX_EID_LEN];
+#if IS_ENABLED(CONFIG_ISM)
+static void smcd_register_dev(struct ism_dev *ism);
+static void smcd_unregister_dev(struct ism_dev *ism);
+static void smcd_handle_event(struct ism_dev *ism, struct ism_event *event);
+static void smcd_handle_irq(struct ism_dev *ism, unsigned int dmbno,
+ u16 dmbemask);
+
+static struct ism_client smc_ism_client = {
+ .name = "SMC-D",
+ .add = smcd_register_dev,
+ .remove = smcd_unregister_dev,
+ .handle_event = smcd_handle_event,
+ .handle_irq = smcd_handle_irq,
+};
+#endif
+
/* Test if an ISM communication is possible - same CPC */
int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
{
@@ -183,6 +200,7 @@ int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
struct smc_buf_desc *dmb_desc)
{
+#if IS_ENABLED(CONFIG_ISM)
struct smcd_dmb dmb;
int rc;
@@ -191,7 +209,7 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
dmb.sba_idx = dmb_desc->sba_idx;
dmb.vlan_id = lgr->vlan_id;
dmb.rgid = lgr->peer_gid;
- rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb);
+ rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, &smc_ism_client);
if (!rc) {
dmb_desc->sba_idx = dmb.sba_idx;
dmb_desc->token = dmb.dmb_tok;
@@ -200,6 +218,9 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
dmb_desc->len = dmb.dmb_len;
}
return rc;
+#else
+ return 0;
+#endif
}
static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd,
@@ -210,9 +231,11 @@ static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd,
struct smc_pci_dev smc_pci_dev;
struct nlattr *port_attrs;
struct nlattr *attrs;
+ struct ism_dev *ism;
int use_cnt = 0;
void *nlh;
+ ism = smcd->priv;
nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&smc_gen_nl_family, NLM_F_MULTI,
SMC_NETLINK_GET_DEV_SMCD);
@@ -227,7 +250,7 @@ static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd,
if (nla_put_u8(skb, SMC_NLA_DEV_IS_CRIT, use_cnt > 0))
goto errattr;
memset(&smc_pci_dev, 0, sizeof(smc_pci_dev));
- smc_set_pci_values(to_pci_dev(smcd->dev.parent), &smc_pci_dev);
+ smc_set_pci_values(to_pci_dev(ism->dev.parent), &smc_pci_dev);
if (nla_put_u32(skb, SMC_NLA_DEV_PCI_FID, smc_pci_dev.pci_fid))
goto errattr;
if (nla_put_u16(skb, SMC_NLA_DEV_PCI_CHID, smc_pci_dev.pci_pchid))
@@ -293,10 +316,11 @@ int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+#if IS_ENABLED(CONFIG_ISM)
struct smc_ism_event_work {
struct work_struct work;
struct smcd_dev *smcd;
- struct smcd_event event;
+ struct ism_event event;
};
#define ISM_EVENT_REQUEST 0x0001
@@ -336,24 +360,6 @@ static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
}
}
-int smc_ism_signal_shutdown(struct smc_link_group *lgr)
-{
- int rc;
- union smcd_sw_event_info ev_info;
-
- if (lgr->peer_shutdown)
- return 0;
-
- memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
- ev_info.vlan_id = lgr->vlan_id;
- ev_info.code = ISM_EVENT_REQUEST;
- rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid,
- ISM_EVENT_REQUEST_IR,
- ISM_EVENT_CODE_SHUTDOWN,
- ev_info.info);
- return rc;
-}
-
/* worker for SMC-D events */
static void smc_ism_event_work(struct work_struct *work)
{
@@ -373,44 +379,25 @@ static void smc_ism_event_work(struct work_struct *work)
kfree(wrk);
}
-static void smcd_release(struct device *dev)
-{
- struct smcd_dev *smcd = container_of(dev, struct smcd_dev, dev);
-
- kfree(smcd->conn);
- kfree(smcd);
-}
-
-struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
- const struct smcd_ops *ops, int max_dmbs)
+static struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
+ const struct smcd_ops *ops, int max_dmbs)
{
struct smcd_dev *smcd;
- smcd = kzalloc(sizeof(*smcd), GFP_KERNEL);
+ smcd = devm_kzalloc(parent, sizeof(*smcd), GFP_KERNEL);
if (!smcd)
return NULL;
- smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *),
- GFP_KERNEL);
- if (!smcd->conn) {
- kfree(smcd);
+ smcd->conn = devm_kcalloc(parent, max_dmbs,
+ sizeof(struct smc_connection *), GFP_KERNEL);
+ if (!smcd->conn)
return NULL;
- }
smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
WQ_MEM_RECLAIM, name);
- if (!smcd->event_wq) {
- kfree(smcd->conn);
- kfree(smcd);
+ if (!smcd->event_wq)
return NULL;
- }
- smcd->dev.parent = parent;
- smcd->dev.release = smcd_release;
- device_initialize(&smcd->dev);
- dev_set_name(&smcd->dev, name);
smcd->ops = ops;
- if (smc_pnetid_by_dev_port(parent, 0, smcd->pnetid))
- smc_pnetid_by_table_smcd(smcd);
spin_lock_init(&smcd->lock);
spin_lock_init(&smcd->lgr_lock);
@@ -419,11 +406,23 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
init_waitqueue_head(&smcd->lgrs_deleted);
return smcd;
}
-EXPORT_SYMBOL_GPL(smcd_alloc_dev);
-int smcd_register_dev(struct smcd_dev *smcd)
+static void smcd_register_dev(struct ism_dev *ism)
{
- int rc;
+ const struct smcd_ops *ops = ism_get_smcd_ops();
+ struct smcd_dev *smcd;
+
+ if (!ops)
+ return;
+
+ smcd = smcd_alloc_dev(&ism->pdev->dev, dev_name(&ism->pdev->dev), ops,
+ ISM_NR_DMBS);
+ if (!smcd)
+ return;
+ smcd->priv = ism;
+ ism_set_priv(ism, &smc_ism_client, smcd);
+ if (smc_pnetid_by_dev_port(&ism->pdev->dev, 0, smcd->pnetid))
+ smc_pnetid_by_table_smcd(smcd);
mutex_lock(&smcd_dev_list.mutex);
if (list_empty(&smcd_dev_list.list)) {
@@ -444,43 +443,28 @@ int smcd_register_dev(struct smcd_dev *smcd)
mutex_unlock(&smcd_dev_list.mutex);
pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
- dev_name(&smcd->dev), smcd->pnetid,
+ dev_name(&ism->dev), smcd->pnetid,
smcd->pnetid_by_user ? " (user defined)" : "");
- rc = device_add(&smcd->dev);
- if (rc) {
- mutex_lock(&smcd_dev_list.mutex);
- list_del(&smcd->list);
- mutex_unlock(&smcd_dev_list.mutex);
- }
-
- return rc;
+ return;
}
-EXPORT_SYMBOL_GPL(smcd_register_dev);
-void smcd_unregister_dev(struct smcd_dev *smcd)
+static void smcd_unregister_dev(struct ism_dev *ism)
{
+ struct smcd_dev *smcd = ism_get_priv(ism, &smc_ism_client);
+
pr_warn_ratelimited("smc: removing smcd device %s\n",
- dev_name(&smcd->dev));
+ dev_name(&ism->dev));
+ smcd->going_away = 1;
+ smc_smcd_terminate_all(smcd);
mutex_lock(&smcd_dev_list.mutex);
list_del_init(&smcd->list);
mutex_unlock(&smcd_dev_list.mutex);
- smcd->going_away = 1;
- smc_smcd_terminate_all(smcd);
destroy_workqueue(smcd->event_wq);
-
- device_del(&smcd->dev);
-}
-EXPORT_SYMBOL_GPL(smcd_unregister_dev);
-
-void smcd_free_dev(struct smcd_dev *smcd)
-{
- put_device(&smcd->dev);
}
-EXPORT_SYMBOL_GPL(smcd_free_dev);
/* SMCD Device event handler. Called from ISM device interrupt handler.
- * Parameters are smcd device pointer,
+ * Parameters are ism device pointer,
* - event->type (0 --> DMB, 1 --> GID),
* - event->code (event code),
* - event->tok (either DMB token when event type 0, or GID when event type 1)
@@ -490,8 +474,9 @@ EXPORT_SYMBOL_GPL(smcd_free_dev);
* Context:
* - Function called in IRQ context from ISM device driver event handler.
*/
-void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
+static void smcd_handle_event(struct ism_dev *ism, struct ism_event *event)
{
+ struct smcd_dev *smcd = ism_get_priv(ism, &smc_ism_client);
struct smc_ism_event_work *wrk;
if (smcd->going_away)
@@ -505,17 +490,18 @@ void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
wrk->event = *event;
queue_work(smcd->event_wq, &wrk->work);
}
-EXPORT_SYMBOL_GPL(smcd_handle_event);
/* SMCD Device interrupt handler. Called from ISM device interrupt handler.
- * Parameters are smcd device pointer, DMB number, and the DMBE bitmask.
+ * Parameters are the ism device pointer, DMB number, and the DMBE bitmask.
* Find the connection and schedule the tasklet for this connection.
*
* Context:
* - Function called in IRQ context from ISM device driver IRQ handler.
*/
-void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno, u16 dmbemask)
+static void smcd_handle_irq(struct ism_dev *ism, unsigned int dmbno,
+ u16 dmbemask)
{
+ struct smcd_dev *smcd = ism_get_priv(ism, &smc_ism_client);
struct smc_connection *conn = NULL;
unsigned long flags;
@@ -525,10 +511,44 @@ void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno, u16 dmbemask)
tasklet_schedule(&conn->rx_tsklet);
spin_unlock_irqrestore(&smcd->lock, flags);
}
-EXPORT_SYMBOL_GPL(smcd_handle_irq);
+#endif
+
+int smc_ism_signal_shutdown(struct smc_link_group *lgr)
+{
+ int rc = 0;
+#if IS_ENABLED(CONFIG_ISM)
+ union smcd_sw_event_info ev_info;
+
+ if (lgr->peer_shutdown)
+ return 0;
+
+ memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
+ ev_info.vlan_id = lgr->vlan_id;
+ ev_info.code = ISM_EVENT_REQUEST;
+ rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid,
+ ISM_EVENT_REQUEST_IR,
+ ISM_EVENT_CODE_SHUTDOWN,
+ ev_info.info);
+#endif
+ return rc;
+}
-void __init smc_ism_init(void)
+int smc_ism_init(void)
{
+ int rc = 0;
+
+#if IS_ENABLED(CONFIG_ISM)
smc_ism_v2_capable = false;
memset(smc_ism_v2_system_eid, 0, SMC_MAX_EID_LEN);
+
+ rc = ism_register_client(&smc_ism_client);
+#endif
+ return rc;
+}
+
+void smc_ism_exit(void)
+{
+#if IS_ENABLED(CONFIG_ISM)
+ ism_unregister_client(&smc_ism_client);
+#endif
}
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index d6b2db604fe8..832b2f42d79f 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -42,7 +42,8 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr);
void smc_ism_get_system_eid(u8 **eid);
u16 smc_ism_get_chid(struct smcd_dev *dev);
bool smc_ism_is_v2_capable(void);
-void smc_ism_init(void);
+int smc_ism_init(void);
+void smc_ism_exit(void);
int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok,
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 524649d0ab65..a0840b8c935b 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -608,7 +608,7 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
prim_lnk_idx = link->link_idx;
lnk_idx = link_new->link_idx;
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
ext->num_rkeys = lgr->conns_num;
if (!ext->num_rkeys)
goto out;
@@ -628,7 +628,7 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
}
len += i * sizeof(ext->rt[0]);
out:
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return len;
}
@@ -889,7 +889,7 @@ static int smc_llc_cli_rkey_exchange(struct smc_link *link,
int rc = 0;
int i;
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
num_rkeys_send = lgr->conns_num;
buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
do {
@@ -916,7 +916,7 @@ static int smc_llc_cli_rkey_exchange(struct smc_link *link,
break;
} while (num_rkeys_send || num_rkeys_recv);
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return rc;
}
@@ -999,14 +999,14 @@ static void smc_llc_save_add_link_rkeys(struct smc_link *link,
ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 +
SMC_WR_TX_SIZE);
max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
for (i = 0; i < max; i++) {
smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
ext->rt[i].rmb_key,
ext->rt[i].rmb_vaddr_new,
ext->rt[i].rmb_key_new);
}
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
}
static void smc_llc_save_add_link_info(struct smc_link *link,
@@ -1202,12 +1202,12 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
if (smc_llc_is_local_add_link(&qentry->msg))
smc_llc_cli_add_link_invite(qentry->link, qentry);
else
smc_llc_cli_add_link(qentry->link, qentry);
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
}
static int smc_llc_active_link_count(struct smc_link_group *lgr)
@@ -1313,7 +1313,7 @@ static int smc_llc_srv_rkey_exchange(struct smc_link *link,
int rc = 0;
int i;
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
num_rkeys_send = lgr->conns_num;
buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
do {
@@ -1338,7 +1338,7 @@ static int smc_llc_srv_rkey_exchange(struct smc_link *link,
smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
} while (num_rkeys_send || num_rkeys_recv);
out:
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return rc;
}
@@ -1509,13 +1509,13 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
rc = smc_llc_srv_add_link(link, qentry);
if (!rc && lgr->type == SMC_LGR_SYMMETRIC) {
/* delete any asymmetric link */
smc_llc_delete_asym_link(lgr);
}
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
kfree(qentry);
}
@@ -1582,7 +1582,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
smc_lgr_terminate_sched(lgr);
goto out;
}
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
/* delete single link */
for (lnk_idx = 0; lnk_idx < SMC_LINKS_PER_LGR_MAX; lnk_idx++) {
if (lgr->lnk[lnk_idx].link_id != del_llc->link_num)
@@ -1616,7 +1616,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
smc_lgr_terminate_sched(lgr);
}
out_unlock:
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
out:
kfree(qentry);
}
@@ -1652,7 +1652,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
int active_links;
int i;
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
lnk = qentry->link;
del_llc = &qentry->msg.delete_link;
@@ -1708,7 +1708,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
smc_llc_add_link_local(lnk);
}
out:
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
kfree(qentry);
}
@@ -2126,7 +2126,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
spin_lock_init(&lgr->llc_flow_lock);
init_waitqueue_head(&lgr->llc_flow_waiter);
init_waitqueue_head(&lgr->llc_msg_waiter);
- mutex_init(&lgr->llc_conf_mutex);
+ init_rwsem(&lgr->llc_conf_mutex);
lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time);
}
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 25fb2fd186e2..11775401df68 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -103,7 +103,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
struct smc_pnetentry *pnetelem, *tmp_pe;
struct smc_pnettable *pnettable;
struct smc_ib_device *ibdev;
- struct smcd_dev *smcd_dev;
+ struct smcd_dev *smcd;
struct smc_net *sn;
int rc = -ENOENT;
int ibport;
@@ -162,16 +162,17 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
mutex_unlock(&smc_ib_devices.mutex);
/* remove smcd devices */
mutex_lock(&smcd_dev_list.mutex);
- list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
- if (smcd_dev->pnetid_by_user &&
+ list_for_each_entry(smcd, &smcd_dev_list.list, list) {
+ if (smcd->pnetid_by_user &&
(!pnet_name ||
- smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
+ smc_pnet_match(pnet_name, smcd->pnetid))) {
pr_warn_ratelimited("smc: smcd device %s "
"erased user defined pnetid "
- "%.16s\n", dev_name(&smcd_dev->dev),
- smcd_dev->pnetid);
- memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
- smcd_dev->pnetid_by_user = false;
+ "%.16s\n",
+ dev_name(smcd->ops->get_dev(smcd)),
+ smcd->pnetid);
+ memset(smcd->pnetid, 0, SMC_MAX_PNETID_LEN);
+ smcd->pnetid_by_user = false;
rc = 0;
}
}
@@ -331,8 +332,8 @@ static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
mutex_lock(&smcd_dev_list.mutex);
list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
- if (!strncmp(dev_name(&smcd_dev->dev), smcd_name,
- IB_DEVICE_NAME_MAX - 1))
+ if (!strncmp(dev_name(smcd_dev->ops->get_dev(smcd_dev)),
+ smcd_name, IB_DEVICE_NAME_MAX - 1))
goto out;
}
smcd_dev = NULL;
@@ -411,7 +412,8 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
struct smc_ib_device *ib_dev;
bool smcddev_applied = true;
bool ibdev_applied = true;
- struct smcd_dev *smcd_dev;
+ struct smcd_dev *smcd;
+ struct device *dev;
bool new_ibdev;
/* try to apply the pnetid to active devices */
@@ -425,14 +427,16 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
ib_port,
ib_dev->pnetid[ib_port - 1]);
}
- smcd_dev = smc_pnet_find_smcd(ib_name);
- if (smcd_dev) {
- smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name);
- if (smcddev_applied)
+ smcd = smc_pnet_find_smcd(ib_name);
+ if (smcd) {
+ smcddev_applied = smc_pnet_apply_smcd(smcd, pnet_name);
+ if (smcddev_applied) {
+ dev = smcd->ops->get_dev(smcd);
pr_warn_ratelimited("smc: smcd device %s "
"applied user defined pnetid "
- "%.16s\n", dev_name(&smcd_dev->dev),
- smcd_dev->pnetid);
+ "%.16s\n", dev_name(dev),
+ smcd->pnetid);
+ }
}
/* Apply fails when a device has a hardware-defined pnetid set, do not
* add a pnet table entry in that case.
@@ -1181,7 +1185,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
*/
int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
{
- const char *ib_name = dev_name(&smcddev->dev);
+ const char *ib_name = dev_name(smcddev->ops->get_dev(smcddev));
struct smc_pnettable *pnettable;
struct smc_pnetentry *tmp_pe;
struct smc_net *sn;
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 17c5aee7ee4f..4380d32f5a5f 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -13,8 +13,10 @@
#include <linux/net.h>
#include <linux/rcupdate.h>
#include <linux/sched/signal.h>
+#include <linux/splice.h>
#include <net/sock.h>
+#include <trace/events/sock.h>
#include "smc.h"
#include "smc_core.h"
@@ -31,6 +33,8 @@ static void smc_rx_wake_up(struct sock *sk)
{
struct socket_wq *wq;
+ trace_sk_data_ready(sk);
+
/* derived from sock_def_readable() */
/* called already in smc_listen_work() */
rcu_read_lock();
diff --git a/net/socket.c b/net/socket.c
index 888cd618a968..77626e4d9690 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -106,6 +106,7 @@
#include <net/busy_poll.h>
#include <linux/errqueue.h>
#include <linux/ptp_clock_kernel.h>
+#include <trace/events/sock.h>
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
@@ -709,12 +710,22 @@ INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
size_t));
INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
size_t));
+
+static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
+ int flags)
+{
+ trace_sock_send_length(sk, ret, 0);
+}
+
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
inet_sendmsg, sock, msg,
msg_data_left(msg));
BUG_ON(ret == -EIOCBQUEUED);
+
+ if (trace_sock_send_length_enabled())
+ call_trace_sock_send_length(sock->sk, ret, 0);
return ret;
}
@@ -989,12 +1000,21 @@ INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
size_t, int));
INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
size_t, int));
+
+static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int flags)
+{
+ trace_sock_recv_length(sk, ret, flags);
+}
+
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
- return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
- inet_recvmsg, sock, msg, msg_data_left(msg),
- flags);
+ int ret = INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
+ inet_recvmsg, sock, msg,
+ msg_data_left(msg), flags);
+ if (trace_sock_recv_length_enabled())
+ call_trace_sock_recv_length(sock->sk, ret, flags);
+ return ret;
}
/**
@@ -1044,6 +1064,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
{
struct socket *sock;
int flags;
+ int ret;
sock = file->private_data;
@@ -1051,7 +1072,11 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
/* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
flags |= more;
- return kernel_sendpage(sock, page, offset, size, flags);
+ ret = kernel_sendpage(sock, page, offset, size, flags);
+
+ if (trace_sock_send_length_enabled())
+ call_trace_sock_send_length(sock->sk, ret, 0);
+ return ret;
}
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 815baf308236..99eafe87b1d5 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -55,6 +55,7 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/xprt.h>
+#include <trace/events/sock.h>
#include <trace/events/sunrpc.h>
#include "socklib.h"
@@ -310,6 +311,8 @@ static void svc_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
+ trace_sk_data_ready(sk);
+
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
@@ -687,6 +690,8 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
+ trace_sk_data_ready(sk);
+
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index aaa5b2741b79..adcbedc244d6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -52,6 +52,7 @@
#include <linux/uio.h>
#include <linux/sched/mm.h>
+#include <trace/events/sock.h>
#include <trace/events/sunrpc.h>
#include "socklib.h"
@@ -1378,6 +1379,8 @@ static void xs_data_ready(struct sock *sk)
{
struct rpc_xprt *xprt;
+ trace_sk_data_ready(sk);
+
xprt = xprt_from_sock(sk);
if (xprt != NULL) {
struct sock_xprt *transport = container_of(xprt,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b35c8701876a..07c9bf5f7f5c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -37,6 +37,7 @@
#include <linux/rhashtable.h>
#include <linux/sched/signal.h>
+#include <trace/events/sock.h>
#include "core.h"
#include "name_table.h"
@@ -2130,6 +2131,8 @@ static void tipc_data_ready(struct sock *sk)
{
struct socket_wq *wq;
+ trace_sk_data_ready(sk);
+
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 69c88cc03887..8ee0c07d00e9 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -43,6 +43,7 @@
#include "bearer.h"
#include <net/sock.h>
#include <linux/module.h>
+#include <trace/events/sock.h>
/* Number of messages to send before rescheduling */
#define MAX_SEND_MSG_COUNT 25
@@ -439,6 +440,8 @@ static void tipc_conn_data_ready(struct sock *sk)
{
struct tipc_conn *con;
+ trace_sk_data_ready(sk);
+
read_lock_bh(&sk->sk_callback_lock);
con = sk->sk_user_data;
if (connected(con)) {
@@ -496,6 +499,8 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk)
{
struct tipc_topsrv *srv;
+ trace_sk_data_ready(sk);
+
read_lock_bh(&sk->sk_callback_lock);
srv = sk->sk_user_data;
if (srv)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index a83d2b4275fa..6d0a534b7baa 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -43,6 +43,7 @@
#include <net/strparser.h>
#include <net/tls.h>
+#include <trace/events/sock.h>
#include "tls.h"
@@ -2284,6 +2285,8 @@ static void tls_data_ready(struct sock *sk)
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_psock *psock;
+ trace_sk_data_ready(sk);
+
tls_strp_data_ready(&ctx->strp);
psock = sk_psock_get(sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f0c2293f1d3b..0be25e712c28 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -112,6 +112,7 @@
#include <linux/mount.h>
#include <net/checksum.h>
#include <linux/security.h>
+#include <linux/splice.h>
#include <linux/freezer.h>
#include <linux/file.h>
#include <linux/btf_ids.h>
@@ -807,23 +808,23 @@ static int unix_count_nr_fds(struct sock *sk)
static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
{
struct sock *sk = sock->sk;
+ unsigned char s_state;
struct unix_sock *u;
- int nr_fds;
+ int nr_fds = 0;
if (sk) {
+ s_state = READ_ONCE(sk->sk_state);
u = unix_sk(sk);
- if (sock->type == SOCK_DGRAM) {
- nr_fds = atomic_read(&u->scm_stat.nr_fds);
- goto out_print;
- }
- unix_state_lock(sk);
- if (sk->sk_state != TCP_LISTEN)
+ /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
+ * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
+ * SOCK_DGRAM is ordinary. So, no lock is needed.
+ */
+ if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
nr_fds = atomic_read(&u->scm_stat.nr_fds);
- else
+ else if (s_state == TCP_LISTEN)
nr_fds = unix_count_nr_fds(sk);
- unix_state_unlock(sk);
-out_print:
+
seq_printf(m, "scm_fds: %u\n", nr_fds);
}
}
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index d593d5b6d4b1..19aea7cba26e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1861,8 +1861,9 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg,
written = transport->stream_enqueue(vsk,
msg, len - total_written);
}
+
if (written < 0) {
- err = -ENOMEM;
+ err = written;
goto out_err;
}
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index ad64f403536a..28b5a8e8e094 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -42,8 +42,7 @@ struct virtio_vsock {
bool tx_run;
struct work_struct send_pkt_work;
- spinlock_t send_pkt_list_lock;
- struct list_head send_pkt_list;
+ struct sk_buff_head send_pkt_queue;
atomic_t queued_replies;
@@ -101,41 +100,31 @@ virtio_transport_send_pkt_work(struct work_struct *work)
vq = vsock->vqs[VSOCK_VQ_TX];
for (;;) {
- struct virtio_vsock_pkt *pkt;
struct scatterlist hdr, buf, *sgs[2];
int ret, in_sg = 0, out_sg = 0;
+ struct sk_buff *skb;
bool reply;
- spin_lock_bh(&vsock->send_pkt_list_lock);
- if (list_empty(&vsock->send_pkt_list)) {
- spin_unlock_bh(&vsock->send_pkt_list_lock);
+ skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
+ if (!skb)
break;
- }
-
- pkt = list_first_entry(&vsock->send_pkt_list,
- struct virtio_vsock_pkt, list);
- list_del_init(&pkt->list);
- spin_unlock_bh(&vsock->send_pkt_list_lock);
- virtio_transport_deliver_tap_pkt(pkt);
+ virtio_transport_deliver_tap_pkt(skb);
+ reply = virtio_vsock_skb_reply(skb);
- reply = pkt->reply;
-
- sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
+ sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
sgs[out_sg++] = &hdr;
- if (pkt->buf) {
- sg_init_one(&buf, pkt->buf, pkt->len);
+ if (skb->len > 0) {
+ sg_init_one(&buf, skb->data, skb->len);
sgs[out_sg++] = &buf;
}
- ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL);
+ ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
/* Usually this means that there is no more space available in
* the vq
*/
if (ret < 0) {
- spin_lock_bh(&vsock->send_pkt_list_lock);
- list_add(&pkt->list, &vsock->send_pkt_list);
- spin_unlock_bh(&vsock->send_pkt_list_lock);
+ virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
break;
}
@@ -164,32 +153,32 @@ out:
}
static int
-virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
+virtio_transport_send_pkt(struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr;
struct virtio_vsock *vsock;
- int len = pkt->len;
+ int len = skb->len;
+
+ hdr = virtio_vsock_hdr(skb);
rcu_read_lock();
vsock = rcu_dereference(the_virtio_vsock);
if (!vsock) {
- virtio_transport_free_pkt(pkt);
+ kfree_skb(skb);
len = -ENODEV;
goto out_rcu;
}
- if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) {
- virtio_transport_free_pkt(pkt);
+ if (le64_to_cpu(hdr->dst_cid) == vsock->guest_cid) {
+ kfree_skb(skb);
len = -ENODEV;
goto out_rcu;
}
- if (pkt->reply)
+ if (virtio_vsock_skb_reply(skb))
atomic_inc(&vsock->queued_replies);
- spin_lock_bh(&vsock->send_pkt_list_lock);
- list_add_tail(&pkt->list, &vsock->send_pkt_list);
- spin_unlock_bh(&vsock->send_pkt_list_lock);
-
+ virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
out_rcu:
@@ -201,9 +190,7 @@ static int
virtio_transport_cancel_pkt(struct vsock_sock *vsk)
{
struct virtio_vsock *vsock;
- struct virtio_vsock_pkt *pkt, *n;
int cnt = 0, ret;
- LIST_HEAD(freeme);
rcu_read_lock();
vsock = rcu_dereference(the_virtio_vsock);
@@ -212,20 +199,7 @@ virtio_transport_cancel_pkt(struct vsock_sock *vsk)
goto out_rcu;
}
- spin_lock_bh(&vsock->send_pkt_list_lock);
- list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
- if (pkt->vsk != vsk)
- continue;
- list_move(&pkt->list, &freeme);
- }
- spin_unlock_bh(&vsock->send_pkt_list_lock);
-
- list_for_each_entry_safe(pkt, n, &freeme, list) {
- if (pkt->reply)
- cnt++;
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
+ cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue);
if (cnt) {
struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
@@ -246,38 +220,28 @@ out_rcu:
static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
{
- int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
- struct virtio_vsock_pkt *pkt;
- struct scatterlist hdr, buf, *sgs[2];
+ int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM;
+ struct scatterlist pkt, *p;
struct virtqueue *vq;
+ struct sk_buff *skb;
int ret;
vq = vsock->vqs[VSOCK_VQ_RX];
do {
- pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
- if (!pkt)
+ skb = virtio_vsock_alloc_skb(total_len, GFP_KERNEL);
+ if (!skb)
break;
- pkt->buf = kmalloc(buf_len, GFP_KERNEL);
- if (!pkt->buf) {
- virtio_transport_free_pkt(pkt);
+ memset(skb->head, 0, VIRTIO_VSOCK_SKB_HEADROOM);
+ sg_init_one(&pkt, virtio_vsock_hdr(skb), total_len);
+ p = &pkt;
+ ret = virtqueue_add_sgs(vq, &p, 0, 1, skb, GFP_KERNEL);
+ if (ret < 0) {
+ kfree_skb(skb);
break;
}
- pkt->buf_len = buf_len;
- pkt->len = buf_len;
-
- sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
- sgs[0] = &hdr;
-
- sg_init_one(&buf, pkt->buf, buf_len);
- sgs[1] = &buf;
- ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL);
- if (ret) {
- virtio_transport_free_pkt(pkt);
- break;
- }
vsock->rx_buf_nr++;
} while (vq->num_free);
if (vsock->rx_buf_nr > vsock->rx_buf_max_nr)
@@ -299,12 +263,12 @@ static void virtio_transport_tx_work(struct work_struct *work)
goto out;
do {
- struct virtio_vsock_pkt *pkt;
+ struct sk_buff *skb;
unsigned int len;
virtqueue_disable_cb(vq);
- while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) {
- virtio_transport_free_pkt(pkt);
+ while ((skb = virtqueue_get_buf(vq, &len)) != NULL) {
+ consume_skb(skb);
added = true;
}
} while (!virtqueue_enable_cb(vq));
@@ -529,7 +493,7 @@ static void virtio_transport_rx_work(struct work_struct *work)
do {
virtqueue_disable_cb(vq);
for (;;) {
- struct virtio_vsock_pkt *pkt;
+ struct sk_buff *skb;
unsigned int len;
if (!virtio_transport_more_replies(vsock)) {
@@ -540,23 +504,22 @@ static void virtio_transport_rx_work(struct work_struct *work)
goto out;
}
- pkt = virtqueue_get_buf(vq, &len);
- if (!pkt) {
+ skb = virtqueue_get_buf(vq, &len);
+ if (!skb)
break;
- }
vsock->rx_buf_nr--;
/* Drop short/long packets */
- if (unlikely(len < sizeof(pkt->hdr) ||
- len > sizeof(pkt->hdr) + pkt->len)) {
- virtio_transport_free_pkt(pkt);
+ if (unlikely(len < sizeof(struct virtio_vsock_hdr) ||
+ len > virtio_vsock_skb_len(skb))) {
+ kfree_skb(skb);
continue;
}
- pkt->len = len - sizeof(pkt->hdr);
- virtio_transport_deliver_tap_pkt(pkt);
- virtio_transport_recv_pkt(&virtio_transport, pkt);
+ virtio_vsock_skb_rx_put(skb);
+ virtio_transport_deliver_tap_pkt(skb);
+ virtio_transport_recv_pkt(&virtio_transport, skb);
}
} while (!virtqueue_enable_cb(vq));
@@ -610,7 +573,7 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
static void virtio_vsock_vqs_del(struct virtio_vsock *vsock)
{
struct virtio_device *vdev = vsock->vdev;
- struct virtio_vsock_pkt *pkt;
+ struct sk_buff *skb;
/* Reset all connected sockets when the VQs disappear */
vsock_for_each_connected_socket(&virtio_transport.transport,
@@ -637,23 +600,16 @@ static void virtio_vsock_vqs_del(struct virtio_vsock *vsock)
virtio_reset_device(vdev);
mutex_lock(&vsock->rx_lock);
- while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX])))
- virtio_transport_free_pkt(pkt);
+ while ((skb = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX])))
+ kfree_skb(skb);
mutex_unlock(&vsock->rx_lock);
mutex_lock(&vsock->tx_lock);
- while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX])))
- virtio_transport_free_pkt(pkt);
+ while ((skb = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX])))
+ kfree_skb(skb);
mutex_unlock(&vsock->tx_lock);
- spin_lock_bh(&vsock->send_pkt_list_lock);
- while (!list_empty(&vsock->send_pkt_list)) {
- pkt = list_first_entry(&vsock->send_pkt_list,
- struct virtio_vsock_pkt, list);
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
- spin_unlock_bh(&vsock->send_pkt_list_lock);
+ virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
/* Delete virtqueues and flush outstanding callbacks if any */
vdev->config->del_vqs(vdev);
@@ -690,8 +646,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
mutex_init(&vsock->tx_lock);
mutex_init(&vsock->rx_lock);
mutex_init(&vsock->event_lock);
- spin_lock_init(&vsock->send_pkt_list_lock);
- INIT_LIST_HEAD(&vsock->send_pkt_list);
+ skb_queue_head_init(&vsock->send_pkt_queue);
INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
INIT_WORK(&vsock->event_work, virtio_transport_event_work);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index a9980e9b9304..a1581c77cf84 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -37,53 +37,56 @@ virtio_transport_get_ops(struct vsock_sock *vsk)
return container_of(t, struct virtio_transport, transport);
}
-static struct virtio_vsock_pkt *
-virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
+/* Returns a new packet on success, otherwise returns NULL.
+ *
+ * If NULL is returned, errp is set to a negative errno.
+ */
+static struct sk_buff *
+virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
size_t len,
u32 src_cid,
u32 src_port,
u32 dst_cid,
u32 dst_port)
{
- struct virtio_vsock_pkt *pkt;
+ const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len;
+ struct virtio_vsock_hdr *hdr;
+ struct sk_buff *skb;
+ void *payload;
int err;
- pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
- if (!pkt)
+ skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
+ if (!skb)
return NULL;
- pkt->hdr.type = cpu_to_le16(info->type);
- pkt->hdr.op = cpu_to_le16(info->op);
- pkt->hdr.src_cid = cpu_to_le64(src_cid);
- pkt->hdr.dst_cid = cpu_to_le64(dst_cid);
- pkt->hdr.src_port = cpu_to_le32(src_port);
- pkt->hdr.dst_port = cpu_to_le32(dst_port);
- pkt->hdr.flags = cpu_to_le32(info->flags);
- pkt->len = len;
- pkt->hdr.len = cpu_to_le32(len);
- pkt->reply = info->reply;
- pkt->vsk = info->vsk;
+ hdr = virtio_vsock_hdr(skb);
+ hdr->type = cpu_to_le16(info->type);
+ hdr->op = cpu_to_le16(info->op);
+ hdr->src_cid = cpu_to_le64(src_cid);
+ hdr->dst_cid = cpu_to_le64(dst_cid);
+ hdr->src_port = cpu_to_le32(src_port);
+ hdr->dst_port = cpu_to_le32(dst_port);
+ hdr->flags = cpu_to_le32(info->flags);
+ hdr->len = cpu_to_le32(len);
if (info->msg && len > 0) {
- pkt->buf = kmalloc(len, GFP_KERNEL);
- if (!pkt->buf)
- goto out_pkt;
-
- pkt->buf_len = len;
-
- err = memcpy_from_msg(pkt->buf, info->msg, len);
+ payload = skb_put(skb, len);
+ err = memcpy_from_msg(payload, info->msg, len);
if (err)
goto out;
if (msg_data_left(info->msg) == 0 &&
info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
- pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+ hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
if (info->msg->msg_flags & MSG_EOR)
- pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
}
}
+ if (info->reply)
+ virtio_vsock_skb_set_reply(skb);
+
trace_virtio_transport_alloc_pkt(src_cid, src_port,
dst_cid, dst_port,
len,
@@ -91,19 +94,18 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
info->op,
info->flags);
- return pkt;
+ return skb;
out:
- kfree(pkt->buf);
-out_pkt:
- kfree(pkt);
+ kfree_skb(skb);
return NULL;
}
/* Packet capture */
static struct sk_buff *virtio_transport_build_skb(void *opaque)
{
- struct virtio_vsock_pkt *pkt = opaque;
+ struct virtio_vsock_hdr *pkt_hdr;
+ struct sk_buff *pkt = opaque;
struct af_vsockmon_hdr *hdr;
struct sk_buff *skb;
size_t payload_len;
@@ -113,10 +115,11 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
* the payload length from the header and the buffer pointer taking
* care of the offset in the original packet.
*/
- payload_len = le32_to_cpu(pkt->hdr.len);
- payload_buf = pkt->buf + pkt->off;
+ pkt_hdr = virtio_vsock_hdr(pkt);
+ payload_len = pkt->len;
+ payload_buf = pkt->data;
- skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len,
+ skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len,
GFP_ATOMIC);
if (!skb)
return NULL;
@@ -124,16 +127,16 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
hdr = skb_put(skb, sizeof(*hdr));
/* pkt->hdr is little-endian so no need to byteswap here */
- hdr->src_cid = pkt->hdr.src_cid;
- hdr->src_port = pkt->hdr.src_port;
- hdr->dst_cid = pkt->hdr.dst_cid;
- hdr->dst_port = pkt->hdr.dst_port;
+ hdr->src_cid = pkt_hdr->src_cid;
+ hdr->src_port = pkt_hdr->src_port;
+ hdr->dst_cid = pkt_hdr->dst_cid;
+ hdr->dst_port = pkt_hdr->dst_port;
hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO);
- hdr->len = cpu_to_le16(sizeof(pkt->hdr));
+ hdr->len = cpu_to_le16(sizeof(*pkt_hdr));
memset(hdr->reserved, 0, sizeof(hdr->reserved));
- switch (le16_to_cpu(pkt->hdr.op)) {
+ switch (le16_to_cpu(pkt_hdr->op)) {
case VIRTIO_VSOCK_OP_REQUEST:
case VIRTIO_VSOCK_OP_RESPONSE:
hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT);
@@ -154,7 +157,7 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
break;
}
- skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr));
+ skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr));
if (payload_len) {
skb_put_data(skb, payload_buf, payload_len);
@@ -163,13 +166,13 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
return skb;
}
-void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
+void virtio_transport_deliver_tap_pkt(struct sk_buff *skb)
{
- if (pkt->tap_delivered)
+ if (virtio_vsock_skb_tap_delivered(skb))
return;
- vsock_deliver_tap(virtio_transport_build_skb, pkt);
- pkt->tap_delivered = true;
+ vsock_deliver_tap(virtio_transport_build_skb, skb);
+ virtio_vsock_skb_set_tap_delivered(skb);
}
EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
@@ -192,8 +195,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
u32 src_cid, src_port, dst_cid, dst_port;
const struct virtio_transport *t_ops;
struct virtio_vsock_sock *vvs;
- struct virtio_vsock_pkt *pkt;
u32 pkt_len = info->pkt_len;
+ struct sk_buff *skb;
info->type = virtio_transport_get_type(sk_vsock(vsk));
@@ -224,42 +227,47 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
return pkt_len;
- pkt = virtio_transport_alloc_pkt(info, pkt_len,
+ skb = virtio_transport_alloc_skb(info, pkt_len,
src_cid, src_port,
dst_cid, dst_port);
- if (!pkt) {
+ if (!skb) {
virtio_transport_put_credit(vvs, pkt_len);
return -ENOMEM;
}
- virtio_transport_inc_tx_pkt(vvs, pkt);
+ virtio_transport_inc_tx_pkt(vvs, skb);
- return t_ops->send_pkt(pkt);
+ return t_ops->send_pkt(skb);
}
static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
- if (vvs->rx_bytes + pkt->len > vvs->buf_alloc)
+ if (vvs->rx_bytes + skb->len > vvs->buf_alloc)
return false;
- vvs->rx_bytes += pkt->len;
+ vvs->rx_bytes += skb->len;
return true;
}
static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
- vvs->rx_bytes -= pkt->len;
- vvs->fwd_cnt += pkt->len;
+ int len;
+
+ len = skb_headroom(skb) - sizeof(struct virtio_vsock_hdr) - skb->len;
+ vvs->rx_bytes -= len;
+ vvs->fwd_cnt += len;
}
-void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt)
+void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+
spin_lock_bh(&vvs->rx_lock);
vvs->last_fwd_cnt = vvs->fwd_cnt;
- pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
- pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
+ hdr->fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
+ hdr->buf_alloc = cpu_to_le32(vvs->buf_alloc);
spin_unlock_bh(&vvs->rx_lock);
}
EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
@@ -303,29 +311,29 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk,
size_t len)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- struct virtio_vsock_pkt *pkt;
size_t bytes, total = 0, off;
+ struct sk_buff *skb, *tmp;
int err = -EFAULT;
spin_lock_bh(&vvs->rx_lock);
- list_for_each_entry(pkt, &vvs->rx_queue, list) {
- off = pkt->off;
+ skb_queue_walk_safe(&vvs->rx_queue, skb, tmp) {
+ off = 0;
if (total == len)
break;
- while (total < len && off < pkt->len) {
+ while (total < len && off < skb->len) {
bytes = len - total;
- if (bytes > pkt->len - off)
- bytes = pkt->len - off;
+ if (bytes > skb->len - off)
+ bytes = skb->len - off;
/* sk_lock is held by caller so no one else can dequeue.
* Unlock rx_lock since memcpy_to_msg() may sleep.
*/
spin_unlock_bh(&vvs->rx_lock);
- err = memcpy_to_msg(msg, pkt->buf + off, bytes);
+ err = memcpy_to_msg(msg, skb->data + off, bytes);
if (err)
goto out;
@@ -352,37 +360,38 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
size_t len)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- struct virtio_vsock_pkt *pkt;
size_t bytes, total = 0;
- u32 free_space;
+ struct sk_buff *skb;
int err = -EFAULT;
+ u32 free_space;
spin_lock_bh(&vvs->rx_lock);
- while (total < len && !list_empty(&vvs->rx_queue)) {
- pkt = list_first_entry(&vvs->rx_queue,
- struct virtio_vsock_pkt, list);
+ while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
+ skb = __skb_dequeue(&vvs->rx_queue);
bytes = len - total;
- if (bytes > pkt->len - pkt->off)
- bytes = pkt->len - pkt->off;
+ if (bytes > skb->len)
+ bytes = skb->len;
/* sk_lock is held by caller so no one else can dequeue.
* Unlock rx_lock since memcpy_to_msg() may sleep.
*/
spin_unlock_bh(&vvs->rx_lock);
- err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes);
+ err = memcpy_to_msg(msg, skb->data, bytes);
if (err)
goto out;
spin_lock_bh(&vvs->rx_lock);
total += bytes;
- pkt->off += bytes;
- if (pkt->off == pkt->len) {
- virtio_transport_dec_rx_pkt(vvs, pkt);
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
+ skb_pull(skb, bytes);
+
+ if (skb->len == 0) {
+ virtio_transport_dec_rx_pkt(vvs, skb);
+ consume_skb(skb);
+ } else {
+ __skb_queue_head(&vvs->rx_queue, skb);
}
}
@@ -414,10 +423,10 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
int flags)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- struct virtio_vsock_pkt *pkt;
int dequeued_len = 0;
size_t user_buf_len = msg_data_left(msg);
bool msg_ready = false;
+ struct sk_buff *skb;
spin_lock_bh(&vvs->rx_lock);
@@ -427,13 +436,18 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
}
while (!msg_ready) {
- pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list);
+ struct virtio_vsock_hdr *hdr;
+
+ skb = __skb_dequeue(&vvs->rx_queue);
+ if (!skb)
+ break;
+ hdr = virtio_vsock_hdr(skb);
if (dequeued_len >= 0) {
size_t pkt_len;
size_t bytes_to_copy;
- pkt_len = (size_t)le32_to_cpu(pkt->hdr.len);
+ pkt_len = (size_t)le32_to_cpu(hdr->len);
bytes_to_copy = min(user_buf_len, pkt_len);
if (bytes_to_copy) {
@@ -444,7 +458,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
*/
spin_unlock_bh(&vvs->rx_lock);
- err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy);
+ err = memcpy_to_msg(msg, skb->data, bytes_to_copy);
if (err) {
/* Copy of message failed. Rest of
* fragments will be freed without copy.
@@ -452,6 +466,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
dequeued_len = err;
} else {
user_buf_len -= bytes_to_copy;
+ skb_pull(skb, bytes_to_copy);
}
spin_lock_bh(&vvs->rx_lock);
@@ -461,17 +476,16 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
dequeued_len += pkt_len;
}
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
msg_ready = true;
vvs->msg_count--;
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
msg->msg_flags |= MSG_EOR;
}
- virtio_transport_dec_rx_pkt(vvs, pkt);
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
+ virtio_transport_dec_rx_pkt(vvs, skb);
+ kfree_skb(skb);
}
spin_unlock_bh(&vvs->rx_lock);
@@ -609,7 +623,7 @@ int virtio_transport_do_socket_init(struct vsock_sock *vsk,
spin_lock_init(&vvs->rx_lock);
spin_lock_init(&vvs->tx_lock);
- INIT_LIST_HEAD(&vvs->rx_queue);
+ skb_queue_head_init(&vvs->rx_queue);
return 0;
}
@@ -806,16 +820,16 @@ void virtio_transport_destruct(struct vsock_sock *vsk)
EXPORT_SYMBOL_GPL(virtio_transport_destruct);
static int virtio_transport_reset(struct vsock_sock *vsk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RST,
- .reply = !!pkt,
+ .reply = !!skb,
.vsk = vsk,
};
/* Send RST only if the original pkt is not a RST pkt */
- if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
+ if (skb && le16_to_cpu(virtio_vsock_hdr(skb)->op) == VIRTIO_VSOCK_OP_RST)
return 0;
return virtio_transport_send_pkt_info(vsk, &info);
@@ -825,29 +839,30 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
* attempt was made to connect to a socket that does not exist.
*/
static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
- struct virtio_vsock_pkt *reply;
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RST,
- .type = le16_to_cpu(pkt->hdr.type),
+ .type = le16_to_cpu(hdr->type),
.reply = true,
};
+ struct sk_buff *reply;
/* Send RST only if the original pkt is not a RST pkt */
- if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
+ if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
return 0;
- reply = virtio_transport_alloc_pkt(&info, 0,
- le64_to_cpu(pkt->hdr.dst_cid),
- le32_to_cpu(pkt->hdr.dst_port),
- le64_to_cpu(pkt->hdr.src_cid),
- le32_to_cpu(pkt->hdr.src_port));
+ reply = virtio_transport_alloc_skb(&info, 0,
+ le64_to_cpu(hdr->dst_cid),
+ le32_to_cpu(hdr->dst_port),
+ le64_to_cpu(hdr->src_cid),
+ le32_to_cpu(hdr->src_port));
if (!reply)
return -ENOMEM;
if (!t) {
- virtio_transport_free_pkt(reply);
+ kfree_skb(reply);
return -ENOTCONN;
}
@@ -858,16 +873,11 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
static void virtio_transport_remove_sock(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- struct virtio_vsock_pkt *pkt, *tmp;
/* We don't need to take rx_lock, as the socket is closing and we are
* removing it.
*/
- list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
-
+ __skb_queue_purge(&vvs->rx_queue);
vsock_remove_sock(vsk);
}
@@ -981,13 +991,14 @@ EXPORT_SYMBOL_GPL(virtio_transport_release);
static int
virtio_transport_recv_connecting(struct sock *sk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct vsock_sock *vsk = vsock_sk(sk);
- int err;
int skerr;
+ int err;
- switch (le16_to_cpu(pkt->hdr.op)) {
+ switch (le16_to_cpu(hdr->op)) {
case VIRTIO_VSOCK_OP_RESPONSE:
sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
@@ -1008,7 +1019,7 @@ virtio_transport_recv_connecting(struct sock *sk,
return 0;
destroy:
- virtio_transport_reset(vsk, pkt);
+ virtio_transport_reset(vsk, skb);
sk->sk_state = TCP_CLOSE;
sk->sk_err = skerr;
sk_error_report(sk);
@@ -1017,34 +1028,37 @@ destroy:
static void
virtio_transport_recv_enqueue(struct vsock_sock *vsk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
struct virtio_vsock_sock *vvs = vsk->trans;
bool can_enqueue, free_pkt = false;
+ struct virtio_vsock_hdr *hdr;
+ u32 len;
- pkt->len = le32_to_cpu(pkt->hdr.len);
- pkt->off = 0;
+ hdr = virtio_vsock_hdr(skb);
+ len = le32_to_cpu(hdr->len);
spin_lock_bh(&vvs->rx_lock);
- can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt);
+ can_enqueue = virtio_transport_inc_rx_pkt(vvs, skb);
if (!can_enqueue) {
free_pkt = true;
goto out;
}
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
vvs->msg_count++;
/* Try to copy small packets into the buffer of last packet queued,
* to avoid wasting memory queueing the entire buffer with a small
* payload.
*/
- if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) {
- struct virtio_vsock_pkt *last_pkt;
+ if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue)) {
+ struct virtio_vsock_hdr *last_hdr;
+ struct sk_buff *last_skb;
- last_pkt = list_last_entry(&vvs->rx_queue,
- struct virtio_vsock_pkt, list);
+ last_skb = skb_peek_tail(&vvs->rx_queue);
+ last_hdr = virtio_vsock_hdr(last_skb);
/* If there is space in the last packet queued, we copy the
* new packet in its buffer. We avoid this if the last packet
@@ -1052,35 +1066,35 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
* delimiter of SEQPACKET message, so 'pkt' is the first packet
* of a new message.
*/
- if ((pkt->len <= last_pkt->buf_len - last_pkt->len) &&
- !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)) {
- memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
- pkt->len);
- last_pkt->len += pkt->len;
+ if (skb->len < skb_tailroom(last_skb) &&
+ !(le32_to_cpu(last_hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)) {
+ memcpy(skb_put(last_skb, skb->len), skb->data, skb->len);
free_pkt = true;
- last_pkt->hdr.flags |= pkt->hdr.flags;
+ last_hdr->flags |= hdr->flags;
+ last_hdr->len = cpu_to_le32(last_skb->len);
goto out;
}
}
- list_add_tail(&pkt->list, &vvs->rx_queue);
+ __skb_queue_tail(&vvs->rx_queue, skb);
out:
spin_unlock_bh(&vvs->rx_lock);
if (free_pkt)
- virtio_transport_free_pkt(pkt);
+ kfree_skb(skb);
}
static int
virtio_transport_recv_connected(struct sock *sk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct vsock_sock *vsk = vsock_sk(sk);
int err = 0;
- switch (le16_to_cpu(pkt->hdr.op)) {
+ switch (le16_to_cpu(hdr->op)) {
case VIRTIO_VSOCK_OP_RW:
- virtio_transport_recv_enqueue(vsk, pkt);
+ virtio_transport_recv_enqueue(vsk, skb);
vsock_data_ready(sk);
return err;
case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
@@ -1090,18 +1104,17 @@ virtio_transport_recv_connected(struct sock *sk,
sk->sk_write_space(sk);
break;
case VIRTIO_VSOCK_OP_SHUTDOWN:
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
vsk->peer_shutdown |= RCV_SHUTDOWN;
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
vsk->peer_shutdown |= SEND_SHUTDOWN;
if (vsk->peer_shutdown == SHUTDOWN_MASK &&
vsock_stream_has_data(vsk) <= 0 &&
!sock_flag(sk, SOCK_DONE)) {
(void)virtio_transport_reset(vsk, NULL);
-
virtio_transport_do_close(vsk, true);
}
- if (le32_to_cpu(pkt->hdr.flags))
+ if (le32_to_cpu(virtio_vsock_hdr(skb)->flags))
sk->sk_state_change(sk);
break;
case VIRTIO_VSOCK_OP_RST:
@@ -1112,28 +1125,30 @@ virtio_transport_recv_connected(struct sock *sk,
break;
}
- virtio_transport_free_pkt(pkt);
+ kfree_skb(skb);
return err;
}
static void
virtio_transport_recv_disconnecting(struct sock *sk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct vsock_sock *vsk = vsock_sk(sk);
- if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
+ if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
virtio_transport_do_close(vsk, true);
}
static int
virtio_transport_send_response(struct vsock_sock *vsk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RESPONSE,
- .remote_cid = le64_to_cpu(pkt->hdr.src_cid),
- .remote_port = le32_to_cpu(pkt->hdr.src_port),
+ .remote_cid = le64_to_cpu(hdr->src_cid),
+ .remote_port = le32_to_cpu(hdr->src_port),
.reply = true,
.vsk = vsk,
};
@@ -1142,8 +1157,9 @@ virtio_transport_send_response(struct vsock_sock *vsk,
}
static bool virtio_transport_space_update(struct sock *sk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct vsock_sock *vsk = vsock_sk(sk);
struct virtio_vsock_sock *vvs = vsk->trans;
bool space_available;
@@ -1158,8 +1174,8 @@ static bool virtio_transport_space_update(struct sock *sk,
/* buf_alloc and fwd_cnt is always included in the hdr */
spin_lock_bh(&vvs->tx_lock);
- vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc);
- vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt);
+ vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc);
+ vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt);
space_available = virtio_transport_has_space(vsk);
spin_unlock_bh(&vvs->tx_lock);
return space_available;
@@ -1167,27 +1183,28 @@ static bool virtio_transport_space_update(struct sock *sk,
/* Handle server socket */
static int
-virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
+virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
struct virtio_transport *t)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct vsock_sock *vsk = vsock_sk(sk);
struct vsock_sock *vchild;
struct sock *child;
int ret;
- if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) {
- virtio_transport_reset_no_sock(t, pkt);
+ if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) {
+ virtio_transport_reset_no_sock(t, skb);
return -EINVAL;
}
if (sk_acceptq_is_full(sk)) {
- virtio_transport_reset_no_sock(t, pkt);
+ virtio_transport_reset_no_sock(t, skb);
return -ENOMEM;
}
child = vsock_create_connected(sk);
if (!child) {
- virtio_transport_reset_no_sock(t, pkt);
+ virtio_transport_reset_no_sock(t, skb);
return -ENOMEM;
}
@@ -1198,10 +1215,10 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
child->sk_state = TCP_ESTABLISHED;
vchild = vsock_sk(child);
- vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
- le32_to_cpu(pkt->hdr.dst_port));
- vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid),
- le32_to_cpu(pkt->hdr.src_port));
+ vsock_addr_init(&vchild->local_addr, le64_to_cpu(hdr->dst_cid),
+ le32_to_cpu(hdr->dst_port));
+ vsock_addr_init(&vchild->remote_addr, le64_to_cpu(hdr->src_cid),
+ le32_to_cpu(hdr->src_port));
ret = vsock_assign_transport(vchild, vsk);
/* Transport assigned (looking at remote_addr) must be the same
@@ -1209,17 +1226,17 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
*/
if (ret || vchild->transport != &t->transport) {
release_sock(child);
- virtio_transport_reset_no_sock(t, pkt);
+ virtio_transport_reset_no_sock(t, skb);
sock_put(child);
return ret;
}
- if (virtio_transport_space_update(child, pkt))
+ if (virtio_transport_space_update(child, skb))
child->sk_write_space(child);
vsock_insert_connected(vchild);
vsock_enqueue_accept(sk, child);
- virtio_transport_send_response(vchild, pkt);
+ virtio_transport_send_response(vchild, skb);
release_sock(child);
@@ -1237,29 +1254,30 @@ static bool virtio_transport_valid_type(u16 type)
* lock.
*/
void virtio_transport_recv_pkt(struct virtio_transport *t,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct sockaddr_vm src, dst;
struct vsock_sock *vsk;
struct sock *sk;
bool space_available;
- vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid),
- le32_to_cpu(pkt->hdr.src_port));
- vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid),
- le32_to_cpu(pkt->hdr.dst_port));
+ vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
+ le32_to_cpu(hdr->src_port));
+ vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
+ le32_to_cpu(hdr->dst_port));
trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
dst.svm_cid, dst.svm_port,
- le32_to_cpu(pkt->hdr.len),
- le16_to_cpu(pkt->hdr.type),
- le16_to_cpu(pkt->hdr.op),
- le32_to_cpu(pkt->hdr.flags),
- le32_to_cpu(pkt->hdr.buf_alloc),
- le32_to_cpu(pkt->hdr.fwd_cnt));
-
- if (!virtio_transport_valid_type(le16_to_cpu(pkt->hdr.type))) {
- (void)virtio_transport_reset_no_sock(t, pkt);
+ le32_to_cpu(hdr->len),
+ le16_to_cpu(hdr->type),
+ le16_to_cpu(hdr->op),
+ le32_to_cpu(hdr->flags),
+ le32_to_cpu(hdr->buf_alloc),
+ le32_to_cpu(hdr->fwd_cnt));
+
+ if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
+ (void)virtio_transport_reset_no_sock(t, skb);
goto free_pkt;
}
@@ -1270,13 +1288,13 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
if (!sk) {
sk = vsock_find_bound_socket(&dst);
if (!sk) {
- (void)virtio_transport_reset_no_sock(t, pkt);
+ (void)virtio_transport_reset_no_sock(t, skb);
goto free_pkt;
}
}
- if (virtio_transport_get_type(sk) != le16_to_cpu(pkt->hdr.type)) {
- (void)virtio_transport_reset_no_sock(t, pkt);
+ if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
+ (void)virtio_transport_reset_no_sock(t, skb);
sock_put(sk);
goto free_pkt;
}
@@ -1287,13 +1305,13 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
/* Check if sk has been closed before lock_sock */
if (sock_flag(sk, SOCK_DONE)) {
- (void)virtio_transport_reset_no_sock(t, pkt);
+ (void)virtio_transport_reset_no_sock(t, skb);
release_sock(sk);
sock_put(sk);
goto free_pkt;
}
- space_available = virtio_transport_space_update(sk, pkt);
+ space_available = virtio_transport_space_update(sk, skb);
/* Update CID in case it has changed after a transport reset event */
if (vsk->local_addr.svm_cid != VMADDR_CID_ANY)
@@ -1304,23 +1322,23 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
switch (sk->sk_state) {
case TCP_LISTEN:
- virtio_transport_recv_listen(sk, pkt, t);
- virtio_transport_free_pkt(pkt);
+ virtio_transport_recv_listen(sk, skb, t);
+ kfree_skb(skb);
break;
case TCP_SYN_SENT:
- virtio_transport_recv_connecting(sk, pkt);
- virtio_transport_free_pkt(pkt);
+ virtio_transport_recv_connecting(sk, skb);
+ kfree_skb(skb);
break;
case TCP_ESTABLISHED:
- virtio_transport_recv_connected(sk, pkt);
+ virtio_transport_recv_connected(sk, skb);
break;
case TCP_CLOSING:
- virtio_transport_recv_disconnecting(sk, pkt);
- virtio_transport_free_pkt(pkt);
+ virtio_transport_recv_disconnecting(sk, skb);
+ kfree_skb(skb);
break;
default:
- (void)virtio_transport_reset_no_sock(t, pkt);
- virtio_transport_free_pkt(pkt);
+ (void)virtio_transport_reset_no_sock(t, skb);
+ kfree_skb(skb);
break;
}
@@ -1333,16 +1351,42 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
return;
free_pkt:
- virtio_transport_free_pkt(pkt);
+ kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
-void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
+/* Remove skbs found in a queue that have a vsk that matches.
+ *
+ * Each skb is freed.
+ *
+ * Returns the count of skbs that were reply packets.
+ */
+int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue)
{
- kvfree(pkt->buf);
- kfree(pkt);
+ struct sk_buff_head freeme;
+ struct sk_buff *skb, *tmp;
+ int cnt = 0;
+
+ skb_queue_head_init(&freeme);
+
+ spin_lock_bh(&queue->lock);
+ skb_queue_walk_safe(queue, skb, tmp) {
+ if (vsock_sk(skb->sk) != vsk)
+ continue;
+
+ __skb_unlink(skb, queue);
+ __skb_queue_tail(&freeme, skb);
+
+ if (virtio_vsock_skb_reply(skb))
+ cnt++;
+ }
+ spin_unlock_bh(&queue->lock);
+
+ __skb_queue_purge(&freeme);
+
+ return cnt;
}
-EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
+EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Asias He");
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index 169a8cf65b39..671e03240fc5 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -16,7 +16,7 @@ struct vsock_loopback {
struct workqueue_struct *workqueue;
spinlock_t pkt_list_lock; /* protects pkt_list */
- struct list_head pkt_list;
+ struct sk_buff_head pkt_queue;
struct work_struct pkt_work;
};
@@ -27,13 +27,13 @@ static u32 vsock_loopback_get_local_cid(void)
return VMADDR_CID_LOCAL;
}
-static int vsock_loopback_send_pkt(struct virtio_vsock_pkt *pkt)
+static int vsock_loopback_send_pkt(struct sk_buff *skb)
{
struct vsock_loopback *vsock = &the_vsock_loopback;
- int len = pkt->len;
+ int len = skb->len;
spin_lock_bh(&vsock->pkt_list_lock);
- list_add_tail(&pkt->list, &vsock->pkt_list);
+ skb_queue_tail(&vsock->pkt_queue, skb);
spin_unlock_bh(&vsock->pkt_list_lock);
queue_work(vsock->workqueue, &vsock->pkt_work);
@@ -44,21 +44,8 @@ static int vsock_loopback_send_pkt(struct virtio_vsock_pkt *pkt)
static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk)
{
struct vsock_loopback *vsock = &the_vsock_loopback;
- struct virtio_vsock_pkt *pkt, *n;
- LIST_HEAD(freeme);
- spin_lock_bh(&vsock->pkt_list_lock);
- list_for_each_entry_safe(pkt, n, &vsock->pkt_list, list) {
- if (pkt->vsk != vsk)
- continue;
- list_move(&pkt->list, &freeme);
- }
- spin_unlock_bh(&vsock->pkt_list_lock);
-
- list_for_each_entry_safe(pkt, n, &freeme, list) {
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
+ virtio_transport_purge_skbs(vsk, &vsock->pkt_queue);
return 0;
}
@@ -121,20 +108,18 @@ static void vsock_loopback_work(struct work_struct *work)
{
struct vsock_loopback *vsock =
container_of(work, struct vsock_loopback, pkt_work);
- LIST_HEAD(pkts);
+ struct sk_buff_head pkts;
+ struct sk_buff *skb;
+
+ skb_queue_head_init(&pkts);
spin_lock_bh(&vsock->pkt_list_lock);
- list_splice_init(&vsock->pkt_list, &pkts);
+ skb_queue_splice_init(&vsock->pkt_queue, &pkts);
spin_unlock_bh(&vsock->pkt_list_lock);
- while (!list_empty(&pkts)) {
- struct virtio_vsock_pkt *pkt;
-
- pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
- list_del_init(&pkt->list);
-
- virtio_transport_deliver_tap_pkt(pkt);
- virtio_transport_recv_pkt(&loopback_transport, pkt);
+ while ((skb = __skb_dequeue(&pkts))) {
+ virtio_transport_deliver_tap_pkt(skb);
+ virtio_transport_recv_pkt(&loopback_transport, skb);
}
}
@@ -148,7 +133,7 @@ static int __init vsock_loopback_init(void)
return -ENOMEM;
spin_lock_init(&vsock->pkt_list_lock);
- INIT_LIST_HEAD(&vsock->pkt_list);
+ skb_queue_head_init(&vsock->pkt_queue);
INIT_WORK(&vsock->pkt_work, vsock_loopback_work);
ret = vsock_core_register(&loopback_transport.transport,
@@ -166,19 +151,13 @@ out_wq:
static void __exit vsock_loopback_exit(void)
{
struct vsock_loopback *vsock = &the_vsock_loopback;
- struct virtio_vsock_pkt *pkt;
vsock_core_unregister(&loopback_transport.transport);
flush_work(&vsock->pkt_work);
spin_lock_bh(&vsock->pkt_list_lock);
- while (!list_empty(&vsock->pkt_list)) {
- pkt = list_first_entry(&vsock->pkt_list,
- struct virtio_vsock_pkt, list);
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
+ virtio_vsock_skb_queue_purge(&vsock->pkt_queue);
spin_unlock_bh(&vsock->pkt_list_lock);
destroy_workqueue(vsock->workqueue);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index af85d8909935..7c61752f6d83 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -278,8 +278,8 @@ struct cfg80211_event {
};
struct cfg80211_cached_keys {
- struct key_params params[CFG80211_MAX_WEP_KEYS];
- u8 data[CFG80211_MAX_WEP_KEYS][WLAN_KEY_LEN_WEP104];
+ struct key_params params[4];
+ u8 data[4][WLAN_KEY_LEN_WEP104];
int def;
};
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index edd062f104f4..e6fdb0b8187d 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -45,8 +45,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
cfg80211_hold_bss(bss_from_pub(bss));
wdev->u.ibss.current_bss = bss_from_pub(bss);
- if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP))
- cfg80211_upload_connect_keys(wdev);
+ cfg80211_upload_connect_keys(wdev);
nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid,
GFP_KERNEL);
@@ -294,7 +293,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL);
if (!ck)
return -ENOMEM;
- for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++)
+ for (i = 0; i < 4; i++)
ck->params[i].key = ck->data[i];
}
err = __cfg80211_join_ibss(rdev, wdev->netdev,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 33a82ecab9d5..64cf6110ce9d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -883,7 +883,7 @@ nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
},
[NL80211_REKEY_DATA_KCK] = {
.type = NLA_BINARY,
- .len = NL80211_KCK_EXT_LEN
+ .len = NL80211_KCK_EXT_LEN_32
},
[NL80211_REKEY_DATA_REPLAY_CTR] = NLA_POLICY_EXACT_LEN(NL80211_REPLAY_CTR_LEN),
[NL80211_REKEY_DATA_AKM] = { .type = NLA_U32 },
@@ -13809,7 +13809,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
return -ERANGE;
if (nla_len(tb[NL80211_REKEY_DATA_KCK]) != NL80211_KCK_LEN &&
!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK &&
- nla_len(tb[NL80211_REKEY_DATA_KEK]) == NL80211_KCK_EXT_LEN))
+ nla_len(tb[NL80211_REKEY_DATA_KCK]) == NL80211_KCK_EXT_LEN) &&
+ !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KCK_32 &&
+ nla_len(tb[NL80211_REKEY_DATA_KCK]) == NL80211_KCK_EXT_LEN_32))
return -ERANGE;
rekey_data.kek = nla_data(tb[NL80211_REKEY_DATA_KEK]);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4f3f31244e8b..af65196c916e 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -737,51 +737,9 @@ static bool valid_country(const u8 *data, unsigned int size,
}
#ifdef CONFIG_CFG80211_REQUIRE_SIGNED_REGDB
-static struct key *builtin_regdb_keys;
-
-static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen)
-{
- const u8 *end = p + buflen;
- size_t plen;
- key_ref_t key;
-
- while (p < end) {
- /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
- * than 256 bytes in size.
- */
- if (end - p < 4)
- goto dodgy_cert;
- if (p[0] != 0x30 &&
- p[1] != 0x82)
- goto dodgy_cert;
- plen = (p[2] << 8) | p[3];
- plen += 4;
- if (plen > end - p)
- goto dodgy_cert;
-
- key = key_create_or_update(make_key_ref(builtin_regdb_keys, 1),
- "asymmetric", NULL, p, plen,
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ),
- KEY_ALLOC_NOT_IN_QUOTA |
- KEY_ALLOC_BUILT_IN |
- KEY_ALLOC_BYPASS_RESTRICTION);
- if (IS_ERR(key)) {
- pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
- PTR_ERR(key));
- } else {
- pr_notice("Loaded X.509 cert '%s'\n",
- key_ref_to_ptr(key)->description);
- key_ref_put(key);
- }
- p += plen;
- }
-
- return;
+#include <keys/asymmetric-type.h>
-dodgy_cert:
- pr_err("Problem parsing in-kernel X.509 certificate list\n");
-}
+static struct key *builtin_regdb_keys;
static int __init load_builtin_regdb_keys(void)
{
@@ -797,11 +755,15 @@ static int __init load_builtin_regdb_keys(void)
pr_notice("Loading compiled-in X.509 certificates for regulatory database\n");
#ifdef CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS
- load_keys_from_buffer(shipped_regdb_certs, shipped_regdb_certs_len);
+ x509_load_certificate_list(shipped_regdb_certs,
+ shipped_regdb_certs_len,
+ builtin_regdb_keys);
#endif
#ifdef CONFIG_CFG80211_EXTRA_REGDB_KEYDIR
if (CONFIG_CFG80211_EXTRA_REGDB_KEYDIR[0] != '\0')
- load_keys_from_buffer(extra_regdb_certs, extra_regdb_certs_len);
+ x509_load_certificate_list(extra_regdb_certs,
+ extra_regdb_certs_len,
+ builtin_regdb_keys);
#endif
return 0;
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 4b5b6ee0fe01..123248b2c0be 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -855,8 +855,7 @@ void __cfg80211_connect_result(struct net_device *dev,
ETH_ALEN);
}
- if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP))
- cfg80211_upload_connect_keys(wdev);
+ cfg80211_upload_connect_keys(wdev);
rcu_read_lock();
for_each_valid_link(cr, link) {
@@ -1462,9 +1461,6 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
connect->crypto.ciphers_pairwise[0] = cipher;
}
}
-
- connect->crypto.wep_keys = connkeys->params;
- connect->crypto.wep_tx_key = connkeys->def;
} else {
if (WARN_ON(connkeys))
return -EINVAL;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 8f403f9fe816..38d3b434c18c 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -934,7 +934,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
if (!wdev->connect_keys)
return;
- for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) {
+ for (i = 0; i < 4; i++) {
if (!wdev->connect_keys->params[i].cipher)
continue;
if (rdev_add_key(rdev, dev, -1, i, false, NULL,
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 8a24dfca75af..e3acfac7430a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -439,7 +439,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
GFP_KERNEL);
if (!wdev->wext.keys)
return -ENOMEM;
- for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++)
+ for (i = 0; i < 4; i++)
wdev->wext.keys->params[i].key =
wdev->wext.keys->data[i];
}
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index fe8765c4075d..13a72b17248e 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -636,7 +636,15 @@ void wireless_send_event(struct net_device * dev,
}
EXPORT_SYMBOL(wireless_send_event);
+#ifdef CONFIG_CFG80211_WEXT
+static void wireless_warn_cfg80211_wext(void)
+{
+ char name[sizeof(current->comm)];
+ pr_warn_ratelimited("warning: `%s' uses wireless extensions that are deprecated for modern drivers; use nl80211\n",
+ get_task_comm(name, current));
+}
+#endif
/* IW handlers */
@@ -652,8 +660,12 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev)
if (dev->ieee80211_ptr &&
dev->ieee80211_ptr->wiphy &&
dev->ieee80211_ptr->wiphy->wext &&
- dev->ieee80211_ptr->wiphy->wext->get_wireless_stats)
+ dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) {
+ wireless_warn_cfg80211_wext();
+ if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)
+ return NULL;
return dev->ieee80211_ptr->wiphy->wext->get_wireless_stats(dev);
+ }
#endif
/* not found */
@@ -690,8 +702,12 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
const struct iw_handler_def *handlers = NULL;
#ifdef CONFIG_CFG80211_WEXT
- if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy)
+ if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy) {
+ wireless_warn_cfg80211_wext();
+ if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)
+ return NULL;
handlers = dev->ieee80211_ptr->wiphy->wext;
+ }
#endif
#ifdef CONFIG_WIRELESS_EXT
if (dev->wireless_handlers)
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 191c6d98c700..f231207ca210 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -47,7 +47,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL);
if (!ck)
return -ENOMEM;
- for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++)
+ for (i = 0; i < 4; i++)
ck->params[i].key = ck->data[i];
}
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 74a54295c164..872b80188e83 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -6,6 +6,7 @@
#include <net/espintcp.h>
#include <linux/skmsg.h>
#include <net/inet_common.h>
+#include <trace/events/sock.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
@@ -397,6 +398,8 @@ static void espintcp_data_ready(struct sock *sk)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
+ trace_sk_data_ready(sk);
+
strp_data_ready(&ctx->strp);
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 4aff76c6f12e..95f1436bf6a2 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -309,7 +309,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
else
xso->type = XFRM_DEV_OFFLOAD_CRYPTO;
- err = dev->xfrmdev_ops->xdo_dev_state_add(x);
+ err = dev->xfrmdev_ops->xdo_dev_state_add(x, extack);
if (err) {
xso->dev = NULL;
xso->dir = 0;
@@ -326,7 +326,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
*/
WARN_ON(err == -EOPNOTSUPP && is_packet_offload);
if (err != -EOPNOTSUPP || is_packet_offload) {
- NL_SET_ERR_MSG(extack, "Device failed to offload this state");
+ NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this state");
return err;
}
}
@@ -383,14 +383,14 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
return -EINVAL;
}
- err = dev->xfrmdev_ops->xdo_dev_policy_add(xp);
+ err = dev->xfrmdev_ops->xdo_dev_policy_add(xp, extack);
if (err) {
xdo->dev = NULL;
xdo->real_dev = NULL;
xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
xdo->dir = 0;
netdev_put(dev, &xdo->dev_tracker);
- NL_SET_ERR_MSG(extack, "Device failed to offload this policy");
+ NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this policy");
return err;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 00afe831c71c..2ab3e09e2227 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1274,7 +1274,7 @@ found:
xso->real_dev = xdo->real_dev;
netdev_tracker_alloc(xso->dev, &xso->dev_tracker,
GFP_ATOMIC);
- error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x);
+ error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL);
if (error) {
xso->dir = 0;
netdev_put(xso->dev, &xso->dev_tracker);