summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c15
-rw-r--r--net/batman-adv/Makefile1
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c2
-rw-r--r--net/batman-adv/fragmentation.c8
-rw-r--r--net/batman-adv/gateway_client.c2
-rw-r--r--net/batman-adv/main.c5
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c129
-rw-r--r--net/batman-adv/multicast.h30
-rw-r--r--net/batman-adv/multicast_forw.c1178
-rw-r--r--net/batman-adv/netlink.c2
-rw-r--r--net/batman-adv/originator.c28
-rw-r--r--net/batman-adv/originator.h3
-rw-r--r--net/batman-adv/routing.c70
-rw-r--r--net/batman-adv/routing.h11
-rw-r--r--net/batman-adv/soft-interface.c18
-rw-r--r--net/batman-adv/types.h70
-rw-r--r--net/bpf/test_run.c2
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c109
-rw-r--r--net/core/dev.h3
-rw-r--r--net/core/dev_ioctl.c7
-rw-r--r--net/core/filter.c15
-rw-r--r--net/core/link_watch.c15
-rw-r--r--net/core/net-sysfs.c8
-rw-r--r--net/core/net_namespace.c45
-rw-r--r--net/core/netdev-genl-gen.c110
-rw-r--r--net/core/netdev-genl-gen.h16
-rw-r--r--net/core/netdev-genl.c344
-rw-r--r--net/core/page_pool.c78
-rw-r--r--net/core/page_pool_priv.h12
-rw-r--r--net/core/page_pool_user.c410
-rw-r--r--net/core/rtnetlink.c10
-rw-r--r--net/devlink/core.c4
-rw-r--r--net/devlink/dev.c24
-rw-r--r--net/devlink/devl_internal.h21
-rw-r--r--net/devlink/health.c3
-rw-r--r--net/devlink/netlink.c45
-rw-r--r--net/devlink/netlink_gen.c4
-rw-r--r--net/devlink/netlink_gen.h5
-rw-r--r--net/devlink/region.c3
-rw-r--r--net/dsa/tag_rtl4_a.c5
-rw-r--r--net/ethtool/common.c6
-rw-r--r--net/ethtool/ioctl.c3
-rw-r--r--net/hsr/hsr_device.c67
-rw-r--r--net/ipv4/inet_diag.c86
-rw-r--r--net/ipv4/syncookies.c215
-rw-r--r--net/ipv4/tcp.c94
-rw-r--r--net/ipv4/tcp_ao.c16
-rw-r--r--net/ipv4/tcp_input.c29
-rw-r--r--net/ipv4/tcp_ipv4.c6
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv6/syncookies.c108
-rw-r--r--net/ipv6/tcp_ipv6.c9
-rw-r--r--net/mac80211/scan.c48
-rw-r--r--net/mptcp/pm_userspace.c8
-rw-r--r--net/mptcp/protocol.h9
-rw-r--r--net/mptcp/sockopt.c2
-rw-r--r--net/ncsi/internal.h7
-rw-r--r--net/ncsi/ncsi-cmd.c3
-rw-r--r--net/ncsi/ncsi-manage.c29
-rw-r--r--net/ncsi/ncsi-netlink.c4
-rw-r--r--net/ncsi/ncsi-pkt.h17
-rw-r--r--net/ncsi/ncsi-rsp.c67
-rw-r--r--net/netfilter/nf_synproxy_core.c4
-rw-r--r--net/netlink/af_netlink.c3
-rw-r--r--net/packet/af_packet.c20
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/sched/act_api.c57
-rw-r--r--net/sched/cls_u32.c36
-rw-r--r--net/sched/sch_cbs.c4
-rw-r--r--net/sched/sch_generic.c5
-rw-r--r--net/smc/af_smc.c2
-rw-r--r--net/smc/smc.h1
-rw-r--r--net/smc/smc_clc.c15
-rw-r--r--net/smc/smc_clc.h3
-rw-r--r--net/smc/smc_sysctl.c24
-rw-r--r--net/smc/smc_sysctl.h2
-rw-r--r--net/smc/smc_tx.c30
-rw-r--r--net/tipc/netlink_compat.c2
-rw-r--r--net/wireless/nl80211.c1
-rw-r--r--net/xdp/xdp_umem.c11
-rw-r--r--net/xdp/xsk.c56
-rw-r--r--net/xdp/xsk_buff_pool.c2
-rw-r--r--net/xdp/xsk_queue.h19
86 files changed, 3543 insertions, 471 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2a7f1b15714a..407b2335f091 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -702,20 +702,7 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev,
struct ethtool_ts_info *info)
{
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
- const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops;
- struct phy_device *phydev = vlan->real_dev->phydev;
-
- if (phy_has_tsinfo(phydev)) {
- return phy_ts_info(phydev, info);
- } else if (ops->get_ts_info) {
- return ops->get_ts_info(vlan->real_dev, info);
- } else {
- info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
- info->phc_index = -1;
- }
-
- return 0;
+ return ethtool_get_ts_info_by_layer(vlan->real_dev, info);
}
static void vlan_dev_get_stats64(struct net_device *dev,
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 3bd0760c76a2..b51d8b071b56 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -20,6 +20,7 @@ batman-adv-y += hash.o
batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
batman-adv-y += main.o
batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
+batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast_forw.o
batman-adv-y += netlink.o
batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
batman-adv-y += originator.o
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 37ce6cfb3520..5f46ca3d4bb8 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -20,7 +20,6 @@
#include <linux/if_vlan.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
@@ -31,6 +30,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/workqueue.h>
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index c120c7c6d25f..757c084ac2d1 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -25,7 +25,6 @@
#include "hard-interface.h"
#include "originator.h"
-#include "routing.h"
#include "send.h"
/**
@@ -351,18 +350,14 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
struct batadv_orig_node *orig_node_src)
{
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
- struct batadv_orig_node *orig_node_dst;
struct batadv_neigh_node *neigh_node = NULL;
struct batadv_frag_packet *packet;
u16 total_size;
bool ret = false;
packet = (struct batadv_frag_packet *)skb->data;
- orig_node_dst = batadv_orig_hash_find(bat_priv, packet->dest);
- if (!orig_node_dst)
- goto out;
- neigh_node = batadv_find_router(bat_priv, orig_node_dst, recv_if);
+ neigh_node = batadv_orig_to_router(bat_priv, packet->dest, recv_if);
if (!neigh_node)
goto out;
@@ -381,7 +376,6 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
}
out:
- batadv_orig_node_put(orig_node_dst);
batadv_neigh_node_put(neigh_node);
return ret;
}
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index d26124bc27e1..0ddd8b4b3f4c 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -18,7 +18,6 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
@@ -29,6 +28,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/udp.h>
#include <net/sock.h>
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index e8a449915566..5fc754b0b3f7 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -6,6 +6,7 @@
#include "main.h"
+#include <linux/array_size.h>
#include <linux/atomic.h>
#include <linux/build_bug.h>
#include <linux/byteorder/generic.h>
@@ -20,7 +21,6 @@
#include <linux/init.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/kernel.h>
#include <linux/kobject.h>
#include <linux/kref.h>
#include <linux/list.h>
@@ -33,6 +33,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/workqueue.h>
@@ -532,6 +533,8 @@ static void batadv_recv_handler_init(void)
/* broadcast packet */
batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet;
+ /* multicast packet */
+ batadv_rx_handler[BATADV_MCAST] = batadv_recv_mcast_packet;
/* unicast packets ... */
/* unicast with 4 addresses packet */
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 10007c5894a1..870dcd7f1786 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2023.3"
+#define BATADV_SOURCE_VERSION "2024.0"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 315394f12c55..d982daea8329 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -25,7 +25,6 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -36,6 +35,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -236,6 +236,37 @@ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_mla_forw_flags_get() - get multicast forwarding flags
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Checks if all active hard interfaces have an MTU larger or equal to 1280
+ * bytes (IPv6 minimum MTU).
+ *
+ * Return: BATADV_MCAST_HAVE_MC_PTYPE_CAPA if yes, BATADV_NO_FLAGS otherwise.
+ */
+static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv)
+{
+ const struct batadv_hard_iface *hard_iface;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ if (hard_iface->if_status != BATADV_IF_ACTIVE)
+ continue;
+
+ if (hard_iface->soft_iface != bat_priv->soft_iface)
+ continue;
+
+ if (hard_iface->net_dev->mtu < IPV6_MIN_MTU) {
+ rcu_read_unlock();
+ return BATADV_NO_FLAGS;
+ }
+ }
+ rcu_read_unlock();
+
+ return BATADV_MCAST_HAVE_MC_PTYPE_CAPA;
+}
+
+/**
* batadv_mcast_mla_flags_get() - get the new multicast flags
* @bat_priv: the bat priv with all the soft interface information
*
@@ -256,6 +287,7 @@ batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv)
mla_flags.enabled = 1;
mla_flags.tvlv_flags |= batadv_mcast_mla_rtr_flags_get(bat_priv,
bridge);
+ mla_flags.tvlv_flags |= batadv_mcast_mla_forw_flags_get(bat_priv);
if (!bridge)
return mla_flags;
@@ -806,23 +838,25 @@ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
{
bool old_enabled = bat_priv->mcast.mla_flags.enabled;
u8 old_flags = bat_priv->mcast.mla_flags.tvlv_flags;
- char str_old_flags[] = "[.... . ]";
+ char str_old_flags[] = "[.... . .]";
- sprintf(str_old_flags, "[%c%c%c%s%s]",
+ sprintf(str_old_flags, "[%c%c%c%s%s%c]",
(old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
(old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
(old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
!(old_flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
- !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
+ !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ",
+ !(old_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.');
batadv_dbg(BATADV_DBG_MCAST, bat_priv,
- "Changing multicast flags from '%s' to '[%c%c%c%s%s]'\n",
+ "Changing multicast flags from '%s' to '[%c%c%c%s%s%c]'\n",
old_enabled ? str_old_flags : "<undefined>",
(flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
(flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
(flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
!(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
- !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
+ !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ",
+ !(flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.');
}
/**
@@ -1136,16 +1170,61 @@ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_forw_mode_by_count() - get forwarding mode by count
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to check
+ * @vid: the vlan identifier
+ * @is_routable: stores whether the destination is routable
+ * @count: the number of originators the multicast packet need to be sent to
+ *
+ * For a multicast packet with multiple destination originators, checks which
+ * mode to use. For BATADV_FORW_MCAST it also encapsulates the packet with a
+ * complete batman-adv multicast header.
+ *
+ * Return:
+ * BATADV_FORW_MCAST: If all nodes have multicast packet routing
+ * capabilities and an MTU >= 1280 on all hard interfaces (including us)
+ * and the encapsulated multicast packet with all destination addresses
+ * would still fit into an 1280 bytes batman-adv multicast packet
+ * (excluding the outer ethernet frame) and we could successfully push
+ * the full batman-adv multicast packet header.
+ * BATADV_FORW_UCASTS: If the packet cannot be sent in a batman-adv
+ * multicast packet and the amount of batman-adv unicast packets needed
+ * is smaller or equal to the configured multicast fanout.
+ * BATADV_FORW_BCAST: Otherwise.
+ */
+static enum batadv_forw_mode
+batadv_mcast_forw_mode_by_count(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid,
+ int is_routable, int count)
+{
+ unsigned int mcast_hdrlen = batadv_mcast_forw_packet_hdrlen(count);
+ u8 own_tvlv_flags = bat_priv->mcast.mla_flags.tvlv_flags;
+
+ if (!atomic_read(&bat_priv->mcast.num_no_mc_ptype_capa) &&
+ own_tvlv_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA &&
+ skb->len + mcast_hdrlen <= IPV6_MIN_MTU &&
+ batadv_mcast_forw_push(bat_priv, skb, vid, is_routable, count))
+ return BATADV_FORW_MCAST;
+
+ if (count <= atomic_read(&bat_priv->multicast_fanout))
+ return BATADV_FORW_UCASTS;
+
+ return BATADV_FORW_BCAST;
+}
+
+/**
* batadv_mcast_forw_mode() - check on how to forward a multicast packet
* @bat_priv: the bat priv with all the soft interface information
* @skb: the multicast packet to check
+ * @vid: the vlan identifier
* @is_routable: stores whether the destination is routable
*
* Return: The forwarding mode as enum batadv_forw_mode.
*/
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- int *is_routable)
+ unsigned short vid, int *is_routable)
{
int ret, tt_count, ip_count, unsnoop_count, total_count;
bool is_unsnoopable = false;
@@ -1175,10 +1254,8 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
else if (unsnoop_count)
return BATADV_FORW_BCAST;
- if (total_count <= atomic_read(&bat_priv->multicast_fanout))
- return BATADV_FORW_UCASTS;
-
- return BATADV_FORW_BCAST;
+ return batadv_mcast_forw_mode_by_count(bat_priv, skb, vid, *is_routable,
+ total_count);
}
/**
@@ -1739,6 +1816,31 @@ static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_have_mc_ptype_update() - update multicast packet type counter
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node which multicast state might have changed of
+ * @mcast_flags: flags indicating the new multicast state
+ *
+ * If the BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag of this originator, orig, has
+ * toggled then this method updates the counter accordingly.
+ */
+static void batadv_mcast_have_mc_ptype_update(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ u8 mcast_flags)
+{
+ lockdep_assert_held(&orig->mcast_handler_lock);
+
+ /* switched from flag set to unset */
+ if (!(mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) &&
+ orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA)
+ atomic_inc(&bat_priv->mcast.num_no_mc_ptype_capa);
+ /* switched from flag unset to set */
+ else if (mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA &&
+ !(orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA))
+ atomic_dec(&bat_priv->mcast.num_no_mc_ptype_capa);
+}
+
+/**
* batadv_mcast_tvlv_flags_get() - get multicast flags from an OGM TVLV
* @enabled: whether the originator has multicast TVLV support enabled
* @tvlv_value: tvlv buffer containing the multicast flags
@@ -1806,6 +1908,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags);
batadv_mcast_want_rtr4_update(bat_priv, orig, mcast_flags);
batadv_mcast_want_rtr6_update(bat_priv, orig, mcast_flags);
+ batadv_mcast_have_mc_ptype_update(bat_priv, orig, mcast_flags);
orig->mcast_flags = mcast_flags;
spin_unlock_bh(&orig->mcast_handler_lock);
@@ -1820,6 +1923,10 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
NULL, NULL, BATADV_TVLV_MCAST, 2,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
+ batadv_tvlv_handler_register(bat_priv, NULL, NULL,
+ batadv_mcast_forw_tracker_tvlv_handler,
+ BATADV_TVLV_MCAST_TRACKER, 1,
+ BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update);
batadv_mcast_start_timer(bat_priv);
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index a9770d8d6d36..d97ee51d26f2 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -11,6 +11,7 @@
#include <linux/netlink.h>
#include <linux/skbuff.h>
+#include <linux/types.h>
/**
* enum batadv_forw_mode - the way a packet should be forwarded as
@@ -28,6 +29,12 @@ enum batadv_forw_mode {
*/
BATADV_FORW_UCASTS,
+ /**
+ * @BATADV_FORW_MCAST: forward the packet to some nodes via a
+ * batman-adv multicast packet
+ */
+ BATADV_FORW_MCAST,
+
/** @BATADV_FORW_NONE: don't forward, drop it */
BATADV_FORW_NONE,
};
@@ -36,7 +43,7 @@ enum batadv_forw_mode {
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- int *is_routable);
+ unsigned short vid, int *is_routable);
int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid, int is_routable);
@@ -52,11 +59,23 @@ void batadv_mcast_free(struct batadv_priv *bat_priv);
void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
+/* multicast_forw.c */
+
+int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv,
+ struct sk_buff *skb);
+
+unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests);
+
+bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int count);
+
+int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv, struct sk_buff *skb);
+
#else
static inline enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- int *is_routable)
+ unsigned short vid, int *is_routable)
{
return BATADV_FORW_BCAST;
}
@@ -94,6 +113,13 @@ static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node)
{
}
+static inline int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+}
+
#endif /* CONFIG_BATMAN_ADV_MCAST */
#endif /* _NET_BATMAN_ADV_MULTICAST_H_ */
diff --git a/net/batman-adv/multicast_forw.c b/net/batman-adv/multicast_forw.c
new file mode 100644
index 000000000000..fafd6ba8c056
--- /dev/null
+++ b/net/batman-adv/multicast_forw.c
@@ -0,0 +1,1178 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) B.A.T.M.A.N. contributors:
+ *
+ * Linus Lüssing
+ */
+
+#include "multicast.h"
+#include "main.h"
+
+#include <linux/bug.h>
+#include <linux/build_bug.h>
+#include <linux/byteorder/generic.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/gfp.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/ipv6.h>
+#include <linux/limits.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <uapi/linux/batadv_packet.h>
+
+#include "bridge_loop_avoidance.h"
+#include "originator.h"
+#include "send.h"
+#include "translation-table.h"
+
+#define batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) \
+ for (; num_dests; num_dests--, (dest) += ETH_ALEN)
+
+#define batadv_mcast_forw_tracker_for_each_dest2(dest1, dest2, num_dests) \
+ for (; num_dests; num_dests--, (dest1) += ETH_ALEN, (dest2) += ETH_ALEN)
+
+/**
+ * batadv_mcast_forw_skb_push() - skb_push and memorize amount of pushed bytes
+ * @skb: the skb to push onto
+ * @size: the amount of bytes to push
+ * @len: stores the total amount of bytes pushed
+ *
+ * Performs an skb_push() onto the given skb and adds the amount of pushed bytes
+ * to the given len pointer.
+ *
+ * Return: the return value of the skb_push() call.
+ */
+static void *batadv_mcast_forw_skb_push(struct sk_buff *skb, size_t size,
+ unsigned short *len)
+{
+ *len += size;
+ return skb_push(skb, size);
+}
+
+/**
+ * batadv_mcast_forw_push_padding() - push 2 padding bytes to skb's front
+ * @skb: the skb to push onto
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes two padding bytes to the front of the given skb.
+ *
+ * Return: On success a pointer to the first byte of the two pushed padding
+ * bytes within the skb. NULL otherwise.
+ */
+static char *
+batadv_mcast_forw_push_padding(struct sk_buff *skb, unsigned short *tvlv_len)
+{
+ const int pad_len = 2;
+ char *padding;
+
+ if (skb_headroom(skb) < pad_len)
+ return NULL;
+
+ padding = batadv_mcast_forw_skb_push(skb, pad_len, tvlv_len);
+ memset(padding, 0, pad_len);
+
+ return padding;
+}
+
+/**
+ * batadv_mcast_forw_push_est_padding() - push padding bytes if necessary
+ * @skb: the skb to potentially push the padding onto
+ * @count: the (estimated) number of originators the multicast packet needs to
+ * be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * If the number of destination entries is even then this adds two
+ * padding bytes to the end of the tracker TVLV.
+ *
+ * Return: true on success or if no padding is needed, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_est_padding(struct sk_buff *skb, int count,
+ unsigned short *tvlv_len)
+{
+ if (!(count % 2) && !batadv_mcast_forw_push_padding(skb, tvlv_len))
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_orig_entry() - get orig_node from an hlist node
+ * @node: the hlist node to get the orig_node from
+ * @entry_offset: the offset of the hlist node within the orig_node struct
+ *
+ * Return: The orig_node containing the hlist node on success, NULL on error.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_orig_entry(struct hlist_node *node,
+ size_t entry_offset)
+{
+ /* sanity check */
+ switch (entry_offset) {
+ case offsetof(struct batadv_orig_node, mcast_want_all_ipv4_node):
+ case offsetof(struct batadv_orig_node, mcast_want_all_ipv6_node):
+ case offsetof(struct batadv_orig_node, mcast_want_all_rtr4_node):
+ case offsetof(struct batadv_orig_node, mcast_want_all_rtr6_node):
+ break;
+ default:
+ WARN_ON(1);
+ return NULL;
+ }
+
+ return (struct batadv_orig_node *)((void *)node - entry_offset);
+}
+
+/**
+ * batadv_mcast_forw_push_dest() - push an originator MAC address onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination address onto
+ * @vid: the vlan identifier
+ * @orig_node: the originator node to get the MAC address from
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * If the orig_node is a BLA backbone gateway, if there is not enough skb
+ * headroom available or if num_dests is already at its maximum (65535) then
+ * neither the skb nor num_dests is changed. Otherwise the originator's MAC
+ * address is pushed onto the given skb and num_dests incremented by one.
+ *
+ * Return: true if the orig_node is a backbone gateway or if an orig address
+ * was pushed successfully, false otherwise.
+ */
+static bool batadv_mcast_forw_push_dest(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid,
+ struct batadv_orig_node *orig_node,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ BUILD_BUG_ON(sizeof_field(struct batadv_tvlv_mcast_tracker, num_dests)
+ != sizeof(__be16));
+
+ /* Avoid sending to other BLA gateways - they already got the frame from
+ * the LAN side we share with them.
+ * TODO: Refactor to take BLA into account earlier in mode check.
+ */
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid))
+ return true;
+
+ if (skb_headroom(skb) < ETH_ALEN || *num_dests == U16_MAX)
+ return false;
+
+ batadv_mcast_forw_skb_push(skb, ETH_ALEN, tvlv_len);
+ ether_addr_copy(skb->data, orig_node->orig);
+ (*num_dests)++;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_dests_list() - push originators from list onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @head: the list to gather originators from
+ * @entry_offset: offset of an hlist node in an orig_node structure
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators in the given list onto the given
+ * skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static int batadv_mcast_forw_push_dests_list(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct hlist_head *head,
+ size_t entry_offset,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct hlist_node *node;
+ struct batadv_orig_node *orig_node;
+
+ rcu_read_lock();
+ __hlist_for_each_rcu(node, head) {
+ orig_node = batadv_mcast_forw_orig_entry(node, entry_offset);
+ if (!orig_node ||
+ !batadv_mcast_forw_push_dest(bat_priv, skb, vid, orig_node,
+ num_dests, tvlv_len)) {
+ rcu_read_unlock();
+ return false;
+ }
+ }
+ rcu_read_unlock();
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_tt() - push originators with interest through TT
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the translation table onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_tt(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct batadv_tt_orig_list_entry *orig_entry;
+
+ struct batadv_tt_global_entry *tt_global;
+ const u8 *addr = eth_hdr(skb)->h_dest;
+
+ /* ok */
+ int ret = true;
+
+ tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid);
+ if (!tt_global)
+ goto out;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_entry, &tt_global->orig_list, list) {
+ if (!batadv_mcast_forw_push_dest(bat_priv, skb, vid,
+ orig_entry->orig_node,
+ num_dests, tvlv_len)) {
+ ret = false;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ batadv_tt_global_entry_put(tt_global);
+
+out:
+ return ret;
+}
+
+/**
+ * batadv_mcast_forw_push_want_all() - push originators with want-all flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the want-all flag onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_want_all(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct hlist_head *head = NULL;
+ size_t offset;
+ int ret;
+
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ head = &bat_priv->mcast.want_all_ipv4_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_ipv4_node);
+ break;
+ case htons(ETH_P_IPV6):
+ head = &bat_priv->mcast.want_all_ipv6_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_ipv6_node);
+ break;
+ default:
+ return false;
+ }
+
+ ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head,
+ offset, num_dests, tvlv_len);
+ if (!ret)
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_want_rtr() - push originators with want-router flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the want-all-rtr flag onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_want_rtr(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct hlist_head *head = NULL;
+ size_t offset;
+ int ret;
+
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ head = &bat_priv->mcast.want_all_rtr4_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_rtr4_node);
+ break;
+ case htons(ETH_P_IPV6):
+ head = &bat_priv->mcast.want_all_rtr6_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_rtr6_node);
+ break;
+ default:
+ return false;
+ }
+
+ ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head,
+ offset, num_dests, tvlv_len);
+ if (!ret)
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_scrape() - remove bytes within skb data
+ * @skb: the skb to remove bytes from
+ * @offset: the offset from the skb data from which to scrape
+ * @len: the amount of bytes to scrape starting from the offset
+ *
+ * Scrapes/removes len bytes from the given skb at the given offset from the
+ * skb data.
+ *
+ * Caller needs to ensure that the region from the skb data's start up
+ * to/including the to be removed bytes are linearized.
+ */
+static void batadv_mcast_forw_scrape(struct sk_buff *skb,
+ unsigned short offset,
+ unsigned short len)
+{
+ char *to, *from;
+
+ SKB_LINEAR_ASSERT(skb);
+
+ to = skb_pull(skb, len);
+ from = to - len;
+
+ memmove(to, from, offset);
+}
+
+/**
+ * batadv_mcast_forw_push_scrape_padding() - remove TVLV padding
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Remove two padding bytes from the end of the multicast tracker TVLV,
+ * from before the payload data.
+ *
+ * Caller needs to ensure that the TVLV bytes are linearized.
+ */
+static void batadv_mcast_forw_push_scrape_padding(struct sk_buff *skb,
+ unsigned short *tvlv_len)
+{
+ const int pad_len = 2;
+
+ batadv_mcast_forw_scrape(skb, *tvlv_len - pad_len, pad_len);
+ *tvlv_len -= pad_len;
+}
+
+/**
+ * batadv_mcast_forw_push_insert_padding() - insert TVLV padding
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Inserts two padding bytes at the end of the multicast tracker TVLV,
+ * before the payload data in the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_insert_padding(struct sk_buff *skb,
+ unsigned short *tvlv_len)
+{
+ unsigned short offset = *tvlv_len;
+ char *to, *from = skb->data;
+
+ to = batadv_mcast_forw_push_padding(skb, tvlv_len);
+ if (!to)
+ return false;
+
+ memmove(to, from, offset);
+ memset(to + offset, 0, *tvlv_len - offset);
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_adjust_padding() - adjust padding if necessary
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @count: the estimated number of originators the multicast packet needs to
+ * be sent to
+ * @num_dests_pushed: the number of originators that were actually added to the
+ * multicast packet's tracker TVLV
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Adjusts the padding in the multicast packet's tracker TVLV depending on the
+ * initially estimated amount of destinations versus the amount of destinations
+ * that were actually added to the tracker TVLV.
+ *
+ * If the initial estimate was correct or at least the oddness was the same then
+ * no padding adjustment is performed.
+ * If the initially estimated number was even, so padding was initially added,
+ * but it turned out to be odd then padding is removed.
+ * If the initially estimated number was odd, so no padding was initially added,
+ * but it turned out to be even then padding is added.
+ *
+ * Return: true if no padding adjustment is needed or the adjustment was
+ * successful, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_adjust_padding(struct sk_buff *skb, int *count,
+ unsigned short num_dests_pushed,
+ unsigned short *tvlv_len)
+{
+ int ret = true;
+
+ if (likely((num_dests_pushed % 2) == (*count % 2)))
+ goto out;
+
+ /**
+ * estimated even number of destinations, but turned out to be odd
+ * -> remove padding
+ */
+ if (!(*count % 2) && (num_dests_pushed % 2))
+ batadv_mcast_forw_push_scrape_padding(skb, tvlv_len);
+ /**
+ * estimated odd number of destinations, but turned out to be even
+ * -> add padding
+ */
+ else if ((*count % 2) && (!(num_dests_pushed % 2)))
+ ret = batadv_mcast_forw_push_insert_padding(skb, tvlv_len);
+
+out:
+ *count = num_dests_pushed;
+ return ret;
+}
+
+/**
+ * batadv_mcast_forw_push_dests() - push originator addresses onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet onto the given skb.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_dests(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int *count,
+ unsigned short *tvlv_len)
+{
+ unsigned short num_dests = 0;
+
+ if (!batadv_mcast_forw_push_est_padding(skb, *count, tvlv_len))
+ goto err;
+
+ if (!batadv_mcast_forw_push_tt(bat_priv, skb, vid, &num_dests,
+ tvlv_len))
+ goto err;
+
+ if (!batadv_mcast_forw_push_want_all(bat_priv, skb, vid, &num_dests,
+ tvlv_len))
+ goto err;
+
+ if (is_routable &&
+ !batadv_mcast_forw_push_want_rtr(bat_priv, skb, vid, &num_dests,
+ tvlv_len))
+ goto err;
+
+ if (!batadv_mcast_forw_push_adjust_padding(skb, count, num_dests,
+ tvlv_len))
+ goto err;
+
+ return 0;
+err:
+ return -ENOMEM;
+}
+
+/**
+ * batadv_mcast_forw_push_tracker() - push a multicast tracker TVLV header
+ * @skb: the skb to push the tracker TVLV onto
+ * @num_dests: the number of destination addresses to set in the header
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes a multicast tracker TVLV header onto the given skb, including the
+ * generic TVLV header but excluding the destination MAC addresses.
+ *
+ * The provided num_dests value is taken into consideration to set the
+ * num_dests field in the tracker header and to set the appropriate TVLV length
+ * value fields.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int batadv_mcast_forw_push_tracker(struct sk_buff *skb, int num_dests,
+ unsigned short *tvlv_len)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ unsigned int tvlv_value_len;
+
+ if (skb_headroom(skb) < sizeof(*mcast_tracker) + sizeof(*tvlv_hdr))
+ return -ENOMEM;
+
+ tvlv_value_len = sizeof(*mcast_tracker) + *tvlv_len;
+ if (tvlv_value_len + sizeof(*tvlv_hdr) > U16_MAX)
+ return -ENOMEM;
+
+ batadv_mcast_forw_skb_push(skb, sizeof(*mcast_tracker), tvlv_len);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb->data;
+ mcast_tracker->num_dests = htons(num_dests);
+
+ skb_reset_network_header(skb);
+
+ batadv_mcast_forw_skb_push(skb, sizeof(*tvlv_hdr), tvlv_len);
+ tvlv_hdr = (struct batadv_tvlv_hdr *)skb->data;
+ tvlv_hdr->type = BATADV_TVLV_MCAST_TRACKER;
+ tvlv_hdr->version = 1;
+ tvlv_hdr->len = htons(tvlv_value_len);
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_push_tvlvs() - push a multicast tracker TVLV onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the tracker TVLV onto
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes a multicast tracker TVLV onto the given skb, including the collected
+ * destination MAC addresses and the generic TVLV header.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_tvlvs(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int count,
+ unsigned short *tvlv_len)
+{
+ int ret;
+
+ ret = batadv_mcast_forw_push_dests(bat_priv, skb, vid, is_routable,
+ &count, tvlv_len);
+ if (ret < 0)
+ return ret;
+
+ ret = batadv_mcast_forw_push_tracker(skb, count, tvlv_len);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_push_hdr() - push a multicast packet header onto an skb
+ * @skb: the skb to push the header onto
+ * @tvlv_len: the total TVLV length value to set in the header
+ *
+ * Pushes a batman-adv multicast packet header onto the given skb and sets
+ * the provided total TVLV length value in it.
+ *
+ * Caller needs to ensure enough skb headroom is available.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_hdr(struct sk_buff *skb, unsigned short tvlv_len)
+{
+ struct batadv_mcast_packet *mcast_packet;
+
+ if (skb_headroom(skb) < sizeof(*mcast_packet))
+ return -ENOMEM;
+
+ skb_push(skb, sizeof(*mcast_packet));
+
+ mcast_packet = (struct batadv_mcast_packet *)skb->data;
+ mcast_packet->version = BATADV_COMPAT_VERSION;
+ mcast_packet->ttl = BATADV_TTL;
+ mcast_packet->packet_type = BATADV_MCAST;
+ mcast_packet->reserved = 0;
+ mcast_packet->tvlv_len = htons(tvlv_len);
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_scrub_dests() - scrub destinations in a tracker TVLV
+ * @bat_priv: the bat priv with all the soft interface information
+ * @comp_neigh: next hop neighbor to scrub+collect destinations for
+ * @dest: start MAC entry in original skb's tracker TVLV
+ * @next_dest: start MAC entry in to be sent skb's tracker TVLV
+ * @num_dests: number of remaining destination MAC entries to iterate over
+ *
+ * This sorts destination entries into either the original batman-adv
+ * multicast packet or the skb (copy) that is going to be sent to comp_neigh
+ * next.
+ *
+ * In preparation for the next, to be (unicast) transmitted batman-adv multicast
+ * packet skb to be sent to the given neighbor node, tries to collect all
+ * originator MAC addresses that have the given neighbor node as their next hop
+ * in the to be transmitted skb (copy), which next_dest points into. That is we
+ * zero all destination entries in next_dest which do not have comp_neigh as
+ * their next hop. And zero all destination entries in the original skb that
+ * would have comp_neigh as their next hop (to avoid redundant transmissions and
+ * duplicated payload later).
+ */
+static void
+batadv_mcast_forw_scrub_dests(struct batadv_priv *bat_priv,
+ struct batadv_neigh_node *comp_neigh, u8 *dest,
+ u8 *next_dest, u16 num_dests)
+{
+ struct batadv_neigh_node *next_neigh;
+
+ /* skip first entry, this is what we are comparing with */
+ eth_zero_addr(dest);
+ dest += ETH_ALEN;
+ next_dest += ETH_ALEN;
+ num_dests--;
+
+ batadv_mcast_forw_tracker_for_each_dest2(dest, next_dest, num_dests) {
+ if (is_zero_ether_addr(next_dest))
+ continue;
+
+ /* sanity check, we expect unicast destinations */
+ if (is_multicast_ether_addr(next_dest)) {
+ eth_zero_addr(dest);
+ eth_zero_addr(next_dest);
+ continue;
+ }
+
+ next_neigh = batadv_orig_to_router(bat_priv, next_dest, NULL);
+ if (!next_neigh) {
+ eth_zero_addr(next_dest);
+ continue;
+ }
+
+ if (!batadv_compare_eth(next_neigh->addr, comp_neigh->addr)) {
+ eth_zero_addr(next_dest);
+ batadv_neigh_node_put(next_neigh);
+ continue;
+ }
+
+ /* found an entry for our next packet to transmit, so remove it
+ * from the original packet
+ */
+ eth_zero_addr(dest);
+ batadv_neigh_node_put(next_neigh);
+ }
+}
+
+/**
+ * batadv_mcast_forw_shrink_fill() - swap slot with next non-zero destination
+ * @slot: the to be filled zero-MAC destination entry in a tracker TVLV
+ * @num_dests_slot: remaining entries in tracker TVLV from/including slot
+ *
+ * Searches for the next non-zero-MAC destination entry in a tracker TVLV after
+ * the given slot pointer. And if found, swaps it with the zero-MAC destination
+ * entry which the slot points to.
+ *
+ * Return: true if slot was swapped/filled successfully, false otherwise.
+ */
+static bool batadv_mcast_forw_shrink_fill(u8 *slot, u16 num_dests_slot)
+{
+ u16 num_dests_filler;
+ u8 *filler;
+
+ /* sanity check, should not happen */
+ if (!num_dests_slot)
+ return false;
+
+ num_dests_filler = num_dests_slot - 1;
+ filler = slot + ETH_ALEN;
+
+ /* find a candidate to fill the empty slot */
+ batadv_mcast_forw_tracker_for_each_dest(filler, num_dests_filler) {
+ if (is_zero_ether_addr(filler))
+ continue;
+
+ ether_addr_copy(slot, filler);
+ eth_zero_addr(filler);
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * batadv_mcast_forw_shrink_pack_dests() - pack destinations of a tracker TVLV
+ * @skb: the batman-adv multicast packet to compact destinations in
+ *
+ * Compacts the originator destination MAC addresses in the multicast tracker
+ * TVLV of the given multicast packet. This is done by moving all non-zero
+ * MAC addresses in direction of the skb head and all zero MAC addresses in skb
+ * tail direction, within the multicast tracker TVLV.
+ *
+ * Return: The number of consecutive zero MAC address destinations which are
+ * now at the end of the multicast tracker TVLV.
+ */
+static int batadv_mcast_forw_shrink_pack_dests(struct sk_buff *skb)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ unsigned char *skb_net_hdr;
+ u16 num_dests_slot;
+ u8 *slot;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests_slot = ntohs(mcast_tracker->num_dests);
+
+ slot = (u8 *)mcast_tracker + sizeof(*mcast_tracker);
+
+ batadv_mcast_forw_tracker_for_each_dest(slot, num_dests_slot) {
+ /* find an empty slot */
+ if (!is_zero_ether_addr(slot))
+ continue;
+
+ if (!batadv_mcast_forw_shrink_fill(slot, num_dests_slot))
+ /* could not find a filler, so we successfully packed
+ * and can stop - and must not reduce num_dests_slot!
+ */
+ break;
+ }
+
+ /* num_dests_slot is now the amount of reduced, zeroed
+ * destinations at the end of the tracker TVLV
+ */
+ return num_dests_slot;
+}
+
+/**
+ * batadv_mcast_forw_shrink_align_offset() - get new alignment offset
+ * @num_dests_old: the old, to be updated amount of destination nodes
+ * @num_dests_reduce: the number of destinations that were removed
+ *
+ * Calculates the amount of potential extra alignment offset that is needed to
+ * adjust the TVLV padding after the change in destination nodes.
+ *
+ * Return:
+ * 0: If no change to padding is needed.
+ * 2: If padding needs to be removed.
+ * -2: If padding needs to be added.
+ */
+static short
+batadv_mcast_forw_shrink_align_offset(unsigned int num_dests_old,
+ unsigned int num_dests_reduce)
+{
+ /* even amount of removed destinations -> no alignment change */
+ if (!(num_dests_reduce % 2))
+ return 0;
+
+ /* even to odd amount of destinations -> remove padding */
+ if (!(num_dests_old % 2))
+ return 2;
+
+ /* odd to even amount of destinations -> add padding */
+ return -2;
+}
+
+/**
+ * batadv_mcast_forw_shrink_update_headers() - update shrunk mc packet headers
+ * @skb: the batman-adv multicast packet to update headers of
+ * @num_dests_reduce: the number of destinations that were removed
+ *
+ * This updates any fields of a batman-adv multicast packet that are affected
+ * by the reduced number of destinations in the multicast tracket TVLV. In
+ * particular this updates:
+ *
+ * The num_dest field of the multicast tracker TVLV.
+ * The TVLV length field of the according generic TVLV header.
+ * The batman-adv multicast packet's total TVLV length field.
+ *
+ * Return: The offset in skb's tail direction at which the new batman-adv
+ * multicast packet header needs to start.
+ */
+static unsigned int
+batadv_mcast_forw_shrink_update_headers(struct sk_buff *skb,
+ unsigned int num_dests_reduce)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ struct batadv_mcast_packet *mcast_packet;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ unsigned char *skb_net_hdr;
+ unsigned int offset;
+ short align_offset;
+ u16 num_dests;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests = ntohs(mcast_tracker->num_dests);
+
+ align_offset = batadv_mcast_forw_shrink_align_offset(num_dests,
+ num_dests_reduce);
+ offset = ETH_ALEN * num_dests_reduce + align_offset;
+ num_dests -= num_dests_reduce;
+
+ /* update tracker header */
+ mcast_tracker->num_dests = htons(num_dests);
+
+ /* update tracker's tvlv header's length field */
+ tvlv_hdr = (struct batadv_tvlv_hdr *)(skb_network_header(skb) -
+ sizeof(*tvlv_hdr));
+ tvlv_hdr->len = htons(ntohs(tvlv_hdr->len) - offset);
+
+ /* update multicast packet header's tvlv length field */
+ mcast_packet = (struct batadv_mcast_packet *)skb->data;
+ mcast_packet->tvlv_len = htons(ntohs(mcast_packet->tvlv_len) - offset);
+
+ return offset;
+}
+
+/**
+ * batadv_mcast_forw_shrink_move_headers() - move multicast headers by offset
+ * @skb: the batman-adv multicast packet to move headers for
+ * @offset: a non-negative offset to move headers by, towards the skb tail
+ *
+ * Moves the batman-adv multicast packet header, its multicast tracker TVLV and
+ * any TVLVs in between by the given offset in direction towards the tail.
+ */
+static void
+batadv_mcast_forw_shrink_move_headers(struct sk_buff *skb, unsigned int offset)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ unsigned char *skb_net_hdr;
+ unsigned int len;
+ u16 num_dests;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests = ntohs(mcast_tracker->num_dests);
+ len = skb_network_offset(skb) + sizeof(*mcast_tracker);
+ len += num_dests * ETH_ALEN;
+
+ batadv_mcast_forw_scrape(skb, len, offset);
+}
+
+/**
+ * batadv_mcast_forw_shrink_tracker() - remove zero addresses in a tracker tvlv
+ * @skb: the batman-adv multicast packet to (potentially) shrink
+ *
+ * Removes all destinations with a zero MAC addresses (00:00:00:00:00:00) from
+ * the given batman-adv multicast packet's tracker TVLV and updates headers
+ * accordingly to maintain a valid batman-adv multicast packet.
+ */
+static void batadv_mcast_forw_shrink_tracker(struct sk_buff *skb)
+{
+ unsigned int offset;
+ u16 dests_reduced;
+
+ dests_reduced = batadv_mcast_forw_shrink_pack_dests(skb);
+ if (!dests_reduced)
+ return;
+
+ offset = batadv_mcast_forw_shrink_update_headers(skb, dests_reduced);
+ batadv_mcast_forw_shrink_move_headers(skb, offset);
+}
+
+/**
+ * batadv_mcast_forw_packet() - forward a batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the received or locally generated batman-adv multicast packet
+ * @local_xmit: indicates that the packet was locally generated and not received
+ *
+ * Parses the tracker TVLV of a batman-adv multicast packet and forwards the
+ * packet as indicated in this TVLV.
+ *
+ * Caller needs to set the skb network header to the start of the multicast
+ * tracker TVLV (excluding the generic TVLV header) and the skb transport header
+ * to the next byte after this multicast tracker TVLV.
+ *
+ * Caller needs to free the skb.
+ *
+ * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error
+ * code on failure. NET_RX_SUCCESS if the received packet is supposed to be
+ * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise.
+ */
+static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, bool local_xmit)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ struct batadv_neigh_node *neigh_node;
+ unsigned long offset, num_dests_off;
+ struct sk_buff *nexthop_skb;
+ unsigned char *skb_net_hdr;
+ bool local_recv = false;
+ unsigned int tvlv_len;
+ bool xmitted = false;
+ u8 *dest, *next_dest;
+ u16 num_dests;
+ int ret;
+
+ /* (at least) TVLV part needs to be linearized */
+ SKB_LINEAR_ASSERT(skb);
+
+ /* check if num_dests is within skb length */
+ num_dests_off = offsetof(struct batadv_tvlv_mcast_tracker, num_dests);
+ if (num_dests_off > skb_network_header_len(skb))
+ return -EINVAL;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests = ntohs(mcast_tracker->num_dests);
+
+ dest = (u8 *)mcast_tracker + sizeof(*mcast_tracker);
+
+ /* check if full tracker tvlv is within skb length */
+ tvlv_len = sizeof(*mcast_tracker) + ETH_ALEN * num_dests;
+ if (tvlv_len > skb_network_header_len(skb))
+ return -EINVAL;
+
+ /* invalidate checksum: */
+ skb->ip_summed = CHECKSUM_NONE;
+
+ batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) {
+ if (is_zero_ether_addr(dest))
+ continue;
+
+ /* only unicast originator addresses supported */
+ if (is_multicast_ether_addr(dest)) {
+ eth_zero_addr(dest);
+ continue;
+ }
+
+ if (batadv_is_my_mac(bat_priv, dest)) {
+ eth_zero_addr(dest);
+ local_recv = true;
+ continue;
+ }
+
+ neigh_node = batadv_orig_to_router(bat_priv, dest, NULL);
+ if (!neigh_node) {
+ eth_zero_addr(dest);
+ continue;
+ }
+
+ nexthop_skb = skb_copy(skb, GFP_ATOMIC);
+ if (!nexthop_skb) {
+ batadv_neigh_node_put(neigh_node);
+ return -ENOMEM;
+ }
+
+ offset = dest - skb->data;
+ next_dest = nexthop_skb->data + offset;
+
+ batadv_mcast_forw_scrub_dests(bat_priv, neigh_node, dest,
+ next_dest, num_dests);
+ batadv_mcast_forw_shrink_tracker(nexthop_skb);
+
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_TX_BYTES,
+ nexthop_skb->len + ETH_HLEN);
+ xmitted = true;
+ ret = batadv_send_unicast_skb(nexthop_skb, neigh_node);
+
+ batadv_neigh_node_put(neigh_node);
+
+ if (ret < 0)
+ return ret;
+ }
+
+ if (xmitted) {
+ if (local_xmit) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX_LOCAL);
+ batadv_add_counter(bat_priv,
+ BATADV_CNT_MCAST_TX_LOCAL_BYTES,
+ skb->len -
+ skb_transport_offset(skb));
+ } else {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_FWD);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_FWD_BYTES,
+ skb->len + ETH_HLEN);
+ }
+ }
+
+ if (local_recv)
+ return NET_RX_SUCCESS;
+ else
+ return NET_RX_DROP;
+}
+
+/**
+ * batadv_mcast_forw_tracker_tvlv_handler() - handle an mcast tracker tvlv
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the received batman-adv multicast packet
+ *
+ * Parses the tracker TVLV of an incoming batman-adv multicast packet and
+ * forwards the packet as indicated in this TVLV.
+ *
+ * Caller needs to set the skb network header to the start of the multicast
+ * tracker TVLV (excluding the generic TVLV header) and the skb transport header
+ * to the next byte after this multicast tracker TVLV.
+ *
+ * Caller needs to free the skb.
+ *
+ * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error
+ * code on failure. NET_RX_SUCCESS if the received packet is supposed to be
+ * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise.
+ */
+int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ return batadv_mcast_forw_packet(bat_priv, skb, false);
+}
+
+/**
+ * batadv_mcast_forw_packet_hdrlen() - multicast packet header length
+ * @num_dests: number of destination nodes
+ *
+ * Calculates the total batman-adv multicast packet header length for a given
+ * number of destination nodes (excluding the outer ethernet frame).
+ *
+ * Return: The calculated total batman-adv multicast packet header length.
+ */
+unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests)
+{
+ /**
+ * If the number of destination entries is even then we need to add
+ * two byte padding to the tracker TVLV.
+ */
+ int padding = (!(num_dests % 2)) ? 2 : 0;
+
+ return padding + num_dests * ETH_ALEN +
+ sizeof(struct batadv_tvlv_mcast_tracker) +
+ sizeof(struct batadv_tvlv_hdr) +
+ sizeof(struct batadv_mcast_packet);
+}
+
+/**
+ * batadv_mcast_forw_expand_head() - expand headroom for an mcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to send
+ *
+ * Tries to expand an skb's headroom so that its head to tail is 1298
+ * bytes (minimum IPv6 MTU + vlan ethernet header size) large.
+ *
+ * Return: -EINVAL if the given skb's length is too large or -ENOMEM on memory
+ * allocation failure. Otherwise, on success, zero is returned.
+ */
+static int batadv_mcast_forw_expand_head(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ int hdr_size = VLAN_ETH_HLEN + IPV6_MIN_MTU - skb->len;
+
+ /* TODO: Could be tightened to actual number of destination nodes?
+ * But it's tricky, number of destinations might have increased since
+ * we last checked.
+ */
+ if (hdr_size < 0) {
+ /* batadv_mcast_forw_mode_check_count() should ensure we do not
+ * end up here
+ */
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (skb_headroom(skb) < hdr_size &&
+ pskb_expand_head(skb, hdr_size, 0, GFP_ATOMIC) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_push() - encapsulate skb in a batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to encapsulate and send
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ *
+ * Encapsulates the given multicast packet in a batman-adv multicast packet.
+ * A multicast tracker TVLV with destination originator addresses for any node
+ * that signaled interest in it, that is either via the translation table or the
+ * according want-all flags, is attached accordingly.
+ *
+ * Return: true on success, false otherwise.
+ */
+bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int count)
+{
+ unsigned short tvlv_len = 0;
+ int ret;
+
+ if (batadv_mcast_forw_expand_head(bat_priv, skb) < 0)
+ goto err;
+
+ skb_reset_transport_header(skb);
+
+ ret = batadv_mcast_forw_push_tvlvs(bat_priv, skb, vid, is_routable,
+ count, &tvlv_len);
+ if (ret < 0)
+ goto err;
+
+ ret = batadv_mcast_forw_push_hdr(skb, tvlv_len);
+ if (ret < 0)
+ goto err;
+
+ return true;
+
+err:
+ if (tvlv_len)
+ skb_pull(skb, tvlv_len);
+
+ return false;
+}
+
+/**
+ * batadv_mcast_forw_mcsend() - send a self prepared batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to encapsulate and send
+ *
+ * Transmits a batman-adv multicast packet that was locally prepared and
+ * consumes/frees it.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure. NET_XMIT_SUCCESS
+ * otherwise.
+ */
+int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ int ret = batadv_mcast_forw_packet(bat_priv, skb, true);
+
+ if (ret < 0) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+
+ consume_skb(skb);
+ return NET_XMIT_SUCCESS;
+}
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 0c64d81a7761..1f7ed9d4f6fd 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -7,6 +7,7 @@
#include "netlink.h"
#include "main.h"
+#include <linux/array_size.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/bug.h>
@@ -20,7 +21,6 @@
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
-#include <linux/kernel.h>
#include <linux/limits.h>
#include <linux/list.h>
#include <linux/minmax.h>
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 34903df4fe93..71c143d4b6d0 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -312,6 +312,33 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
}
/**
+ * batadv_orig_to_router() - get next hop neighbor to an orig address
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_addr: the originator MAC address to search the best next hop router for
+ * @if_outgoing: the interface where the payload packet has been received or
+ * the OGM should be sent to
+ *
+ * Return: A neighbor node which is the best router towards the given originator
+ * address.
+ */
+struct batadv_neigh_node *
+batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr,
+ struct batadv_hard_iface *if_outgoing)
+{
+ struct batadv_neigh_node *neigh_node;
+ struct batadv_orig_node *orig_node;
+
+ orig_node = batadv_orig_hash_find(bat_priv, orig_addr);
+ if (!orig_node)
+ return NULL;
+
+ neigh_node = batadv_find_router(bat_priv, orig_node, if_outgoing);
+ batadv_orig_node_put(orig_node);
+
+ return neigh_node;
+}
+
+/**
* batadv_orig_ifinfo_get() - find the ifinfo from an orig_node
* @orig_node: the orig node to be queried
* @if_outgoing: the interface for which the ifinfo should be acquired
@@ -942,6 +969,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
#ifdef CONFIG_BATMAN_ADV_MCAST
orig_node->mcast_flags = BATADV_MCAST_WANT_NO_RTR4;
orig_node->mcast_flags |= BATADV_MCAST_WANT_NO_RTR6;
+ orig_node->mcast_flags |= BATADV_MCAST_HAVE_MC_PTYPE_CAPA;
INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node);
INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node);
INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node);
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index ea3d69e4e670..db0c55128170 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -36,6 +36,9 @@ void batadv_neigh_node_release(struct kref *ref);
struct batadv_neigh_node *
batadv_orig_router_get(struct batadv_orig_node *orig_node,
const struct batadv_hard_iface *if_outgoing);
+struct batadv_neigh_node *
+batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr,
+ struct batadv_hard_iface *if_outgoing);
struct batadv_neigh_ifinfo *
batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
struct batadv_hard_iface *if_outgoing);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 163cd43c4821..f1061985149f 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1270,3 +1270,73 @@ out:
batadv_orig_node_put(orig_node);
return ret;
}
+
+#ifdef CONFIG_BATMAN_ADV_MCAST
+/**
+ * batadv_recv_mcast_packet() - process received batman-adv multicast packet
+ * @skb: the received batman-adv multicast packet
+ * @recv_if: interface that the skb is received on
+ *
+ * Parses the given, received batman-adv multicast packet. Depending on the
+ * contents of its TVLV forwards it and/or decapsulates it to hand it to the
+ * soft interface.
+ *
+ * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
+ */
+int batadv_recv_mcast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if)
+{
+ struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+ struct batadv_mcast_packet *mcast_packet;
+ int hdr_size = sizeof(*mcast_packet);
+ unsigned char *tvlv_buff;
+ int ret = NET_RX_DROP;
+ u16 tvlv_buff_len;
+
+ if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
+ goto free_skb;
+
+ /* create a copy of the skb, if needed, to modify it. */
+ if (skb_cow(skb, ETH_HLEN) < 0)
+ goto free_skb;
+
+ /* packet needs to be linearized to access the tvlv content */
+ if (skb_linearize(skb) < 0)
+ goto free_skb;
+
+ mcast_packet = (struct batadv_mcast_packet *)skb->data;
+ if (mcast_packet->ttl-- < 2)
+ goto free_skb;
+
+ tvlv_buff = (unsigned char *)(skb->data + hdr_size);
+ tvlv_buff_len = ntohs(mcast_packet->tvlv_len);
+
+ if (tvlv_buff_len > skb->len - hdr_size)
+ goto free_skb;
+
+ ret = batadv_tvlv_containers_process(bat_priv, BATADV_MCAST, NULL, skb,
+ tvlv_buff, tvlv_buff_len);
+ if (ret >= 0) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_BYTES,
+ skb->len + ETH_HLEN);
+ }
+
+ hdr_size += tvlv_buff_len;
+
+ if (ret == NET_RX_SUCCESS && (skb->len - hdr_size >= ETH_HLEN)) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL_BYTES,
+ skb->len - hdr_size);
+
+ batadv_interface_rx(bat_priv->soft_iface, skb, hdr_size, NULL);
+ /* skb was consumed */
+ skb = NULL;
+ }
+
+free_skb:
+ kfree_skb(skb);
+
+ return ret;
+}
+#endif /* CONFIG_BATMAN_ADV_MCAST */
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index afd15b3879f1..e9849f032a24 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -27,6 +27,17 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
struct batadv_hard_iface *iface);
int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
+#ifdef CONFIG_BATMAN_ADV_MCAST
+int batadv_recv_mcast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
+#else
+static inline int batadv_recv_mcast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if)
+{
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+#endif
int batadv_recv_unicast_tvlv(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 1bf1232a4f75..89c51b3cf430 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -301,12 +301,13 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
send:
if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) {
- forw_mode = batadv_mcast_forw_mode(bat_priv, skb,
+ forw_mode = batadv_mcast_forw_mode(bat_priv, skb, vid,
&mcast_is_routable);
switch (forw_mode) {
case BATADV_FORW_BCAST:
break;
case BATADV_FORW_UCASTS:
+ case BATADV_FORW_MCAST:
do_bcast = false;
break;
case BATADV_FORW_NONE:
@@ -365,6 +366,8 @@ send:
} else if (forw_mode == BATADV_FORW_UCASTS) {
ret = batadv_mcast_forw_send(bat_priv, skb, vid,
mcast_is_routable);
+ } else if (forw_mode == BATADV_FORW_MCAST) {
+ ret = batadv_mcast_forw_mcsend(bat_priv, skb);
} else {
if (batadv_dat_snoop_outgoing_arp_request(bat_priv,
skb))
@@ -762,6 +765,7 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0);
atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0);
atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
+ atomic_set(&bat_priv->mcast.num_no_mc_ptype_capa, 0);
#endif
atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
atomic_set(&bat_priv->gw.bandwidth_down, 100);
@@ -925,6 +929,18 @@ static const struct {
{ "tt_response_rx" },
{ "tt_roam_adv_tx" },
{ "tt_roam_adv_rx" },
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ { "mcast_tx" },
+ { "mcast_tx_bytes" },
+ { "mcast_tx_local" },
+ { "mcast_tx_local_bytes" },
+ { "mcast_rx" },
+ { "mcast_rx_bytes" },
+ { "mcast_rx_local" },
+ { "mcast_rx_local_bytes" },
+ { "mcast_fwd" },
+ { "mcast_fwd_bytes" },
+#endif
#ifdef CONFIG_BATMAN_ADV_DAT
{ "dat_get_tx" },
{ "dat_get_rx" },
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 17d5ea1d8e84..00840d5784fe 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -862,6 +862,70 @@ enum batadv_counters {
*/
BATADV_CNT_TT_ROAM_ADV_RX,
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ /**
+ * @BATADV_CNT_MCAST_TX: transmitted batman-adv multicast packets
+ * counter
+ */
+ BATADV_CNT_MCAST_TX,
+
+ /**
+ * @BATADV_CNT_MCAST_TX_BYTES: transmitted batman-adv multicast packets
+ * bytes counter
+ */
+ BATADV_CNT_MCAST_TX_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_TX_LOCAL: counter for multicast packets which
+ * were locally encapsulated and transmitted as batman-adv multicast
+ * packets
+ */
+ BATADV_CNT_MCAST_TX_LOCAL,
+
+ /**
+ * @BATADV_CNT_MCAST_TX_LOCAL_BYTES: bytes counter for multicast packets
+ * which were locally encapsulated and transmitted as batman-adv
+ * multicast packets
+ */
+ BATADV_CNT_MCAST_TX_LOCAL_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_RX: received batman-adv multicast packet counter
+ */
+ BATADV_CNT_MCAST_RX,
+
+ /**
+ * @BATADV_CNT_MCAST_RX_BYTES: received batman-adv multicast packet
+ * bytes counter
+ */
+ BATADV_CNT_MCAST_RX_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_RX_LOCAL: counter for received batman-adv multicast
+ * packets which were forwarded to the local soft interface
+ */
+ BATADV_CNT_MCAST_RX_LOCAL,
+
+ /**
+ * @BATADV_CNT_MCAST_RX_LOCAL_BYTES: bytes counter for received
+ * batman-adv multicast packets which were forwarded to the local soft
+ * interface
+ */
+ BATADV_CNT_MCAST_RX_LOCAL_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_FWD: counter for received batman-adv multicast
+ * packets which were forwarded to other, neighboring nodes
+ */
+ BATADV_CNT_MCAST_FWD,
+
+ /**
+ * @BATADV_CNT_MCAST_FWD_BYTES: bytes counter for received batman-adv
+ * multicast packets which were forwarded to other, neighboring nodes
+ */
+ BATADV_CNT_MCAST_FWD_BYTES,
+#endif
+
#ifdef CONFIG_BATMAN_ADV_DAT
/**
* @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter
@@ -1279,6 +1343,12 @@ struct batadv_priv_mcast {
atomic_t num_want_all_rtr6;
/**
+ * @num_no_mc_ptype_capa: counter for number of nodes without the
+ * BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag
+ */
+ atomic_t num_no_mc_ptype_capa;
+
+ /**
* @want_lists_lock: lock for protecting modifications to mcasts
* want_all_{unsnoopables,ipv4,ipv6}_list (traversals are rcu-locked)
*/
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index c9fdcc5cdce1..711cf5d59816 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -542,7 +542,7 @@ struct bpf_fentry_test_t {
int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
{
- asm volatile ("");
+ asm volatile ("": "+r"(arg));
return (long)arg;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 6b7f36769d03..051ea81864ac 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -186,6 +186,7 @@ enum {
* struct net_bridge_vlan - per-vlan entry
*
* @vnode: rhashtable member
+ * @tnode: rhashtable member
* @vid: VLAN id
* @flags: bridge vlan flags
* @priv_flags: private (in-kernel) bridge vlan flags
@@ -196,6 +197,7 @@ enum {
* @refcnt: if MASTER flag set, this is bumped for each port referencing it
* @brvlan: if MASTER flag unset, this points to the global per-VLAN context
* for this VLAN entry
+ * @tinfo: bridge tunnel info
* @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context
* @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast
* context
diff --git a/net/core/Makefile b/net/core/Makefile
index 0cb734cbc24b..821aec06abf1 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,7 +18,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
obj-y += net-sysfs.o
-obj-$(CONFIG_PAGE_POOL) += page_pool.o
+obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o
diff --git a/net/core/dev.c b/net/core/dev.c
index c879246be48d..0432b04cf9b0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -165,7 +165,6 @@ static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack);
-static struct napi_struct *napi_by_id(unsigned int napi_id);
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -6139,7 +6138,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
EXPORT_SYMBOL(napi_complete_done);
/* must be called under rcu_read_lock(), as we dont take a reference */
-static struct napi_struct *napi_by_id(unsigned int napi_id)
+struct napi_struct *napi_by_id(unsigned int napi_id)
{
unsigned int hash = napi_id % HASH_SIZE(napi_hash);
struct napi_struct *napi;
@@ -6400,6 +6399,43 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
}
EXPORT_SYMBOL(dev_set_threaded);
+/**
+ * netif_queue_set_napi - Associate queue with the napi
+ * @dev: device to which NAPI and queue belong
+ * @queue_index: Index of queue
+ * @type: queue type as RX or TX
+ * @napi: NAPI context, pass NULL to clear previously set NAPI
+ *
+ * Set queue with its corresponding napi context. This should be done after
+ * registering the NAPI handler for the queue-vector and the queues have been
+ * mapped to the corresponding interrupt vector.
+ */
+void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
+ enum netdev_queue_type type, struct napi_struct *napi)
+{
+ struct netdev_rx_queue *rxq;
+ struct netdev_queue *txq;
+
+ if (WARN_ON_ONCE(napi && !napi->dev))
+ return;
+ if (dev->reg_state >= NETREG_REGISTERED)
+ ASSERT_RTNL();
+
+ switch (type) {
+ case NETDEV_QUEUE_TYPE_RX:
+ rxq = __netif_get_rx_queue(dev, queue_index);
+ rxq->napi = napi;
+ return;
+ case NETDEV_QUEUE_TYPE_TX:
+ txq = netdev_get_tx_queue(dev, queue_index);
+ txq->napi = napi;
+ return;
+ default:
+ return;
+ }
+}
+EXPORT_SYMBOL(netif_queue_set_napi);
+
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6435,6 +6471,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
*/
if (dev->threaded && napi_kthread_create(napi))
dev->threaded = 0;
+ netif_napi_set_irq(napi, -1);
}
EXPORT_SYMBOL(netif_napi_add_weight);
@@ -10511,7 +10548,7 @@ void netdev_run_todo(void)
write_lock(&dev_base_lock);
dev->reg_state = NETREG_UNREGISTERED;
write_unlock(&dev_base_lock);
- linkwatch_forget_dev(dev);
+ linkwatch_sync_dev(dev);
}
while (!list_empty(&list)) {
@@ -11236,17 +11273,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_net_set(dev, net);
dev->ifindex = new_ifindex;
- /* Send a netdev-add uevent to the new namespace */
- kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
- netdev_adjacent_add_links(dev);
-
if (new_name[0]) /* Rename the netdev to prepared name */
strscpy(dev->name, new_name, IFNAMSIZ);
/* Fixup kobjects */
+ dev_set_uevent_suppress(&dev->dev, 1);
err = device_rename(&dev->dev, dev->name);
+ dev_set_uevent_suppress(&dev->dev, 0);
WARN_ON(err);
+ /* Send a netdev-add uevent to the new namespace */
+ kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
+ netdev_adjacent_add_links(dev);
+
/* Adapt owner in case owning user namespace of target network
* namespace is different from the original one.
*/
@@ -11570,6 +11609,60 @@ static struct pernet_operations __net_initdata default_device_ops = {
.exit_batch = default_device_exit_batch,
};
+static void __init net_dev_struct_check(void)
+{
+ /* TX read-mostly hotpath */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, real_num_tx_queues);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tc_to_txq);
+#ifdef CONFIG_XPS
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, xps_maps);
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, nf_hooks_egress);
+#endif
+#ifdef CONFIG_NET_XGRESS
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tcx_egress);
+#endif
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 152);
+
+ /* TXRX read-mostly hotpath */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr);
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30);
+
+ /* RX read-mostly hotpath */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler_data);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, nd_net);
+#ifdef CONFIG_NETPOLL
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, npinfo);
+#endif
+#ifdef CONFIG_NET_XGRESS
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
+#endif
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 96);
+}
+
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
@@ -11587,6 +11680,8 @@ static int __init net_dev_init(void)
BUG_ON(!dev_boot_phase);
+ net_dev_struct_check();
+
if (dev_proc_init())
goto out;
diff --git a/net/core/dev.h b/net/core/dev.h
index 5aa45f0fd4ae..cf93e188785b 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -30,7 +30,6 @@ int __init dev_proc_init(void);
#endif
void linkwatch_init_dev(struct net_device *dev);
-void linkwatch_forget_dev(struct net_device *dev);
void linkwatch_run_queue(void);
void dev_addr_flush(struct net_device *dev);
@@ -145,4 +144,6 @@ void xdp_do_check_flushed(struct napi_struct *napi);
#else
static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
#endif
+
+struct napi_struct *napi_by_id(unsigned int napi_id);
#endif
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index feeddf95f450..9a66cf5015f2 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -322,9 +322,9 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
* frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in
* dev->priv_flags.
*/
-static int dev_set_hwtstamp_phylib(struct net_device *dev,
- struct kernel_hwtstamp_config *cfg,
- struct netlink_ext_ack *extack)
+int dev_set_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
bool phy_ts = phy_has_hwtstamp(dev->phydev);
@@ -363,6 +363,7 @@ static int dev_set_hwtstamp_phylib(struct net_device *dev,
return 0;
}
+EXPORT_SYMBOL_GPL(dev_set_hwtstamp_phylib);
static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
{
diff --git a/net/core/filter.c b/net/core/filter.c
index 1737884be52f..eedb33f3e998 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7257,7 +7257,6 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
struct tcphdr *, th, u32, th_len)
{
#ifdef CONFIG_SYN_COOKIES
- u32 cookie;
int ret;
if (unlikely(!sk || th_len < sizeof(*th)))
@@ -7279,8 +7278,6 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (tcp_synq_no_recent_overflow(sk))
return -ENOENT;
- cookie = ntohl(th->ack_seq) - 1;
-
/* Both struct iphdr and struct ipv6hdr have the version field at the
* same offset so we can cast to the shorter header (struct iphdr).
*/
@@ -7289,7 +7286,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
return -EINVAL;
- ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
+ ret = __cookie_v4_check((struct iphdr *)iph, th);
break;
#if IS_BUILTIN(CONFIG_IPV6)
@@ -7300,7 +7297,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (sk->sk_family != AF_INET6)
return -EINVAL;
- ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
+ ret = __cookie_v6_check((struct ipv6hdr *)iph, th);
break;
#endif /* CONFIG_IPV6 */
@@ -7753,9 +7750,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
struct tcphdr *, th)
{
- u32 cookie = ntohl(th->ack_seq) - 1;
-
- if (__cookie_v4_check(iph, th, cookie) > 0)
+ if (__cookie_v4_check(iph, th) > 0)
return 0;
return -EACCES;
@@ -7776,9 +7771,7 @@ BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
struct tcphdr *, th)
{
#if IS_BUILTIN(CONFIG_IPV6)
- u32 cookie = ntohl(th->ack_seq) - 1;
-
- if (__cookie_v6_check(iph, th, cookie) > 0)
+ if (__cookie_v6_check(iph, th) > 0)
return 0;
return -EACCES;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index c469d1c4db5d..7be5b3ab32bd 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -192,8 +192,6 @@ static void __linkwatch_run_queue(int urgent_only)
#define MAX_DO_DEV_PER_LOOP 100
int do_dev = MAX_DO_DEV_PER_LOOP;
- struct net_device *dev;
- LIST_HEAD(wrk);
/* Give urgent case more budget */
if (urgent_only)
@@ -215,11 +213,11 @@ static void __linkwatch_run_queue(int urgent_only)
clear_bit(LW_URGENT, &linkwatch_flags);
spin_lock_irq(&lweventlist_lock);
- list_splice_init(&lweventlist, &wrk);
+ while (!list_empty(&lweventlist) && do_dev > 0) {
+ struct net_device *dev;
- while (!list_empty(&wrk) && do_dev > 0) {
-
- dev = list_first_entry(&wrk, struct net_device, link_watch_list);
+ dev = list_first_entry(&lweventlist, struct net_device,
+ link_watch_list);
list_del_init(&dev->link_watch_list);
if (!netif_device_present(dev) ||
@@ -237,15 +235,12 @@ static void __linkwatch_run_queue(int urgent_only)
spin_lock_irq(&lweventlist_lock);
}
- /* Add the remaining work back to lweventlist */
- list_splice_init(&wrk, &lweventlist);
-
if (!list_empty(&lweventlist))
linkwatch_schedule_work(0);
spin_unlock_irq(&lweventlist_lock);
}
-void linkwatch_forget_dev(struct net_device *dev)
+void linkwatch_sync_dev(struct net_device *dev)
{
unsigned long flags;
int clean = 0;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index fccaa5bac0ed..d9b33e923b18 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -194,8 +194,14 @@ static ssize_t carrier_show(struct device *dev,
{
struct net_device *netdev = to_net_dev(dev);
- if (netif_running(netdev))
+ if (netif_running(netdev)) {
+ /* Synchronize carrier state with link watch,
+ * see also rtnl_getlink().
+ */
+ linkwatch_sync_dev(netdev);
+
return sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
+ }
return -EINVAL;
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f4183c4c1ec8..cb8bcbff9e83 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -1099,11 +1099,56 @@ out:
rtnl_set_sk_err(net, RTNLGRP_NSID, err);
}
+#ifdef CONFIG_NET_NS
+static void __init netns_ipv4_struct_check(void)
+{
+ /* TX readonly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_early_retrans);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_tso_win_divisor);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_tso_rtt_log);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_autocorking);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_min_snd_mss);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_notsent_lowat);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_limit_output_bytes);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_min_rtt_wlen);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_wmem);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_ip_fwd_use_pmtu);
+ CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, 33);
+
+ /* TXRX readonly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx,
+ sysctl_tcp_moderate_rcvbuf);
+ CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, 1);
+
+ /* RX readonly hotpath cache line */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_ip_early_demux);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_tcp_early_demux);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_tcp_reordering);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_tcp_rmem);
+ CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18);
+}
+#endif
+
void __init net_ns_init(void)
{
struct net_generic *ng;
#ifdef CONFIG_NET_NS
+ netns_ipv4_struct_check();
net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
SMP_CACHE_BYTES,
SLAB_PANIC|SLAB_ACCOUNT, NULL);
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index ea9231378aa6..be7f2ebd61b2 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -10,11 +10,64 @@
#include <uapi/linux/netdev.h>
+/* Integer value ranges */
+static const struct netlink_range_validation netdev_a_page_pool_id_range = {
+ .min = 1ULL,
+ .max = 4294967295ULL,
+};
+
+static const struct netlink_range_validation netdev_a_page_pool_ifindex_range = {
+ .min = 1ULL,
+ .max = 2147483647ULL,
+};
+
+/* Common nested types */
+const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = {
+ [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range),
+ [NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range),
+};
+
/* NETDEV_CMD_DEV_GET - do */
static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = {
[NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
};
+/* NETDEV_CMD_PAGE_POOL_GET - do */
+#ifdef CONFIG_PAGE_POOL
+static const struct nla_policy netdev_page_pool_get_nl_policy[NETDEV_A_PAGE_POOL_ID + 1] = {
+ [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range),
+};
+#endif /* CONFIG_PAGE_POOL */
+
+/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */
+#ifdef CONFIG_PAGE_POOL_STATS
+static const struct nla_policy netdev_page_pool_stats_get_nl_policy[NETDEV_A_PAGE_POOL_STATS_INFO + 1] = {
+ [NETDEV_A_PAGE_POOL_STATS_INFO] = NLA_POLICY_NESTED(netdev_page_pool_info_nl_policy),
+};
+#endif /* CONFIG_PAGE_POOL_STATS */
+
+/* NETDEV_CMD_QUEUE_GET - do */
+static const struct nla_policy netdev_queue_get_do_nl_policy[NETDEV_A_QUEUE_TYPE + 1] = {
+ [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+ [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1),
+ [NETDEV_A_QUEUE_ID] = { .type = NLA_U32, },
+};
+
+/* NETDEV_CMD_QUEUE_GET - dump */
+static const struct nla_policy netdev_queue_get_dump_nl_policy[NETDEV_A_QUEUE_IFINDEX + 1] = {
+ [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
+/* NETDEV_CMD_NAPI_GET - do */
+static const struct nla_policy netdev_napi_get_do_nl_policy[NETDEV_A_NAPI_ID + 1] = {
+ [NETDEV_A_NAPI_ID] = { .type = NLA_U32, },
+};
+
+/* NETDEV_CMD_NAPI_GET - dump */
+static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFINDEX + 1] = {
+ [NETDEV_A_NAPI_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
/* Ops table for netdev */
static const struct genl_split_ops netdev_nl_ops[] = {
{
@@ -29,10 +82,67 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.dumpit = netdev_nl_dev_get_dumpit,
.flags = GENL_CMD_CAP_DUMP,
},
+#ifdef CONFIG_PAGE_POOL
+ {
+ .cmd = NETDEV_CMD_PAGE_POOL_GET,
+ .doit = netdev_nl_page_pool_get_doit,
+ .policy = netdev_page_pool_get_nl_policy,
+ .maxattr = NETDEV_A_PAGE_POOL_ID,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_PAGE_POOL_GET,
+ .dumpit = netdev_nl_page_pool_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+#endif /* CONFIG_PAGE_POOL */
+#ifdef CONFIG_PAGE_POOL_STATS
+ {
+ .cmd = NETDEV_CMD_PAGE_POOL_STATS_GET,
+ .doit = netdev_nl_page_pool_stats_get_doit,
+ .policy = netdev_page_pool_stats_get_nl_policy,
+ .maxattr = NETDEV_A_PAGE_POOL_STATS_INFO,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_PAGE_POOL_STATS_GET,
+ .dumpit = netdev_nl_page_pool_stats_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+#endif /* CONFIG_PAGE_POOL_STATS */
+ {
+ .cmd = NETDEV_CMD_QUEUE_GET,
+ .doit = netdev_nl_queue_get_doit,
+ .policy = netdev_queue_get_do_nl_policy,
+ .maxattr = NETDEV_A_QUEUE_TYPE,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_QUEUE_GET,
+ .dumpit = netdev_nl_queue_get_dumpit,
+ .policy = netdev_queue_get_dump_nl_policy,
+ .maxattr = NETDEV_A_QUEUE_IFINDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = NETDEV_CMD_NAPI_GET,
+ .doit = netdev_nl_napi_get_doit,
+ .policy = netdev_napi_get_do_nl_policy,
+ .maxattr = NETDEV_A_NAPI_ID,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_NAPI_GET,
+ .dumpit = netdev_nl_napi_get_dumpit,
+ .policy = netdev_napi_get_dump_nl_policy,
+ .maxattr = NETDEV_A_NAPI_IFINDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
};
static const struct genl_multicast_group netdev_nl_mcgrps[] = {
[NETDEV_NLGRP_MGMT] = { "mgmt", },
+ [NETDEV_NLGRP_PAGE_POOL] = { "page-pool", },
};
struct genl_family netdev_nl_family __ro_after_init = {
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index 7b370c073e7d..a47f2bcbe4fa 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -11,11 +11,27 @@
#include <uapi/linux/netdev.h>
+/* Common nested types */
+extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1];
+
int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_queue_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
enum {
NETDEV_NLGRP_MGMT,
+ NETDEV_NLGRP_PAGE_POOL,
};
extern struct genl_family netdev_nl_family;
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index fe61f85bcf33..fd98936da3ae 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -6,13 +6,32 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/xdp.h>
+#include <net/xdp_sock.h>
+#include <net/netdev_rx_queue.h>
+#include <net/busy_poll.h>
#include "netdev-genl-gen.h"
+#include "dev.h"
+
+struct netdev_nl_dump_ctx {
+ unsigned long ifindex;
+ unsigned int rxq_idx;
+ unsigned int txq_idx;
+ unsigned int napi_id;
+};
+
+static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb)
+{
+ NL_ASSERT_DUMP_CTX_FITS(struct netdev_nl_dump_ctx);
+
+ return (struct netdev_nl_dump_ctx *)cb->ctx;
+}
static int
netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
const struct genl_info *info)
{
+ u64 xsk_features = 0;
u64 xdp_rx_meta = 0;
void *hdr;
@@ -26,11 +45,20 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
+ if (netdev->xsk_tx_metadata_ops) {
+ if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp)
+ xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP;
+ if (netdev->xsk_tx_metadata_ops->tmo_request_checksum)
+ xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM;
+ }
+
if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
netdev->xdp_features, NETDEV_A_DEV_PAD) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
- xdp_rx_meta, NETDEV_A_DEV_PAD)) {
+ xdp_rx_meta, NETDEV_A_DEV_PAD) ||
+ nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
+ xsk_features, NETDEV_A_DEV_PAD)) {
genlmsg_cancel(rsp, hdr);
return -EINVAL;
}
@@ -111,12 +139,13 @@ err_free_msg:
int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
int err = 0;
rtnl_lock();
- for_each_netdev_dump(net, netdev, cb->args[0]) {
+ for_each_netdev_dump(net, netdev, ctx->ifindex) {
err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
if (err < 0)
break;
@@ -129,6 +158,317 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+static int
+netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
+ const struct genl_info *info)
+{
+ void *hdr;
+ pid_t pid;
+
+ if (WARN_ON_ONCE(!napi->dev))
+ return -EINVAL;
+ if (!(napi->dev->flags & IFF_UP))
+ return 0;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (napi->napi_id >= MIN_NAPI_ID &&
+ nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id))
+ goto nla_put_failure;
+
+ if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex))
+ goto nla_put_failure;
+
+ if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq))
+ goto nla_put_failure;
+
+ if (napi->thread) {
+ pid = task_pid_nr(napi->thread);
+ if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid))
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct napi_struct *napi;
+ struct sk_buff *rsp;
+ u32 napi_id;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
+ return -EINVAL;
+
+ napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ rtnl_lock();
+
+ napi = napi_by_id(napi_id);
+ if (napi)
+ err = netdev_nl_napi_fill_one(rsp, napi, info);
+ else
+ err = -EINVAL;
+
+ rtnl_unlock();
+
+ if (err)
+ goto err_free_msg;
+
+ return genlmsg_reply(rsp, info);
+
+err_free_msg:
+ nlmsg_free(rsp);
+ return err;
+}
+
+static int
+netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
+ const struct genl_info *info,
+ struct netdev_nl_dump_ctx *ctx)
+{
+ struct napi_struct *napi;
+ int err = 0;
+
+ if (!(netdev->flags & IFF_UP))
+ return err;
+
+ list_for_each_entry(napi, &netdev->napi_list, dev_list) {
+ if (ctx->napi_id && napi->napi_id >= ctx->napi_id)
+ continue;
+
+ err = netdev_nl_napi_fill_one(rsp, napi, info);
+ if (err)
+ return err;
+ ctx->napi_id = napi->napi_id;
+ }
+ return err;
+}
+
+int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ u32 ifindex = 0;
+ int err = 0;
+
+ if (info->attrs[NETDEV_A_NAPI_IFINDEX])
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]);
+
+ rtnl_lock();
+ if (ifindex) {
+ netdev = __dev_get_by_index(net, ifindex);
+ if (netdev)
+ err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
+ else
+ err = -ENODEV;
+ } else {
+ for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
+ if (err < 0)
+ break;
+ ctx->napi_id = 0;
+ }
+ }
+ rtnl_unlock();
+
+ if (err != -EMSGSIZE)
+ return err;
+
+ return skb->len;
+}
+
+static int
+netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
+ u32 q_idx, u32 q_type, const struct genl_info *info)
+{
+ struct netdev_rx_queue *rxq;
+ struct netdev_queue *txq;
+ void *hdr;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) ||
+ nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) ||
+ nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex))
+ goto nla_put_failure;
+
+ switch (q_type) {
+ case NETDEV_QUEUE_TYPE_RX:
+ rxq = __netif_get_rx_queue(netdev, q_idx);
+ if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
+ rxq->napi->napi_id))
+ goto nla_put_failure;
+ break;
+ case NETDEV_QUEUE_TYPE_TX:
+ txq = netdev_get_tx_queue(netdev, q_idx);
+ if (txq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
+ txq->napi->napi_id))
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id,
+ u32 q_type)
+{
+ switch (q_type) {
+ case NETDEV_QUEUE_TYPE_RX:
+ if (q_id >= netdev->real_num_rx_queues)
+ return -EINVAL;
+ return 0;
+ case NETDEV_QUEUE_TYPE_TX:
+ if (q_id >= netdev->real_num_tx_queues)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int
+netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx,
+ u32 q_type, const struct genl_info *info)
+{
+ int err = 0;
+
+ if (!(netdev->flags & IFF_UP))
+ return err;
+
+ err = netdev_nl_queue_validate(netdev, q_idx, q_type);
+ if (err)
+ return err;
+
+ return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info);
+}
+
+int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ u32 q_id, q_type, ifindex;
+ struct net_device *netdev;
+ struct sk_buff *rsp;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX))
+ return -EINVAL;
+
+ q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]);
+ q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]);
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ rtnl_lock();
+
+ netdev = __dev_get_by_index(genl_info_net(info), ifindex);
+ if (netdev)
+ err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info);
+ else
+ err = -ENODEV;
+
+ rtnl_unlock();
+
+ if (err)
+ goto err_free_msg;
+
+ return genlmsg_reply(rsp, info);
+
+err_free_msg:
+ nlmsg_free(rsp);
+ return err;
+}
+
+static int
+netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp,
+ const struct genl_info *info,
+ struct netdev_nl_dump_ctx *ctx)
+{
+ int err = 0;
+ int i;
+
+ if (!(netdev->flags & IFF_UP))
+ return err;
+
+ for (i = ctx->rxq_idx; i < netdev->real_num_rx_queues;) {
+ err = netdev_nl_queue_fill_one(rsp, netdev, i,
+ NETDEV_QUEUE_TYPE_RX, info);
+ if (err)
+ return err;
+ ctx->rxq_idx = i++;
+ }
+ for (i = ctx->txq_idx; i < netdev->real_num_tx_queues;) {
+ err = netdev_nl_queue_fill_one(rsp, netdev, i,
+ NETDEV_QUEUE_TYPE_TX, info);
+ if (err)
+ return err;
+ ctx->txq_idx = i++;
+ }
+
+ return err;
+}
+
+int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ u32 ifindex = 0;
+ int err = 0;
+
+ if (info->attrs[NETDEV_A_QUEUE_IFINDEX])
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
+
+ rtnl_lock();
+ if (ifindex) {
+ netdev = __dev_get_by_index(net, ifindex);
+ if (netdev)
+ err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
+ else
+ err = -ENODEV;
+ } else {
+ for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
+ if (err < 0)
+ break;
+ ctx->rxq_idx = 0;
+ ctx->txq_idx = 0;
+ }
+ }
+ rtnl_unlock();
+
+ if (err != -EMSGSIZE)
+ return err;
+
+ return skb->len;
+}
+
static int netdev_genl_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index dec544337236..c2e7c9a6efbe 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -23,6 +23,8 @@
#include <trace/events/page_pool.h>
+#include "page_pool_priv.h"
+
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (60 * HZ)
@@ -69,7 +71,7 @@ static const char pp_stats[][ETH_GSTRING_LEN] = {
* is passed to this API which is filled in. The caller can then report
* those stats to the user (perhaps via ethtool, debugfs, etc.).
*/
-bool page_pool_get_stats(struct page_pool *pool,
+bool page_pool_get_stats(const struct page_pool *pool,
struct page_pool_stats *stats)
{
int cpu = 0;
@@ -173,7 +175,8 @@ static int page_pool_init(struct page_pool *pool,
{
unsigned int ring_qsize = 1024; /* Default */
- memcpy(&pool->p, params, sizeof(pool->p));
+ memcpy(&pool->p, &params->fast, sizeof(pool->p));
+ memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
/* Validate only known flags were used */
if (pool->p.flags & ~(PP_FLAG_ALL))
@@ -211,6 +214,8 @@ static int page_pool_init(struct page_pool *pool,
*/
}
+ pool->has_init_callback = !!pool->slow.init_callback;
+
#ifdef CONFIG_PAGE_POOL_STATS
pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
if (!pool->recycle_stats)
@@ -235,6 +240,18 @@ static int page_pool_init(struct page_pool *pool,
return 0;
}
+static void page_pool_uninit(struct page_pool *pool)
+{
+ ptr_ring_cleanup(&pool->ring, NULL);
+
+ if (pool->p.flags & PP_FLAG_DMA_MAP)
+ put_device(pool->p.dev);
+
+#ifdef CONFIG_PAGE_POOL_STATS
+ free_percpu(pool->recycle_stats);
+#endif
+}
+
/**
* page_pool_create() - create a page pool.
* @params: parameters, see struct page_pool_params
@@ -249,13 +266,21 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
return ERR_PTR(-ENOMEM);
err = page_pool_init(pool, params);
- if (err < 0) {
- pr_warn("%s() gave up with errno %d\n", __func__, err);
- kfree(pool);
- return ERR_PTR(err);
- }
+ if (err < 0)
+ goto err_free;
+
+ err = page_pool_list(pool);
+ if (err)
+ goto err_uninit;
return pool;
+
+err_uninit:
+ page_pool_uninit(pool);
+err_free:
+ pr_warn("%s() gave up with errno %d\n", __func__, err);
+ kfree(pool);
+ return ERR_PTR(err);
}
EXPORT_SYMBOL(page_pool_create);
@@ -388,8 +413,8 @@ static void page_pool_set_pp_info(struct page_pool *pool,
* the overhead is negligible.
*/
page_pool_fragment_page(page, 1);
- if (pool->p.init_callback)
- pool->p.init_callback(page, pool->p.init_arg);
+ if (pool->has_init_callback)
+ pool->slow.init_callback(page, pool->slow.init_arg);
}
static void page_pool_clear_pp_info(struct page *page)
@@ -504,7 +529,7 @@ EXPORT_SYMBOL(page_pool_alloc_pages);
*/
#define _distance(a, b) (s32)((a) - (b))
-static s32 page_pool_inflight(struct page_pool *pool)
+s32 page_pool_inflight(const struct page_pool *pool, bool strict)
{
u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
@@ -512,8 +537,13 @@ static s32 page_pool_inflight(struct page_pool *pool)
inflight = _distance(hold_cnt, release_cnt);
- trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
- WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
+ if (strict) {
+ trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
+ WARN(inflight < 0, "Negative(%d) inflight packet-pages",
+ inflight);
+ } else {
+ inflight = max(0, inflight);
+ }
return inflight;
}
@@ -818,14 +848,8 @@ static void __page_pool_destroy(struct page_pool *pool)
if (pool->disconnect)
pool->disconnect(pool);
- ptr_ring_cleanup(&pool->ring, NULL);
-
- if (pool->p.flags & PP_FLAG_DMA_MAP)
- put_device(pool->p.dev);
-
-#ifdef CONFIG_PAGE_POOL_STATS
- free_percpu(pool->recycle_stats);
-#endif
+ page_pool_unlist(pool);
+ page_pool_uninit(pool);
kfree(pool);
}
@@ -862,7 +886,7 @@ static int page_pool_release(struct page_pool *pool)
int inflight;
page_pool_scrub(pool);
- inflight = page_pool_inflight(pool);
+ inflight = page_pool_inflight(pool, true);
if (!inflight)
__page_pool_destroy(pool);
@@ -873,18 +897,21 @@ static void page_pool_release_retry(struct work_struct *wq)
{
struct delayed_work *dwq = to_delayed_work(wq);
struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
+ void *netdev;
int inflight;
inflight = page_pool_release(pool);
if (!inflight)
return;
- /* Periodic warning */
- if (time_after_eq(jiffies, pool->defer_warn)) {
+ /* Periodic warning for page pools the user can't see */
+ netdev = READ_ONCE(pool->slow.netdev);
+ if (time_after_eq(jiffies, pool->defer_warn) &&
+ (!netdev || netdev == NET_PTR_POISON)) {
int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
- pr_warn("%s() stalled pool shutdown %d inflight %d sec\n",
- __func__, inflight, sec);
+ pr_warn("%s() stalled pool shutdown: id %u, %d inflight %d sec\n",
+ __func__, pool->user.id, inflight, sec);
pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
}
@@ -929,6 +956,7 @@ void page_pool_destroy(struct page_pool *pool)
if (!page_pool_release(pool))
return;
+ page_pool_detached(pool);
pool->defer_start = jiffies;
pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h
new file mode 100644
index 000000000000..90665d40f1eb
--- /dev/null
+++ b/net/core/page_pool_priv.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PAGE_POOL_PRIV_H
+#define __PAGE_POOL_PRIV_H
+
+s32 page_pool_inflight(const struct page_pool *pool, bool strict);
+
+int page_pool_list(struct page_pool *pool);
+void page_pool_detached(struct page_pool *pool);
+void page_pool_unlist(struct page_pool *pool);
+
+#endif
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
new file mode 100644
index 000000000000..ffe5244e5597
--- /dev/null
+++ b/net/core/page_pool_user.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/xarray.h>
+#include <net/net_debug.h>
+#include <net/page_pool/types.h>
+#include <net/page_pool/helpers.h>
+#include <net/sock.h>
+
+#include "page_pool_priv.h"
+#include "netdev-genl-gen.h"
+
+static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1);
+/* Protects: page_pools, netdevice->page_pools, pool->slow.netdev, pool->user.
+ * Ordering: inside rtnl_lock
+ */
+static DEFINE_MUTEX(page_pools_lock);
+
+/* Page pools are only reachable from user space (via netlink) if they are
+ * linked to a netdev at creation time. Following page pool "visibility"
+ * states are possible:
+ * - normal
+ * - user.list: linked to real netdev, netdev: real netdev
+ * - orphaned - real netdev has disappeared
+ * - user.list: linked to lo, netdev: lo
+ * - invisible - either (a) created without netdev linking, (b) unlisted due
+ * to error, or (c) the entire namespace which owned this pool disappeared
+ * - user.list: unhashed, netdev: unknown
+ */
+
+typedef int (*pp_nl_fill_cb)(struct sk_buff *rsp, const struct page_pool *pool,
+ const struct genl_info *info);
+
+static int
+netdev_nl_page_pool_get_do(struct genl_info *info, u32 id, pp_nl_fill_cb fill)
+{
+ struct page_pool *pool;
+ struct sk_buff *rsp;
+ int err;
+
+ mutex_lock(&page_pools_lock);
+ pool = xa_load(&page_pools, id);
+ if (!pool || hlist_unhashed(&pool->user.list) ||
+ !net_eq(dev_net(pool->slow.netdev), genl_info_net(info))) {
+ err = -ENOENT;
+ goto err_unlock;
+ }
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp) {
+ err = -ENOMEM;
+ goto err_unlock;
+ }
+
+ err = fill(rsp, pool, info);
+ if (err)
+ goto err_free_msg;
+
+ mutex_unlock(&page_pools_lock);
+
+ return genlmsg_reply(rsp, info);
+
+err_free_msg:
+ nlmsg_free(rsp);
+err_unlock:
+ mutex_unlock(&page_pools_lock);
+ return err;
+}
+
+struct page_pool_dump_cb {
+ unsigned long ifindex;
+ u32 pp_id;
+};
+
+static int
+netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ pp_nl_fill_cb fill)
+{
+ struct page_pool_dump_cb *state = (void *)cb->ctx;
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ struct page_pool *pool;
+ int err = 0;
+
+ rtnl_lock();
+ mutex_lock(&page_pools_lock);
+ for_each_netdev_dump(net, netdev, state->ifindex) {
+ hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
+ if (state->pp_id && state->pp_id < pool->user.id)
+ continue;
+
+ state->pp_id = pool->user.id;
+ err = fill(skb, pool, info);
+ if (err)
+ break;
+ }
+
+ state->pp_id = 0;
+ }
+ mutex_unlock(&page_pools_lock);
+ rtnl_unlock();
+
+ if (skb->len && err == -EMSGSIZE)
+ return skb->len;
+ return err;
+}
+
+static int
+page_pool_nl_stats_fill(struct sk_buff *rsp, const struct page_pool *pool,
+ const struct genl_info *info)
+{
+#ifdef CONFIG_PAGE_POOL_STATS
+ struct page_pool_stats stats = {};
+ struct nlattr *nest;
+ void *hdr;
+
+ if (!page_pool_get_stats(pool, &stats))
+ return 0;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ nest = nla_nest_start(rsp, NETDEV_A_PAGE_POOL_STATS_INFO);
+
+ if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id) ||
+ (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
+ nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
+ pool->slow.netdev->ifindex)))
+ goto err_cancel_nest;
+
+ nla_nest_end(rsp, nest);
+
+ if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST,
+ stats.alloc_stats.fast) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW,
+ stats.alloc_stats.slow) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER,
+ stats.alloc_stats.slow_high_order) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY,
+ stats.alloc_stats.empty) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL,
+ stats.alloc_stats.refill) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE,
+ stats.alloc_stats.waive) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED,
+ stats.recycle_stats.cached) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL,
+ stats.recycle_stats.cache_full) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING,
+ stats.recycle_stats.ring) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL,
+ stats.recycle_stats.ring_full) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT,
+ stats.recycle_stats.released_refcnt))
+ goto err_cancel_msg;
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+err_cancel_nest:
+ nla_nest_cancel(rsp, nest);
+err_cancel_msg:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+#else
+ GENL_SET_ERR_MSG(info, "kernel built without CONFIG_PAGE_POOL_STATS");
+ return -EOPNOTSUPP;
+#endif
+}
+
+int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *tb[ARRAY_SIZE(netdev_page_pool_info_nl_policy)];
+ struct nlattr *nest;
+ int err;
+ u32 id;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_STATS_INFO))
+ return -EINVAL;
+
+ nest = info->attrs[NETDEV_A_PAGE_POOL_STATS_INFO];
+ err = nla_parse_nested(tb, ARRAY_SIZE(tb) - 1, nest,
+ netdev_page_pool_info_nl_policy,
+ info->extack);
+ if (err)
+ return err;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, nest, tb, NETDEV_A_PAGE_POOL_ID))
+ return -EINVAL;
+ if (tb[NETDEV_A_PAGE_POOL_IFINDEX]) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[NETDEV_A_PAGE_POOL_IFINDEX],
+ "selecting by ifindex not supported");
+ return -EINVAL;
+ }
+
+ id = nla_get_uint(tb[NETDEV_A_PAGE_POOL_ID]);
+
+ return netdev_nl_page_pool_get_do(info, id, page_pool_nl_stats_fill);
+}
+
+int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_stats_fill);
+}
+
+static int
+page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
+ const struct genl_info *info)
+{
+ size_t inflight, refsz;
+ void *hdr;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id))
+ goto err_cancel;
+
+ if (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
+ nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
+ pool->slow.netdev->ifindex))
+ goto err_cancel;
+ if (pool->user.napi_id &&
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, pool->user.napi_id))
+ goto err_cancel;
+
+ inflight = page_pool_inflight(pool, false);
+ refsz = PAGE_SIZE << pool->p.order;
+ if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT, inflight) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
+ inflight * refsz))
+ goto err_cancel;
+ if (pool->user.detach_time &&
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_DETACH_TIME,
+ pool->user.detach_time))
+ goto err_cancel;
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+err_cancel:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+static void netdev_nl_page_pool_event(const struct page_pool *pool, u32 cmd)
+{
+ struct genl_info info;
+ struct sk_buff *ntf;
+ struct net *net;
+
+ lockdep_assert_held(&page_pools_lock);
+
+ /* 'invisible' page pools don't matter */
+ if (hlist_unhashed(&pool->user.list))
+ return;
+ net = dev_net(pool->slow.netdev);
+
+ if (!genl_has_listeners(&netdev_nl_family, net, NETDEV_NLGRP_PAGE_POOL))
+ return;
+
+ genl_info_init_ntf(&info, &netdev_nl_family, cmd);
+
+ ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!ntf)
+ return;
+
+ if (page_pool_nl_fill(ntf, pool, &info)) {
+ nlmsg_free(ntf);
+ return;
+ }
+
+ genlmsg_multicast_netns(&netdev_nl_family, net, ntf,
+ 0, NETDEV_NLGRP_PAGE_POOL, GFP_KERNEL);
+}
+
+int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ u32 id;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_ID))
+ return -EINVAL;
+
+ id = nla_get_uint(info->attrs[NETDEV_A_PAGE_POOL_ID]);
+
+ return netdev_nl_page_pool_get_do(info, id, page_pool_nl_fill);
+}
+
+int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_fill);
+}
+
+int page_pool_list(struct page_pool *pool)
+{
+ static u32 id_alloc_next;
+ int err;
+
+ mutex_lock(&page_pools_lock);
+ err = xa_alloc_cyclic(&page_pools, &pool->user.id, pool, xa_limit_32b,
+ &id_alloc_next, GFP_KERNEL);
+ if (err < 0)
+ goto err_unlock;
+
+ INIT_HLIST_NODE(&pool->user.list);
+ if (pool->slow.netdev) {
+ hlist_add_head(&pool->user.list,
+ &pool->slow.netdev->page_pools);
+ pool->user.napi_id = pool->p.napi ? pool->p.napi->napi_id : 0;
+
+ netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF);
+ }
+
+ mutex_unlock(&page_pools_lock);
+ return 0;
+
+err_unlock:
+ mutex_unlock(&page_pools_lock);
+ return err;
+}
+
+void page_pool_detached(struct page_pool *pool)
+{
+ mutex_lock(&page_pools_lock);
+ pool->user.detach_time = ktime_get_boottime_seconds();
+ netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
+ mutex_unlock(&page_pools_lock);
+}
+
+void page_pool_unlist(struct page_pool *pool)
+{
+ mutex_lock(&page_pools_lock);
+ netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_DEL_NTF);
+ xa_erase(&page_pools, pool->user.id);
+ if (!hlist_unhashed(&pool->user.list))
+ hlist_del(&pool->user.list);
+ mutex_unlock(&page_pools_lock);
+}
+
+static void page_pool_unreg_netdev_wipe(struct net_device *netdev)
+{
+ struct page_pool *pool;
+ struct hlist_node *n;
+
+ mutex_lock(&page_pools_lock);
+ hlist_for_each_entry_safe(pool, n, &netdev->page_pools, user.list) {
+ hlist_del_init(&pool->user.list);
+ pool->slow.netdev = NET_PTR_POISON;
+ }
+ mutex_unlock(&page_pools_lock);
+}
+
+static void page_pool_unreg_netdev(struct net_device *netdev)
+{
+ struct page_pool *pool, *last;
+ struct net_device *lo;
+
+ lo = dev_net(netdev)->loopback_dev;
+
+ mutex_lock(&page_pools_lock);
+ last = NULL;
+ hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
+ pool->slow.netdev = lo;
+ netdev_nl_page_pool_event(pool,
+ NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
+ last = pool;
+ }
+ if (last)
+ hlist_splice_init(&netdev->page_pools, &last->user.list,
+ &lo->page_pools);
+ mutex_unlock(&page_pools_lock);
+}
+
+static int
+page_pool_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ if (event != NETDEV_UNREGISTER)
+ return NOTIFY_DONE;
+
+ if (hlist_empty(&netdev->page_pools))
+ return NOTIFY_OK;
+
+ if (netdev->ifindex != LOOPBACK_IFINDEX)
+ page_pool_unreg_netdev(netdev);
+ else
+ page_pool_unreg_netdev_wipe(netdev);
+ return NOTIFY_OK;
+}
+
+static struct notifier_block page_pool_netdevice_nb = {
+ .notifier_call = page_pool_netdevice_event,
+};
+
+static int __init page_pool_user_init(void)
+{
+ return register_netdevice_notifier(&page_pool_netdevice_nb);
+}
+
+subsys_initcall(page_pool_user_init);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e8431c6c8490..5e0ab4c08f72 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3849,10 +3849,18 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
err = -ENOBUFS;
- nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
+ nskb = nlmsg_new_large(if_nlmsg_size(dev, ext_filter_mask));
if (nskb == NULL)
goto out;
+ /* Synchronize the carrier state so we don't report a state
+ * that we're not actually going to honour immediately; if
+ * the driver just did a carrier off->on transition, we can
+ * only TX if link watch work has run, but without this we'd
+ * already report carrier on, even if it doesn't work yet.
+ */
+ linkwatch_sync_dev(dev);
+
err = rtnl_fill_ifinfo(nskb, dev, net,
RTM_NEWLINK, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, 0, 0, ext_filter_mask,
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 6984877e9f10..4275a2bc6d8e 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -503,14 +503,14 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
* all devlink instances from this namespace into init_net.
*/
devlinks_xa_for_each_registered_get(net, index, devlink) {
- devl_lock(devlink);
+ devl_dev_lock(devlink, true);
err = 0;
if (devl_is_registered(devlink))
err = devlink_reload(devlink, &init_net,
DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
DEVLINK_RELOAD_LIMIT_UNSPEC,
&actions_performed, NULL);
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, true);
devlink_put(devlink);
if (err && err != -EOPNOTSUPP)
pr_warn("Failed to reload devlink instance into init_net\n");
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index 4fc7adb32663..918a0395b03e 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -4,6 +4,7 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
+#include <linux/device.h>
#include <net/genetlink.h>
#include <net/sock.h>
#include "devl_internal.h"
@@ -424,6 +425,18 @@ static void devlink_reload_netns_change(struct devlink *devlink,
devlink_rel_nested_in_notify(devlink);
}
+static void devlink_reload_reinit_sanity_check(struct devlink *devlink)
+{
+ WARN_ON(!list_empty(&devlink->trap_policer_list));
+ WARN_ON(!list_empty(&devlink->trap_group_list));
+ WARN_ON(!list_empty(&devlink->trap_list));
+ WARN_ON(!list_empty(&devlink->dpipe_table_list));
+ WARN_ON(!list_empty(&devlink->sb_list));
+ WARN_ON(!list_empty(&devlink->rate_list));
+ WARN_ON(!list_empty(&devlink->linecard_list));
+ WARN_ON(!xa_empty(&devlink->ports));
+}
+
int devlink_reload(struct devlink *devlink, struct net *dest_net,
enum devlink_reload_action action,
enum devlink_reload_limit limit,
@@ -433,6 +446,13 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net,
struct net *curr_net;
int err;
+ /* Make sure the reload operations are invoked with the device lock
+ * held to allow drivers to trigger functionality that expects it
+ * (e.g., PCI reset) and to close possible races between these
+ * operations and probe/remove.
+ */
+ device_lock_assert(devlink->dev);
+
memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
sizeof(remote_reload_stats));
@@ -444,8 +464,10 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net,
if (dest_net && !net_eq(dest_net, curr_net))
devlink_reload_netns_change(devlink, curr_net, dest_net);
- if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
+ if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT) {
devlink_params_driverinit_load_new(devlink);
+ devlink_reload_reinit_sanity_check(devlink);
+ }
err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
devlink_reload_failed_set(devlink, !!err);
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 183dbe3807ab..5ea2e2012e93 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -3,6 +3,7 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
+#include <linux/device.h>
#include <linux/etherdevice.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
@@ -96,6 +97,20 @@ static inline bool devl_is_registered(struct devlink *devlink)
return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
}
+static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock)
+{
+ if (dev_lock)
+ device_lock(devlink->dev);
+ devl_lock(devlink);
+}
+
+static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock)
+{
+ devl_unlock(devlink);
+ if (dev_lock)
+ device_unlock(devlink->dev);
+}
+
typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index,
u32 rel_index);
@@ -111,9 +126,6 @@ int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
bool *msg_updated);
/* Netlink */
-#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
-#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
-
enum devlink_multicast_groups {
DEVLINK_MCGRP_CONFIG,
};
@@ -140,7 +152,8 @@ typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg,
int flags);
struct devlink *
-devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs,
+ bool dev_lock);
int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
devlink_nl_dump_one_func_t *dump_one);
diff --git a/net/devlink/health.c b/net/devlink/health.c
index 695df61f8ac2..71ae121dc739 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -1151,7 +1151,8 @@ devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb)
struct nlattr **attrs = info->attrs;
struct devlink *devlink;
- devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs,
+ false);
if (IS_ERR(devlink))
return NULL;
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
index d0b90ebc8b15..fa9afe3e6d9b 100644
--- a/net/devlink/netlink.c
+++ b/net/devlink/netlink.c
@@ -9,6 +9,10 @@
#include "devl_internal.h"
+#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
+#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
+#define DEVLINK_NL_FLAG_NEED_DEV_LOCK BIT(2)
+
static const struct genl_multicast_group devlink_nl_mcgrps[] = {
[DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
};
@@ -61,7 +65,8 @@ int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info)
}
struct devlink *
-devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs,
+ bool dev_lock)
{
struct devlink *devlink;
unsigned long index;
@@ -75,12 +80,12 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
devlinks_xa_for_each_registered_get(net, index, devlink) {
- devl_lock(devlink);
+ devl_dev_lock(devlink, dev_lock);
if (devl_is_registered(devlink) &&
strcmp(devlink->dev->bus->name, busname) == 0 &&
strcmp(dev_name(devlink->dev), devname) == 0)
return devlink;
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, dev_lock);
devlink_put(devlink);
}
@@ -90,11 +95,13 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
u8 flags)
{
+ bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK;
struct devlink_port *devlink_port;
struct devlink *devlink;
int err;
- devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs);
+ devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs,
+ dev_lock);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
@@ -114,7 +121,7 @@ static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
return 0;
unlock:
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, dev_lock);
devlink_put(devlink);
return err;
}
@@ -131,6 +138,12 @@ int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_PORT);
}
+int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK);
+}
+
int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info)
@@ -138,16 +151,30 @@ int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT);
}
-void devlink_nl_post_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
+static void __devlink_nl_post_doit(struct sk_buff *skb, struct genl_info *info,
+ u8 flags)
{
+ bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK;
struct devlink *devlink;
devlink = info->user_ptr[0];
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, dev_lock);
devlink_put(devlink);
}
+void devlink_nl_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ __devlink_nl_post_doit(skb, info, 0);
+}
+
+void
+devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ __devlink_nl_post_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK);
+}
+
static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
struct netlink_callback *cb, int flags,
devlink_nl_dump_one_func_t *dump_one,
@@ -156,7 +183,7 @@ static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
struct devlink *devlink;
int err;
- devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs, false);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
err = dump_one(msg, devlink, cb, flags | NLM_F_DUMP_FILTERED);
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
index 788dfdc498a9..95f9b4350ab7 100644
--- a/net/devlink/netlink_gen.c
+++ b/net/devlink/netlink_gen.c
@@ -846,9 +846,9 @@ const struct genl_split_ops devlink_nl_ops[73] = {
{
.cmd = DEVLINK_CMD_RELOAD,
.validate = GENL_DONT_VALIDATE_STRICT,
- .pre_doit = devlink_nl_pre_doit,
+ .pre_doit = devlink_nl_pre_doit_dev_lock,
.doit = devlink_nl_reload_doit,
- .post_doit = devlink_nl_post_doit,
+ .post_doit = devlink_nl_post_doit_dev_lock,
.policy = devlink_reload_nl_policy,
.maxattr = DEVLINK_ATTR_RELOAD_LIMITS,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h
index 0e9e89c31c31..02f3c0bfae0e 100644
--- a/net/devlink/netlink_gen.h
+++ b/net/devlink/netlink_gen.h
@@ -22,12 +22,17 @@ int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
void
devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
struct genl_info *info);
+void
+devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/devlink/region.c b/net/devlink/region.c
index 0aab7b82d678..e3bab458db94 100644
--- a/net/devlink/region.c
+++ b/net/devlink/region.c
@@ -883,7 +883,8 @@ int devlink_nl_region_read_dumpit(struct sk_buff *skb,
start_offset = state->start_offset;
- devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs,
+ false);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index 4da5bad1a7aa..a019226ec6d2 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -23,7 +23,6 @@
#define RTL4_A_NAME "rtl4a"
#define RTL4_A_HDR_LEN 4
-#define RTL4_A_ETHERTYPE 0x8899
#define RTL4_A_PROTOCOL_SHIFT 12
/*
* 0x1 = Realtek Remote Control protocol (RRCP)
@@ -54,7 +53,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
/* Set Ethertype */
p = (__be16 *)tag;
- *p = htons(RTL4_A_ETHERTYPE);
+ *p = htons(ETH_P_REALTEK);
out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT);
/* The lower bits indicate the port number */
@@ -82,7 +81,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
tag = dsa_etype_header_pos_rx(skb);
p = (__be16 *)tag;
etype = ntohs(*p);
- if (etype != RTL4_A_ETHERTYPE) {
+ if (etype != ETH_P_REALTEK) {
/* Not custom, just pass through */
netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype);
return skb;
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index b4419fb6df6a..11d8797f63f6 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -661,6 +661,12 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
}
EXPORT_SYMBOL(ethtool_get_phc_vclocks);
+int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info)
+{
+ return __ethtool_get_ts_info(dev, info);
+}
+EXPORT_SYMBOL(ethtool_get_ts_info_by_layer);
+
const struct ethtool_phy_ops *ethtool_phy_ops;
void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops)
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 0b0ce4f81c01..a977f8903467 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -58,6 +58,9 @@ static struct devlink *netdev_to_devlink_get(struct net_device *dev)
u32 ethtool_op_get_link(struct net_device *dev)
{
+ /* Synchronize carrier state with link watch, see also rtnl_getlink() */
+ linkwatch_sync_dev(dev);
+
return netif_carrier_ok(dev) ? 1 : 0;
}
EXPORT_SYMBOL(ethtool_op_get_link);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 306f942c3b28..7ceb9ac6e730 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -173,7 +173,24 @@ static int hsr_dev_open(struct net_device *dev)
static int hsr_dev_close(struct net_device *dev)
{
- /* Nothing to do here. */
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ dev_uc_unsync(port->dev, dev);
+ dev_mc_unsync(port->dev, dev);
+ break;
+ default:
+ break;
+ }
+ }
+
return 0;
}
@@ -404,12 +421,60 @@ void hsr_del_ports(struct hsr_priv *hsr)
hsr_del_port(port);
}
+static void hsr_set_rx_mode(struct net_device *dev)
+{
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ dev_mc_sync_multiple(port->dev, dev);
+ dev_uc_sync_multiple(port->dev, dev);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void hsr_change_rx_flags(struct net_device *dev, int change)
+{
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ if (change & IFF_ALLMULTI)
+ dev_set_allmulti(port->dev,
+ dev->flags &
+ IFF_ALLMULTI ? 1 : -1);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
static const struct net_device_ops hsr_device_ops = {
.ndo_change_mtu = hsr_dev_change_mtu,
.ndo_open = hsr_dev_open,
.ndo_stop = hsr_dev_close,
.ndo_start_xmit = hsr_dev_xmit,
+ .ndo_change_rx_flags = hsr_change_rx_flags,
.ndo_fix_features = hsr_fix_features,
+ .ndo_set_rx_mode = hsr_set_rx_mode,
};
static struct device_type hsr_type = {
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7d0e7aaa71e0..46b13962ad02 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1077,10 +1077,94 @@ skip_listen_ht:
s_i = num = s_num = 0;
}
+/* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets
+ * with bh disabled.
+ */
+#define SKARR_SZ 16
+
+ /* Dump bound but inactive (not listening, connecting, etc.) sockets */
+ if (cb->args[0] == 1) {
+ if (!(idiag_states & TCPF_BOUND_INACTIVE))
+ goto skip_bind_ht;
+
+ for (i = s_i; i < hashinfo->bhash_size; i++) {
+ struct inet_bind_hashbucket *ibb;
+ struct inet_bind2_bucket *tb2;
+ struct sock *sk_arr[SKARR_SZ];
+ int num_arr[SKARR_SZ];
+ int idx, accum, res;
+
+resume_bind_walk:
+ num = 0;
+ accum = 0;
+ ibb = &hashinfo->bhash2[i];
+
+ spin_lock_bh(&ibb->lock);
+ inet_bind_bucket_for_each(tb2, &ibb->chain) {
+ if (!net_eq(ib2_net(tb2), net))
+ continue;
+
+ sk_for_each_bound_bhash2(sk, &tb2->owners) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (num < s_num)
+ goto next_bind;
+
+ if (sk->sk_state != TCP_CLOSE ||
+ !inet->inet_num)
+ goto next_bind;
+
+ if (r->sdiag_family != AF_UNSPEC &&
+ r->sdiag_family != sk->sk_family)
+ goto next_bind;
+
+ if (!inet_diag_bc_sk(bc, sk))
+ goto next_bind;
+
+ sock_hold(sk);
+ num_arr[accum] = num;
+ sk_arr[accum] = sk;
+ if (++accum == SKARR_SZ)
+ goto pause_bind_walk;
+next_bind:
+ num++;
+ }
+ }
+pause_bind_walk:
+ spin_unlock_bh(&ibb->lock);
+
+ res = 0;
+ for (idx = 0; idx < accum; idx++) {
+ if (res >= 0) {
+ res = inet_sk_diag_fill(sk_arr[idx],
+ NULL, skb, cb,
+ r, NLM_F_MULTI,
+ net_admin);
+ if (res < 0)
+ num = num_arr[idx];
+ }
+ sock_put(sk_arr[idx]);
+ }
+ if (res < 0)
+ goto done;
+
+ cond_resched();
+
+ if (accum == SKARR_SZ) {
+ s_num = num + 1;
+ goto resume_bind_walk;
+ }
+
+ s_num = 0;
+ }
+skip_bind_ht:
+ cb->args[0] = 2;
+ s_i = num = s_num = 0;
+ }
+
if (!(idiag_states & ~TCPF_LISTEN))
goto out;
-#define SKARR_SZ 16
for (i = s_i; i <= hashinfo->ehash_mask; i++) {
struct inet_ehash_bucket *head = &hashinfo->ehash[i];
spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index d37282c06e3d..61f1c96cfe63 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -189,12 +189,14 @@ __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp)
* Check if a ack sequence number is a valid syncookie.
* Return the decoded mss if it is, or 0 if not.
*/
-int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
- u32 cookie)
+int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th)
{
+ __u32 cookie = ntohl(th->ack_seq) - 1;
__u32 seq = ntohl(th->seq) - 1;
- __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
- th->source, th->dest, seq);
+ __u32 mssind;
+
+ mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
+ th->source, th->dest, seq);
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
@@ -202,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check);
struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst, u32 tsoff)
+ struct dst_entry *dst)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct sock *child;
@@ -212,7 +214,6 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
NULL, &own_req);
if (child) {
refcount_set(&req->rsk_refcnt, 1);
- tcp_sk(child)->tsoffset = tsoff;
sock_rps_save_rxhash(child, skb);
if (rsk_drop_req(req)) {
@@ -269,26 +270,46 @@ bool cookie_timestamp_decode(const struct net *net,
}
EXPORT_SYMBOL(cookie_timestamp_decode);
-bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
- const struct net *net, const struct dst_entry *dst)
+static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req)
{
- bool ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN;
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct tcp_request_sock *treq = tcp_rsk(req);
+ const struct tcphdr *th = tcp_hdr(skb);
- if (!ecn_ok)
- return false;
+ req->num_retrans = 0;
- if (READ_ONCE(net->ipv4.sysctl_tcp_ecn))
- return true;
+ ireq->ir_num = ntohs(th->dest);
+ ireq->ir_rmt_port = th->source;
+ ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
+ ireq->ir_mark = inet_request_mark(sk, skb);
+
+ if (IS_ENABLED(CONFIG_SMC))
+ ireq->smc_ok = 0;
- return dst_feature(dst, RTAX_FEATURE_ECN);
+ treq->snt_synack = 0;
+ treq->tfo_listener = false;
+ treq->txhash = net_tx_rndhash();
+ treq->rcv_isn = ntohl(th->seq) - 1;
+ treq->snt_isn = ntohl(th->ack_seq) - 1;
+ treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
+ treq->req_usec_ts = false;
+
+#if IS_ENABLED(CONFIG_MPTCP)
+ treq->is_mptcp = sk_is_mptcp(sk);
+ if (treq->is_mptcp)
+ return mptcp_subflow_init_cookie_req(req, sk, skb);
+#endif
+
+ return 0;
}
-EXPORT_SYMBOL(cookie_ecn_ok);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
- const struct tcp_request_sock_ops *af_ops,
- struct sock *sk,
- struct sk_buff *skb)
+ struct sock *sk, struct sk_buff *skb,
+ struct tcp_options_received *tcp_opt,
+ int mss, u32 tsoff)
{
+ struct inet_request_sock *ireq;
struct tcp_request_sock *treq;
struct request_sock *req;
@@ -300,126 +321,109 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
if (!req)
return NULL;
- treq = tcp_rsk(req);
+ if (cookie_tcp_reqsk_init(sk, skb, req)) {
+ reqsk_free(req);
+ return NULL;
+ }
- /* treq->af_specific might be used to perform TCP_MD5 lookup */
- treq->af_specific = af_ops;
+ ireq = inet_rsk(req);
+ treq = tcp_rsk(req);
- treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
- treq->req_usec_ts = false;
+ req->mss = mss;
+ req->ts_recent = tcp_opt->saw_tstamp ? tcp_opt->rcv_tsval : 0;
-#if IS_ENABLED(CONFIG_MPTCP)
- treq->is_mptcp = sk_is_mptcp(sk);
- if (treq->is_mptcp) {
- int err = mptcp_subflow_init_cookie_req(req, sk, skb);
+ ireq->snd_wscale = tcp_opt->snd_wscale;
+ ireq->tstamp_ok = tcp_opt->saw_tstamp;
+ ireq->sack_ok = tcp_opt->sack_ok;
+ ireq->wscale_ok = tcp_opt->wscale_ok;
+ ireq->ecn_ok = !!(tcp_opt->rcv_tsecr & TS_OPT_ECN);
- if (err) {
- reqsk_free(req);
- return NULL;
- }
- }
-#endif
+ treq->ts_off = tsoff;
return req;
}
EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc);
-/* On input, sk is a listener.
- * Output is listener if incoming packet would not create a child
- * NULL if memory could not be allocated.
- */
-struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
+static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
- struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
struct tcp_options_received tcp_opt;
- struct inet_request_sock *ireq;
- struct tcp_request_sock *treq;
- struct tcp_sock *tp = tcp_sk(sk);
- const struct tcphdr *th = tcp_hdr(skb);
- __u32 cookie = ntohl(th->ack_seq) - 1;
- struct sock *ret = sk;
- struct request_sock *req;
- int full_space, mss;
- struct rtable *rt;
- __u8 rcv_wscale;
- struct flowi4 fl4;
u32 tsoff = 0;
- int l3index;
-
- if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
- !th->ack || th->rst)
- goto out;
+ int mss;
if (tcp_synq_no_recent_overflow(sk))
goto out;
- mss = __cookie_v4_check(ip_hdr(skb), th, cookie);
- if (mss == 0) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
+ mss = __cookie_v4_check(ip_hdr(skb), tcp_hdr(skb));
+ if (!mss) {
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcp_ts_off(sock_net(sk),
+ tsoff = secure_tcp_ts_off(net,
ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr);
tcp_opt.rcv_tsecr -= tsoff;
}
- if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
+ if (!cookie_timestamp_decode(net, &tcp_opt))
goto out;
- ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops,
- &tcp_request_sock_ipv4_ops, sk, skb);
- if (!req)
+ return cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb,
+ &tcp_opt, mss, tsoff);
+out:
+ return ERR_PTR(-EINVAL);
+}
+
+/* On input, sk is a listener.
+ * Output is listener if incoming packet would not create a child
+ * NULL if memory could not be allocated.
+ */
+struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_request_sock *ireq;
+ struct net *net = sock_net(sk);
+ struct request_sock *req;
+ struct sock *ret = sk;
+ struct flowi4 fl4;
+ struct rtable *rt;
+ __u8 rcv_wscale;
+ int full_space;
+
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ if (!req)
+ goto out_drop;
+
ireq = inet_rsk(req);
- treq = tcp_rsk(req);
- treq->rcv_isn = ntohl(th->seq) - 1;
- treq->snt_isn = cookie;
- treq->ts_off = 0;
- treq->txhash = net_tx_rndhash();
- req->mss = mss;
- ireq->ir_num = ntohs(th->dest);
- ireq->ir_rmt_port = th->source;
+
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
- ireq->ir_mark = inet_request_mark(sk, skb);
- ireq->snd_wscale = tcp_opt.snd_wscale;
- ireq->sack_ok = tcp_opt.sack_ok;
- ireq->wscale_ok = tcp_opt.wscale_ok;
- ireq->tstamp_ok = tcp_opt.saw_tstamp;
- req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack = 0;
- treq->tfo_listener = false;
-
- if (IS_ENABLED(CONFIG_SMC))
- ireq->smc_ok = 0;
-
- ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
-
- l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
- tcp_ao_syncookie(sk, skb, treq, AF_INET, l3index);
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
*/
- RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb));
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
- if (security_inet_conn_request(sk, skb, req)) {
- reqsk_free(req);
- goto out;
- }
+ if (security_inet_conn_request(sk, skb, req))
+ goto out_free;
- req->num_retrans = 0;
+ tcp_ao_syncookie(sk, skb, req, AF_INET);
/*
* We need to lookup the route here to get at the correct
@@ -433,11 +437,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
opt->srr ? opt->faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
security_req_classify_flow(req, flowi4_to_flowi_common(&fl4));
- rt = ip_route_output_key(sock_net(sk), &fl4);
- if (IS_ERR(rt)) {
- reqsk_free(req);
- goto out;
- }
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt))
+ goto out_free;
/* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
@@ -453,13 +455,18 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
dst_metric(&rt->dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
- ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
+ ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst);
- ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff);
+ ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
/* ip_queue_xmit() depends on our flow being setup
* Normal sockets get it right from inet_csk_route_child_sock()
*/
if (ret)
inet_sk(ret)->cork.fl.u.ip4 = fl4;
-out: return ret;
+out:
+ return ret;
+out_free:
+ reqsk_free(req);
+out_drop:
+ return NULL;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ff6838ca2e58..1d6b80145efb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2605,6 +2605,7 @@ void tcp_set_state(struct sock *sk, int state)
BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN);
BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING);
BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
+ BUILD_BUG_ON((int)BPF_TCP_BOUND_INACTIVE != (int)TCP_BOUND_INACTIVE);
BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
/* bpf uapi header bpf.h defines an anonymous enum with values
@@ -4585,6 +4586,97 @@ static void __init tcp_init_mem(void)
sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */
}
+static void __init tcp_struct_check(void)
+{
+ /* TX read-mostly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, max_window);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, rcv_ssthresh);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, reordering);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, lost_skb_hint);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 40);
+
+ /* TXRX read-mostly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_wnd);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, mss_cache);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_cwnd);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 31);
+
+ /* RX read-mostly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rcv_tstamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_wl1);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tlp_high_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rttvar_us);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, retrans_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, advmss);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, urg_data);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, lost);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69);
+
+ /* TX read-write hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, data_segs_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, bytes_sent);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, snd_sml);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_start);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_stat);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, write_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, pushed_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, lsndtime);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, mdev_us);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_wstamp_ns);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_clock_cache);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_mstamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, rtt_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 113);
+
+ /* TXRX read-write hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_nxt);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_nxt);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_una);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, window_clamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, srtt_us);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, packets_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_up);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 76);
+
+ /* RX read-write hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, segs_in);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, data_segs_in);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_wup);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, max_packets_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, cwnd_usage_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_delivered);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_interval_us);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_last_tsecr);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, first_tx_mstamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_mstamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 99);
+}
+
void __init tcp_init(void)
{
int max_rshare, max_wshare, cnt;
@@ -4595,6 +4687,8 @@ void __init tcp_init(void)
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
sizeof_field(struct sk_buff, cb));
+ tcp_struct_check();
+
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index f8308d3f565e..87db432c6bb4 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -844,18 +844,30 @@ static struct tcp_ao_key *tcp_ao_inbound_lookup(unsigned short int family,
}
void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
- struct tcp_request_sock *treq,
- unsigned short int family, int l3index)
+ struct request_sock *req, unsigned short int family)
{
+ struct tcp_request_sock *treq = tcp_rsk(req);
const struct tcphdr *th = tcp_hdr(skb);
const struct tcp_ao_hdr *aoh;
struct tcp_ao_key *key;
+ int l3index;
+
+ /* treq->af_specific is used to perform TCP_AO lookup
+ * in tcp_create_openreq_child().
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6)
+ treq->af_specific = &tcp_request_sock_ipv6_ops;
+ else
+#endif
+ treq->af_specific = &tcp_request_sock_ipv4_ops;
treq->used_tcp_ao = false;
if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh)
return;
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk), inet_rsk(req)->ir_iif);
key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index);
if (!key)
/* Key not found, continue without TCP-AO */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 90de838a2745..7990f4939e8d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -202,23 +202,17 @@ static void bpf_skops_established(struct sock *sk, int bpf_op,
}
#endif
-static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
- unsigned int len)
+static __cold void tcp_gro_dev_warn(const struct sock *sk, const struct sk_buff *skb,
+ unsigned int len)
{
- static bool __once __read_mostly;
+ struct net_device *dev;
- if (!__once) {
- struct net_device *dev;
-
- __once = true;
-
- rcu_read_lock();
- dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
- if (!dev || len >= dev->mtu)
- pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
- dev ? dev->name : "Unknown driver");
- rcu_read_unlock();
- }
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
+ if (!dev || len >= READ_ONCE(dev->mtu))
+ pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
+ dev ? dev->name : "Unknown driver");
+ rcu_read_unlock();
}
/* Adapt the MSS value used to make delayed ack decision to the
@@ -250,9 +244,8 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
/* Account for possibly-removed options */
- if (unlikely(len > icsk->icsk_ack.rcv_mss +
- MAX_TCP_OPTION_SPACE))
- tcp_gro_dev_warn(sk, skb, len);
+ DO_ONCE_LITE_IF(len > icsk->icsk_ack.rcv_mss + MAX_TCP_OPTION_SPACE,
+ tcp_gro_dev_warn, sk, skb, len);
/* If the skb has a len of exactly 1*MSS and has the PSH bit
* set then it is likely the end of an application write. So
* more data may not be arriving soon, and yet the data sender
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0c50c5a32b84..4bac6e319aca 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -482,6 +482,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
const int code = icmp_hdr(skb)->code;
struct sock *sk;
struct request_sock *fastopen;
+ bool harderr = false;
u32 seq, snd_una;
int err;
struct net *net = dev_net(skb->dev);
@@ -555,6 +556,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
goto out;
case ICMP_PARAMETERPROB:
err = EPROTO;
+ harderr = true;
break;
case ICMP_DEST_UNREACH:
if (code > NR_ICMP_UNREACH)
@@ -579,6 +581,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
}
err = icmp_err_convert[code].errno;
+ harderr = icmp_err_convert[code].fatal;
/* check if this ICMP message allows revert of backoff.
* (see RFC 6069)
*/
@@ -604,6 +607,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
+ if (!harderr)
+ break;
+
if (!sock_owned_by_user(sk)) {
WRITE_ONCE(sk->sk_err, err);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1f9f6c1c196b..d1ad20ce1c8c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -626,7 +626,6 @@ void tcp_retransmit_timer(struct sock *sk)
* implemented ftp to mars will work nicely. We will have to fix
* the 120 second clamps though!
*/
- icsk->icsk_backoff++;
out_reset_timer:
/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
@@ -647,11 +646,12 @@ out_reset_timer:
tcp_rto_min(sk),
TCP_RTO_MAX);
} else if (sk->sk_state != TCP_SYN_SENT ||
- icsk->icsk_backoff >
+ tp->total_rto >
READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
/* Use normal (exponential) backoff unless linear timeouts are
* activated.
*/
+ icsk->icsk_backoff++;
icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 12eedc6ca2cc..c8d2ca27220c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -114,76 +114,82 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp)
return __cookie_v6_init_sequence(iph, th, mssp);
}
-int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
- __u32 cookie)
+int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th)
{
+ __u32 cookie = ntohl(th->ack_seq) - 1;
__u32 seq = ntohl(th->seq) - 1;
- __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
- th->source, th->dest, seq);
+ __u32 mssind;
+
+ mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
+ th->source, th->dest, seq);
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
EXPORT_SYMBOL_GPL(__cookie_v6_check);
-struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct tcp_options_received tcp_opt;
- struct inet_request_sock *ireq;
- struct tcp_request_sock *treq;
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- const struct tcphdr *th = tcp_hdr(skb);
- __u32 cookie = ntohl(th->ack_seq) - 1;
- struct sock *ret = sk;
- struct request_sock *req;
- int full_space, mss;
- struct dst_entry *dst;
- __u8 rcv_wscale;
u32 tsoff = 0;
- int l3index;
-
- if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
- !th->ack || th->rst)
- goto out;
+ int mss;
if (tcp_synq_no_recent_overflow(sk))
goto out;
- mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie);
- if (mss == 0) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
+ mss = __cookie_v6_check(ipv6_hdr(skb), tcp_hdr(skb));
+ if (!mss) {
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcpv6_ts_off(sock_net(sk),
+ tsoff = secure_tcpv6_ts_off(net,
ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32);
tcp_opt.rcv_tsecr -= tsoff;
}
- if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
+ if (!cookie_timestamp_decode(net, &tcp_opt))
goto out;
- ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops,
- &tcp_request_sock_ipv6_ops, sk, skb);
- if (!req)
+ return cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb,
+ &tcp_opt, mss, tsoff);
+out:
+ return ERR_PTR(-EINVAL);
+}
+
+struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_request_sock *ireq;
+ struct net *net = sock_net(sk);
+ struct request_sock *req;
+ struct dst_entry *dst;
+ struct sock *ret = sk;
+ __u8 rcv_wscale;
+ int full_space;
+
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ if (!req)
+ goto out_drop;
+
ireq = inet_rsk(req);
- treq = tcp_rsk(req);
- treq->tfo_listener = false;
- req->mss = mss;
- ireq->ir_rmt_port = th->source;
- ireq->ir_num = ntohs(th->dest);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
@@ -197,31 +203,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->pktopts = skb;
}
- ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
- ireq->ir_mark = inet_request_mark(sk, skb);
-
- req->num_retrans = 0;
- ireq->snd_wscale = tcp_opt.snd_wscale;
- ireq->sack_ok = tcp_opt.sack_ok;
- ireq->wscale_ok = tcp_opt.wscale_ok;
- ireq->tstamp_ok = tcp_opt.saw_tstamp;
- req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack = 0;
- treq->rcv_isn = ntohl(th->seq) - 1;
- treq->snt_isn = cookie;
- treq->ts_off = 0;
- treq->txhash = net_tx_rndhash();
-
- l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
- tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index);
-
- if (IS_ENABLED(CONFIG_SMC))
- ireq->smc_ok = 0;
+ tcp_ao_syncookie(sk, skb, req, AF_INET6);
/*
* We need to lookup the dst_entry to get the correct window size.
@@ -243,7 +230,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
fl6.flowi6_uid = sk->sk_uid;
security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
- dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
if (IS_ERR(dst))
goto out_free;
}
@@ -261,12 +248,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
dst_metric(dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
- ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
+ ireq->ecn_ok &= cookie_ecn_ok(net, dst);
- ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff);
+ ret = tcp_get_cookie_sock(sk, skb, req, dst);
out:
return ret;
out_free:
reqsk_free(req);
+out_drop:
return NULL;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8c6623496dd7..06a19fe2afd1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -381,7 +381,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct tcp_sock *tp;
__u32 seq, snd_una;
struct sock *sk;
- bool fatal;
+ bool harderr;
int err;
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
@@ -402,9 +402,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
seq = ntohl(th->seq);
- fatal = icmpv6_err_convert(type, code, &err);
+ harderr = icmpv6_err_convert(type, code, &err);
if (sk->sk_state == TCP_NEW_SYN_RECV) {
- tcp_req_err(sk, seq, fatal);
+ tcp_req_err(sk, seq, harderr);
return 0;
}
@@ -489,6 +489,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
+ if (!harderr)
+ break;
+
if (!sock_owned_by_user(sk)) {
WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 24fa06105378..1d98877647d8 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -194,11 +194,32 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
if (scan_sdata && scan_sdata->vif.type == NL80211_IFTYPE_STATION &&
scan_sdata->vif.cfg.assoc &&
ieee80211_have_rx_timestamp(rx_status)) {
- bss_meta.parent_tsf =
- ieee80211_calculate_rx_timestamp(local, rx_status,
- len + FCS_LEN, 24);
- ether_addr_copy(bss_meta.parent_bssid,
- scan_sdata->vif.bss_conf.bssid);
+ struct ieee80211_bss_conf *link_conf = NULL;
+
+ /* for an MLO connection, set the TSF data only in case we have
+ * an indication on which of the links the frame was received
+ */
+ if (ieee80211_vif_is_mld(&scan_sdata->vif)) {
+ if (rx_status->link_valid) {
+ s8 link_id = rx_status->link_id;
+
+ link_conf =
+ rcu_dereference(scan_sdata->vif.link_conf[link_id]);
+ }
+ } else {
+ link_conf = &scan_sdata->vif.bss_conf;
+ }
+
+ if (link_conf) {
+ bss_meta.parent_tsf =
+ ieee80211_calculate_rx_timestamp(local,
+ rx_status,
+ len + FCS_LEN,
+ 24);
+
+ ether_addr_copy(bss_meta.parent_bssid,
+ link_conf->bssid);
+ }
}
rcu_read_unlock();
@@ -666,6 +687,21 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
if (local->scan_req)
return -EBUSY;
+ /* For an MLO connection, if a link ID was specified, validate that it
+ * is indeed active. If no link ID was specified, select one of the
+ * active links.
+ */
+ if (ieee80211_vif_is_mld(&sdata->vif)) {
+ if (req->tsf_report_link_id >= 0) {
+ if (!(sdata->vif.active_links &
+ BIT(req->tsf_report_link_id)))
+ return -EINVAL;
+ } else {
+ req->tsf_report_link_id =
+ __ffs(sdata->vif.active_links);
+ }
+ }
+
if (!__ieee80211_can_leave_ch(sdata))
return -EBUSY;
@@ -714,6 +750,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
local->hw_scan_req->req.duration = req->duration;
local->hw_scan_req->req.duration_mandatory =
req->duration_mandatory;
+ local->hw_scan_req->req.tsf_report_link_id =
+ req->tsf_report_link_id;
local->hw_scan_band = 0;
local->hw_scan_req->req.n_6ghz_params = req->n_6ghz_params;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 5c01b9bc619a..efecbe3cf415 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -276,12 +276,12 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
if (!mptcp_pm_is_userspace(msk)) {
GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
- goto remove_err;
+ goto out;
}
if (id_val == 0) {
err = mptcp_userspace_pm_remove_id_zero_address(msk, info);
- goto remove_err;
+ goto out;
}
lock_sock(sk);
@@ -296,7 +296,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
if (!match) {
GENL_SET_ERR_MSG(info, "address with specified id not found");
release_sock(sk);
- goto remove_err;
+ goto out;
}
list_move(&match->list, &free_list);
@@ -310,7 +310,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
}
err = 0;
- remove_err:
+out:
sock_put(sk);
return err;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index fe6f2d399ee8..458a2d7bb0dd 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -1072,6 +1072,15 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}
+static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
+{
+ struct sock *ssk = READ_ONCE(msk->first);
+
+ return ssk && ((1 << inet_sk_state_load(ssk)) &
+ (TCPF_ESTABLISHED | TCPF_SYN_SENT |
+ TCPF_SYN_RECV | TCPF_LISTEN));
+}
+
static inline void mptcp_do_fallback(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 353680733700..cabe856b2a45 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -938,6 +938,8 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_bytes_sent = msk->bytes_sent;
info->mptcpi_bytes_received = msk->bytes_received;
info->mptcpi_bytes_retrans = msk->bytes_retrans;
+ info->mptcpi_subflows_total = info->mptcpi_subflows +
+ __mptcp_has_initial_subflow(msk);
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 03757e76bb6b..374412ed780b 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -105,8 +105,11 @@ enum {
struct ncsi_channel_version {
- u32 version; /* Supported BCD encoded NCSI version */
- u32 alpha2; /* Supported BCD encoded NCSI version */
+ u8 major; /* NCSI version major */
+ u8 minor; /* NCSI version minor */
+ u8 update; /* NCSI version update */
+ char alpha1; /* NCSI version alpha1 */
+ char alpha2; /* NCSI version alpha2 */
u8 fw_name[12]; /* Firmware name string */
u32 fw_version; /* Firmware version */
u16 pci_ids[4]; /* PCI identification */
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index fd2236ee9a79..b3ff37a181d7 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -270,7 +270,8 @@ static struct ncsi_cmd_handler {
{ NCSI_PKT_CMD_GPS, 0, ncsi_cmd_handler_default },
{ NCSI_PKT_CMD_OEM, -1, ncsi_cmd_handler_oem },
{ NCSI_PKT_CMD_PLDM, 0, NULL },
- { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default }
+ { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_GMCMA, 0, ncsi_cmd_handler_default }
};
static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index d9da942ad53d..745c788f1d1d 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -689,8 +689,6 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
return 0;
}
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
-
static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca)
{
unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN];
@@ -716,10 +714,6 @@ static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca)
return ret;
}
-#endif
-
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
-
/* NCSI OEM Command APIs */
static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca)
{
@@ -856,8 +850,6 @@ static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
return nch->handler(nca);
}
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
-
/* Determine if a given channel from the channel_queue should be used for Tx */
static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp,
struct ncsi_channel *nc)
@@ -1039,20 +1031,23 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
goto error;
}
- nd->state = ncsi_dev_state_config_oem_gma;
+ nd->state = IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
+ ? ncsi_dev_state_config_oem_gma
+ : ncsi_dev_state_config_clear_vids;
break;
case ncsi_dev_state_config_oem_gma:
nd->state = ncsi_dev_state_config_clear_vids;
- ret = -1;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
- nca.type = NCSI_PKT_CMD_OEM;
nca.package = np->id;
nca.channel = nc->id;
ndp->pending_req_num = 1;
- ret = ncsi_gma_handler(&nca, nc->version.mf_id);
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
-
+ if (nc->version.major >= 1 && nc->version.minor >= 2) {
+ nca.type = NCSI_PKT_CMD_GMCMA;
+ ret = ncsi_xmit_cmd(&nca);
+ } else {
+ nca.type = NCSI_PKT_CMD_OEM;
+ ret = ncsi_gma_handler(&nca, nc->version.mf_id);
+ }
if (ret < 0)
schedule_work(&ndp->work);
@@ -1404,7 +1399,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
schedule_work(&ndp->work);
break;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
case ncsi_dev_state_probe_mlx_gma:
ndp->pending_req_num = 1;
@@ -1429,7 +1423,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_cis;
break;
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
case ncsi_dev_state_probe_cis:
ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
@@ -1447,7 +1440,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY))
nd->state = ncsi_dev_state_probe_keep_phy;
break;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
case ncsi_dev_state_probe_keep_phy:
ndp->pending_req_num = 1;
@@ -1460,7 +1452,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_gvi;
break;
-#endif /* CONFIG_NCSI_OEM_CMD_KEEP_PHY */
case ncsi_dev_state_probe_gvi:
case ncsi_dev_state_probe_gc:
case ncsi_dev_state_probe_gls:
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index a3a6753a1db7..2f872d064396 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -71,8 +71,8 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
if (nc == nc->package->preferred_channel)
nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
- nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
- nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2);
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.major);
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.minor);
nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name);
vid_nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index ba66c7dc3a21..f2f3b5c1b941 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -197,9 +197,12 @@ struct ncsi_rsp_gls_pkt {
/* Get Version ID */
struct ncsi_rsp_gvi_pkt {
struct ncsi_rsp_pkt_hdr rsp; /* Response header */
- __be32 ncsi_version; /* NCSI version */
+ unsigned char major; /* NCSI version major */
+ unsigned char minor; /* NCSI version minor */
+ unsigned char update; /* NCSI version update */
+ unsigned char alpha1; /* NCSI version alpha1 */
unsigned char reserved[3]; /* Reserved */
- unsigned char alpha2; /* NCSI version */
+ unsigned char alpha2; /* NCSI version alpha2 */
unsigned char fw_name[12]; /* f/w name string */
__be32 fw_version; /* f/w version */
__be16 pci_ids[4]; /* PCI IDs */
@@ -335,6 +338,14 @@ struct ncsi_rsp_gpuuid_pkt {
__be32 checksum;
};
+/* Get MC MAC Address */
+struct ncsi_rsp_gmcma_pkt {
+ struct ncsi_rsp_pkt_hdr rsp;
+ unsigned char address_count;
+ unsigned char reserved[3];
+ unsigned char addresses[][ETH_ALEN];
+};
+
/* AEN: Link State Change */
struct ncsi_aen_lsc_pkt {
struct ncsi_aen_pkt_hdr aen; /* AEN header */
@@ -395,6 +406,7 @@ struct ncsi_aen_hncdsc_pkt {
#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */
#define NCSI_PKT_CMD_QPNPR 0x56 /* Query Pending NC PLDM request */
#define NCSI_PKT_CMD_SNPR 0x57 /* Send NC PLDM Reply */
+#define NCSI_PKT_CMD_GMCMA 0x58 /* Get MC MAC Address */
/* NCSI packet responses */
@@ -430,6 +442,7 @@ struct ncsi_aen_hncdsc_pkt {
#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80)
#define NCSI_PKT_RSP_QPNPR (NCSI_PKT_CMD_QPNPR + 0x80)
#define NCSI_PKT_RSP_SNPR (NCSI_PKT_CMD_SNPR + 0x80)
+#define NCSI_PKT_RSP_GMCMA (NCSI_PKT_CMD_GMCMA + 0x80)
/* NCSI response code/reason */
#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 069c2659074b..bee290d0f48b 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -19,6 +19,19 @@
#include "ncsi-pkt.h"
#include "ncsi-netlink.h"
+/* Nibbles within [0xA, 0xF] add zero "0" to the returned value.
+ * Optional fields (encoded as 0xFF) will default to zero.
+ */
+static u8 decode_bcd_u8(u8 x)
+{
+ int lo = x & 0xF;
+ int hi = x >> 4;
+
+ lo = lo < 0xA ? lo : 0;
+ hi = hi < 0xA ? hi : 0;
+ return lo + hi * 10;
+}
+
static int ncsi_validate_rsp_pkt(struct ncsi_request *nr,
unsigned short payload)
{
@@ -755,9 +768,18 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr)
if (!nc)
return -ENODEV;
- /* Update to channel's version info */
+ /* Update channel's version info
+ *
+ * Major, minor, and update fields are supposed to be
+ * unsigned integers encoded as packed BCD.
+ *
+ * Alpha1 and alpha2 are ISO/IEC 8859-1 characters.
+ */
ncv = &nc->version;
- ncv->version = ntohl(rsp->ncsi_version);
+ ncv->major = decode_bcd_u8(rsp->major);
+ ncv->minor = decode_bcd_u8(rsp->minor);
+ ncv->update = decode_bcd_u8(rsp->update);
+ ncv->alpha1 = rsp->alpha1;
ncv->alpha2 = rsp->alpha2;
memcpy(ncv->fw_name, rsp->fw_name, 12);
ncv->fw_version = ntohl(rsp->fw_version);
@@ -1069,6 +1091,44 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr)
return ret;
}
+static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr)
+{
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct net_device *ndev = ndp->ndev.dev;
+ struct ncsi_rsp_gmcma_pkt *rsp;
+ struct sockaddr saddr;
+ int ret = -1;
+ int i;
+
+ rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp);
+ saddr.sa_family = ndev->type;
+ ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
+ netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n",
+ rsp->address_count);
+ for (i = 0; i < rsp->address_count; i++) {
+ netdev_info(ndev, "NCSI: MAC address %d: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ i, rsp->addresses[i][0], rsp->addresses[i][1],
+ rsp->addresses[i][2], rsp->addresses[i][3],
+ rsp->addresses[i][4], rsp->addresses[i][5]);
+ }
+
+ for (i = 0; i < rsp->address_count; i++) {
+ memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN);
+ ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr);
+ if (ret < 0) {
+ netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n",
+ saddr.sa_data);
+ continue;
+ }
+ netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data);
+ break;
+ }
+
+ ndp->gma_flag = ret == 0;
+ return ret;
+}
+
static struct ncsi_rsp_handler {
unsigned char type;
int payload;
@@ -1105,7 +1165,8 @@ static struct ncsi_rsp_handler {
{ NCSI_PKT_RSP_PLDM, -1, ncsi_rsp_handler_pldm },
{ NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid },
{ NCSI_PKT_RSP_QPNPR, -1, ncsi_rsp_handler_pldm },
- { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm }
+ { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm },
+ { NCSI_PKT_RSP_GMCMA, -1, ncsi_rsp_handler_gmcma },
};
int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 467671f2d42f..fbbc4fd37349 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -617,7 +617,7 @@ synproxy_recv_client_ack(struct net *net,
struct synproxy_net *snet = synproxy_pernet(net);
int mss;
- mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ mss = __cookie_v4_check(ip_hdr(skb), th);
if (mss == 0) {
this_cpu_inc(snet->stats->cookie_invalid);
return false;
@@ -1034,7 +1034,7 @@ synproxy_recv_client_ack_ipv6(struct net *net,
struct synproxy_net *snet = synproxy_pernet(net);
int mss;
- mss = nf_cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ mss = nf_cookie_v6_check(ipv6_hdr(skb), th);
if (mss == 0) {
this_cpu_inc(snet->stats->cookie_invalid);
return false;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index eb086b06d60d..177126fb0484 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1204,8 +1204,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
return sock;
}
-static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
- int broadcast)
+struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
{
struct sk_buff *skb;
void *data;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 7adf48549a3b..5f1757a32842 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2121,13 +2121,13 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
+ enum skb_drop_reason drop_reason = SKB_CONSUMED;
struct sock *sk;
struct sockaddr_ll *sll;
struct packet_sock *po;
u8 *skb_head = skb->data;
int skb_len = skb->len;
unsigned int snaplen, res;
- bool is_drop_n_account = false;
if (skb->pkt_type == PACKET_LOOPBACK)
goto drop;
@@ -2217,9 +2217,9 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
return 0;
drop_n_acct:
- is_drop_n_account = true;
atomic_inc(&po->tp_drops);
atomic_inc(&sk->sk_drops);
+ drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR;
drop_n_restore:
if (skb_head != skb->data && skb_shared(skb)) {
@@ -2227,16 +2227,14 @@ drop_n_restore:
skb->len = skb_len;
}
drop:
- if (!is_drop_n_account)
- consume_skb(skb);
- else
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return 0;
}
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
+ enum skb_drop_reason drop_reason = SKB_CONSUMED;
struct sock *sk;
struct packet_sock *po;
struct sockaddr_ll *sll;
@@ -2250,7 +2248,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct sk_buff *copy_skb = NULL;
struct timespec64 ts;
__u32 ts_status;
- bool is_drop_n_account = false;
unsigned int slot_id = 0;
int vnet_hdr_sz = 0;
@@ -2498,19 +2495,16 @@ drop_n_restore:
skb->len = skb_len;
}
drop:
- if (!is_drop_n_account)
- consume_skb(skb);
- else
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return 0;
drop_n_account:
spin_unlock(&sk->sk_receive_queue.lock);
atomic_inc(&po->tp_drops);
- is_drop_n_account = true;
+ drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR;
sk->sk_data_ready(sk);
- kfree_skb(copy_skb);
+ kfree_skb_reason(copy_skb, drop_reason);
goto drop_n_restore;
}
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 14cc8fe8584b..c3feb4f49d09 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1351,11 +1351,11 @@ static long rfkill_fop_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct rfkill_data *data = file->private_data;
- int ret = -ENOSYS;
+ int ret = -ENOTTY;
u32 size;
if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC)
- return -ENOSYS;
+ return -ENOTTY;
mutex_lock(&data->mtx);
switch (_IOC_NR(cmd)) {
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index c39252d61ebb..abec5c45b5a4 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1118,8 +1118,7 @@ int tcf_action_destroy(struct tc_action *actions[], int bind)
struct tc_action *a;
int ret = 0, i;
- for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
- a = actions[i];
+ tcf_act_for_each_action(i, a, actions) {
actions[i] = NULL;
ops = a->ops;
ret = __tcf_idr_release(a, bind, true);
@@ -1136,18 +1135,29 @@ static int tcf_action_put(struct tc_action *p)
return __tcf_action_put(p, false);
}
-/* Put all actions in this array, skip those NULL's. */
static void tcf_action_put_many(struct tc_action *actions[])
{
+ struct tc_action *a;
+ int i;
+
+ tcf_act_for_each_action(i, a, actions) {
+ const struct tc_action_ops *ops = a->ops;
+ if (tcf_action_put(a))
+ module_put(ops->owner);
+ }
+}
+
+static void tca_put_bound_many(struct tc_action *actions[], int init_res[])
+{
+ struct tc_action *a;
int i;
- for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
- struct tc_action *a = actions[i];
- const struct tc_action_ops *ops;
+ tcf_act_for_each_action(i, a, actions) {
+ const struct tc_action_ops *ops = a->ops;
- if (!a)
+ if (init_res[i] == ACT_P_CREATED)
continue;
- ops = a->ops;
+
if (tcf_action_put(a))
module_put(ops->owner);
}
@@ -1211,8 +1221,7 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
int err = -EINVAL, i;
struct nlattr *nest;
- for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
- a = actions[i];
+ tcf_act_for_each_action(i, a, actions) {
nest = nla_nest_start_noflag(skb, i + 1);
if (nest == NULL)
goto nla_put_failure;
@@ -1276,14 +1285,12 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
void tcf_idr_insert_many(struct tc_action *actions[])
{
+ struct tc_action *a;
int i;
- for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
- struct tc_action *a = actions[i];
+ tcf_act_for_each_action(i, a, actions) {
struct tcf_idrinfo *idrinfo;
- if (!a)
- continue;
idrinfo = a->idrinfo;
mutex_lock(&idrinfo->lock);
/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if
@@ -1497,10 +1504,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
err:
tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND);
err_mod:
- for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
- if (ops[i])
- module_put(ops[i]->owner);
- }
+ for (i = 0; i < TCA_ACT_MAX_PRIO && ops[i]; i++)
+ module_put(ops[i]->owner);
return err;
}
@@ -1753,10 +1758,10 @@ err_out:
static int tcf_action_delete(struct net *net, struct tc_action *actions[])
{
+ struct tc_action *a;
int i;
- for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
- struct tc_action *a = actions[i];
+ tcf_act_for_each_action(i, a, actions) {
const struct tc_action_ops *ops = a->ops;
/* Actions can be deleted concurrently so we must save their
* type and id to search again after reference is released.
@@ -1768,7 +1773,7 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[])
if (tcf_action_put(a)) {
/* last reference, action was deleted concurrently */
module_put(ops->owner);
- } else {
+ } else {
int ret;
/* now do the delete */
@@ -1977,7 +1982,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
size_t attr_size = 0;
- int loop, ret, i;
+ int loop, ret;
struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
int init_res[TCA_ACT_MAX_PRIO] = {};
@@ -1990,13 +1995,11 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
if (ret < 0)
return ret;
+
ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
- /* only put existing actions */
- for (i = 0; i < TCA_ACT_MAX_PRIO; i++)
- if (init_res[i] == ACT_P_CREATED)
- actions[i] = NULL;
- tcf_action_put_many(actions);
+ /* only put bound actions */
+ tca_put_bound_many(actions, init_res);
return ret;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d5bdfd4a7655..289e1755c26b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -71,7 +71,7 @@ struct tc_u_hnode {
struct tc_u_hnode __rcu *next;
u32 handle;
u32 prio;
- int refcnt;
+ refcount_t refcnt;
unsigned int divisor;
struct idr handle_idr;
bool is_root;
@@ -86,7 +86,7 @@ struct tc_u_hnode {
struct tc_u_common {
struct tc_u_hnode __rcu *hlist;
void *ptr;
- int refcnt;
+ refcount_t refcnt;
struct idr handle_idr;
struct hlist_node hnode;
long knodes;
@@ -359,7 +359,7 @@ static int u32_init(struct tcf_proto *tp)
if (root_ht == NULL)
return -ENOBUFS;
- root_ht->refcnt++;
+ refcount_set(&root_ht->refcnt, 1);
root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
root_ht->prio = tp->prio;
root_ht->is_root = true;
@@ -371,18 +371,20 @@ static int u32_init(struct tcf_proto *tp)
kfree(root_ht);
return -ENOBUFS;
}
+ refcount_set(&tp_c->refcnt, 1);
tp_c->ptr = key;
INIT_HLIST_NODE(&tp_c->hnode);
idr_init(&tp_c->handle_idr);
hlist_add_head(&tp_c->hnode, tc_u_hash(key));
+ } else {
+ refcount_inc(&tp_c->refcnt);
}
- tp_c->refcnt++;
RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
rcu_assign_pointer(tp_c->hlist, root_ht);
- root_ht->refcnt++;
+ /* root_ht must be destroyed when tcf_proto is destroyed */
rcu_assign_pointer(tp->root, root_ht);
tp->data = tp_c;
return 0;
@@ -393,7 +395,7 @@ static void __u32_destroy_key(struct tc_u_knode *n)
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
tcf_exts_destroy(&n->exts);
- if (ht && --ht->refcnt == 0)
+ if (ht && refcount_dec_and_test(&ht->refcnt))
kfree(ht);
kfree(n);
}
@@ -601,8 +603,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
struct tc_u_hnode __rcu **hn;
struct tc_u_hnode *phn;
- WARN_ON(--ht->refcnt);
-
u32_clear_hnode(tp, ht, extack);
hn = &tp_c->hlist;
@@ -630,10 +630,10 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
WARN_ON(root_ht == NULL);
- if (root_ht && --root_ht->refcnt == 1)
+ if (root_ht && refcount_dec_and_test(&root_ht->refcnt))
u32_destroy_hnode(tp, root_ht, extack);
- if (--tp_c->refcnt == 0) {
+ if (refcount_dec_and_test(&tp_c->refcnt)) {
struct tc_u_hnode *ht;
hlist_del(&tp_c->hnode);
@@ -645,7 +645,7 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
/* u32_destroy_key() will later free ht for us, if it's
* still referenced by some knode
*/
- if (--ht->refcnt == 0)
+ if (refcount_dec_and_test(&ht->refcnt))
kfree_rcu(ht, rcu);
}
@@ -674,7 +674,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
return -EINVAL;
}
- if (ht->refcnt == 1) {
+ if (refcount_dec_if_one(&ht->refcnt)) {
u32_destroy_hnode(tp, ht, extack);
} else {
NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
@@ -682,7 +682,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
}
out:
- *last = tp_c->refcnt == 1 && tp_c->knodes == 0;
+ *last = refcount_read(&tp_c->refcnt) == 1 && tp_c->knodes == 0;
return ret;
}
@@ -766,14 +766,14 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
return -EINVAL;
}
- ht_down->refcnt++;
+ refcount_inc(&ht_down->refcnt);
}
ht_old = rtnl_dereference(n->ht_down);
rcu_assign_pointer(n->ht_down, ht_down);
if (ht_old)
- ht_old->refcnt--;
+ refcount_dec(&ht_old->refcnt);
}
if (ifindex >= 0)
@@ -852,7 +852,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
/* bump reference count as long as we hold pointer to structure */
if (ht)
- ht->refcnt++;
+ refcount_inc(&ht->refcnt);
return new;
}
@@ -932,7 +932,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht_old = rtnl_dereference(n->ht_down);
if (ht_old)
- ht_old->refcnt++;
+ refcount_inc(&ht_old->refcnt);
}
__u32_destroy_key(new);
return err;
@@ -980,7 +980,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return err;
}
}
- ht->refcnt = 1;
+ refcount_set(&ht->refcnt, 1);
ht->divisor = divisor;
ht->handle = handle;
ht->prio = tp->prio;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 9a0b85190a2c..beece8e82c23 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -57,6 +57,8 @@
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
+#include <linux/units.h>
+
#include <net/netevent.h>
#include <net/netlink.h>
#include <net/sch_generic.h>
@@ -65,8 +67,6 @@
static LIST_HEAD(cbs_list);
static DEFINE_SPINLOCK(cbs_list_lock);
-#define BYTES_PER_KBIT (1000LL / 8)
-
struct cbs_sched_data {
bool offload;
int queue;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4195a4bc26ca..8dd0e5925342 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -522,8 +522,9 @@ static void dev_watchdog(struct timer_list *t)
if (unlikely(timedout_ms)) {
trace_net_dev_xmit_timeout(dev, i);
- WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n",
- dev->name, netdev_drivername(dev), i, timedout_ms);
+ netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n",
+ raw_smp_processor_id(),
+ i, timedout_ms);
netif_freeze_queues(dev);
dev->netdev_ops->ndo_tx_timeout(dev, i);
netif_unfreeze_queues(dev);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 73eebddbbf41..7fc2f3c6d248 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2461,7 +2461,7 @@ static void smc_listen_work(struct work_struct *work)
if (rc)
goto out_decl;
- rc = smc_clc_srv_v2x_features_validate(pclc, ini);
+ rc = smc_clc_srv_v2x_features_validate(new_smc, pclc, ini);
if (rc)
goto out_decl;
diff --git a/net/smc/smc.h b/net/smc/smc.h
index e377980b8414..cd51261b7d9e 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -196,7 +196,6 @@ struct smc_connection {
* - dec on polled tx cqe
*/
wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/
- atomic_t tx_pushing; /* nr_threads trying tx push */
struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
u32 tx_off; /* base offset in peer rmb */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 72f4d81a3f41..95e19aa3e769 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -824,6 +824,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
struct smc_clc_smcd_gid_chid *gidchids;
struct smc_clc_msg_proposal_area *pclc;
struct smc_clc_ipv6_prefix *ipv6_prfx;
+ struct net *net = sock_net(&smc->sk);
struct smc_clc_v2_extension *v2_ext;
struct smc_clc_msg_smcd *pclc_smcd;
struct smc_clc_msg_trail *trl;
@@ -943,8 +944,8 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
}
if (smcr_indicated(ini->smc_type_v2)) {
memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE);
- v2_ext->max_conns = SMC_CONN_PER_LGR_PREFER;
- v2_ext->max_links = SMC_LINKS_PER_LGR_MAX_PREFER;
+ v2_ext->max_conns = net->smc.sysctl_max_conns_per_lgr;
+ v2_ext->max_links = net->smc.sysctl_max_links_per_lgr;
}
pclc_base->hdr.length = htons(plen);
@@ -1170,10 +1171,12 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
return len > 0 ? 0 : len;
}
-int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+int smc_clc_srv_v2x_features_validate(struct smc_sock *smc,
+ struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
struct smc_clc_v2_extension *pclc_v2_ext;
+ struct net *net = sock_net(&smc->sk);
ini->max_conns = SMC_CONN_PER_LGR_MAX;
ini->max_links = SMC_LINKS_ADD_LNK_MAX;
@@ -1187,11 +1190,13 @@ int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
return SMC_CLC_DECL_NOV2EXT;
if (ini->smcr_version & SMC_V2) {
- ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, SMC_CONN_PER_LGR_PREFER);
+ ini->max_conns = min_t(u8, pclc_v2_ext->max_conns,
+ net->smc.sysctl_max_conns_per_lgr);
if (ini->max_conns < SMC_CONN_PER_LGR_MIN)
return SMC_CLC_DECL_MAXCONNERR;
- ini->max_links = min_t(u8, pclc_v2_ext->max_links, SMC_LINKS_PER_LGR_MAX_PREFER);
+ ini->max_links = min_t(u8, pclc_v2_ext->max_links,
+ net->smc.sysctl_max_links_per_lgr);
if (ini->max_links < SMC_LINKS_ADD_LNK_MIN)
return SMC_CLC_DECL_MAXLINKERR;
}
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 08155a96a02a..1697b84d85be 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -422,7 +422,8 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version, u8 *eid, struct smc_init_info *ini);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
u8 version, u8 *negotiated_eid, struct smc_init_info *ini);
-int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+int smc_clc_srv_v2x_features_validate(struct smc_sock *smc,
+ struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini);
int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
struct smc_init_info *ini);
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 5cbc18c6e62b..a5946d1b9d60 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -25,6 +25,10 @@ static int max_sndbuf = INT_MAX / 2;
static int max_rcvbuf = INT_MAX / 2;
static const int net_smc_wmem_init = (64 * 1024);
static const int net_smc_rmem_init = (64 * 1024);
+static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN;
+static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX;
+static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN;
+static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX;
static struct ctl_table smc_table[] = {
{
@@ -68,6 +72,24 @@ static struct ctl_table smc_table[] = {
.extra1 = &min_rcvbuf,
.extra2 = &max_rcvbuf,
},
+ {
+ .procname = "smcr_max_links_per_lgr",
+ .data = &init_net.smc.sysctl_max_links_per_lgr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &links_per_lgr_min,
+ .extra2 = &links_per_lgr_max,
+ },
+ {
+ .procname = "smcr_max_conns_per_lgr",
+ .data = &init_net.smc.sysctl_max_conns_per_lgr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &conns_per_lgr_min,
+ .extra2 = &conns_per_lgr_max,
+ },
{ }
};
@@ -97,6 +119,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init);
WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
+ net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER;
+ net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER;
return 0;
diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h
index 0becc11bd2f4..eb2465ae1e15 100644
--- a/net/smc/smc_sysctl.h
+++ b/net/smc/smc_sysctl.h
@@ -23,6 +23,8 @@ void __net_exit smc_sysctl_net_exit(struct net *net);
static inline int smc_sysctl_net_init(struct net *net)
{
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
+ net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER;
+ net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER;
return 0;
}
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 3b0ff3b589c7..214ac3cbcf9a 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -621,7 +621,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
return rc;
}
-static int __smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
int rc = 0;
@@ -655,34 +655,6 @@ out:
return rc;
}
-int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
-{
- int rc;
-
- /* This make sure only one can send simultaneously to prevent wasting
- * of CPU and CDC slot.
- * Record whether someone has tried to push while we are pushing.
- */
- if (atomic_inc_return(&conn->tx_pushing) > 1)
- return 0;
-
-again:
- atomic_set(&conn->tx_pushing, 1);
- smp_wmb(); /* Make sure tx_pushing is 1 before real send */
- rc = __smc_tx_sndbuf_nonempty(conn);
-
- /* We need to check whether someone else have added some data into
- * the send queue and tried to push but failed after the atomic_set()
- * when we are pushing.
- * If so, we need to push again to prevent those data hang in the send
- * queue.
- */
- if (unlikely(!atomic_dec_and_test(&conn->tx_pushing)))
- goto again;
-
- return rc;
-}
-
/* Wakeup sndbuf consumers from process context
* since there is more data to transmit. The caller
* must hold sock lock.
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index c763008a8adb..079aebb16ed8 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -168,7 +168,7 @@ static struct sk_buff *tipc_get_err_tlv(char *str)
int str_len = strlen(str) + 1;
struct sk_buff *buf;
- buf = tipc_tlv_alloc(TLV_SPACE(str_len));
+ buf = tipc_tlv_alloc(str_len);
if (buf)
tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1cbbb11ea503..d0f499227c29 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -9342,6 +9342,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
else
eth_broadcast_addr(request->bssid);
+ request->tsf_report_link_id = nl80211_link_id_or_invalid(info->attrs);
request->wdev = wdev;
request->wiphy = &rdev->wiphy;
request->scan_start = jiffies;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 06cead2b8e34..caa340134b0e 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -148,6 +148,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
return 0;
}
+#define XDP_UMEM_FLAGS_VALID ( \
+ XDP_UMEM_UNALIGNED_CHUNK_FLAG | \
+ XDP_UMEM_TX_SW_CSUM | \
+ 0)
+
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
@@ -167,7 +172,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
}
- if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+ if (mr->flags & ~XDP_UMEM_FLAGS_VALID)
return -EINVAL;
if (!unaligned_chunks && !is_power_of_2(chunk_size))
@@ -199,6 +204,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
+ if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
+ return -EINVAL;
+
umem->size = size;
umem->headroom = headroom;
umem->chunk_size = chunk_size;
@@ -207,6 +215,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
+ umem->tx_metadata_len = mr->tx_metadata_len;
INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3da0b52f308d..9f13aa3353e3 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -571,6 +571,13 @@ static u32 xsk_get_num_desc(struct sk_buff *skb)
static void xsk_destruct_skb(struct sk_buff *skb)
{
+ struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta;
+
+ if (compl->tx_timestamp) {
+ /* sw completion timestamp, not a real one */
+ *compl->tx_timestamp = ktime_get_tai_fast_ns();
+ }
+
xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
sock_wfree(skb);
}
@@ -655,8 +662,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
struct xdp_desc *desc)
{
+ struct xsk_tx_metadata *meta = NULL;
struct net_device *dev = xs->dev;
struct sk_buff *skb = xs->skb;
+ bool first_frag = false;
int err;
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
@@ -687,6 +696,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
kfree_skb(skb);
goto free_err;
}
+
+ first_frag = true;
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
struct page *page;
@@ -709,12 +720,45 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
}
+
+ if (first_frag && desc->options & XDP_TX_METADATA) {
+ if (unlikely(xs->pool->tx_metadata_len == 0)) {
+ err = -EINVAL;
+ goto free_err;
+ }
+
+ meta = buffer - xs->pool->tx_metadata_len;
+ if (unlikely(!xsk_buff_valid_tx_metadata(meta))) {
+ err = -EINVAL;
+ goto free_err;
+ }
+
+ if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) {
+ if (unlikely(meta->request.csum_start +
+ meta->request.csum_offset +
+ sizeof(__sum16) > len)) {
+ err = -EINVAL;
+ goto free_err;
+ }
+
+ skb->csum_start = hr + meta->request.csum_start;
+ skb->csum_offset = meta->request.csum_offset;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+ if (unlikely(xs->pool->tx_sw_csum)) {
+ err = skb_checksum_help(skb);
+ if (err)
+ goto free_err;
+ }
+ }
+ }
}
skb->dev = dev;
skb->priority = READ_ONCE(xs->sk.sk_priority);
skb->mark = READ_ONCE(xs->sk.sk_mark);
skb->destructor = xsk_destruct_skb;
+ xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta);
xsk_set_destructor_arg(skb);
return skb;
@@ -1282,6 +1326,14 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
+struct xdp_umem_reg_v2 {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+ __u32 flags;
+};
+
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1325,8 +1377,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
- else if (optlen < sizeof(mr))
+ else if (optlen < sizeof(struct xdp_umem_reg_v2))
mr_size = sizeof(struct xdp_umem_reg_v1);
+ else if (optlen < sizeof(mr))
+ mr_size = sizeof(struct xdp_umem_reg_v2);
if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 49cb9f9a09be..4f6f538a5462 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -85,6 +85,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM;
pool->umem = umem;
pool->addrs = umem->addrs;
+ pool->tx_metadata_len = umem->tx_metadata_len;
+ pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 13354a1e4280..6f2d1621c992 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -137,21 +137,23 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
static inline bool xp_unused_options_set(u32 options)
{
- return options & ~XDP_PKT_CONTD;
+ return options & ~(XDP_PKT_CONTD | XDP_TX_METADATA);
}
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 offset = desc->addr & (pool->chunk_size - 1);
+ u64 addr = desc->addr - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
+ u64 offset = addr & (pool->chunk_size - 1);
if (!desc->len)
return false;
- if (offset + desc->len > pool->chunk_size)
+ if (offset + len > pool->chunk_size)
return false;
- if (desc->addr >= pool->addrs_cnt)
+ if (addr >= pool->addrs_cnt)
return false;
if (xp_unused_options_set(desc->options))
@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
if (!desc->len)
return false;
- if (desc->len > pool->chunk_size)
+ if (len > pool->chunk_size)
return false;
- if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
- xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+ if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
+ xp_desc_crosses_non_contig_pg(pool, addr, len))
return false;
if (xp_unused_options_set(desc->options))