summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c8
-rw-r--r--net/9p/trans_xen.c14
-rw-r--r--net/Kconfig13
-rw-r--r--net/ax25/af_ax25.c7
-rw-r--r--net/batman-adv/bat_iv_ogm.c2
-rw-r--r--net/batman-adv/bat_v_elp.c2
-rw-r--r--net/batman-adv/bat_v_ogm.c2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c3
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/gateway_client.c1
-rw-r--r--net/batman-adv/hard-interface.c35
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c1
-rw-r--r--net/batman-adv/network-coding.c2
-rw-r--r--net/batman-adv/originator.c2
-rw-r--r--net/batman-adv/send.c2
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/tp_meter.c2
-rw-r--r--net/batman-adv/translation-table.c2
-rw-r--r--net/batman-adv/tvlv.c2
-rw-r--r--net/bluetooth/6lowpan.c3
-rw-r--r--net/bluetooth/bnep/core.c2
-rw-r--r--net/bluetooth/eir.h20
-rw-r--r--net/bluetooth/hci_core.c1
-rw-r--r--net/bluetooth/hci_event.c19
-rw-r--r--net/bluetooth/hci_sync.c81
-rw-r--r--net/bluetooth/l2cap_core.c1
-rw-r--r--net/bluetooth/mgmt.c159
-rw-r--r--net/bluetooth/mgmt_util.c3
-rw-r--r--net/bpf/test_run.c5
-rw-r--r--net/bridge/br.c15
-rw-r--r--net/bridge/br_arp_nd_proxy.c4
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_input.c11
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_netlink.c6
-rw-r--r--net/bridge/br_private.h6
-rw-r--r--net/bridge/br_switchdev.c97
-rw-r--r--net/bridge/br_vlan.c117
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c5
-rw-r--r--net/caif/chnl_net.c2
-rw-r--r--net/can/af_can.c2
-rw-r--r--net/can/gw.c16
-rw-r--r--net/can/isotp.c235
-rw-r--r--net/can/j1939/transport.c2
-rw-r--r--net/core/dev.c578
-rw-r--r--net/core/drop_monitor.c72
-rw-r--r--net/core/filter.c181
-rw-r--r--net/core/flow_dissector.c18
-rw-r--r--net/core/gro.c25
-rw-r--r--net/core/gro_cells.c36
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/page_pool.c79
-rw-r--r--net/core/ptp_classifier.c12
-rw-r--r--net/core/rtnetlink.c545
-rw-r--r--net/core/skbuff.c70
-rw-r--r--net/core/skmsg.c2
-rw-r--r--net/core/sock.c12
-rw-r--r--net/core/sysctl_net_core.c6
-rw-r--r--net/core/utils.c4
-rw-r--r--net/core/xdp.c3
-rw-r--r--net/dcb/dcbnl.c44
-rw-r--r--net/decnet/dn_nsp_out.c3
-rw-r--r--net/dsa/dsa.c101
-rw-r--r--net/dsa/dsa2.c76
-rw-r--r--net/dsa/dsa_priv.h95
-rw-r--r--net/dsa/master.c7
-rw-r--r--net/dsa/port.c425
-rw-r--r--net/dsa/slave.c434
-rw-r--r--net/dsa/switch.c371
-rw-r--r--net/dsa/tag_8021q.c323
-rw-r--r--net/dsa/tag_dsa.c19
-rw-r--r--net/dsa/tag_lan9303.c21
-rw-r--r--net/dsa/tag_ocelot_8021q.c11
-rw-r--r--net/dsa/tag_rtl8_4.c152
-rw-r--r--net/dsa/tag_sja1105.c28
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/ethtool/rings.c19
-rw-r--r--net/hsr/hsr_framereg.c23
-rw-r--r--net/hsr/hsr_framereg.h8
-rw-r--r--net/hsr/hsr_main.h16
-rw-r--r--net/ieee802154/6lowpan/reassembly.c1
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/arp.c11
-rw-r--r--net/ipv4/devinet.c7
-rw-r--r--net/ipv4/esp4.c7
-rw-r--r--net/ipv4/esp4_offload.c6
-rw-r--r--net/ipv4/fib_frontend.c8
-rw-r--r--net/ipv4/fib_lookup.h7
-rw-r--r--net/ipv4/fib_semantics.c6
-rw-r--r--net/ipv4/fib_trie.c22
-rw-r--r--net/ipv4/inet_fragment.c1
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_fragment.c1
-rw-r--r--net/ipv4/ip_input.c1
-rw-r--r--net/ipv4/ip_output.c16
-rw-r--r--net/ipv4/ping.c10
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv4/tcp.c80
-rw-r--r--net/ipv4/tcp_input.c42
-rw-r--r--net/ipv4/tcp_ipv4.c97
-rw-r--r--net/ipv4/tcp_output.c49
-rw-r--r--net/ipv4/udp_tunnel_nic.c2
-rw-r--r--net/ipv6/addrconf.c61
-rw-r--r--net/ipv6/af_inet6.c24
-rw-r--r--net/ipv6/esp6.c7
-rw-r--r--net/ipv6/esp6_offload.c6
-rw-r--r--net/ipv6/ioam6.c19
-rw-r--r--net/ipv6/ip6_flowlabel.c4
-rw-r--r--net/ipv6/ip6_input.c1
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_output.c27
-rw-r--r--net/ipv6/ip6mr.c18
-rw-r--r--net/ipv6/ipv6_sockglue.c6
-rw-r--r--net/ipv6/mcast.c34
-rw-r--r--net/ipv6/ndisc.c55
-rw-r--r--net/ipv6/netfilter.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c1
-rw-r--r--net/ipv6/ping.c21
-rw-r--r--net/ipv6/reassembly.c1
-rw-r--r--net/ipv6/route.c19
-rw-r--r--net/ipv6/tcp_ipv6.c95
-rw-r--r--net/ipv6/udp.c11
-rw-r--r--net/ipv6/xfrm6_output.c16
-rw-r--r--net/iucv/iucv.c2
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/mac80211/agg-tx.c10
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/mlme.c45
-rw-r--r--net/mac80211/rx.c14
-rw-r--r--net/mctp/device.c33
-rw-r--r--net/mctp/neigh.c2
-rw-r--r--net/mctp/route.c38
-rw-r--r--net/mctp/test/utils.c1
-rw-r--r--net/mptcp/mib.c6
-rw-r--r--net/mptcp/mib.h6
-rw-r--r--net/mptcp/options.c18
-rw-r--r--net/mptcp/pm.c19
-rw-r--r--net/mptcp/pm_netlink.c161
-rw-r--r--net/mptcp/protocol.c22
-rw-r--r--net/mptcp/protocol.h32
-rw-r--r--net/mptcp/sockopt.c2
-rw-r--r--net/mptcp/subflow.c112
-rw-r--r--net/netfilter/core.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c6
-rw-r--r--net/netfilter/nf_dup_netdev.c2
-rw-r--r--net/netfilter/nf_flow_table_ip.c4
-rw-r--r--net/netfilter/nf_flow_table_offload.c6
-rw-r--r--net/netfilter/nf_queue.c36
-rw-r--r--net/netfilter/nf_tables_api.c20
-rw-r--r--net/netfilter/nf_tables_offload.c3
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_queue.c20
-rw-r--r--net/netfilter/nft_dup_netdev.c6
-rw-r--r--net/netfilter/nft_fwd_netdev.c8
-rw-r--r--net/netfilter/nft_immediate.c12
-rw-r--r--net/netfilter/nft_limit.c18
-rw-r--r--net/netfilter/nft_synproxy.c4
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/nfc/llcp.h1
-rw-r--r--net/nfc/llcp_core.c9
-rw-r--r--net/nfc/llcp_sock.c49
-rw-r--r--net/openvswitch/actions.c46
-rw-r--r--net/openvswitch/flow.c140
-rw-r--r--net/openvswitch/flow.h14
-rw-r--r--net/openvswitch/flow_netlink.c37
-rw-r--r--net/openvswitch/vport.c2
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/phonet/af_phonet.c8
-rw-r--r--net/sched/act_api.c17
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_ct.c18
-rw-r--r--net/sched/act_police.c62
-rw-r--r--net/sched/cls_api.c6
-rw-r--r--net/sched/cls_bpf.c2
-rw-r--r--net/sched/sch_api.c22
-rw-r--r--net/sched/sch_generic.c29
-rw-r--r--net/sctp/diag.c9
-rw-r--r--net/smc/Makefile1
-rw-r--r--net/smc/af_smc.c71
-rw-r--r--net/smc/smc.h6
-rw-r--r--net/smc/smc_cdc.c24
-rw-r--r--net/smc/smc_core.c7
-rw-r--r--net/smc/smc_pnet.c42
-rw-r--r--net/smc/smc_pnet.h2
-rw-r--r--net/smc/smc_sysctl.c65
-rw-r--r--net/smc/smc_sysctl.h33
-rw-r--r--net/smc/smc_tx.c119
-rw-r--r--net/socket.c4
-rw-r--r--net/switchdev/switchdev.c230
-rw-r--r--net/tipc/bearer.c14
-rw-r--r--net/tipc/crypto.c2
-rw-r--r--net/tipc/link.c9
-rw-r--r--net/tipc/name_table.c2
-rw-r--r--net/tipc/node.c13
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/vmw_vsock/af_vsock.c1
-rw-r--r--net/wireless/Makefile2
-rw-r--r--net/wireless/core.c17
-rw-r--r--net/wireless/nl80211.c15
-rw-r--r--net/wireless/util.c2
-rw-r--r--net/xfrm/xfrm_device.c6
-rw-r--r--net/xfrm/xfrm_interface.c9
-rw-r--r--net/xfrm/xfrm_policy.c14
-rw-r--r--net/xfrm/xfrm_state.c29
-rw-r--r--net/xfrm/xfrm_user.c27
209 files changed, 5362 insertions, 2150 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index d1902828a18a..e5d23e75572a 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -638,12 +638,7 @@ void vlan_dev_free_egress_priority(const struct net_device *dev)
static void vlan_dev_uninit(struct net_device *dev)
{
- struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
-
vlan_dev_free_egress_priority(dev);
-
- /* Get rid of the vlan's reference to real_dev */
- dev_put_track(vlan->real_dev, &vlan->dev_tracker);
}
static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
@@ -856,6 +851,9 @@ static void vlan_dev_free(struct net_device *dev)
free_percpu(vlan->vlan_pcpu_stats);
vlan->vlan_pcpu_stats = NULL;
+
+ /* Get rid of the vlan's reference to real_dev */
+ dev_put_track(vlan->real_dev, &vlan->dev_tracker);
}
void vlan_setup(struct net_device *dev)
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index eb9fb55280ef..01f8067994d6 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -281,9 +281,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
ref = priv->rings[i].intf->ref[j];
gnttab_end_foreign_access(ref, 0, 0);
}
- free_pages((unsigned long)priv->rings[i].data.in,
- priv->rings[i].intf->ring_order -
- (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ free_pages_exact(priv->rings[i].data.in,
+ 1UL << (priv->rings[i].intf->ring_order +
+ XEN_PAGE_SHIFT));
}
gnttab_end_foreign_access(priv->rings[i].ref, 0, 0);
free_page((unsigned long)priv->rings[i].intf);
@@ -322,8 +322,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
if (ret < 0)
goto out;
ring->ref = ret;
- bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- order - (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ bytes = alloc_pages_exact(1UL << (order + XEN_PAGE_SHIFT),
+ GFP_KERNEL | __GFP_ZERO);
if (!bytes) {
ret = -ENOMEM;
goto out;
@@ -354,9 +354,7 @@ out:
if (bytes) {
for (i--; i >= 0; i--)
gnttab_end_foreign_access(ring->intf->ref[i], 0, 0);
- free_pages((unsigned long)bytes,
- ring->intf->ring_order -
- (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ free_pages_exact(bytes, 1UL << (order + XEN_PAGE_SHIFT));
}
gnttab_end_foreign_access(ring->ref, 0, 0);
free_page((unsigned long)ring->intf);
diff --git a/net/Kconfig b/net/Kconfig
index 8a1f9d0287de..6b78f695caa6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -434,6 +434,19 @@ config NET_DEVLINK
config PAGE_POOL
bool
+config PAGE_POOL_STATS
+ default n
+ bool "Page pool stats"
+ depends on PAGE_POOL
+ help
+ Enable page pool statistics to track page allocation and recycling
+ in page pools. This option incurs additional CPU cost in allocation
+ and recycle paths and additional memory cost to store the statistics.
+ These statistics are only available if this option is enabled and if
+ the driver using the page pool supports exporting this data.
+
+ If unsure, say N.
+
config FAILOVER
tristate "Generic failover module"
help
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index d53cbb4e2503..6bd097180772 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -87,6 +87,13 @@ again:
ax25_for_each(s, &ax25_list) {
if (s->ax25_dev == ax25_dev) {
sk = s->sk;
+ if (!sk) {
+ spin_unlock_bh(&ax25_list_lock);
+ s->ax25_dev = NULL;
+ ax25_disconnect(s, ENETUNREACH);
+ spin_lock_bh(&ax25_list_lock);
+ goto again;
+ }
sock_hold(sk);
spin_unlock_bh(&ax25_list_lock);
lock_sock(sk);
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index f94f538fa382..7f6a7c96ac92 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -13,13 +13,13 @@
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 71999e13f729..b6db999abf75 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -10,13 +10,13 @@
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 1d750f3cb2e4..033639df96d8 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -9,12 +9,12 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 2ed9496fc41f..7f8a14d99cdb 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -10,6 +10,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/crc16.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -443,7 +444,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac,
batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
skb->len + ETH_HLEN);
- netif_rx_any_context(skb);
+ netif_rx(skb);
out:
batadv_hardif_put(primary_if);
}
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 2f008e329007..fefb51a5f606 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -11,6 +11,7 @@
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
@@ -20,7 +21,6 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netlink.h>
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index b7466136e292..d26124bc27e1 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 8a2b78f9c4b2..83fb51b6e299 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -9,11 +9,11 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/gfp.h>
#include <linux/if.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/limits.h>
#include <linux/list.h>
@@ -149,25 +149,28 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
struct net *net = dev_net(net_dev);
struct net_device *parent_dev;
struct net *parent_net;
+ int iflink;
bool ret;
/* check if this is a batman-adv mesh interface */
if (batadv_softif_is_valid(net_dev))
return true;
- /* no more parents..stop recursion */
- if (dev_get_iflink(net_dev) == 0 ||
- dev_get_iflink(net_dev) == net_dev->ifindex)
+ iflink = dev_get_iflink(net_dev);
+ if (iflink == 0)
return false;
parent_net = batadv_getlink_net(net_dev, net);
+ /* iflink to itself, most likely physical device */
+ if (net == parent_net && iflink == net_dev->ifindex)
+ return false;
+
/* recurse over the parent device */
- parent_dev = __dev_get_by_index((struct net *)parent_net,
- dev_get_iflink(net_dev));
- /* if we got a NULL parent_dev there is something broken.. */
+ parent_dev = __dev_get_by_index((struct net *)parent_net, iflink);
if (!parent_dev) {
- pr_err("Cannot find parent device\n");
+ pr_warn("Cannot find parent device. Skipping batadv-on-batadv check for %s\n",
+ net_dev->name);
return false;
}
@@ -214,14 +217,15 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
struct net_device *real_netdev = NULL;
struct net *real_net;
struct net *net;
- int ifindex;
+ int iflink;
ASSERT_RTNL();
if (!netdev)
return NULL;
- if (netdev->ifindex == dev_get_iflink(netdev)) {
+ iflink = dev_get_iflink(netdev);
+ if (iflink == 0) {
dev_hold(netdev);
return netdev;
}
@@ -231,9 +235,16 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
goto out;
net = dev_net(hard_iface->soft_iface);
- ifindex = dev_get_iflink(netdev);
real_net = batadv_getlink_net(netdev, net);
- real_netdev = dev_get_by_index(real_net, ifindex);
+
+ /* iflink to itself, most likely physical device */
+ if (net == real_net && netdev->ifindex == iflink) {
+ real_netdev = netdev;
+ dev_hold(real_netdev);
+ goto out;
+ }
+
+ real_netdev = dev_get_by_index(real_net, iflink);
out:
batadv_hardif_put(hard_iface);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 5207cd8d6ad8..e8a449915566 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/build_bug.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/crc32c.h>
#include <linux/device.h>
#include <linux/errno.h>
@@ -132,7 +133,6 @@ static void __exit batadv_exit(void)
rtnl_link_unregister(&batadv_link_ops);
unregister_netdevice_notifier(&batadv_hard_if_notifier);
- flush_workqueue(batadv_event_workqueue);
destroy_workqueue(batadv_event_workqueue);
batadv_event_workqueue = NULL;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 494d1ebecac2..f3be82999f1f 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2022.0"
+#define BATADV_SOURCE_VERSION "2022.1"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 9f311fddfaf9..b238455913df 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -11,6 +11,7 @@
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 974d726fabb9..5f4aeeb60dc4 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -11,6 +11,7 @@
#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
@@ -19,7 +20,6 @@
#include <linux/init.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index aadc653ca1d8..34903df4fe93 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -8,11 +8,11 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 477d85a3b558..0379b126865d 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -10,13 +10,13 @@
#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netdevice.h>
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 2dbbe6c19609..0f5c0679b55a 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -11,6 +11,7 @@
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/cpumask.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -19,7 +20,6 @@
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 93730d30af54..7f3dd3c393e0 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -12,13 +12,13 @@
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/err.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/kthread.h>
#include <linux/limits.h>
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 4b7ad6684bc4..8478034d3abf 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -13,6 +13,7 @@
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/crc32c.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -21,7 +22,6 @@
#include <linux/init.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 0cb58eb04093..7ec2e2343884 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -7,10 +7,10 @@
#include "main.h"
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 133d7ea063fb..215af9b3b589 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -240,7 +240,7 @@ static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev)
if (!skb_cp)
return NET_RX_DROP;
- return netif_rx_ni(skb_cp);
+ return netif_rx(skb_cp);
}
static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
@@ -641,7 +641,6 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
return NULL;
peer->chan = chan;
- memset(&peer->peer_addr, 0, sizeof(struct in6_addr));
baswap((void *)peer->lladdr, &chan->dst);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 40baa6b7321a..5a6a49885ab6 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -400,7 +400,7 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
dev->stats.rx_packets++;
nskb->ip_summed = CHECKSUM_NONE;
nskb->protocol = eth_type_trans(nskb, dev);
- netif_rx_ni(nskb);
+ netif_rx(nskb);
return 0;
badframe:
diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h
index 05e2e917fc25..43f1945bffc5 100644
--- a/net/bluetooth/eir.h
+++ b/net/bluetooth/eir.h
@@ -15,6 +15,11 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr);
u8 eir_append_local_name(struct hci_dev *hdev, u8 *eir, u8 ad_len);
u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len);
+static inline u16 eir_precalc_len(u8 data_len)
+{
+ return sizeof(u8) * 2 + data_len;
+}
+
static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type,
u8 *data, u8 data_len)
{
@@ -36,6 +41,21 @@ static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data)
return eir_len;
}
+static inline u16 eir_skb_put_data(struct sk_buff *skb, u8 type, u8 *data, u8 data_len)
+{
+ u8 *eir;
+ u16 eir_len;
+
+ eir_len = eir_precalc_len(data_len);
+ eir = skb_put(skb, eir_len);
+ WARN_ON(sizeof(type) + data_len > U8_MAX);
+ eir[0] = sizeof(type) + data_len;
+ eir[1] = type;
+ memcpy(&eir[2], data, data_len);
+
+ return eir_len;
+}
+
static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type,
size_t *data_len)
{
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5bde0ec41177..b4782a6c1025 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2739,6 +2739,7 @@ void hci_release_dev(struct hci_dev *hdev)
hci_dev_unlock(hdev);
ida_simple_remove(&hci_index_ida, hdev->id);
+ kfree_skb(hdev->sent_cmd);
kfree(hdev);
}
EXPORT_SYMBOL(hci_release_dev);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 63b925921c87..4888c1f8a9b7 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5716,8 +5716,6 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
- adv = hci_find_adv_instance(hdev, ev->handle);
-
/* The Bluetooth Core 5.3 specification clearly states that this event
* shall not be sent when the Host disables the advertising set. So in
* case of HCI_ERROR_CANCELLED_BY_HOST, just ignore the event.
@@ -5730,9 +5728,13 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data,
return;
}
+ hci_dev_lock(hdev);
+
+ adv = hci_find_adv_instance(hdev, ev->handle);
+
if (ev->status) {
if (!adv)
- return;
+ goto unlock;
/* Remove advertising as it has been terminated */
hci_remove_adv_instance(hdev, ev->handle);
@@ -5740,12 +5742,12 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data,
list_for_each_entry_safe(adv, n, &hdev->adv_instances, list) {
if (adv->enabled)
- return;
+ goto unlock;
}
/* We are no longer advertising, clear HCI_LE_ADV */
hci_dev_clear_flag(hdev, HCI_LE_ADV);
- return;
+ goto unlock;
}
if (adv)
@@ -5760,16 +5762,19 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data,
if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM ||
bacmp(&conn->resp_addr, BDADDR_ANY))
- return;
+ goto unlock;
if (!ev->handle) {
bacpy(&conn->resp_addr, &hdev->random_addr);
- return;
+ goto unlock;
}
if (adv)
bacpy(&conn->resp_addr, &adv->random_addr);
}
+
+unlock:
+ hci_dev_unlock(hdev);
}
static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, void *data,
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 6e71aa6b6fea..af7ea8a3317d 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -276,40 +276,37 @@ EXPORT_SYMBOL(__hci_cmd_sync_status);
static void hci_cmd_sync_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work);
- struct hci_cmd_sync_work_entry *entry;
- hci_cmd_sync_work_func_t func;
- hci_cmd_sync_work_destroy_t destroy;
- void *data;
bt_dev_dbg(hdev, "");
- mutex_lock(&hdev->cmd_sync_work_lock);
- entry = list_first_entry(&hdev->cmd_sync_work_list,
- struct hci_cmd_sync_work_entry, list);
- if (entry) {
- list_del(&entry->list);
- func = entry->func;
- data = entry->data;
- destroy = entry->destroy;
- kfree(entry);
- } else {
- func = NULL;
- data = NULL;
- destroy = NULL;
- }
- mutex_unlock(&hdev->cmd_sync_work_lock);
+ /* Dequeue all entries and run them */
+ while (1) {
+ struct hci_cmd_sync_work_entry *entry;
- if (func) {
- int err;
+ mutex_lock(&hdev->cmd_sync_work_lock);
+ entry = list_first_entry_or_null(&hdev->cmd_sync_work_list,
+ struct hci_cmd_sync_work_entry,
+ list);
+ if (entry)
+ list_del(&entry->list);
+ mutex_unlock(&hdev->cmd_sync_work_lock);
- hci_req_sync_lock(hdev);
+ if (!entry)
+ break;
- err = func(hdev, data);
+ bt_dev_dbg(hdev, "entry %p", entry);
- if (destroy)
- destroy(hdev, data, err);
+ if (entry->func) {
+ int err;
+
+ hci_req_sync_lock(hdev);
+ err = entry->func(hdev, entry->data);
+ if (entry->destroy)
+ entry->destroy(hdev, entry->data, err);
+ hci_req_sync_unlock(hdev);
+ }
- hci_req_sync_unlock(hdev);
+ kfree(entry);
}
}
@@ -1844,6 +1841,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
struct bdaddr_list *b, *t;
u8 num_entries = 0;
bool pend_conn, pend_report;
+ u8 filter_policy;
int err;
/* Pause advertising if resolving list can be used as controllers are
@@ -1930,6 +1928,8 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
err = -EINVAL;
done:
+ filter_policy = err ? 0x00 : 0x01;
+
/* Enable address resolution when LL Privacy is enabled. */
err = hci_le_set_addr_resolution_enable_sync(hdev, 0x01);
if (err)
@@ -1940,7 +1940,7 @@ done:
hci_resume_advertising_sync(hdev);
/* Select filter policy to use accept list */
- return err ? 0x00 : 0x01;
+ return filter_policy;
}
/* Returns true if an le connection is in the scanning state */
@@ -3265,10 +3265,10 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev)
if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT)
events[0] |= 0x40; /* LE Data Length Change */
- /* If the controller supports LL Privacy feature, enable
- * the corresponding event.
+ /* If the controller supports LL Privacy feature or LE Extended Adv,
+ * enable the corresponding event.
*/
- if (hdev->le_features[0] & HCI_LE_LL_PRIVACY)
+ if (use_enhanced_conn_complete(hdev))
events[1] |= 0x02; /* LE Enhanced Connection Complete */
/* If the controller supports Extended Scanner Filter
@@ -4109,9 +4109,9 @@ int hci_dev_close_sync(struct hci_dev *hdev)
hci_inquiry_cache_flush(hdev);
hci_pend_le_actions_clear(hdev);
hci_conn_hash_flush(hdev);
- hci_dev_unlock(hdev);
-
+ /* Prevent data races on hdev->smp_data or hdev->smp_bredr_data */
smp_unregister(hdev);
+ hci_dev_unlock(hdev);
hci_sock_dev_event(hdev, HCI_DEV_DOWN);
@@ -4425,7 +4425,7 @@ static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason)
return err;
}
- return err;
+ return 0;
}
/* This function perform power off HCI command sequence as follows:
@@ -5188,7 +5188,7 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev,
return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_EXT_CREATE_CONN,
plen, data,
HCI_EV_LE_ENHANCED_CONN_COMPLETE,
- HCI_CMD_TIMEOUT, NULL);
+ conn->conn_timeout, NULL);
}
int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
@@ -5273,9 +5273,18 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
cp.min_ce_len = cpu_to_le16(0x0000);
cp.max_ce_len = cpu_to_le16(0x0000);
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2261:
+ *
+ * If this event is unmasked and the HCI_LE_Connection_Complete event
+ * is unmasked, only the HCI_LE_Enhanced_Connection_Complete event is
+ * sent when a new connection has been created.
+ */
err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CONN,
- sizeof(cp), &cp, HCI_EV_LE_CONN_COMPLETE,
- HCI_CMD_TIMEOUT, NULL);
+ sizeof(cp), &cp,
+ use_enhanced_conn_complete(hdev) ?
+ HCI_EV_LE_ENHANCED_CONN_COMPLETE :
+ HCI_EV_LE_CONN_COMPLETE,
+ conn->conn_timeout, NULL);
done:
/* Re-enable advertising after the connection attempt is finished. */
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index e817ff0607a0..8df99c07f272 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1436,6 +1436,7 @@ static void l2cap_ecred_connect(struct l2cap_chan *chan)
l2cap_ecred_init(chan, 0);
+ memset(&data, 0, sizeof(data));
data.pdu.req.psm = chan->psm;
data.pdu.req.mtu = cpu_to_le16(chan->imtu);
data.pdu.req.mps = cpu_to_le16(chan->mps);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 5dd684e0b259..8101a6a31841 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1220,7 +1220,13 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip)
static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
- struct mgmt_mode *cp = cmd->param;
+ struct mgmt_mode *cp;
+
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_POWERED, hdev))
+ return;
+
+ cp = cmd->param;
bt_dev_dbg(hdev, "err %d", err);
@@ -1244,7 +1250,7 @@ static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err)
mgmt_status(err));
}
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
}
static int set_powered_sync(struct hci_dev *hdev, void *data)
@@ -1283,7 +1289,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_SET_POWERED, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -1292,6 +1298,9 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd,
mgmt_set_powered_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
failed:
hci_dev_unlock(hdev);
return err;
@@ -1385,6 +1394,10 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "err %d", err);
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_DISCOVERABLE, hdev))
+ return;
+
hci_dev_lock(hdev);
if (err) {
@@ -1404,7 +1417,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
new_settings(hdev, cmd->sk);
done:
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
hci_dev_unlock(hdev);
}
@@ -1513,7 +1526,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -1540,6 +1553,9 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, set_discoverable_sync, cmd,
mgmt_set_discoverable_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
failed:
hci_dev_unlock(hdev);
return err;
@@ -1552,6 +1568,10 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "err %d", err);
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_CONNECTABLE, hdev))
+ return;
+
hci_dev_lock(hdev);
if (err) {
@@ -1564,7 +1584,9 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
new_settings(hdev, cmd->sk);
done:
- mgmt_pending_free(cmd);
+ if (cmd)
+ mgmt_pending_remove(cmd);
+
hci_dev_unlock(hdev);
}
@@ -1636,7 +1658,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -1656,6 +1678,9 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, set_connectable_sync, cmd,
mgmt_set_connectable_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
failed:
hci_dev_unlock(hdev);
return err;
@@ -1776,6 +1801,10 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
u8 enable = cp->val;
bool changed;
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_SSP, hdev))
+ return;
+
if (err) {
u8 mgmt_err = mgmt_status(err);
@@ -2269,7 +2298,9 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
struct mgmt_cp_remove_uuid *cp = data;
struct mgmt_pending_cmd *cmd;
struct bt_uuid *match, *tmp;
- u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static const u8 bt_uuid_any[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
int err, found;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -3323,6 +3354,9 @@ static void set_name_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
+ if (cmd != pending_find(MGMT_OP_SET_LOCAL_NAME, hdev))
+ return;
+
if (status) {
mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
status);
@@ -3495,6 +3529,9 @@ static void set_default_phy_complete(struct hci_dev *hdev, void *data, int err)
struct sk_buff *skb = cmd->skb;
u8 status = mgmt_status(err);
+ if (cmd != pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev))
+ return;
+
if (!status) {
if (!skb)
status = MGMT_STATUS_FAILED;
@@ -3761,13 +3798,6 @@ static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
- if (pending_find(MGMT_OP_SET_WIDEBAND_SPEECH, hdev)) {
- err = mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_SET_WIDEBAND_SPEECH,
- MGMT_STATUS_BUSY);
- goto unlock;
- }
-
if (hdev_is_powered(hdev) &&
!!cp->val != hci_dev_test_flag(hdev,
HCI_WIDEBAND_SPEECH_ENABLED)) {
@@ -4515,9 +4545,9 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
}
}
-done:
hci_dev_unlock(hdev);
+done:
if (status == MGMT_STATUS_SUCCESS)
device_flags_changed(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
supported_flags, current_flags);
@@ -5038,12 +5068,6 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev)) {
- err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
- MGMT_STATUS_BUSY);
- goto unlock;
- }
-
cmd = mgmt_pending_new(sk, MGMT_OP_READ_LOCAL_OOB_DATA, hdev, NULL, 0);
if (!cmd)
err = -ENOMEM;
@@ -5263,11 +5287,16 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
+ if (cmd != pending_find(MGMT_OP_START_DISCOVERY, hdev) &&
+ cmd != pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev) &&
+ cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev))
+ return;
+
bt_dev_dbg(hdev, "err %d", err);
mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
cmd->param, 1);
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
hci_discovery_set_state(hdev, err ? DISCOVERY_STOPPED:
DISCOVERY_FINDING);
@@ -5329,7 +5358,7 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev,
else
hdev->discovery.limited = false;
- cmd = mgmt_pending_new(sk, op, hdev, data, len);
+ cmd = mgmt_pending_add(sk, op, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -5338,7 +5367,7 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev,
err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd,
start_discovery_complete);
if (err < 0) {
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
goto failed;
}
@@ -5432,7 +5461,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_START_SERVICE_DISCOVERY,
+ cmd = mgmt_pending_add(sk, MGMT_OP_START_SERVICE_DISCOVERY,
hdev, data, len);
if (!cmd) {
err = -ENOMEM;
@@ -5465,7 +5494,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd,
start_discovery_complete);
if (err < 0) {
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
goto failed;
}
@@ -5497,11 +5526,14 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
+ if (cmd != pending_find(MGMT_OP_STOP_DISCOVERY, hdev))
+ return;
+
bt_dev_dbg(hdev, "err %d", err);
mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
cmd->param, 1);
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
if (!err)
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
@@ -5537,7 +5569,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto unlock;
@@ -5546,7 +5578,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, stop_discovery_sync, cmd,
stop_discovery_complete);
if (err < 0) {
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
goto unlock;
}
@@ -7476,6 +7508,9 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, void *data,
u8 status = mgmt_status(err);
u16 eir_len;
+ if (cmd != pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev))
+ return;
+
if (!status) {
if (!skb)
status = MGMT_STATUS_FAILED;
@@ -7971,11 +8006,7 @@ static bool requested_adv_flags_are_valid(struct hci_dev *hdev, u32 adv_flags)
static bool adv_busy(struct hci_dev *hdev)
{
- return (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_SET_LE, hdev) ||
- pending_find(MGMT_OP_ADD_EXT_ADV_PARAMS, hdev) ||
- pending_find(MGMT_OP_ADD_EXT_ADV_DATA, hdev));
+ return pending_find(MGMT_OP_SET_LE, hdev);
}
static void add_adv_complete(struct hci_dev *hdev, struct sock *sk, u8 instance,
@@ -8048,7 +8079,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
u32 flags;
u8 status;
u16 timeout, duration;
- unsigned int prev_instance_cnt = hdev->adv_instance_cnt;
+ unsigned int prev_instance_cnt;
u8 schedule_instance = 0;
struct adv_info *next_instance;
int err;
@@ -8099,6 +8130,8 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
+ prev_instance_cnt = hdev->adv_instance_cnt;
+
err = hci_add_adv_instance(hdev, cp->instance, flags,
cp->adv_data_len, cp->data,
cp->scan_rsp_len,
@@ -8565,9 +8598,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_SET_LE, hdev)) {
+ if (pending_find(MGMT_OP_SET_LE, hdev)) {
err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING,
MGMT_STATUS_BUSY);
goto unlock;
@@ -8603,7 +8634,6 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev,
struct mgmt_cp_get_adv_size_info *cp = data;
struct mgmt_rp_get_adv_size_info rp;
u32 flags, supported_flags;
- int err;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -8630,10 +8660,8 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev,
rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true);
rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false);
- err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
- MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
-
- return err;
+ return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
+ MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
}
static const struct hci_mgmt_handler mgmt_handlers[] = {
@@ -9061,12 +9089,14 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
u16 eir_len = 0;
u32 flags = 0;
+ /* allocate buff for LE or BR/EDR adv */
if (conn->le_adv_data_len > 0)
skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED,
- conn->le_adv_data_len);
+ sizeof(*ev) + conn->le_adv_data_len);
else
skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED,
- 2 + name_len + 5);
+ sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0) +
+ eir_precalc_len(sizeof(conn->dev_class)));
ev = skb_put(skb, sizeof(*ev));
bacpy(&ev->addr.bdaddr, &conn->dst);
@@ -9085,18 +9115,12 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
skb_put_data(skb, conn->le_adv_data, conn->le_adv_data_len);
eir_len = conn->le_adv_data_len;
} else {
- if (name_len > 0) {
- eir_len = eir_append_data(ev->eir, 0, EIR_NAME_COMPLETE,
- name, name_len);
- skb_put(skb, eir_len);
- }
+ if (name)
+ eir_len += eir_skb_put_data(skb, EIR_NAME_COMPLETE, name, name_len);
- if (memcmp(conn->dev_class, "\0\0\0", 3) != 0) {
- eir_len = eir_append_data(ev->eir, eir_len,
- EIR_CLASS_OF_DEV,
- conn->dev_class, 3);
- skb_put(skb, 5);
- }
+ if (memcmp(conn->dev_class, "\0\0\0", sizeof(conn->dev_class)))
+ eir_len += eir_skb_put_data(skb, EIR_CLASS_OF_DEV,
+ conn->dev_class, sizeof(conn->dev_class));
}
ev->eir_len = cpu_to_le16(eir_len);
@@ -9785,28 +9809,21 @@ void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
{
struct sk_buff *skb;
struct mgmt_ev_device_found *ev;
- u16 eir_len;
- u32 flags;
+ u16 eir_len = 0;
+ u32 flags = 0;
- if (name_len)
- skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, 2 + name_len);
- else
- skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, 0);
+ skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND,
+ sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0));
ev = skb_put(skb, sizeof(*ev));
bacpy(&ev->addr.bdaddr, bdaddr);
ev->addr.type = link_to_bdaddr(link_type, addr_type);
ev->rssi = rssi;
- if (name) {
- eir_len = eir_append_data(ev->eir, 0, EIR_NAME_COMPLETE, name,
- name_len);
- flags = 0;
- skb_put(skb, eir_len);
- } else {
- eir_len = 0;
+ if (name)
+ eir_len += eir_skb_put_data(skb, EIR_NAME_COMPLETE, name, name_len);
+ else
flags = MGMT_DEV_FOUND_NAME_REQUEST_FAILED;
- }
ev->eir_len = cpu_to_le16(eir_len);
ev->flags = cpu_to_le32(flags);
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index edee60bbc7b4..37eef2ce55ae 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -77,11 +77,12 @@ int mgmt_send_event_skb(unsigned short channel, struct sk_buff *skb, int flag,
{
struct hci_dev *hdev;
struct mgmt_hdr *hdr;
- int len = skb->len;
+ int len;
if (!skb)
return -EINVAL;
+ len = skb->len;
hdev = bt_cb(skb)->mgmt.hdev;
/* Time stamp */
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index f08034500813..eb129e48f90b 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -150,6 +150,11 @@ static int bpf_test_finish(const union bpf_attr *kattr,
if (data_out) {
int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
+ if (len < 0) {
+ err = -ENOSPC;
+ goto out;
+ }
+
if (copy_to_user(data_out, data, len))
goto out;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1fac72cc617f..b1dea3febeea 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -342,23 +342,26 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
clear_bit(opt, &br->options);
}
-static void __net_exit br_net_exit(struct net *net)
+static void __net_exit br_net_exit_batch(struct list_head *net_list)
{
struct net_device *dev;
+ struct net *net;
LIST_HEAD(list);
rtnl_lock();
- for_each_netdev(net, dev)
- if (netif_is_bridge_master(dev))
- br_dev_delete(dev, &list);
+
+ list_for_each_entry(net, net_list, exit_list)
+ for_each_netdev(net, dev)
+ if (netif_is_bridge_master(dev))
+ br_dev_delete(dev, &list);
unregister_netdevice_many(&list);
- rtnl_unlock();
+ rtnl_unlock();
}
static struct pernet_operations br_net_ops = {
- .exit = br_net_exit,
+ .exit_batch = br_net_exit_batch,
};
static const struct stp_proto br_stp_proto = {
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index 3db1def4437b..e5e48c6e35d7 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -84,7 +84,7 @@ static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p,
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->pkt_type = PACKET_HOST;
- netif_rx_ni(skb);
+ netif_rx(skb);
}
}
@@ -364,7 +364,7 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
reply->ip_summed = CHECKSUM_UNNECESSARY;
reply->pkt_type = PACKET_HOST;
- netif_rx_ni(reply);
+ netif_rx(reply);
}
}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index ec646656dbf1..02bb620d3b8d 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
net, sk, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index b50382f957c1..e0c13fcc50ed 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -81,6 +81,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (!p || p->state == BR_STATE_DISABLED)
goto drop;
+ br = p->br;
brmctx = &p->br->multicast_ctx;
pmctx = &p->multicast_ctx;
state = p->state;
@@ -88,10 +89,18 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
&state, &vlan))
goto out;
+ if (p->flags & BR_PORT_LOCKED) {
+ struct net_bridge_fdb_entry *fdb_src =
+ br_fdb_find_rcu(br, eth_hdr(skb)->h_source, vid);
+
+ if (!fdb_src || READ_ONCE(fdb_src->dst) != p ||
+ test_bit(BR_FDB_LOCAL, &fdb_src->flags))
+ goto drop;
+ }
+
nbp_switchdev_frame_mark(p, skb);
/* insert into forwarding database after filtering to avoid spoofing */
- br = p->br;
if (p->flags & BR_LEARNING)
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index de2409889489..db4f2641d1cd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -82,6 +82,9 @@ static void br_multicast_find_del_pg(struct net_bridge *br,
struct net_bridge_port_group *pg);
static void __br_multicast_stop(struct net_bridge_mcast *brmctx);
+static int br_mc_disabled_update(struct net_device *dev, bool value,
+ struct netlink_ext_ack *extack);
+
static struct net_bridge_port_group *
br_sg_port_find(struct net_bridge *br,
struct net_bridge_port_group_sg_key *sg_p)
@@ -1156,6 +1159,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
return mp;
if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
+ br_mc_disabled_update(br->dev, false, NULL);
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
return ERR_PTR(-E2BIG);
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 2ff83d84230d..7d4432ca9a20 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -184,6 +184,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
+ nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */
+ nla_total_size(1) /* IFLA_BRPORT_ISOLATED */
+ + nla_total_size(1) /* IFLA_BRPORT_LOCKED */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
@@ -269,7 +270,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
BR_MRP_LOST_CONT)) ||
nla_put_u8(skb, IFLA_BRPORT_MRP_IN_OPEN,
!!(p->flags & BR_MRP_LOST_IN_CONT)) ||
- nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)))
+ nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)) ||
+ nla_put_u8(skb, IFLA_BRPORT_LOCKED, !!(p->flags & BR_PORT_LOCKED)))
return -EMSGSIZE;
timerval = br_timer_value(&p->message_age_timer);
@@ -827,6 +829,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
[IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
[IFLA_BRPORT_ISOLATED] = { .type = NLA_U8 },
+ [IFLA_BRPORT_LOCKED] = { .type = NLA_U8 },
[IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
};
@@ -893,6 +896,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL);
br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS, BR_NEIGH_SUPPRESS);
br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED);
+ br_set_port_flag(p, tb, IFLA_BRPORT_LOCKED, BR_PORT_LOCKED);
changed_mask = old_flags ^ p->flags;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2661dda1a92b..48bc61ebc211 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1985,7 +1985,7 @@ void br_switchdev_mdb_notify(struct net_device *dev,
struct net_bridge_port_group *pg,
int type);
int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
- struct netlink_ext_ack *extack);
+ bool changed, struct netlink_ext_ack *extack);
int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
void br_switchdev_init(struct net_bridge *br);
@@ -2052,8 +2052,8 @@ static inline int br_switchdev_set_port_flag(struct net_bridge_port *p,
return 0;
}
-static inline int br_switchdev_port_vlan_add(struct net_device *dev,
- u16 vid, u16 flags,
+static inline int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid,
+ u16 flags, bool changed,
struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index f8fbaaa7c501..6f6a70121a5e 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -72,7 +72,7 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
/* Flags that can be offloaded to hardware */
#define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
- BR_MCAST_FLOOD | BR_BCAST_FLOOD)
+ BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED)
int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long flags,
@@ -160,13 +160,14 @@ br_switchdev_fdb_notify(struct net_bridge *br,
}
int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
- struct netlink_ext_ack *extack)
+ bool changed, struct netlink_ext_ack *extack)
{
struct switchdev_obj_port_vlan v = {
.obj.orig_dev = dev,
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
.flags = flags,
.vid = vid,
+ .changed = changed,
};
return switchdev_port_obj_add(dev, &v.obj, extack);
@@ -351,51 +352,19 @@ br_switchdev_vlan_replay_one(struct notifier_block *nb,
return notifier_to_errno(err);
}
-static int br_switchdev_vlan_replay(struct net_device *br_dev,
- struct net_device *dev,
- const void *ctx, bool adding,
- struct notifier_block *nb,
- struct netlink_ext_ack *extack)
+static int br_switchdev_vlan_replay_group(struct notifier_block *nb,
+ struct net_device *dev,
+ struct net_bridge_vlan_group *vg,
+ const void *ctx, unsigned long action,
+ struct netlink_ext_ack *extack)
{
- struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *v;
- struct net_bridge_port *p;
- struct net_bridge *br;
- unsigned long action;
int err = 0;
u16 pvid;
- ASSERT_RTNL();
-
- if (!nb)
- return 0;
-
- if (!netif_is_bridge_master(br_dev))
- return -EINVAL;
-
- if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev))
- return -EINVAL;
-
- if (netif_is_bridge_master(dev)) {
- br = netdev_priv(dev);
- vg = br_vlan_group(br);
- p = NULL;
- } else {
- p = br_port_get_rtnl(dev);
- if (WARN_ON(!p))
- return -EINVAL;
- vg = nbp_vlan_group(p);
- br = p->br;
- }
-
if (!vg)
return 0;
- if (adding)
- action = SWITCHDEV_PORT_OBJ_ADD;
- else
- action = SWITCHDEV_PORT_OBJ_DEL;
-
pvid = br_get_pvid(vg);
list_for_each_entry(v, &vg->vlan_list, vlist) {
@@ -415,7 +384,48 @@ static int br_switchdev_vlan_replay(struct net_device *br_dev,
return err;
}
- return err;
+ return 0;
+}
+
+static int br_switchdev_vlan_replay(struct net_device *br_dev,
+ const void *ctx, bool adding,
+ struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge *br = netdev_priv(br_dev);
+ struct net_bridge_port *p;
+ unsigned long action;
+ int err;
+
+ ASSERT_RTNL();
+
+ if (!nb)
+ return 0;
+
+ if (!netif_is_bridge_master(br_dev))
+ return -EINVAL;
+
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
+ err = br_switchdev_vlan_replay_group(nb, br_dev, br_vlan_group(br),
+ ctx, action, extack);
+ if (err)
+ return err;
+
+ list_for_each_entry(p, &br->port_list, list) {
+ struct net_device *dev = p->dev;
+
+ err = br_switchdev_vlan_replay_group(nb, dev,
+ nbp_vlan_group(p),
+ ctx, action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
}
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
@@ -681,8 +691,7 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
struct net_device *dev = p->dev;
int err;
- err = br_switchdev_vlan_replay(br_dev, dev, ctx, true, blocking_nb,
- extack);
+ err = br_switchdev_vlan_replay(br_dev, ctx, true, blocking_nb, extack);
if (err && err != -EOPNOTSUPP)
return err;
@@ -706,11 +715,11 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
struct net_device *br_dev = p->br->dev;
struct net_device *dev = p->dev;
- br_switchdev_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+ br_switchdev_fdb_replay(br_dev, ctx, false, atomic_nb);
br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
- br_switchdev_fdb_replay(br_dev, ctx, false, atomic_nb);
+ br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL);
}
/* Let the bridge know that this port is offloaded, so that it can assign a
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 1402d5ca242d..7557e90b60e1 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -34,53 +34,70 @@ static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid)
return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params);
}
-static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg,
+static void __vlan_add_pvid(struct net_bridge_vlan_group *vg,
const struct net_bridge_vlan *v)
{
if (vg->pvid == v->vid)
- return false;
+ return;
smp_wmb();
br_vlan_set_pvid_state(vg, v->state);
vg->pvid = v->vid;
-
- return true;
}
-static bool __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
+static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
{
if (vg->pvid != vid)
- return false;
+ return;
smp_wmb();
vg->pvid = 0;
-
- return true;
}
-/* return true if anything changed, false otherwise */
-static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
+/* Update the BRIDGE_VLAN_INFO_PVID and BRIDGE_VLAN_INFO_UNTAGGED flags of @v.
+ * If @commit is false, return just whether the BRIDGE_VLAN_INFO_PVID and
+ * BRIDGE_VLAN_INFO_UNTAGGED bits of @flags would produce any change onto @v.
+ */
+static bool __vlan_flags_update(struct net_bridge_vlan *v, u16 flags,
+ bool commit)
{
struct net_bridge_vlan_group *vg;
- u16 old_flags = v->flags;
- bool ret;
+ bool change;
if (br_vlan_is_master(v))
vg = br_vlan_group(v->br);
else
vg = nbp_vlan_group(v->port);
+ /* check if anything would be changed on commit */
+ change = !!(flags & BRIDGE_VLAN_INFO_PVID) == !!(vg->pvid != v->vid) ||
+ ((flags ^ v->flags) & BRIDGE_VLAN_INFO_UNTAGGED);
+
+ if (!commit)
+ goto out;
+
if (flags & BRIDGE_VLAN_INFO_PVID)
- ret = __vlan_add_pvid(vg, v);
+ __vlan_add_pvid(vg, v);
else
- ret = __vlan_delete_pvid(vg, v->vid);
+ __vlan_delete_pvid(vg, v->vid);
if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
v->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
else
v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED;
- return ret || !!(old_flags ^ v->flags);
+out:
+ return change;
+}
+
+static bool __vlan_flags_would_change(struct net_bridge_vlan *v, u16 flags)
+{
+ return __vlan_flags_update(v, flags, false);
+}
+
+static void __vlan_flags_commit(struct net_bridge_vlan *v, u16 flags)
+{
+ __vlan_flags_update(v, flags, true);
}
static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
@@ -92,7 +109,7 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
/* Try switchdev op first. In case it is not supported, fallback to
* 8021q add.
*/
- err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
+ err = br_switchdev_port_vlan_add(dev, v->vid, flags, false, extack);
if (err == -EOPNOTSUPP)
return vlan_vid_add(dev, br->vlan_proto, v->vid);
v->priv_flags |= BR_VLFLAG_ADDED_BY_SWITCHDEV;
@@ -284,9 +301,12 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
}
br_multicast_port_ctx_init(p, v, &v->port_mcast_ctx);
} else {
- err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
- if (err && err != -EOPNOTSUPP)
- goto out;
+ if (br_vlan_should_use(v)) {
+ err = br_switchdev_port_vlan_add(dev, v->vid, flags,
+ false, extack);
+ if (err && err != -EOPNOTSUPP)
+ goto out;
+ }
br_multicast_ctx_init(br, v, &v->br_mcast_ctx);
v->priv_flags |= BR_VLFLAG_GLOBAL_MCAST_ENABLED;
}
@@ -310,7 +330,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
goto out_fdb_insert;
__vlan_add_list(v);
- __vlan_add_flags(v, flags);
+ __vlan_flags_commit(v, flags);
br_multicast_toggle_one_vlan(v, true);
if (p)
@@ -404,6 +424,7 @@ static void __vlan_flush(const struct net_bridge *br,
{
struct net_bridge_vlan *vlan, *tmp;
u16 v_start = 0, v_end = 0;
+ int err;
__vlan_delete_pvid(vg, vg->pvid);
list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist) {
@@ -417,7 +438,13 @@ static void __vlan_flush(const struct net_bridge *br,
}
v_end = vlan->vid;
- __vlan_del(vlan);
+ err = __vlan_del(vlan);
+ if (err) {
+ br_err(br,
+ "port %u(%s) failed to delete vlan %d: %pe\n",
+ (unsigned int) p->port_no, p->dev->name,
+ vlan->vid, ERR_PTR(err));
+ }
}
/* notify about the last/whole vlan range */
@@ -670,18 +697,29 @@ static int br_vlan_add_existing(struct net_bridge *br,
u16 flags, bool *changed,
struct netlink_ext_ack *extack)
{
+ bool would_change = __vlan_flags_would_change(vlan, flags);
+ bool becomes_brentry = false;
int err;
- err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
if (!br_vlan_is_brentry(vlan)) {
/* Trying to change flags of non-existent bridge vlan */
- if (!(flags & BRIDGE_VLAN_INFO_BRENTRY)) {
- err = -EINVAL;
- goto err_flags;
- }
+ if (!(flags & BRIDGE_VLAN_INFO_BRENTRY))
+ return -EINVAL;
+
+ becomes_brentry = true;
+ }
+
+ /* Master VLANs that aren't brentries weren't notified before,
+ * time to notify them now.
+ */
+ if (becomes_brentry || would_change) {
+ err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags,
+ would_change, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ if (becomes_brentry) {
/* It was only kept for port vlans, now make it real */
err = br_fdb_add_local(br, NULL, br->dev->dev_addr, vlan->vid);
if (err) {
@@ -696,13 +734,13 @@ static int br_vlan_add_existing(struct net_bridge *br,
br_multicast_toggle_one_vlan(vlan, true);
}
- if (__vlan_add_flags(vlan, flags))
+ __vlan_flags_commit(vlan, flags);
+ if (would_change)
*changed = true;
return 0;
err_fdb_insert:
-err_flags:
br_switchdev_port_vlan_del(br->dev, vlan->vid);
return err;
}
@@ -1247,11 +1285,18 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
*changed = false;
vlan = br_vlan_find(nbp_vlan_group(port), vid);
if (vlan) {
- /* Pass the flags to the hardware bridge */
- ret = br_switchdev_port_vlan_add(port->dev, vid, flags, extack);
- if (ret && ret != -EOPNOTSUPP)
- return ret;
- *changed = __vlan_add_flags(vlan, flags);
+ bool would_change = __vlan_flags_would_change(vlan, flags);
+
+ if (would_change) {
+ /* Pass the flags to the hardware bridge */
+ ret = br_switchdev_port_vlan_add(port->dev, vid, flags,
+ true, extack);
+ if (ret && ret != -EOPNOTSUPP)
+ return ret;
+ }
+
+ __vlan_flags_commit(vlan, flags);
+ *changed = would_change;
return 0;
}
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index fdbed3158555..ebfb2a5c59e4 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -32,6 +32,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+ bool mono_delivery_time = skb->mono_delivery_time;
unsigned int hlen, ll_rs, mtu;
ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
@@ -81,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
if (iter.frag)
ip_fraglist_prepare(skb, &iter);
- skb->tstamp = tstamp;
+ skb_set_delivery_time(skb, tstamp, mono_delivery_time);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -112,7 +113,7 @@ slow_path:
goto blackhole;
}
- skb2->tstamp = tstamp;
+ skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 414dc5671c45..4d63ef13a1fd 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -99,7 +99,7 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
else
skb->ip_summed = CHECKSUM_NONE;
- netif_rx_any_context(skb);
+ netif_rx(skb);
/* Update statistics. */
priv->netdev->stats.rx_packets++;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index cce2af10eb3e..1fb49d51b25d 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -284,7 +284,7 @@ int can_send(struct sk_buff *skb, int loop)
}
if (newskb)
- netif_rx_ni(newskb);
+ netif_rx(newskb);
/* update statistics */
pkg_stats->tx_frames++;
diff --git a/net/can/gw.c b/net/can/gw.c
index 24221352e059..1ea4cc527db3 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -577,6 +577,13 @@ static inline void cgw_unregister_filter(struct net *net, struct cgw_job *gwj)
gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj);
}
+static void cgw_job_free_rcu(struct rcu_head *rcu_head)
+{
+ struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu);
+
+ kmem_cache_free(cgw_cache, gwj);
+}
+
static int cgw_notifier(struct notifier_block *nb,
unsigned long msg, void *ptr)
{
@@ -596,8 +603,7 @@ static int cgw_notifier(struct notifier_block *nb,
if (gwj->src.dev == dev || gwj->dst.dev == dev) {
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
- synchronize_rcu();
- kmem_cache_free(cgw_cache, gwj);
+ call_rcu(&gwj->rcu, cgw_job_free_rcu);
}
}
}
@@ -1155,8 +1161,7 @@ static void cgw_remove_all_jobs(struct net *net)
hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
- synchronize_rcu();
- kmem_cache_free(cgw_cache, gwj);
+ call_rcu(&gwj->rcu, cgw_job_free_rcu);
}
}
@@ -1224,8 +1229,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
- synchronize_rcu();
- kmem_cache_free(cgw_cache, gwj);
+ call_rcu(&gwj->rcu, cgw_job_free_rcu);
err = 0;
break;
}
diff --git a/net/can/isotp.c b/net/can/isotp.c
index d2a430b6a13b..d4c0b4704987 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -14,7 +14,6 @@
* - use CAN_ISOTP_WAIT_TX_DONE flag to block the caller until the PDU is sent
* - as we have static buffers the check whether the PDU fits into the buffer
* is done at FF reception time (no support for sending 'wait frames')
- * - take care of the tx-queue-len as traffic shaping is still on the TODO list
*
* Copyright (c) 2020 Volkswagen Group Electronic Research
* All rights reserved.
@@ -87,9 +86,9 @@ MODULE_ALIAS("can-proto-6");
/* ISO 15765-2:2016 supports more than 4095 byte per ISO PDU as the FF_DL can
* take full 32 bit values (4 Gbyte). We would need some good concept to handle
* this between user space and kernel space. For now increase the static buffer
- * to something about 8 kbyte to be able to test this new functionality.
+ * to something about 64 kbyte to be able to test this new functionality.
*/
-#define MAX_MSG_LENGTH 8200
+#define MAX_MSG_LENGTH 66000
/* N_PCI type values in bits 7-4 of N_PCI bytes */
#define N_PCI_SF 0x00 /* single frame */
@@ -141,8 +140,10 @@ struct isotp_sock {
struct can_isotp_options opt;
struct can_isotp_fc_options rxfc, txfc;
struct can_isotp_ll_options ll;
+ u32 frame_txtime;
u32 force_tx_stmin;
u32 force_rx_stmin;
+ u32 cfecho; /* consecutive frame echo tag */
struct tpcon rx, tx;
struct list_head notifier;
wait_queue_head_t wait;
@@ -360,7 +361,7 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
so->tx_gap = ktime_set(0, 0);
/* add transmission time for CAN frame N_As */
- so->tx_gap = ktime_add_ns(so->tx_gap, so->opt.frame_txtime);
+ so->tx_gap = ktime_add_ns(so->tx_gap, so->frame_txtime);
/* add waiting time for consecutive frames N_Cs */
if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN)
so->tx_gap = ktime_add_ns(so->tx_gap,
@@ -712,6 +713,63 @@ static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so,
cf->data[0] = so->opt.ext_address;
}
+static void isotp_send_cframe(struct isotp_sock *so)
+{
+ struct sock *sk = &so->sk;
+ struct sk_buff *skb;
+ struct net_device *dev;
+ struct canfd_frame *cf;
+ int can_send_ret;
+ int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
+
+ dev = dev_get_by_index(sock_net(sk), so->ifindex);
+ if (!dev)
+ return;
+
+ skb = alloc_skb(so->ll.mtu + sizeof(struct can_skb_priv), GFP_ATOMIC);
+ if (!skb) {
+ dev_put(dev);
+ return;
+ }
+
+ can_skb_reserve(skb);
+ can_skb_prv(skb)->ifindex = dev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
+
+ cf = (struct canfd_frame *)skb->data;
+ skb_put_zero(skb, so->ll.mtu);
+
+ /* create consecutive frame */
+ isotp_fill_dataframe(cf, so, ae, 0);
+
+ /* place consecutive frame N_PCI in appropriate index */
+ cf->data[ae] = N_PCI_CF | so->tx.sn++;
+ so->tx.sn %= 16;
+ so->tx.bs++;
+
+ cf->flags = so->ll.tx_flags;
+
+ skb->dev = dev;
+ can_skb_set_owner(skb, sk);
+
+ /* cfecho should have been zero'ed by init/isotp_rcv_echo() */
+ if (so->cfecho)
+ pr_notice_once("can-isotp: cfecho is %08X != 0\n", so->cfecho);
+
+ /* set consecutive frame echo tag */
+ so->cfecho = *(u32 *)cf->data;
+
+ /* send frame with local echo enabled */
+ can_send_ret = can_send(skb, 1);
+ if (can_send_ret) {
+ pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
+ __func__, ERR_PTR(can_send_ret));
+ if (can_send_ret == -ENOBUFS)
+ pr_notice_once("can-isotp: tx queue is full\n");
+ }
+ dev_put(dev);
+}
+
static void isotp_create_fframe(struct canfd_frame *cf, struct isotp_sock *so,
int ae)
{
@@ -748,19 +806,74 @@ static void isotp_create_fframe(struct canfd_frame *cf, struct isotp_sock *so,
so->tx.state = ISOTP_WAIT_FIRST_FC;
}
+static void isotp_rcv_echo(struct sk_buff *skb, void *data)
+{
+ struct sock *sk = (struct sock *)data;
+ struct isotp_sock *so = isotp_sk(sk);
+ struct canfd_frame *cf = (struct canfd_frame *)skb->data;
+
+ /* only handle my own local echo skb's */
+ if (skb->sk != sk || so->cfecho != *(u32 *)cf->data)
+ return;
+
+ /* cancel local echo timeout */
+ hrtimer_cancel(&so->txtimer);
+
+ /* local echo skb with consecutive frame has been consumed */
+ so->cfecho = 0;
+
+ if (so->tx.idx >= so->tx.len) {
+ /* we are done */
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
+ return;
+ }
+
+ if (so->txfc.bs && so->tx.bs >= so->txfc.bs) {
+ /* stop and wait for FC with timeout */
+ so->tx.state = ISOTP_WAIT_FC;
+ hrtimer_start(&so->txtimer, ktime_set(1, 0),
+ HRTIMER_MODE_REL_SOFT);
+ return;
+ }
+
+ /* no gap between data frames needed => use burst mode */
+ if (!so->tx_gap) {
+ isotp_send_cframe(so);
+ return;
+ }
+
+ /* start timer to send next consecutive frame with correct delay */
+ hrtimer_start(&so->txtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
+}
+
static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
{
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
txtimer);
struct sock *sk = &so->sk;
- struct sk_buff *skb;
- struct net_device *dev;
- struct canfd_frame *cf;
enum hrtimer_restart restart = HRTIMER_NORESTART;
- int can_send_ret;
- int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
switch (so->tx.state) {
+ case ISOTP_SENDING:
+
+ /* cfecho should be consumed by isotp_rcv_echo() here */
+ if (!so->cfecho) {
+ /* start timeout for unlikely lost echo skb */
+ hrtimer_set_expires(&so->txtimer,
+ ktime_add(ktime_get(),
+ ktime_set(2, 0)));
+ restart = HRTIMER_RESTART;
+
+ /* push out the next consecutive frame */
+ isotp_send_cframe(so);
+ break;
+ }
+
+ /* cfecho has not been cleared in isotp_rcv_echo() */
+ pr_notice_once("can-isotp: cfecho %08X timeout\n", so->cfecho);
+ fallthrough;
+
case ISOTP_WAIT_FC:
case ISOTP_WAIT_FIRST_FC:
@@ -776,78 +889,6 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
wake_up_interruptible(&so->wait);
break;
- case ISOTP_SENDING:
-
- /* push out the next segmented pdu */
- dev = dev_get_by_index(sock_net(sk), so->ifindex);
- if (!dev)
- break;
-
-isotp_tx_burst:
- skb = alloc_skb(so->ll.mtu + sizeof(struct can_skb_priv),
- GFP_ATOMIC);
- if (!skb) {
- dev_put(dev);
- break;
- }
-
- can_skb_reserve(skb);
- can_skb_prv(skb)->ifindex = dev->ifindex;
- can_skb_prv(skb)->skbcnt = 0;
-
- cf = (struct canfd_frame *)skb->data;
- skb_put_zero(skb, so->ll.mtu);
-
- /* create consecutive frame */
- isotp_fill_dataframe(cf, so, ae, 0);
-
- /* place consecutive frame N_PCI in appropriate index */
- cf->data[ae] = N_PCI_CF | so->tx.sn++;
- so->tx.sn %= 16;
- so->tx.bs++;
-
- cf->flags = so->ll.tx_flags;
-
- skb->dev = dev;
- can_skb_set_owner(skb, sk);
-
- can_send_ret = can_send(skb, 1);
- if (can_send_ret) {
- pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
- __func__, ERR_PTR(can_send_ret));
- if (can_send_ret == -ENOBUFS)
- pr_notice_once("can-isotp: tx queue is full, increasing txqueuelen may prevent this error\n");
- }
- if (so->tx.idx >= so->tx.len) {
- /* we are done */
- so->tx.state = ISOTP_IDLE;
- dev_put(dev);
- wake_up_interruptible(&so->wait);
- break;
- }
-
- if (so->txfc.bs && so->tx.bs >= so->txfc.bs) {
- /* stop and wait for FC */
- so->tx.state = ISOTP_WAIT_FC;
- dev_put(dev);
- hrtimer_set_expires(&so->txtimer,
- ktime_add(ktime_get(),
- ktime_set(1, 0)));
- restart = HRTIMER_RESTART;
- break;
- }
-
- /* no gap between data frames needed => use burst mode */
- if (!so->tx_gap)
- goto isotp_tx_burst;
-
- /* start timer to send next data frame with correct delay */
- dev_put(dev);
- hrtimer_set_expires(&so->txtimer,
- ktime_add(ktime_get(), so->tx_gap));
- restart = HRTIMER_RESTART;
- break;
-
default:
WARN_ON_ONCE(1);
}
@@ -1075,6 +1116,9 @@ static int isotp_release(struct socket *sock)
can_rx_unregister(net, dev, so->rxid,
SINGLE_MASK(so->rxid),
isotp_rcv, sk);
+ can_rx_unregister(net, dev, so->txid,
+ SINGLE_MASK(so->txid),
+ isotp_rcv_echo, sk);
dev_put(dev);
synchronize_rcu();
}
@@ -1161,11 +1205,20 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
ifindex = dev->ifindex;
- if (do_rx_reg)
+ if (do_rx_reg) {
can_rx_register(net, dev, addr->can_addr.tp.rx_id,
SINGLE_MASK(addr->can_addr.tp.rx_id),
isotp_rcv, sk, "isotp", sk);
+ /* no consecutive frame echo skb in flight */
+ so->cfecho = 0;
+
+ /* register for echo skb's */
+ can_rx_register(net, dev, addr->can_addr.tp.tx_id,
+ SINGLE_MASK(addr->can_addr.tp.tx_id),
+ isotp_rcv_echo, sk, "isotpe", sk);
+ }
+
dev_put(dev);
if (so->bound && do_rx_reg) {
@@ -1176,6 +1229,9 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
can_rx_unregister(net, dev, so->rxid,
SINGLE_MASK(so->rxid),
isotp_rcv, sk);
+ can_rx_unregister(net, dev, so->txid,
+ SINGLE_MASK(so->txid),
+ isotp_rcv_echo, sk);
dev_put(dev);
}
}
@@ -1238,6 +1294,14 @@ static int isotp_setsockopt_locked(struct socket *sock, int level, int optname,
/* no separate rx_ext_address is given => use ext_address */
if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR))
so->opt.rx_ext_address = so->opt.ext_address;
+
+ /* check for frame_txtime changes (0 => no changes) */
+ if (so->opt.frame_txtime) {
+ if (so->opt.frame_txtime == CAN_ISOTP_FRAME_TXTIME_ZERO)
+ so->frame_txtime = 0;
+ else
+ so->frame_txtime = so->opt.frame_txtime;
+ }
break;
case CAN_ISOTP_RECV_FC:
@@ -1381,10 +1445,14 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg,
case NETDEV_UNREGISTER:
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST)))
+ if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) {
can_rx_unregister(dev_net(dev), dev, so->rxid,
SINGLE_MASK(so->rxid),
isotp_rcv, sk);
+ can_rx_unregister(dev_net(dev), dev, so->txid,
+ SINGLE_MASK(so->txid),
+ isotp_rcv_echo, sk);
+ }
so->ifindex = 0;
so->bound = 0;
@@ -1439,6 +1507,7 @@ static int isotp_init(struct sock *sk)
so->opt.rxpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
so->opt.txpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
so->opt.frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
+ so->frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
so->rxfc.bs = CAN_ISOTP_DEFAULT_RECV_BS;
so->rxfc.stmin = CAN_ISOTP_DEFAULT_RECV_STMIN;
so->rxfc.wftmax = CAN_ISOTP_DEFAULT_RECV_WFTMAX;
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index a271688780a2..307ee1174a6e 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -2006,7 +2006,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
/* set the end-packet for broadcast */
session->pkt.last = session->pkt.total;
- skcb->tskey = session->sk->sk_tskey++;
+ skcb->tskey = atomic_inc_return(&session->sk->sk_tskey) - 1;
session->tskey = skcb->tskey;
return session;
diff --git a/net/core/dev.c b/net/core/dev.c
index 2c3b8744e00c..ba69ddf85af6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -216,18 +216,38 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
}
-static inline void rps_lock(struct softnet_data *sd)
+static inline void rps_lock_irqsave(struct softnet_data *sd,
+ unsigned long *flags)
{
-#ifdef CONFIG_RPS
- spin_lock(&sd->input_pkt_queue.lock);
-#endif
+ if (IS_ENABLED(CONFIG_RPS))
+ spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
+ else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_save(*flags);
}
-static inline void rps_unlock(struct softnet_data *sd)
+static inline void rps_lock_irq_disable(struct softnet_data *sd)
{
-#ifdef CONFIG_RPS
- spin_unlock(&sd->input_pkt_queue.lock);
-#endif
+ if (IS_ENABLED(CONFIG_RPS))
+ spin_lock_irq(&sd->input_pkt_queue.lock);
+ else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
+}
+
+static inline void rps_unlock_irq_restore(struct softnet_data *sd,
+ unsigned long *flags)
+{
+ if (IS_ENABLED(CONFIG_RPS))
+ spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
+ else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_restore(*flags);
+}
+
+static inline void rps_unlock_irq_enable(struct softnet_data *sd)
+{
+ if (IS_ENABLED(CONFIG_RPS))
+ spin_unlock_irq(&sd->input_pkt_queue.lock);
+ else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
}
static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
@@ -1602,7 +1622,8 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
- N(PRE_CHANGEADDR)
+ N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
+ N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
}
#undef N
return "UNKNOWN_NETDEV_EVENT";
@@ -1919,6 +1940,32 @@ static int call_netdevice_notifiers_info(unsigned long val,
return raw_notifier_call_chain(&netdev_chain, val, info);
}
+/**
+ * call_netdevice_notifiers_info_robust - call per-netns notifier blocks
+ * for and rollback on error
+ * @val_up: value passed unmodified to notifier function
+ * @val_down: value passed unmodified to the notifier function when
+ * recovering from an error on @val_up
+ * @info: notifier information data
+ *
+ * Call all per-netns network notifier blocks, but not notifier blocks on
+ * the global notifier chain. Parameters and return value are as for
+ * raw_notifier_call_chain_robust().
+ */
+
+static int
+call_netdevice_notifiers_info_robust(unsigned long val_up,
+ unsigned long val_down,
+ struct netdev_notifier_info *info)
+{
+ struct net *net = dev_net(info->dev);
+
+ ASSERT_RTNL();
+
+ return raw_notifier_call_chain_robust(&net->netdev_chain,
+ val_up, val_down, info);
+}
+
static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack)
@@ -2000,7 +2047,8 @@ void net_dec_egress_queue(void)
EXPORT_SYMBOL_GPL(net_dec_egress_queue);
#endif
-static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
+DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
+EXPORT_SYMBOL(netstamp_needed_key);
#ifdef CONFIG_JUMP_LABEL
static atomic_t netstamp_needed_deferred;
static atomic_t netstamp_wanted;
@@ -2061,14 +2109,15 @@ EXPORT_SYMBOL(net_disable_timestamp);
static inline void net_timestamp_set(struct sk_buff *skb)
{
skb->tstamp = 0;
+ skb->mono_delivery_time = 0;
if (static_branch_unlikely(&netstamp_needed_key))
- __net_timestamp(skb);
+ skb->tstamp = ktime_get_real();
}
#define net_timestamp_check(COND, SKB) \
if (static_branch_unlikely(&netstamp_needed_key)) { \
if ((COND) && !(SKB)->tstamp) \
- __net_timestamp(SKB); \
+ (SKB)->tstamp = ktime_get_real(); \
} \
bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
@@ -3710,7 +3759,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
no_lock_out:
if (unlikely(to_free))
- kfree_skb_list(to_free);
+ kfree_skb_list_reason(to_free,
+ SKB_DROP_REASON_QDISC_DROP);
return rc;
}
@@ -3765,7 +3815,7 @@ no_lock_out:
}
spin_unlock(root_lock);
if (unlikely(to_free))
- kfree_skb_list(to_free);
+ kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP);
if (unlikely(contended))
spin_unlock(&q->busylock);
return rc;
@@ -3811,7 +3861,7 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_UNNECESSARY;
WARN_ON(!skb_dst(skb));
skb_dst_force(skb);
- netif_rx_ni(skb);
+ netif_rx(skb);
return 0;
}
EXPORT_SYMBOL(dev_loopback_xmit);
@@ -3840,7 +3890,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
case TC_ACT_SHOT:
mini_qdisc_qstats_cpu_drop(miniq);
*ret = NET_XMIT_DROP;
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
@@ -4456,11 +4506,11 @@ static void rps_trigger_softirq(void *data)
* If yes, queue it to our IPI list and return 1
* If no, return 0
*/
-static int rps_ipi_queued(struct softnet_data *sd)
+static int napi_schedule_rps(struct softnet_data *sd)
{
-#ifdef CONFIG_RPS
struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
+#ifdef CONFIG_RPS
if (sd != mysd) {
sd->rps_ipi_next = mysd->rps_ipi_list;
mysd->rps_ipi_list = sd;
@@ -4469,6 +4519,7 @@ static int rps_ipi_queued(struct softnet_data *sd)
return 1;
}
#endif /* CONFIG_RPS */
+ __napi_schedule_irqoff(&mysd->backlog);
return 0;
}
@@ -4519,15 +4570,15 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
unsigned int *qtail)
{
+ enum skb_drop_reason reason;
struct softnet_data *sd;
unsigned long flags;
unsigned int qlen;
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
sd = &per_cpu(softnet_data, cpu);
- local_irq_save(flags);
-
- rps_lock(sd);
+ rps_lock_irqsave(sd, &flags);
if (!netif_running(skb->dev))
goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
@@ -4536,29 +4587,25 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
input_queue_tail_incr_save(sd, qtail);
- rps_unlock(sd);
- local_irq_restore(flags);
+ rps_unlock_irq_restore(sd, &flags);
return NET_RX_SUCCESS;
}
/* Schedule NAPI for backlog device
* We can use non atomic operation since we own the queue lock
*/
- if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
- if (!rps_ipi_queued(sd))
- ____napi_schedule(sd, &sd->backlog);
- }
+ if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
+ napi_schedule_rps(sd);
goto enqueue;
}
+ reason = SKB_DROP_REASON_CPU_BACKLOG;
drop:
sd->dropped++;
- rps_unlock(sd);
-
- local_irq_restore(flags);
+ rps_unlock_irq_restore(sd, &flags);
atomic_long_inc(&skb->dev->rx_dropped);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
@@ -4778,7 +4825,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
}
return XDP_PASS;
out_redir:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
EXPORT_SYMBOL_GPL(do_xdp_generic);
@@ -4796,7 +4843,6 @@ static int netif_rx_internal(struct sk_buff *skb)
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
- preempt_disable();
rcu_read_lock();
cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -4806,78 +4852,72 @@ static int netif_rx_internal(struct sk_buff *skb)
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
- preempt_enable();
} else
#endif
{
unsigned int qtail;
- ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
- put_cpu();
+ ret = enqueue_to_backlog(skb, smp_processor_id(), &qtail);
}
return ret;
}
/**
+ * __netif_rx - Slightly optimized version of netif_rx
+ * @skb: buffer to post
+ *
+ * This behaves as netif_rx except that it does not disable bottom halves.
+ * As a result this function may only be invoked from the interrupt context
+ * (either hard or soft interrupt).
+ */
+int __netif_rx(struct sk_buff *skb)
+{
+ int ret;
+
+ lockdep_assert_once(hardirq_count() | softirq_count());
+
+ trace_netif_rx_entry(skb);
+ ret = netif_rx_internal(skb);
+ trace_netif_rx_exit(ret);
+ return ret;
+}
+EXPORT_SYMBOL(__netif_rx);
+
+/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
*
* This function receives a packet from a device driver and queues it for
- * the upper (protocol) levels to process. It always succeeds. The buffer
- * may be dropped during processing for congestion control or by the
- * protocol layers.
+ * the upper (protocol) levels to process via the backlog NAPI device. It
+ * always succeeds. The buffer may be dropped during processing for
+ * congestion control or by the protocol layers.
+ * The network buffer is passed via the backlog NAPI device. Modern NIC
+ * driver should use NAPI and GRO.
+ * This function can used from interrupt and from process context. The
+ * caller from process context must not disable interrupts before invoking
+ * this function.
*
* return values:
* NET_RX_SUCCESS (no congestion)
* NET_RX_DROP (packet was dropped)
*
*/
-
int netif_rx(struct sk_buff *skb)
{
+ bool need_bh_off = !(hardirq_count() | softirq_count());
int ret;
+ if (need_bh_off)
+ local_bh_disable();
trace_netif_rx_entry(skb);
-
ret = netif_rx_internal(skb);
trace_netif_rx_exit(ret);
-
+ if (need_bh_off)
+ local_bh_enable();
return ret;
}
EXPORT_SYMBOL(netif_rx);
-int netif_rx_ni(struct sk_buff *skb)
-{
- int err;
-
- trace_netif_rx_ni_entry(skb);
-
- preempt_disable();
- err = netif_rx_internal(skb);
- if (local_softirq_pending())
- do_softirq();
- preempt_enable();
- trace_netif_rx_ni_exit(err);
-
- return err;
-}
-EXPORT_SYMBOL(netif_rx_ni);
-
-int netif_rx_any_context(struct sk_buff *skb)
-{
- /*
- * If invoked from contexts which do not invoke bottom half
- * processing either at return from interrupt or when softrqs are
- * reenabled, use netif_rx_ni() which invokes bottomhalf processing
- * directly.
- */
- if (in_interrupt())
- return netif_rx(skb);
- else
- return netif_rx_ni(skb);
-}
-EXPORT_SYMBOL(netif_rx_any_context);
-
static __latent_entropy void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -5001,7 +5041,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
break;
case TC_ACT_SHOT:
mini_qdisc_qstats_cpu_drop(miniq);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
@@ -5318,11 +5358,13 @@ check_vlan_id:
*ppt_prev = pt_prev;
} else {
drop:
- if (!deliver_exact)
+ if (!deliver_exact) {
atomic_long_inc(&skb->dev->rx_dropped);
- else
+ kfree_skb_reason(skb, SKB_DROP_REASON_PTYPE_ABSENT);
+ } else {
atomic_long_inc(&skb->dev->rx_nohandler);
- kfree_skb(skb);
+ kfree_skb(skb);
+ }
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
@@ -5650,8 +5692,7 @@ static void flush_backlog(struct work_struct *work)
local_bh_disable();
sd = this_cpu_ptr(&softnet_data);
- local_irq_disable();
- rps_lock(sd);
+ rps_lock_irq_disable(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
@@ -5659,8 +5700,7 @@ static void flush_backlog(struct work_struct *work)
input_queue_head_incr(sd);
}
}
- rps_unlock(sd);
- local_irq_enable();
+ rps_unlock_irq_enable(sd);
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
@@ -5678,16 +5718,14 @@ static bool flush_required(int cpu)
struct softnet_data *sd = &per_cpu(softnet_data, cpu);
bool do_flush;
- local_irq_disable();
- rps_lock(sd);
+ rps_lock_irq_disable(sd);
/* as insertion into process_queue happens with the rps lock held,
* process_queue access may race only with dequeue
*/
do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
!skb_queue_empty_lockless(&sd->process_queue);
- rps_unlock(sd);
- local_irq_enable();
+ rps_unlock_irq_enable(sd);
return do_flush;
#endif
@@ -5802,8 +5840,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
}
- local_irq_disable();
- rps_lock(sd);
+ rps_lock_irq_disable(sd);
if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
* Inline a custom version of __napi_complete().
@@ -5819,8 +5856,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
skb_queue_splice_tail_init(&sd->input_pkt_queue,
&sd->process_queue);
}
- rps_unlock(sd);
- local_irq_enable();
+ rps_unlock_irq_enable(sd);
}
return work;
@@ -7727,6 +7763,242 @@ void netdev_bonding_info_change(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_bonding_info_change);
+static int netdev_offload_xstats_enable_l3(struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
+ };
+ int err;
+ int rc;
+
+ dev->offload_xstats_l3 = kzalloc(sizeof(*dev->offload_xstats_l3),
+ GFP_KERNEL);
+ if (!dev->offload_xstats_l3)
+ return -ENOMEM;
+
+ rc = call_netdevice_notifiers_info_robust(NETDEV_OFFLOAD_XSTATS_ENABLE,
+ NETDEV_OFFLOAD_XSTATS_DISABLE,
+ &info.info);
+ err = notifier_to_errno(rc);
+ if (err)
+ goto free_stats;
+
+ return 0;
+
+free_stats:
+ kfree(dev->offload_xstats_l3);
+ dev->offload_xstats_l3 = NULL;
+ return err;
+}
+
+int netdev_offload_xstats_enable(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct netlink_ext_ack *extack)
+{
+ ASSERT_RTNL();
+
+ if (netdev_offload_xstats_enabled(dev, type))
+ return -EALREADY;
+
+ switch (type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ return netdev_offload_xstats_enable_l3(dev, extack);
+ }
+
+ WARN_ON(1);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(netdev_offload_xstats_enable);
+
+static void netdev_offload_xstats_disable_l3(struct net_device *dev)
+{
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
+ };
+
+ call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_DISABLE,
+ &info.info);
+ kfree(dev->offload_xstats_l3);
+ dev->offload_xstats_l3 = NULL;
+}
+
+int netdev_offload_xstats_disable(struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ ASSERT_RTNL();
+
+ if (!netdev_offload_xstats_enabled(dev, type))
+ return -EALREADY;
+
+ switch (type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ netdev_offload_xstats_disable_l3(dev);
+ return 0;
+ }
+
+ WARN_ON(1);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(netdev_offload_xstats_disable);
+
+static void netdev_offload_xstats_disable_all(struct net_device *dev)
+{
+ netdev_offload_xstats_disable(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3);
+}
+
+static struct rtnl_hw_stats64 *
+netdev_offload_xstats_get_ptr(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ switch (type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ return dev->offload_xstats_l3;
+ }
+
+ WARN_ON(1);
+ return NULL;
+}
+
+bool netdev_offload_xstats_enabled(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ ASSERT_RTNL();
+
+ return netdev_offload_xstats_get_ptr(dev, type);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_enabled);
+
+struct netdev_notifier_offload_xstats_ru {
+ bool used;
+};
+
+struct netdev_notifier_offload_xstats_rd {
+ struct rtnl_hw_stats64 stats;
+ bool used;
+};
+
+static void netdev_hw_stats64_add(struct rtnl_hw_stats64 *dest,
+ const struct rtnl_hw_stats64 *src)
+{
+ dest->rx_packets += src->rx_packets;
+ dest->tx_packets += src->tx_packets;
+ dest->rx_bytes += src->rx_bytes;
+ dest->tx_bytes += src->tx_bytes;
+ dest->rx_errors += src->rx_errors;
+ dest->tx_errors += src->tx_errors;
+ dest->rx_dropped += src->rx_dropped;
+ dest->tx_dropped += src->tx_dropped;
+ dest->multicast += src->multicast;
+}
+
+static int netdev_offload_xstats_get_used(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ bool *p_used,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_offload_xstats_ru report_used = {};
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .type = type,
+ .report_used = &report_used,
+ };
+ int rc;
+
+ WARN_ON(!netdev_offload_xstats_enabled(dev, type));
+ rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_USED,
+ &info.info);
+ *p_used = report_used.used;
+ return notifier_to_errno(rc);
+}
+
+static int netdev_offload_xstats_get_stats(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct rtnl_hw_stats64 *p_stats,
+ bool *p_used,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_offload_xstats_rd report_delta = {};
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .type = type,
+ .report_delta = &report_delta,
+ };
+ struct rtnl_hw_stats64 *stats;
+ int rc;
+
+ stats = netdev_offload_xstats_get_ptr(dev, type);
+ if (WARN_ON(!stats))
+ return -EINVAL;
+
+ rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_DELTA,
+ &info.info);
+
+ /* Cache whatever we got, even if there was an error, otherwise the
+ * successful stats retrievals would get lost.
+ */
+ netdev_hw_stats64_add(stats, &report_delta.stats);
+
+ if (p_stats)
+ *p_stats = *stats;
+ *p_used = report_delta.used;
+
+ return notifier_to_errno(rc);
+}
+
+int netdev_offload_xstats_get(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct rtnl_hw_stats64 *p_stats, bool *p_used,
+ struct netlink_ext_ack *extack)
+{
+ ASSERT_RTNL();
+
+ if (p_stats)
+ return netdev_offload_xstats_get_stats(dev, type, p_stats,
+ p_used, extack);
+ else
+ return netdev_offload_xstats_get_used(dev, type, p_used,
+ extack);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_get);
+
+void
+netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *report_delta,
+ const struct rtnl_hw_stats64 *stats)
+{
+ report_delta->used = true;
+ netdev_hw_stats64_add(&report_delta->stats, stats);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_report_delta);
+
+void
+netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *report_used)
+{
+ report_used->used = true;
+}
+EXPORT_SYMBOL(netdev_offload_xstats_report_used);
+
+void netdev_offload_xstats_push_delta(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ const struct rtnl_hw_stats64 *p_stats)
+{
+ struct rtnl_hw_stats64 *stats;
+
+ ASSERT_RTNL();
+
+ stats = netdev_offload_xstats_get_ptr(dev, type);
+ if (WARN_ON(!stats))
+ return;
+
+ netdev_hw_stats64_add(stats, p_stats);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_push_delta);
+
/**
* netdev_get_xmit_slave - Get the xmit slave of master device
* @dev: device
@@ -9815,8 +10087,8 @@ int netdev_unregister_timeout_secs __read_mostly = 10;
#define WAIT_REFS_MIN_MSECS 1
#define WAIT_REFS_MAX_MSECS 250
/**
- * netdev_wait_allrefs - wait until all references are gone.
- * @dev: target net_device
+ * netdev_wait_allrefs_any - wait until all references are gone.
+ * @list: list of net_devices to wait on
*
* This is called when unregistering network devices.
*
@@ -9826,37 +10098,42 @@ int netdev_unregister_timeout_secs __read_mostly = 10;
* We can get stuck here if buggy protocols don't correctly
* call dev_put.
*/
-static void netdev_wait_allrefs(struct net_device *dev)
+static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
{
unsigned long rebroadcast_time, warning_time;
- int wait = 0, refcnt;
-
- linkwatch_forget_dev(dev);
+ struct net_device *dev;
+ int wait = 0;
rebroadcast_time = warning_time = jiffies;
- refcnt = netdev_refcnt_read(dev);
- while (refcnt != 1) {
+ list_for_each_entry(dev, list, todo_list)
+ if (netdev_refcnt_read(dev) == 1)
+ return dev;
+
+ while (true) {
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
rtnl_lock();
/* Rebroadcast unregister notification */
- call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+ list_for_each_entry(dev, list, todo_list)
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
__rtnl_unlock();
rcu_barrier();
rtnl_lock();
- if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
- &dev->state)) {
- /* We must not have linkwatch events
- * pending on unregister. If this
- * happens, we simply run the queue
- * unscheduled, resulting in a noop
- * for this device.
- */
- linkwatch_run_queue();
- }
+ list_for_each_entry(dev, list, todo_list)
+ if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
+ &dev->state)) {
+ /* We must not have linkwatch events
+ * pending on unregister. If this
+ * happens, we simply run the queue
+ * unscheduled, resulting in a noop
+ * for this device.
+ */
+ linkwatch_run_queue();
+ break;
+ }
__rtnl_unlock();
@@ -9871,14 +10148,18 @@ static void netdev_wait_allrefs(struct net_device *dev)
wait = min(wait << 1, WAIT_REFS_MAX_MSECS);
}
- refcnt = netdev_refcnt_read(dev);
+ list_for_each_entry(dev, list, todo_list)
+ if (netdev_refcnt_read(dev) == 1)
+ return dev;
- if (refcnt != 1 &&
- time_after(jiffies, warning_time +
+ if (time_after(jiffies, warning_time +
netdev_unregister_timeout_secs * HZ)) {
- pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
- dev->name, refcnt);
- ref_tracker_dir_print(&dev->refcnt_tracker, 10);
+ list_for_each_entry(dev, list, todo_list) {
+ pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
+ dev->name, netdev_refcnt_read(dev));
+ ref_tracker_dir_print(&dev->refcnt_tracker, 10);
+ }
+
warning_time = jiffies;
}
}
@@ -9910,6 +10191,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
*/
void netdev_run_todo(void)
{
+ struct net_device *dev, *tmp;
struct list_head list;
#ifdef CONFIG_LOCKDEP
struct list_head unlink_list;
@@ -9930,26 +10212,24 @@ void netdev_run_todo(void)
__rtnl_unlock();
-
/* Wait for rcu callbacks to finish before next phase */
if (!list_empty(&list))
rcu_barrier();
- while (!list_empty(&list)) {
- struct net_device *dev
- = list_first_entry(&list, struct net_device, todo_list);
- list_del(&dev->todo_list);
-
+ list_for_each_entry_safe(dev, tmp, &list, todo_list) {
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
- pr_err("network todo '%s' but state %d\n",
- dev->name, dev->reg_state);
- dump_stack();
+ netdev_WARN(dev, "run_todo but not unregistering\n");
+ list_del(&dev->todo_list);
continue;
}
dev->reg_state = NETREG_UNREGISTERED;
+ linkwatch_forget_dev(dev);
+ }
- netdev_wait_allrefs(dev);
+ while (!list_empty(&list)) {
+ dev = netdev_wait_allrefs_any(&list);
+ list_del(&dev->todo_list);
/* paranoia */
BUG_ON(netdev_refcnt_read(dev) != 1);
@@ -10408,6 +10688,8 @@ void unregister_netdevice_many(struct list_head *head)
dev_xdp_uninstall(dev);
+ netdev_offload_xstats_disable_all(dev);
+
/* Notify protocols, that we are about to destroy
* this device. They should clean all the things.
*/
@@ -10673,11 +10955,11 @@ static int dev_cpu_dead(unsigned int oldcpu)
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
- netif_rx_ni(skb);
+ netif_rx(skb);
input_queue_head_incr(oldsd);
}
while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
- netif_rx_ni(skb);
+ netif_rx(skb);
input_queue_head_incr(oldsd);
}
@@ -10880,36 +11162,6 @@ static void __net_exit default_device_exit_net(struct net *net)
}
}
-static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
-{
- /* Return (with the rtnl_lock held) when there are no network
- * devices unregistering in any network namespace in net_list.
- */
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- bool unregistering;
- struct net *net;
-
- ASSERT_RTNL();
- add_wait_queue(&netdev_unregistering_wq, &wait);
- for (;;) {
- unregistering = false;
-
- list_for_each_entry(net, net_list, exit_list) {
- if (atomic_read(&net->dev_unreg_count) > 0) {
- unregistering = true;
- break;
- }
- }
- if (!unregistering)
- break;
- __rtnl_unlock();
-
- wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
- rtnl_lock();
- }
- remove_wait_queue(&netdev_unregistering_wq, &wait);
-}
-
static void __net_exit default_device_exit_batch(struct list_head *net_list)
{
/* At exit all network devices most be removed from a network
@@ -10926,18 +11178,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
default_device_exit_net(net);
cond_resched();
}
- /* To prevent network device cleanup code from dereferencing
- * loopback devices or network devices that have been freed
- * wait here for all pending unregistrations to complete,
- * before unregistring the loopback device and allowing the
- * network namespace be freed.
- *
- * The netdev todo list containing all network devices
- * unregistrations that happen in default_device_exit_batch
- * will run in the rtnl_unlock() at the end of
- * default_device_exit_batch.
- */
- rtnl_lock_unregistering(net_list);
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 4641126b8a20..b89e3e95bffc 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -64,7 +64,6 @@ static const char * const drop_reasons[] = {
/* net_dm_mutex
*
* An overall lock guarding every operation coming from userspace.
- * It also guards the global 'hw_stats_list' list.
*/
static DEFINE_MUTEX(net_dm_mutex);
@@ -100,11 +99,9 @@ struct per_cpu_dm_data {
};
struct dm_hw_stat_delta {
- struct net_device *dev;
unsigned long last_rx;
- struct list_head list;
- struct rcu_head rcu;
unsigned long last_drop_val;
+ struct rcu_head rcu;
};
static struct genl_family net_drop_monitor_family;
@@ -115,7 +112,6 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
static int dm_hit_limit = 64;
static int dm_delay = 1;
static unsigned long dm_hw_check_delta = 2*HZ;
-static LIST_HEAD(hw_stats_list);
static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
static u32 net_dm_trunc_len;
@@ -287,29 +283,27 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
int work, int budget)
{
- struct dm_hw_stat_delta *new_stat;
-
+ struct net_device *dev = napi->dev;
+ struct dm_hw_stat_delta *stat;
/*
* Don't check napi structures with no associated device
*/
- if (!napi->dev)
+ if (!dev)
return;
rcu_read_lock();
- list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
+ stat = rcu_dereference(dev->dm_private);
+ if (stat) {
/*
* only add a note to our monitor buffer if:
- * 1) this is the dev we received on
- * 2) its after the last_rx delta
- * 3) our rx_dropped count has gone up
+ * 1) its after the last_rx delta
+ * 2) our rx_dropped count has gone up
*/
- if ((new_stat->dev == napi->dev) &&
- (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
- (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
+ if (time_after(jiffies, stat->last_rx + dm_hw_check_delta) &&
+ (dev->stats.rx_dropped != stat->last_drop_val)) {
trace_drop_common(NULL, NULL);
- new_stat->last_drop_val = napi->dev->stats.rx_dropped;
- new_stat->last_rx = jiffies;
- break;
+ stat->last_drop_val = dev->stats.rx_dropped;
+ stat->last_rx = jiffies;
}
}
rcu_read_unlock();
@@ -1194,7 +1188,6 @@ err_module_put:
static void net_dm_trace_off_set(void)
{
- struct dm_hw_stat_delta *new_stat, *temp;
const struct net_dm_alert_ops *ops;
int cpu;
@@ -1218,13 +1211,6 @@ static void net_dm_trace_off_set(void)
consume_skb(skb);
}
- list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
- if (new_stat->dev == NULL) {
- list_del_rcu(&new_stat->list);
- kfree_rcu(new_stat, rcu);
- }
- }
-
module_put(THIS_MODULE);
}
@@ -1585,38 +1571,28 @@ static int dropmon_net_event(struct notifier_block *ev_block,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct dm_hw_stat_delta *new_stat = NULL;
- struct dm_hw_stat_delta *tmp;
+ struct dm_hw_stat_delta *stat;
switch (event) {
case NETDEV_REGISTER:
- new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
+ if (WARN_ON_ONCE(rtnl_dereference(dev->dm_private)))
+ break;
+ stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+ if (!stat)
+ break;
- if (!new_stat)
- goto out;
+ stat->last_rx = jiffies;
+ rcu_assign_pointer(dev->dm_private, stat);
- new_stat->dev = dev;
- new_stat->last_rx = jiffies;
- mutex_lock(&net_dm_mutex);
- list_add_rcu(&new_stat->list, &hw_stats_list);
- mutex_unlock(&net_dm_mutex);
break;
case NETDEV_UNREGISTER:
- mutex_lock(&net_dm_mutex);
- list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
- if (new_stat->dev == dev) {
- new_stat->dev = NULL;
- if (trace_state == TRACE_OFF) {
- list_del_rcu(&new_stat->list);
- kfree_rcu(new_stat, rcu);
- break;
- }
- }
+ stat = rtnl_dereference(dev->dm_private);
+ if (stat) {
+ rcu_assign_pointer(dev->dm_private, NULL);
+ kfree_rcu(stat, rcu);
}
- mutex_unlock(&net_dm_mutex);
break;
}
-out:
return NOTIFY_DONE;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 818244068c2d..88767f7da150 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2107,7 +2107,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
}
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
dev_xmit_recursion_inc();
ret = dev_queue_xmit(skb);
@@ -2176,7 +2176,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
}
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
@@ -2274,7 +2274,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
}
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
@@ -2710,6 +2710,9 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
if (unlikely(flags))
return -EINVAL;
+ if (unlikely(len == 0))
+ return 0;
+
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
@@ -7385,6 +7388,43 @@ static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_3(bpf_skb_set_delivery_time, struct sk_buff *, skb,
+ u64, dtime, u32, dtime_type)
+{
+ /* skb_clear_delivery_time() is done for inet protocol */
+ if (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6))
+ return -EOPNOTSUPP;
+
+ switch (dtime_type) {
+ case BPF_SKB_DELIVERY_TIME_MONO:
+ if (!dtime)
+ return -EINVAL;
+ skb->tstamp = dtime;
+ skb->mono_delivery_time = 1;
+ break;
+ case BPF_SKB_DELIVERY_TIME_NONE:
+ if (dtime)
+ return -EINVAL;
+ skb->tstamp = 0;
+ skb->mono_delivery_time = 0;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_set_delivery_time_proto = {
+ .func = bpf_skb_set_delivery_time,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -7746,6 +7786,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_gen_syncookie_proto;
case BPF_FUNC_sk_assign:
return &bpf_sk_assign_proto;
+ case BPF_FUNC_skb_set_delivery_time:
+ return &bpf_skb_set_delivery_time_proto;
#endif
default:
return bpf_sk_base_func_proto(func_id);
@@ -8085,7 +8127,9 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
return false;
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
break;
- case offsetofend(struct __sk_buff, gso_size) ... offsetof(struct __sk_buff, hwtstamp) - 1:
+ case offsetof(struct __sk_buff, delivery_time_type):
+ return false;
+ case offsetofend(struct __sk_buff, delivery_time_type) ... offsetof(struct __sk_buff, hwtstamp) - 1:
/* Explicitly prohibit access to padding in __sk_buff. */
return false;
default:
@@ -8440,6 +8484,15 @@ static bool tc_cls_act_is_valid_access(int off, int size,
break;
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
+ case offsetof(struct __sk_buff, delivery_time_type):
+ /* The convert_ctx_access() on reading and writing
+ * __sk_buff->tstamp depends on whether the bpf prog
+ * has used __sk_buff->delivery_time_type or not.
+ * Thus, we need to set prog->delivery_time_access
+ * earlier during is_valid_access() here.
+ */
+ ((struct bpf_prog *)prog)->delivery_time_access = 1;
+ return size == sizeof(__u8);
}
return bpf_skb_is_valid_access(off, size, type, prog, info);
@@ -8835,6 +8888,45 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+static struct bpf_insn *bpf_convert_dtime_type_read(const struct bpf_insn *si,
+ struct bpf_insn *insn)
+{
+ __u8 value_reg = si->dst_reg;
+ __u8 skb_reg = si->src_reg;
+ __u8 tmp_reg = BPF_REG_AX;
+
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
+ SKB_MONO_DELIVERY_TIME_OFFSET);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
+ SKB_MONO_DELIVERY_TIME_MASK);
+ *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 2);
+ /* value_reg = BPF_SKB_DELIVERY_TIME_MONO */
+ *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_DELIVERY_TIME_MONO);
+ *insn++ = BPF_JMP_A(IS_ENABLED(CONFIG_NET_CLS_ACT) ? 10 : 5);
+
+ *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, skb_reg,
+ offsetof(struct sk_buff, tstamp));
+ *insn++ = BPF_JMP_IMM(BPF_JNE, tmp_reg, 0, 2);
+ /* value_reg = BPF_SKB_DELIVERY_TIME_NONE */
+ *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_DELIVERY_TIME_NONE);
+ *insn++ = BPF_JMP_A(IS_ENABLED(CONFIG_NET_CLS_ACT) ? 6 : 1);
+
+#ifdef CONFIG_NET_CLS_ACT
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, TC_AT_INGRESS_OFFSET);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK);
+ *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 2);
+ /* At ingress, value_reg = 0 */
+ *insn++ = BPF_MOV32_IMM(value_reg, 0);
+ *insn++ = BPF_JMP_A(1);
+#endif
+
+ /* value_reg = BPF_SKB_DELIVERYT_TIME_UNSPEC */
+ *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_DELIVERY_TIME_UNSPEC);
+
+ /* 15 insns with CONFIG_NET_CLS_ACT */
+ return insn;
+}
+
static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
struct bpf_insn *insn)
{
@@ -8856,6 +8948,71 @@ static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
return insn;
}
+static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn)
+{
+ __u8 value_reg = si->dst_reg;
+ __u8 skb_reg = si->src_reg;
+
+#ifdef CONFIG_NET_CLS_ACT
+ if (!prog->delivery_time_access) {
+ __u8 tmp_reg = BPF_REG_AX;
+
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, TC_AT_INGRESS_OFFSET);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK);
+ *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 5);
+ /* @ingress, read __sk_buff->tstamp as the (rcv) timestamp,
+ * so check the skb->mono_delivery_time.
+ */
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
+ SKB_MONO_DELIVERY_TIME_OFFSET);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
+ SKB_MONO_DELIVERY_TIME_MASK);
+ *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 2);
+ /* skb->mono_delivery_time is set, read 0 as the (rcv) timestamp. */
+ *insn++ = BPF_MOV64_IMM(value_reg, 0);
+ *insn++ = BPF_JMP_A(1);
+ }
+#endif
+
+ *insn++ = BPF_LDX_MEM(BPF_DW, value_reg, skb_reg,
+ offsetof(struct sk_buff, tstamp));
+ return insn;
+}
+
+static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn)
+{
+ __u8 value_reg = si->src_reg;
+ __u8 skb_reg = si->dst_reg;
+
+#ifdef CONFIG_NET_CLS_ACT
+ if (!prog->delivery_time_access) {
+ __u8 tmp_reg = BPF_REG_AX;
+
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, TC_AT_INGRESS_OFFSET);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK);
+ *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 3);
+ /* Writing __sk_buff->tstamp at ingress as the (rcv) timestamp.
+ * Clear the skb->mono_delivery_time.
+ */
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
+ SKB_MONO_DELIVERY_TIME_OFFSET);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
+ ~SKB_MONO_DELIVERY_TIME_MASK);
+ *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg,
+ SKB_MONO_DELIVERY_TIME_OFFSET);
+ }
+#endif
+
+ /* skb->tstamp = tstamp */
+ *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg,
+ offsetof(struct sk_buff, tstamp));
+ return insn;
+}
+
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -9164,17 +9321,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_DW,
- si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff,
- tstamp, 8,
- target_size));
+ insn = bpf_convert_tstamp_write(prog, si, insn);
else
- *insn++ = BPF_LDX_MEM(BPF_DW,
- si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff,
- tstamp, 8,
- target_size));
+ insn = bpf_convert_tstamp_read(prog, si, insn);
+ break;
+
+ case offsetof(struct __sk_buff, delivery_time_type):
+ insn = bpf_convert_dtime_type_read(si, insn);
break;
case offsetof(struct __sk_buff, gso_segs):
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 15833e1d6ea1..03b6e649c428 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -22,6 +22,7 @@
#include <linux/ppp_defs.h>
#include <linux/stddef.h>
#include <linux/if_ether.h>
+#include <linux/if_hsr.h>
#include <linux/mpls.h>
#include <linux/tcp.h>
#include <linux/ptp_classify.h>
@@ -1282,6 +1283,23 @@ proto_again:
break;
}
+ case htons(ETH_P_PRP):
+ case htons(ETH_P_HSR): {
+ struct hsr_tag *hdr, _hdr;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen,
+ &_hdr);
+ if (!hdr) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
+
+ proto = hdr->encap_proto;
+ nhoff += HSR_HLEN;
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ break;
+ }
+
default:
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
diff --git a/net/core/gro.c b/net/core/gro.c
index ee5e7e889d8b..78110edf5d4b 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -93,6 +93,31 @@ void dev_remove_offload(struct packet_offload *po)
EXPORT_SYMBOL(dev_remove_offload);
/**
+ * skb_eth_gso_segment - segmentation handler for ethernet protocols.
+ * @skb: buffer to segment
+ * @features: features for the output path (see dev->features)
+ * @type: Ethernet Protocol ID
+ */
+struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+ netdev_features_t features, __be16 type)
+{
+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+ struct packet_offload *ptype;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &offload_base, list) {
+ if (ptype->type == type && ptype->callbacks.gso_segment) {
+ segs = ptype->callbacks.gso_segment(skb, features);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return segs;
+}
+EXPORT_SYMBOL(skb_eth_gso_segment);
+
+/**
* skb_mac_gso_segment - mac layer segmentation handler.
* @skb: buffer to segment
* @features: features for the output path (see dev->features)
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 6eb2e5ec2c50..8462f926ab45 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -89,8 +89,23 @@ int gro_cells_init(struct gro_cells *gcells, struct net_device *dev)
}
EXPORT_SYMBOL(gro_cells_init);
+struct percpu_free_defer {
+ struct rcu_head rcu;
+ void __percpu *ptr;
+};
+
+static void percpu_free_defer_callback(struct rcu_head *head)
+{
+ struct percpu_free_defer *defer;
+
+ defer = container_of(head, struct percpu_free_defer, rcu);
+ free_percpu(defer->ptr);
+ kfree(defer);
+}
+
void gro_cells_destroy(struct gro_cells *gcells)
{
+ struct percpu_free_defer *defer;
int i;
if (!gcells->cells)
@@ -102,12 +117,23 @@ void gro_cells_destroy(struct gro_cells *gcells)
__netif_napi_del(&cell->napi);
__skb_queue_purge(&cell->napi_skbs);
}
- /* This barrier is needed because netpoll could access dev->napi_list
- * under rcu protection.
+ /* We need to observe an rcu grace period before freeing ->cells,
+ * because netpoll could access dev->napi_list under rcu protection.
+ * Try hard using call_rcu() instead of synchronize_rcu(),
+ * because we might be called from cleanup_net(), and we
+ * definitely do not want to block this critical task.
*/
- synchronize_net();
-
- free_percpu(gcells->cells);
+ defer = kmalloc(sizeof(*defer), GFP_KERNEL | __GFP_NOWARN);
+ if (likely(defer)) {
+ defer->ptr = gcells->cells;
+ call_rcu(&defer->rcu, percpu_free_defer_callback);
+ } else {
+ /* We do not hold RTNL at this point, synchronize_net()
+ * would not be able to expedite this sync.
+ */
+ synchronize_rcu_expedited();
+ free_percpu(gcells->cells);
+ }
gcells->cells = NULL;
}
EXPORT_SYMBOL(gro_cells_destroy);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ec0bf737b076..f64ebd050f6c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1171,7 +1171,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
@@ -1193,7 +1193,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
if (!buff)
break;
neigh->arp_queue_len_bytes -= buff->truesize;
- kfree_skb(buff);
+ kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
skb_dst_force(skb);
@@ -1215,7 +1215,7 @@ out_dead:
if (neigh->nud_state & NUD_STALE)
goto out_unlock_bh;
write_unlock_bh(&neigh->lock);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
trace_neigh_event_send_dead(neigh, 1);
return 1;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 53ea262ecafd..fbddf966206b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -213,7 +213,7 @@ static ssize_t speed_show(struct device *dev,
if (!rtnl_trylock())
return restart_syscall();
- if (netif_running(netdev)) {
+ if (netif_running(netdev) && netif_device_present(netdev)) {
struct ethtool_link_ksettings cmd;
if (!__ethtool_get_link_ksettings(netdev, &cmd))
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index e25d359d84d9..1943c0f0307d 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -26,6 +26,45 @@
#define BIAS_MAX LONG_MAX
+#ifdef CONFIG_PAGE_POOL_STATS
+/* alloc_stat_inc is intended to be used in softirq context */
+#define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++)
+/* recycle_stat_inc is safe to use when preemption is possible. */
+#define recycle_stat_inc(pool, __stat) \
+ do { \
+ struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \
+ this_cpu_inc(s->__stat); \
+ } while (0)
+
+bool page_pool_get_stats(struct page_pool *pool,
+ struct page_pool_stats *stats)
+{
+ int cpu = 0;
+
+ if (!stats)
+ return false;
+
+ memcpy(&stats->alloc_stats, &pool->alloc_stats, sizeof(pool->alloc_stats));
+
+ for_each_possible_cpu(cpu) {
+ const struct page_pool_recycle_stats *pcpu =
+ per_cpu_ptr(pool->recycle_stats, cpu);
+
+ stats->recycle_stats.cached += pcpu->cached;
+ stats->recycle_stats.cache_full += pcpu->cache_full;
+ stats->recycle_stats.ring += pcpu->ring;
+ stats->recycle_stats.ring_full += pcpu->ring_full;
+ stats->recycle_stats.released_refcnt += pcpu->released_refcnt;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL(page_pool_get_stats);
+#else
+#define alloc_stat_inc(pool, __stat)
+#define recycle_stat_inc(pool, __stat)
+#endif
+
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
@@ -73,6 +112,12 @@ static int page_pool_init(struct page_pool *pool,
pool->p.flags & PP_FLAG_PAGE_FRAG)
return -EINVAL;
+#ifdef CONFIG_PAGE_POOL_STATS
+ pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
+ if (!pool->recycle_stats)
+ return -ENOMEM;
+#endif
+
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
return -ENOMEM;
@@ -117,8 +162,10 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
int pref_nid; /* preferred NUMA node */
/* Quicker fallback, avoid locks when ring is empty */
- if (__ptr_ring_empty(r))
+ if (__ptr_ring_empty(r)) {
+ alloc_stat_inc(pool, empty);
return NULL;
+ }
/* Softirq guarantee CPU and thus NUMA node is stable. This,
* assumes CPU refilling driver RX-ring will also run RX-NAPI.
@@ -145,14 +192,17 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
* This limit stress on page buddy alloactor.
*/
page_pool_return_page(pool, page);
+ alloc_stat_inc(pool, waive);
page = NULL;
break;
}
} while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
/* Return last page */
- if (likely(pool->alloc.count > 0))
+ if (likely(pool->alloc.count > 0)) {
page = pool->alloc.cache[--pool->alloc.count];
+ alloc_stat_inc(pool, refill);
+ }
return page;
}
@@ -166,6 +216,7 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
if (likely(pool->alloc.count)) {
/* Fast-path */
page = pool->alloc.cache[--pool->alloc.count];
+ alloc_stat_inc(pool, fast);
} else {
page = page_pool_refill_alloc_cache(pool);
}
@@ -239,6 +290,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
return NULL;
}
+ alloc_stat_inc(pool, slow_high_order);
page_pool_set_pp_info(pool, page);
/* Track how many pages are held 'in-flight' */
@@ -293,10 +345,12 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
}
/* Return last page */
- if (likely(pool->alloc.count > 0))
+ if (likely(pool->alloc.count > 0)) {
page = pool->alloc.cache[--pool->alloc.count];
- else
+ alloc_stat_inc(pool, slow);
+ } else {
page = NULL;
+ }
/* When page just alloc'ed is should/must have refcnt 1. */
return page;
@@ -394,7 +448,12 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
else
ret = ptr_ring_produce_bh(&pool->ring, page);
- return (ret == 0) ? true : false;
+ if (!ret) {
+ recycle_stat_inc(pool, ring);
+ return true;
+ }
+
+ return false;
}
/* Only allow direct recycling in special circumstances, into the
@@ -405,11 +464,14 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
static bool page_pool_recycle_in_cache(struct page *page,
struct page_pool *pool)
{
- if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
+ if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) {
+ recycle_stat_inc(pool, cache_full);
return false;
+ }
/* Caller MUST have verified/know (page_ref_count(page) == 1) */
pool->alloc.cache[pool->alloc.count++] = page;
+ recycle_stat_inc(pool, cached);
return true;
}
@@ -459,6 +521,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* doing refcnt based recycle tricks, meaning another process
* will be invoking put_page.
*/
+ recycle_stat_inc(pool, released_refcnt);
/* Do not replace this with page_pool_return_page() */
page_pool_release_page(pool, page);
put_page(page);
@@ -472,6 +535,7 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
if (page && !page_pool_recycle_in_ring(pool, page)) {
/* Cache full, fallback to free pages */
+ recycle_stat_inc(pool, ring_full);
page_pool_return_page(pool, page);
}
}
@@ -620,6 +684,9 @@ static void page_pool_free(struct page_pool *pool)
if (pool->p.flags & PP_FLAG_DMA_MAP)
put_device(pool->p.dev);
+#ifdef CONFIG_PAGE_POOL_STATS
+ free_percpu(pool->recycle_stats);
+#endif
kfree(pool);
}
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index dd4cf01d1e0a..598041b0499e 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -137,6 +137,18 @@ struct ptp_header *ptp_parse_header(struct sk_buff *skb, unsigned int type)
}
EXPORT_SYMBOL_GPL(ptp_parse_header);
+bool ptp_msg_is_sync(struct sk_buff *skb, unsigned int type)
+{
+ struct ptp_header *hdr;
+
+ hdr = ptp_parse_header(skb, type);
+ if (!hdr)
+ return false;
+
+ return ptp_get_msgtype(hdr, type) == PTP_MSGTYPE_SYNC;
+}
+EXPORT_SYMBOL_GPL(ptp_msg_is_sync);
+
void __init ptp_classifier_init(void)
{
static struct sock_filter ptp_filter[] __initdata = {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a6fad3df42a8..159c9c61e6af 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1699,6 +1699,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
+ struct Qdisc *qdisc;
ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1716,6 +1717,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
goto nla_put_failure;
+ qdisc = rtnl_dereference(dev->qdisc);
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
nla_put_u8(skb, IFLA_OPERSTATE,
@@ -1735,8 +1737,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
#endif
put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
- (dev->qdisc &&
- nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
+ (qdisc &&
+ nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) +
@@ -3650,13 +3652,24 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
bool *changed, struct netlink_ext_ack *extack)
{
char *alt_ifname;
+ size_t size;
int err;
err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack);
if (err)
return err;
- alt_ifname = nla_strdup(attr, GFP_KERNEL);
+ if (cmd == RTM_NEWLINKPROP) {
+ size = rtnl_prop_list_size(dev);
+ size += nla_total_size(ALTIFNAMSIZ);
+ if (size >= U16_MAX) {
+ NL_SET_ERR_MSG(extack,
+ "effective property list too long");
+ return -EINVAL;
+ }
+ }
+
+ alt_ifname = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
if (!alt_ifname)
return -ENOMEM;
@@ -5046,82 +5059,256 @@ static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr)
(!idxattr || idxattr == attrid);
}
-#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1)
-static int rtnl_get_offload_stats_attr_size(int attr_id)
+static bool
+rtnl_offload_xstats_have_ndo(const struct net_device *dev, int attr_id)
{
- switch (attr_id) {
- case IFLA_OFFLOAD_XSTATS_CPU_HIT:
- return sizeof(struct rtnl_link_stats64);
- }
+ return dev->netdev_ops &&
+ dev->netdev_ops->ndo_has_offload_stats &&
+ dev->netdev_ops->ndo_get_offload_stats &&
+ dev->netdev_ops->ndo_has_offload_stats(dev, attr_id);
+}
- return 0;
+static unsigned int
+rtnl_offload_xstats_get_size_ndo(const struct net_device *dev, int attr_id)
+{
+ return rtnl_offload_xstats_have_ndo(dev, attr_id) ?
+ sizeof(struct rtnl_link_stats64) : 0;
}
-static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev,
- int *prividx)
+static int
+rtnl_offload_xstats_fill_ndo(struct net_device *dev, int attr_id,
+ struct sk_buff *skb)
{
+ unsigned int size = rtnl_offload_xstats_get_size_ndo(dev, attr_id);
struct nlattr *attr = NULL;
- int attr_id, size;
void *attr_data;
int err;
- if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
- dev->netdev_ops->ndo_get_offload_stats))
+ if (!size)
return -ENODATA;
- for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
- attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
- if (attr_id < *prividx)
- continue;
+ attr = nla_reserve_64bit(skb, attr_id, size,
+ IFLA_OFFLOAD_XSTATS_UNSPEC);
+ if (!attr)
+ return -EMSGSIZE;
- size = rtnl_get_offload_stats_attr_size(attr_id);
- if (!size)
- continue;
+ attr_data = nla_data(attr);
+ memset(attr_data, 0, size);
- if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
- continue;
+ err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, attr_data);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static unsigned int
+rtnl_offload_xstats_get_size_stats(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ bool enabled = netdev_offload_xstats_enabled(dev, type);
+
+ return enabled ? sizeof(struct rtnl_hw_stats64) : 0;
+}
+
+struct rtnl_offload_xstats_request_used {
+ bool request;
+ bool used;
+};
+
+static int
+rtnl_offload_xstats_get_stats(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct rtnl_offload_xstats_request_used *ru,
+ struct rtnl_hw_stats64 *stats,
+ struct netlink_ext_ack *extack)
+{
+ bool request;
+ bool used;
+ int err;
- attr = nla_reserve_64bit(skb, attr_id, size,
+ request = netdev_offload_xstats_enabled(dev, type);
+ if (!request) {
+ used = false;
+ goto out;
+ }
+
+ err = netdev_offload_xstats_get(dev, type, stats, &used, extack);
+ if (err)
+ return err;
+
+out:
+ if (ru) {
+ ru->request = request;
+ ru->used = used;
+ }
+ return 0;
+}
+
+static int
+rtnl_offload_xstats_fill_hw_s_info_one(struct sk_buff *skb, int attr_id,
+ struct rtnl_offload_xstats_request_used *ru)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, attr_id);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, ru->request))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, ru->used))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int
+rtnl_offload_xstats_fill_hw_s_info(struct sk_buff *skb, struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+ struct rtnl_offload_xstats_request_used ru_l3;
+ struct nlattr *nest;
+ int err;
+
+ err = rtnl_offload_xstats_get_stats(dev, t_l3, &ru_l3, NULL, extack);
+ if (err)
+ return err;
+
+ nest = nla_nest_start(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (rtnl_offload_xstats_fill_hw_s_info_one(skb,
+ IFLA_OFFLOAD_XSTATS_L3_STATS,
+ &ru_l3))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev,
+ int *prividx, u32 off_filter_mask,
+ struct netlink_ext_ack *extack)
+{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+ int attr_id_hw_s_info = IFLA_OFFLOAD_XSTATS_HW_S_INFO;
+ int attr_id_l3_stats = IFLA_OFFLOAD_XSTATS_L3_STATS;
+ int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT;
+ bool have_data = false;
+ int err;
+
+ if (*prividx <= attr_id_cpu_hit &&
+ (off_filter_mask &
+ IFLA_STATS_FILTER_BIT(attr_id_cpu_hit))) {
+ err = rtnl_offload_xstats_fill_ndo(dev, attr_id_cpu_hit, skb);
+ if (!err) {
+ have_data = true;
+ } else if (err != -ENODATA) {
+ *prividx = attr_id_cpu_hit;
+ return err;
+ }
+ }
+
+ if (*prividx <= attr_id_hw_s_info &&
+ (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_hw_s_info))) {
+ *prividx = attr_id_hw_s_info;
+
+ err = rtnl_offload_xstats_fill_hw_s_info(skb, dev, extack);
+ if (err)
+ return err;
+
+ have_data = true;
+ *prividx = 0;
+ }
+
+ if (*prividx <= attr_id_l3_stats &&
+ (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_l3_stats))) {
+ unsigned int size_l3;
+ struct nlattr *attr;
+
+ *prividx = attr_id_l3_stats;
+
+ size_l3 = rtnl_offload_xstats_get_size_stats(dev, t_l3);
+ attr = nla_reserve_64bit(skb, attr_id_l3_stats, size_l3,
IFLA_OFFLOAD_XSTATS_UNSPEC);
if (!attr)
- goto nla_put_failure;
+ return -EMSGSIZE;
- attr_data = nla_data(attr);
- memset(attr_data, 0, size);
- err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev,
- attr_data);
+ err = rtnl_offload_xstats_get_stats(dev, t_l3, NULL,
+ nla_data(attr), extack);
if (err)
- goto get_offload_stats_failure;
+ return err;
+
+ have_data = true;
+ *prividx = 0;
}
- if (!attr)
+ if (!have_data)
return -ENODATA;
*prividx = 0;
return 0;
+}
-nla_put_failure:
- err = -EMSGSIZE;
-get_offload_stats_failure:
- *prividx = attr_id;
- return err;
+static unsigned int
+rtnl_offload_xstats_get_size_hw_s_info_one(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ bool enabled = netdev_offload_xstats_enabled(dev, type);
+
+ return nla_total_size(0) +
+ /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST */
+ nla_total_size(sizeof(u8)) +
+ /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED */
+ (enabled ? nla_total_size(sizeof(u8)) : 0) +
+ 0;
+}
+
+static unsigned int
+rtnl_offload_xstats_get_size_hw_s_info(const struct net_device *dev)
+{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+
+ return nla_total_size(0) +
+ /* IFLA_OFFLOAD_XSTATS_L3_STATS */
+ rtnl_offload_xstats_get_size_hw_s_info_one(dev, t_l3) +
+ 0;
}
-static int rtnl_get_offload_stats_size(const struct net_device *dev)
+static int rtnl_offload_xstats_get_size(const struct net_device *dev,
+ u32 off_filter_mask)
{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+ int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT;
int nla_size = 0;
- int attr_id;
int size;
- if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
- dev->netdev_ops->ndo_get_offload_stats))
- return 0;
+ if (off_filter_mask &
+ IFLA_STATS_FILTER_BIT(attr_id_cpu_hit)) {
+ size = rtnl_offload_xstats_get_size_ndo(dev, attr_id_cpu_hit);
+ nla_size += nla_total_size_64bit(size);
+ }
- for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
- attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
- if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
- continue;
- size = rtnl_get_offload_stats_attr_size(attr_id);
+ if (off_filter_mask &
+ IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO))
+ nla_size += rtnl_offload_xstats_get_size_hw_s_info(dev);
+
+ if (off_filter_mask &
+ IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_L3_STATS)) {
+ size = rtnl_offload_xstats_get_size_stats(dev, t_l3);
nla_size += nla_total_size_64bit(size);
}
@@ -5131,11 +5318,21 @@ static int rtnl_get_offload_stats_size(const struct net_device *dev)
return nla_size;
}
+struct rtnl_stats_dump_filters {
+ /* mask[0] filters outer attributes. Then individual nests have their
+ * filtering mask at the index of the nested attribute.
+ */
+ u32 mask[IFLA_STATS_MAX + 1];
+};
+
static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
- unsigned int flags, unsigned int filter_mask,
- int *idxattr, int *prividx)
+ unsigned int flags,
+ const struct rtnl_stats_dump_filters *filters,
+ int *idxattr, int *prividx,
+ struct netlink_ext_ack *extack)
{
+ unsigned int filter_mask = filters->mask[0];
struct if_stats_msg *ifsm;
struct nlmsghdr *nlh;
struct nlattr *attr;
@@ -5161,8 +5358,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64,
sizeof(struct rtnl_link_stats64),
IFLA_STATS_UNSPEC);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
sp = nla_data(attr);
dev_get_stats(dev, sp);
@@ -5175,8 +5374,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
*idxattr = IFLA_STATS_LINK_XSTATS;
attr = nla_nest_start_noflag(skb,
IFLA_STATS_LINK_XSTATS);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
nla_nest_end(skb, attr);
@@ -5198,8 +5399,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
*idxattr = IFLA_STATS_LINK_XSTATS_SLAVE;
attr = nla_nest_start_noflag(skb,
IFLA_STATS_LINK_XSTATS_SLAVE);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
nla_nest_end(skb, attr);
@@ -5211,13 +5414,19 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS,
*idxattr)) {
+ u32 off_filter_mask;
+
+ off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS];
*idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS;
attr = nla_nest_start_noflag(skb,
IFLA_STATS_LINK_OFFLOAD_XSTATS);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
- err = rtnl_get_offload_stats(skb, dev, prividx);
+ err = rtnl_offload_xstats_fill(skb, dev, prividx,
+ off_filter_mask, extack);
if (err == -ENODATA)
nla_nest_cancel(skb, attr);
else
@@ -5233,19 +5442,21 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
*idxattr = IFLA_STATS_AF_SPEC;
attr = nla_nest_start_noflag(skb, IFLA_STATS_AF_SPEC);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
rcu_read_lock();
list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->fill_stats_af) {
struct nlattr *af;
- int err;
af = nla_nest_start_noflag(skb,
af_ops->family);
if (!af) {
rcu_read_unlock();
+ err = -EMSGSIZE;
goto nla_put_failure;
}
err = af_ops->fill_stats_af(skb, dev);
@@ -5278,13 +5489,14 @@ nla_put_failure:
else
nlmsg_end(skb, nlh);
- return -EMSGSIZE;
+ return err;
}
static size_t if_nlmsg_stats_size(const struct net_device *dev,
- u32 filter_mask)
+ const struct rtnl_stats_dump_filters *filters)
{
size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg));
+ unsigned int filter_mask = filters->mask[0];
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
@@ -5320,8 +5532,12 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
}
}
- if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0))
- size += rtnl_get_offload_stats_size(dev);
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) {
+ u32 off_filter_mask;
+
+ off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS];
+ size += rtnl_offload_xstats_get_size(dev, off_filter_mask);
+ }
if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, 0)) {
struct rtnl_af_ops *af_ops;
@@ -5345,6 +5561,79 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
return size;
}
+#define RTNL_STATS_OFFLOAD_XSTATS_VALID ((1 << __IFLA_OFFLOAD_XSTATS_MAX) - 1)
+
+static const struct nla_policy
+rtnl_stats_get_policy_filters[IFLA_STATS_MAX + 1] = {
+ [IFLA_STATS_LINK_OFFLOAD_XSTATS] =
+ NLA_POLICY_MASK(NLA_U32, RTNL_STATS_OFFLOAD_XSTATS_VALID),
+};
+
+static const struct nla_policy
+rtnl_stats_get_policy[IFLA_STATS_GETSET_MAX + 1] = {
+ [IFLA_STATS_GET_FILTERS] =
+ NLA_POLICY_NESTED(rtnl_stats_get_policy_filters),
+};
+
+static const struct nla_policy
+ifla_stats_set_policy[IFLA_STATS_GETSET_MAX + 1] = {
+ [IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS] = NLA_POLICY_MAX(NLA_U8, 1),
+};
+
+static int rtnl_stats_get_parse_filters(struct nlattr *ifla_filters,
+ struct rtnl_stats_dump_filters *filters,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_STATS_MAX + 1];
+ int err;
+ int at;
+
+ err = nla_parse_nested(tb, IFLA_STATS_MAX, ifla_filters,
+ rtnl_stats_get_policy_filters, extack);
+ if (err < 0)
+ return err;
+
+ for (at = 1; at <= IFLA_STATS_MAX; at++) {
+ if (tb[at]) {
+ if (!(filters->mask[0] & IFLA_STATS_FILTER_BIT(at))) {
+ NL_SET_ERR_MSG(extack, "Filtered attribute not enabled in filter_mask");
+ return -EINVAL;
+ }
+ filters->mask[at] = nla_get_u32(tb[at]);
+ }
+ }
+
+ return 0;
+}
+
+static int rtnl_stats_get_parse(const struct nlmsghdr *nlh,
+ u32 filter_mask,
+ struct rtnl_stats_dump_filters *filters,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1];
+ int err;
+ int i;
+
+ filters->mask[0] = filter_mask;
+ for (i = 1; i < ARRAY_SIZE(filters->mask); i++)
+ filters->mask[i] = -1U;
+
+ err = nlmsg_parse(nlh, sizeof(struct if_stats_msg), tb,
+ IFLA_STATS_GETSET_MAX, rtnl_stats_get_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (tb[IFLA_STATS_GET_FILTERS]) {
+ err = rtnl_stats_get_parse_filters(tb[IFLA_STATS_GET_FILTERS],
+ filters, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
bool is_dump, struct netlink_ext_ack *extack)
{
@@ -5367,10 +5656,6 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request");
return -EINVAL;
}
- if (nlmsg_attrlen(nlh, sizeof(*ifsm))) {
- NL_SET_ERR_MSG(extack, "Invalid attributes after stats header");
- return -EINVAL;
- }
if (ifsm->filter_mask >= IFLA_STATS_FILTER_BIT(IFLA_STATS_MAX + 1)) {
NL_SET_ERR_MSG(extack, "Invalid stats requested through filter mask");
return -EINVAL;
@@ -5382,12 +5667,12 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct rtnl_stats_dump_filters filters;
struct net *net = sock_net(skb->sk);
struct net_device *dev = NULL;
int idxattr = 0, prividx = 0;
struct if_stats_msg *ifsm;
struct sk_buff *nskb;
- u32 filter_mask;
int err;
err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb),
@@ -5404,17 +5689,22 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!dev)
return -ENODEV;
- filter_mask = ifsm->filter_mask;
- if (!filter_mask)
+ if (!ifsm->filter_mask) {
+ NL_SET_ERR_MSG(extack, "Filter mask must be set for stats get");
return -EINVAL;
+ }
+
+ err = rtnl_stats_get_parse(nlh, ifsm->filter_mask, &filters, extack);
+ if (err)
+ return err;
- nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL);
+ nskb = nlmsg_new(if_nlmsg_stats_size(dev, &filters), GFP_KERNEL);
if (!nskb)
return -ENOBUFS;
err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
- 0, filter_mask, &idxattr, &prividx);
+ 0, &filters, &idxattr, &prividx, extack);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
WARN_ON(err == -EMSGSIZE);
@@ -5430,12 +5720,12 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct netlink_ext_ack *extack = cb->extack;
int h, s_h, err, s_idx, s_idxattr, s_prividx;
+ struct rtnl_stats_dump_filters filters;
struct net *net = sock_net(skb->sk);
unsigned int flags = NLM_F_MULTI;
struct if_stats_msg *ifsm;
struct hlist_head *head;
struct net_device *dev;
- u32 filter_mask = 0;
int idx = 0;
s_h = cb->args[0];
@@ -5450,12 +5740,16 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
return err;
ifsm = nlmsg_data(cb->nlh);
- filter_mask = ifsm->filter_mask;
- if (!filter_mask) {
+ if (!ifsm->filter_mask) {
NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump");
return -EINVAL;
}
+ err = rtnl_stats_get_parse(cb->nlh, ifsm->filter_mask, &filters,
+ extack);
+ if (err)
+ return err;
+
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
@@ -5465,8 +5759,9 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
- flags, filter_mask,
- &s_idxattr, &s_prividx);
+ flags, &filters,
+ &s_idxattr, &s_prividx,
+ extack);
/* If we ran out of room on the first message,
* we're in trouble
*/
@@ -5490,6 +5785,107 @@ out:
return skb->len;
}
+void rtnl_offload_xstats_notify(struct net_device *dev)
+{
+ struct rtnl_stats_dump_filters response_filters = {};
+ struct net *net = dev_net(dev);
+ int idxattr = 0, prividx = 0;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ ASSERT_RTNL();
+
+ response_filters.mask[0] |=
+ IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS);
+ response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |=
+ IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO);
+
+ skb = nlmsg_new(if_nlmsg_stats_size(dev, &response_filters),
+ GFP_KERNEL);
+ if (!skb)
+ goto errout;
+
+ err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, 0, 0, 0, 0,
+ &response_filters, &idxattr, &prividx, NULL);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_STATS, NULL, GFP_KERNEL);
+ return;
+
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_STATS, err);
+}
+EXPORT_SYMBOL(rtnl_offload_xstats_notify);
+
+static int rtnl_stats_set(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+ struct rtnl_stats_dump_filters response_filters = {};
+ struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev = NULL;
+ struct if_stats_msg *ifsm;
+ bool notify = false;
+ int err;
+
+ err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb),
+ false, extack);
+ if (err)
+ return err;
+
+ ifsm = nlmsg_data(nlh);
+ if (ifsm->family != AF_UNSPEC) {
+ NL_SET_ERR_MSG(extack, "Address family should be AF_UNSPEC");
+ return -EINVAL;
+ }
+
+ if (ifsm->ifindex > 0)
+ dev = __dev_get_by_index(net, ifsm->ifindex);
+ else
+ return -EINVAL;
+
+ if (!dev)
+ return -ENODEV;
+
+ if (ifsm->filter_mask) {
+ NL_SET_ERR_MSG(extack, "Filter mask must be 0 for stats set");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse(nlh, sizeof(*ifsm), tb, IFLA_STATS_GETSET_MAX,
+ ifla_stats_set_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]) {
+ u8 req = nla_get_u8(tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]);
+
+ if (req)
+ err = netdev_offload_xstats_enable(dev, t_l3, extack);
+ else
+ err = netdev_offload_xstats_disable(dev, t_l3);
+
+ if (!err)
+ notify = true;
+ else if (err != -EALREADY)
+ return err;
+
+ response_filters.mask[0] |=
+ IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS);
+ response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |=
+ IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO);
+ }
+
+ if (notify)
+ rtnl_offload_xstats_notify(dev);
+
+ return 0;
+}
+
/* Process one rtnetlink message. */
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -5715,4 +6111,5 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
0);
+ rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9d0388bed0c1..10bde7c6db44 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -201,7 +201,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data,
skb->head = data;
skb->data = data;
skb_reset_tail_pointer(skb);
- skb->end = skb->tail + size;
+ skb_set_end_offset(skb, size);
skb->mac_header = (typeof(skb->mac_header))~0U;
skb->transport_header = (typeof(skb->transport_header))~0U;
@@ -777,16 +777,17 @@ void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
}
EXPORT_SYMBOL(kfree_skb_reason);
-void kfree_skb_list(struct sk_buff *segs)
+void kfree_skb_list_reason(struct sk_buff *segs,
+ enum skb_drop_reason reason)
{
while (segs) {
struct sk_buff *next = segs->next;
- kfree_skb(segs);
+ kfree_skb_reason(segs, reason);
segs = next;
}
}
-EXPORT_SYMBOL(kfree_skb_list);
+EXPORT_SYMBOL(kfree_skb_list_reason);
/* Dump skb information and contents.
*
@@ -1736,11 +1737,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->head = data;
skb->head_frag = 0;
skb->data += off;
+
+ skb_set_end_offset(skb, size);
#ifdef NET_SKBUFF_DATA_USES_OFFSET
- skb->end = size;
off = nhead;
-#else
- skb->end = skb->head + size;
#endif
skb->tail += off;
skb_headers_offset_update(skb, nhead);
@@ -1788,6 +1788,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
}
EXPORT_SYMBOL(skb_realloc_headroom);
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
+{
+ unsigned int saved_end_offset, saved_truesize;
+ struct skb_shared_info *shinfo;
+ int res;
+
+ saved_end_offset = skb_end_offset(skb);
+ saved_truesize = skb->truesize;
+
+ res = pskb_expand_head(skb, 0, 0, pri);
+ if (res)
+ return res;
+
+ skb->truesize = saved_truesize;
+
+ if (likely(skb_end_offset(skb) == saved_end_offset))
+ return 0;
+
+ shinfo = skb_shinfo(skb);
+
+ /* We are about to change back skb->end,
+ * we need to move skb_shinfo() to its new location.
+ */
+ memmove(skb->head + saved_end_offset,
+ shinfo,
+ offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
+
+ skb_set_end_offset(skb, saved_end_offset);
+
+ return 0;
+}
+
/**
* skb_expand_head - reallocate header of &sk_buff
* @skb: buffer to reallocate
@@ -2276,7 +2308,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
/* Free pulled out fragments. */
while ((list = skb_shinfo(skb)->frag_list) != insp) {
skb_shinfo(skb)->frag_list = list->next;
- kfree_skb(list);
+ consume_skb(list);
}
/* And insert new clone at head. */
if (clone) {
@@ -3876,6 +3908,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
list_skb = list_skb->next;
err = 0;
+ delta_truesize += nskb->truesize;
if (skb_shared(nskb)) {
tmp = skb_clone(nskb, GFP_ATOMIC);
if (tmp) {
@@ -3900,7 +3933,6 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
tail = nskb;
delta_len += nskb->len;
- delta_truesize += nskb->truesize;
skb_push(nskb, -skb_network_offset(nskb) + offset);
@@ -4730,7 +4762,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk_is_tcp(sk))
- serr->ee.ee_data -= sk->sk_tskey;
+ serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
}
err = sock_queue_err_skb(sk, skb);
@@ -4820,7 +4852,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (hwtstamps)
*skb_hwtstamps(skb) = *hwtstamps;
else
- skb->tstamp = ktime_get_real();
+ __net_timestamp(skb);
__skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
}
@@ -5350,7 +5382,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
ipvs_reset(skb);
skb->mark = 0;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
}
EXPORT_SYMBOL_GPL(skb_scrub_packet);
@@ -6044,11 +6076,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
skb->head = data;
skb->data = data;
skb->head_frag = 0;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- skb->end = size;
-#else
- skb->end = skb->head + size;
-#endif
+ skb_set_end_offset(skb, size);
skb_set_tail_pointer(skb, skb_headlen(skb));
skb_headers_offset_update(skb, 0);
skb->cloned = 0;
@@ -6105,7 +6133,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb,
/* Free pulled out fragments. */
while ((list = shinfo->frag_list) != insp) {
shinfo->frag_list = list->next;
- kfree_skb(list);
+ consume_skb(list);
}
/* And insert new clone at head. */
if (clone) {
@@ -6186,11 +6214,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
skb->head = data;
skb->head_frag = 0;
skb->data = data;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- skb->end = size;
-#else
- skb->end = skb->head + size;
-#endif
+ skb_set_end_offset(skb, size);
skb_reset_tail_pointer(skb);
skb_headers_offset_update(skb, 0);
skb->cloned = 0;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 8eb671c827f9..929a2b096b04 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1153,7 +1153,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
struct sk_psock *psock;
struct bpf_prog *prog;
int ret = __SK_DROP;
- int len = skb->len;
+ int len = orig_len;
/* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
skb = skb_clone(skb, GFP_ATOMIC);
diff --git a/net/core/sock.c b/net/core/sock.c
index 09d31a7dc68f..1180a0cb0110 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -879,9 +879,9 @@ int sock_set_timestamping(struct sock *sk, int optname,
if ((1 << sk->sk_state) &
(TCPF_CLOSE | TCPF_LISTEN))
return -EINVAL;
- sk->sk_tskey = tcp_sk(sk)->snd_una;
+ atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
} else {
- sk->sk_tskey = 0;
+ atomic_set(&sk->sk_tskey, 0);
}
}
@@ -1377,9 +1377,9 @@ set_sndbuf:
if (!(sk_is_tcp(sk) ||
(sk->sk_type == SOCK_DGRAM &&
sk->sk_protocol == IPPROTO_UDP)))
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
} else if (sk->sk_family != PF_RDS) {
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
}
if (!ret) {
if (val < 0 || val > 1)
@@ -3718,6 +3718,10 @@ int proto_register(struct proto *prot, int alloc_slab)
{
int ret = -ENOBUFS;
+ if (prot->memory_allocated && !prot->sysctl_mem) {
+ pr_err("%s: missing sysctl_mem\n", prot->name);
+ return -EINVAL;
+ }
if (alloc_slab) {
prot->slab = kmem_cache_create_usercopy(prot->name,
prot->obj_size, 0,
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index dbeb8ecbcd98..7123fe7feeac 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -103,8 +103,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
if (orig_sock_table) {
static_branch_dec(&rps_needed);
static_branch_dec(&rfs_needed);
- synchronize_rcu();
- vfree(orig_sock_table);
+ kvfree_rcu(orig_sock_table);
}
}
}
@@ -142,8 +141,7 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
lockdep_is_held(&flow_limit_update_mutex));
if (cur && !cpumask_test_cpu(i, mask)) {
RCU_INIT_POINTER(sd->flow_limit, NULL);
- synchronize_rcu();
- kfree(cur);
+ kfree_rcu(cur);
} else if (!cur && cpumask_test_cpu(i, mask)) {
cur = kzalloc_node(len, GFP_KERNEL,
cpu_to_node(i));
diff --git a/net/core/utils.c b/net/core/utils.c
index 1f31a39236d5..938495bc1d34 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -476,9 +476,9 @@ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
__wsum diff, bool pseudohdr)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
- *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
+ csum_replace_by_diff(sum, diff);
if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
- skb->csum = ~csum_add(diff, ~skb->csum);
+ skb->csum = ~csum_sub(diff, skb->csum);
} else if (pseudohdr) {
*sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 361df312ee7f..7577adf19ef4 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -359,7 +359,8 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
if (IS_ERR(xdp_alloc))
return PTR_ERR(xdp_alloc);
- trace_mem_connect(xdp_alloc, xdp_rxq);
+ if (trace_mem_connect_enabled() && xdp_alloc)
+ trace_mem_connect(xdp_alloc, xdp_rxq);
return 0;
}
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index b441ab330fd3..dc4fb699b56c 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -2073,8 +2073,52 @@ u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev)
}
EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask);
+static void dcbnl_flush_dev(struct net_device *dev)
+{
+ struct dcb_app_type *itr, *tmp;
+
+ spin_lock_bh(&dcb_lock);
+
+ list_for_each_entry_safe(itr, tmp, &dcb_app_list, list) {
+ if (itr->ifindex == dev->ifindex) {
+ list_del(&itr->list);
+ kfree(itr);
+ }
+ }
+
+ spin_unlock_bh(&dcb_lock);
+}
+
+static int dcbnl_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ if (!dev->dcbnl_ops)
+ return NOTIFY_DONE;
+
+ dcbnl_flush_dev(dev);
+
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block dcbnl_nb __read_mostly = {
+ .notifier_call = dcbnl_netdevice_event,
+};
+
static int __init dcbnl_init(void)
{
+ int err;
+
+ err = register_netdevice_notifier(&dcbnl_nb);
+ if (err)
+ return err;
+
rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index eadc89583168..b05639bdfc8f 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -52,6 +52,7 @@
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/if_packet.h>
+#include <linux/jiffies.h>
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/flow.h>
@@ -351,7 +352,7 @@ void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb,
* Slow start: If we have been idle for more than
* one RTT, then reset window to min size.
*/
- if ((jiffies - scp->stamp) > t)
+ if (time_is_before_jiffies(scp->stamp + t))
scp->snd_window = NSP_MIN_WINDOW;
if (oth)
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index d9d0d227092c..89c6c86e746f 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -349,6 +349,7 @@ void dsa_flush_workqueue(void)
{
flush_workqueue(dsa_owq);
}
+EXPORT_SYMBOL_GPL(dsa_flush_workqueue);
int dsa_devlink_param_get(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx)
@@ -466,6 +467,106 @@ struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
}
EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
+int dsa_port_walk_fdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_mac_addr *a;
+ int err = 0;
+
+ mutex_lock(&dp->addr_lists_lock);
+
+ list_for_each_entry(a, &dp->fdbs, list) {
+ err = cb(ds, port, a->addr, a->vid, a->db);
+ if (err)
+ break;
+ }
+
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(dsa_port_walk_fdbs);
+
+int dsa_port_walk_mdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_mac_addr *a;
+ int err = 0;
+
+ mutex_lock(&dp->addr_lists_lock);
+
+ list_for_each_entry(a, &dp->mdbs, list) {
+ err = cb(ds, port, a->addr, a->vid, a->db);
+ if (err)
+ break;
+ }
+
+ mutex_unlock(&dp->addr_lists_lock);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(dsa_port_walk_mdbs);
+
+bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b)
+{
+ if (a->type != b->type)
+ return false;
+
+ switch (a->type) {
+ case DSA_DB_PORT:
+ return a->dp == b->dp;
+ case DSA_DB_LAG:
+ return a->lag.dev == b->lag.dev;
+ case DSA_DB_BRIDGE:
+ return a->bridge.num == b->bridge.num;
+ default:
+ WARN_ON(1);
+ return false;
+ }
+}
+
+bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_mac_addr *a;
+
+ lockdep_assert_held(&dp->addr_lists_lock);
+
+ list_for_each_entry(a, &dp->fdbs, list) {
+ if (!ether_addr_equal(a->addr, addr) || a->vid != vid)
+ continue;
+
+ if (a->db.type == db.type && !dsa_db_equal(&a->db, &db))
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(dsa_fdb_present_in_other_db);
+
+bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_mac_addr *a;
+
+ lockdep_assert_held(&dp->addr_lists_lock);
+
+ list_for_each_entry(a, &dp->mdbs, list) {
+ if (!ether_addr_equal(a->addr, mdb->addr) || a->vid != mdb->vid)
+ continue;
+
+ if (a->db.type == db.type && !dsa_db_equal(&a->db, &db))
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(dsa_mdb_present_in_other_db);
+
static int __init dsa_init_module(void)
{
int rc;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index e498c927c3d0..bef1aaa7ec1c 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -72,27 +72,24 @@ int dsa_broadcast(unsigned long e, void *v)
}
/**
- * dsa_lag_map() - Map LAG netdev to a linear LAG ID
+ * dsa_lag_map() - Map LAG structure to a linear LAG array
* @dst: Tree in which to record the mapping.
- * @lag: Netdev that is to be mapped to an ID.
+ * @lag: LAG structure that is to be mapped to the tree's array.
*
- * dsa_lag_id/dsa_lag_dev can then be used to translate between the
+ * dsa_lag_id/dsa_lag_by_id can then be used to translate between the
* two spaces. The size of the mapping space is determined by the
* driver by setting ds->num_lag_ids. It is perfectly legal to leave
* it unset if it is not needed, in which case these functions become
* no-ops.
*/
-void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag)
+void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag)
{
unsigned int id;
- if (dsa_lag_id(dst, lag) >= 0)
- /* Already mapped */
- return;
-
- for (id = 0; id < dst->lags_len; id++) {
- if (!dsa_lag_dev(dst, id)) {
- dst->lags[id] = lag;
+ for (id = 1; id <= dst->lags_len; id++) {
+ if (!dsa_lag_by_id(dst, id)) {
+ dst->lags[id - 1] = lag;
+ lag->id = id;
return;
}
}
@@ -108,28 +105,36 @@ void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag)
/**
* dsa_lag_unmap() - Remove a LAG ID mapping
* @dst: Tree in which the mapping is recorded.
- * @lag: Netdev that was mapped.
+ * @lag: LAG structure that was mapped.
*
* As there may be multiple users of the mapping, it is only removed
* if there are no other references to it.
*/
-void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag)
+void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag)
{
- struct dsa_port *dp;
unsigned int id;
- dsa_lag_foreach_port(dp, dst, lag)
- /* There are remaining users of this mapping */
- return;
-
dsa_lags_foreach_id(id, dst) {
- if (dsa_lag_dev(dst, id) == lag) {
- dst->lags[id] = NULL;
+ if (dsa_lag_by_id(dst, id) == lag) {
+ dst->lags[id - 1] = NULL;
+ lag->id = 0;
break;
}
}
}
+struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
+ const struct net_device *lag_dev)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_lag_dev_get(dp) == lag_dev)
+ return dp->lag;
+
+ return NULL;
+}
+
struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst,
const struct net_device *br)
{
@@ -452,10 +457,6 @@ static int dsa_port_setup(struct dsa_port *dp)
if (dp->setup)
return 0;
- mutex_init(&dp->addr_lists_lock);
- INIT_LIST_HEAD(&dp->fdbs);
- INIT_LIST_HEAD(&dp->mdbs);
-
if (ds->ops->port_setup) {
err = ds->ops->port_setup(ds, dp->index);
if (err)
@@ -561,7 +562,6 @@ static void dsa_port_teardown(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
struct dsa_switch *ds = dp->ds;
- struct dsa_mac_addr *a, *tmp;
struct net_device *slave;
if (!dp->setup)
@@ -593,16 +593,6 @@ static void dsa_port_teardown(struct dsa_port *dp)
break;
}
- list_for_each_entry_safe(a, tmp, &dp->fdbs, list) {
- list_del(&a->list);
- kfree(a);
- }
-
- list_for_each_entry_safe(a, tmp, &dp->mdbs, list) {
- list_del(&a->list);
- kfree(a);
- }
-
dp->setup = false;
}
@@ -1059,7 +1049,7 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
{
struct dsa_port *dp;
- int err;
+ int err = 0;
rtnl_lock();
@@ -1071,7 +1061,7 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
err = dsa_master_setup(master, dp);
if (err)
- return err;
+ break;
/* Replay master state event */
dsa_tree_master_admin_state_change(dst, master, admin_up);
@@ -1082,7 +1072,7 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
rtnl_unlock();
- return 0;
+ return err;
}
static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
@@ -1281,7 +1271,7 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
info.tag_ops = tag_ops;
err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
if (err)
- return err;
+ goto out_unwind_tagger;
err = dsa_tree_bind_tag_proto(dst, tag_ops);
if (err)
@@ -1361,6 +1351,11 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
dp->ds = ds;
dp->index = index;
+ mutex_init(&dp->addr_lists_lock);
+ mutex_init(&dp->vlans_lock);
+ INIT_LIST_HEAD(&dp->fdbs);
+ INIT_LIST_HEAD(&dp->mdbs);
+ INIT_LIST_HEAD(&dp->vlans);
INIT_LIST_HEAD(&dp->list);
list_add_tail(&dp->list, &dst->ports);
@@ -1699,6 +1694,9 @@ static void dsa_switch_release_ports(struct dsa_switch *ds)
struct dsa_port *dp, *next;
dsa_switch_for_each_port_safe(dp, next, ds) {
+ WARN_ON(!list_empty(&dp->fdbs));
+ WARN_ON(!list_empty(&dp->mdbs));
+ WARN_ON(!list_empty(&dp->vlans));
list_del(&dp->list);
kfree(dp);
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 2bbfa9efe9f8..f20bdd8ea0a8 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -25,6 +25,8 @@ enum {
DSA_NOTIFIER_FDB_DEL,
DSA_NOTIFIER_HOST_FDB_ADD,
DSA_NOTIFIER_HOST_FDB_DEL,
+ DSA_NOTIFIER_LAG_FDB_ADD,
+ DSA_NOTIFIER_LAG_FDB_DEL,
DSA_NOTIFIER_LAG_CHANGE,
DSA_NOTIFIER_LAG_JOIN,
DSA_NOTIFIER_LAG_LEAVE,
@@ -34,6 +36,8 @@ enum {
DSA_NOTIFIER_HOST_MDB_DEL,
DSA_NOTIFIER_VLAN_ADD,
DSA_NOTIFIER_VLAN_DEL,
+ DSA_NOTIFIER_HOST_VLAN_ADD,
+ DSA_NOTIFIER_HOST_VLAN_DEL,
DSA_NOTIFIER_MTU,
DSA_NOTIFIER_TAG_PROTO,
DSA_NOTIFIER_TAG_PROTO_CONNECT,
@@ -55,6 +59,7 @@ struct dsa_notifier_bridge_info {
int sw_index;
int port;
bool tx_fwd_offload;
+ struct netlink_ext_ack *extack;
};
/* DSA_NOTIFIER_FDB_* */
@@ -63,6 +68,15 @@ struct dsa_notifier_fdb_info {
int port;
const unsigned char *addr;
u16 vid;
+ struct dsa_db db;
+};
+
+/* DSA_NOTIFIER_LAG_FDB_* */
+struct dsa_notifier_lag_fdb_info {
+ struct dsa_lag *lag;
+ const unsigned char *addr;
+ u16 vid;
+ struct dsa_db db;
};
/* DSA_NOTIFIER_MDB_* */
@@ -70,11 +84,12 @@ struct dsa_notifier_mdb_info {
const struct switchdev_obj_port_mdb *mdb;
int sw_index;
int port;
+ struct dsa_db db;
};
/* DSA_NOTIFIER_LAG_* */
struct dsa_notifier_lag_info {
- struct net_device *lag;
+ struct dsa_lag lag;
int sw_index;
int port;
@@ -117,9 +132,8 @@ struct dsa_notifier_master_state_info {
};
struct dsa_switchdev_event_work {
- struct dsa_switch *ds;
- int port;
struct net_device *dev;
+ struct net_device *orig_dev;
struct work_struct work;
unsigned long event;
/* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and
@@ -130,6 +144,21 @@ struct dsa_switchdev_event_work {
bool host_addr;
};
+enum dsa_standalone_event {
+ DSA_UC_ADD,
+ DSA_UC_DEL,
+ DSA_MC_ADD,
+ DSA_MC_DEL,
+};
+
+struct dsa_standalone_event_work {
+ struct work_struct work;
+ struct net_device *dev;
+ enum dsa_standalone_event event;
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+};
+
struct dsa_slave_priv {
/* Copy of CPU port xmit for faster access in slave transmit hot path */
struct sk_buff * (*xmit)(struct sk_buff *skb,
@@ -153,8 +182,9 @@ const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol);
void dsa_tag_driver_put(const struct dsa_device_ops *ops);
const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf);
+bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b);
+
bool dsa_schedule_work(struct work_struct *work);
-void dsa_flush_workqueue(void);
const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
@@ -210,19 +240,31 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
-int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
- u16 vid);
-int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
- u16 vid);
+int dsa_port_standalone_host_fdb_add(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid);
+int dsa_port_standalone_host_fdb_del(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid);
+int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_lag_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_lag_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
int dsa_port_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb);
int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_host_mdb_add(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_host_mdb_del(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack);
@@ -234,6 +276,11 @@ int dsa_port_vlan_add(struct dsa_port *dp,
struct netlink_ext_ack *extack);
int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan);
+int dsa_port_host_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack);
+int dsa_port_host_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan);
int dsa_port_mrp_add(const struct dsa_port *dp,
const struct switchdev_obj_mrp *mrp);
int dsa_port_mrp_del(const struct dsa_port *dp,
@@ -480,9 +527,25 @@ static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb)
int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
+static inline bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds)
+{
+ return ds->ops->port_fdb_add && ds->ops->port_fdb_del &&
+ ds->fdb_isolation && !ds->vlan_filtering_is_global &&
+ !ds->needs_standalone_vlan_filtering;
+}
+
+static inline bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds)
+{
+ return ds->ops->port_mdb_add && ds->ops->port_mdb_del &&
+ ds->fdb_isolation && !ds->vlan_filtering_is_global &&
+ !ds->needs_standalone_vlan_filtering;
+}
+
/* dsa2.c */
-void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag);
-void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag);
+void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag);
+void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag);
+struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
+ const struct net_device *lag_dev);
int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v);
int dsa_broadcast(unsigned long e, void *v);
int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
@@ -502,10 +565,6 @@ struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst,
const struct net_device *br);
/* tag_8021q.c */
-int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
- struct dsa_notifier_bridge_info *info);
-int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
- struct dsa_notifier_bridge_info *info);
int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
struct dsa_notifier_tag_8021q_vlan_info *info);
int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 6ac393cc6ea7..991c2930d631 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -260,11 +260,16 @@ static void dsa_netdev_ops_set(struct net_device *dev,
dev->dsa_ptr->netdev_ops = ops;
}
+/* Keep the master always promiscuous if the tagging protocol requires that
+ * (garbles MAC DA) or if it doesn't support unicast filtering, case in which
+ * it would revert to promiscuous mode as soon as we call dev_uc_add() on it
+ * anyway.
+ */
static void dsa_master_set_promiscuity(struct net_device *dev, int inc)
{
const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops;
- if (!ops->promisc_on_master)
+ if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_master)
return;
ASSERT_RTNL();
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bd78192e0e47..58291df14cdb 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -176,7 +176,7 @@ static int dsa_port_inherit_brport_flags(struct dsa_port *dp,
struct netlink_ext_ack *extack)
{
const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
- BR_BCAST_FLOOD;
+ BR_BCAST_FLOOD | BR_PORT_LOCKED;
struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
int flag, err;
@@ -200,7 +200,7 @@ static void dsa_port_clear_brport_flags(struct dsa_port *dp)
{
const unsigned long val = BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
- BR_BCAST_FLOOD;
+ BR_BCAST_FLOOD | BR_PORT_LOCKED;
int flag, err;
for_each_set_bit(flag, &mask, 32) {
@@ -328,6 +328,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
.tree_index = dp->ds->dst->index,
.sw_index = dp->ds->index,
.port = dp->index,
+ .extack = extack,
};
struct net_device *dev = dp->slave;
struct net_device *brport_dev;
@@ -395,10 +396,17 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
.tree_index = dp->ds->dst->index,
.sw_index = dp->ds->index,
.port = dp->index,
- .bridge = *dp->bridge,
};
int err;
+ /* If the port could not be offloaded to begin with, then
+ * there is nothing to do.
+ */
+ if (!dp->bridge)
+ return;
+
+ info.bridge = *dp->bridge;
+
/* Here the port is already unbridged. Reflect the current configuration
* so that drivers can program their chips accordingly.
*/
@@ -422,7 +430,7 @@ int dsa_port_lag_change(struct dsa_port *dp,
};
bool tx_enabled;
- if (!dp->lag_dev)
+ if (!dp->lag)
return 0;
/* On statically configured aggregates (e.g. loadbalance
@@ -440,27 +448,70 @@ int dsa_port_lag_change(struct dsa_port *dp,
return dsa_port_notify(dp, DSA_NOTIFIER_LAG_CHANGE, &info);
}
-int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag,
+static int dsa_port_lag_create(struct dsa_port *dp,
+ struct net_device *lag_dev)
+{
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_lag *lag;
+
+ lag = dsa_tree_lag_find(ds->dst, lag_dev);
+ if (lag) {
+ refcount_inc(&lag->refcount);
+ dp->lag = lag;
+ return 0;
+ }
+
+ lag = kzalloc(sizeof(*lag), GFP_KERNEL);
+ if (!lag)
+ return -ENOMEM;
+
+ refcount_set(&lag->refcount, 1);
+ mutex_init(&lag->fdb_lock);
+ INIT_LIST_HEAD(&lag->fdbs);
+ lag->dev = lag_dev;
+ dsa_lag_map(ds->dst, lag);
+ dp->lag = lag;
+
+ return 0;
+}
+
+static void dsa_port_lag_destroy(struct dsa_port *dp)
+{
+ struct dsa_lag *lag = dp->lag;
+
+ dp->lag = NULL;
+ dp->lag_tx_enabled = false;
+
+ if (!refcount_dec_and_test(&lag->refcount))
+ return;
+
+ WARN_ON(!list_empty(&lag->fdbs));
+ dsa_lag_unmap(dp->ds->dst, lag);
+ kfree(lag);
+}
+
+int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
struct netdev_lag_upper_info *uinfo,
struct netlink_ext_ack *extack)
{
struct dsa_notifier_lag_info info = {
.sw_index = dp->ds->index,
.port = dp->index,
- .lag = lag,
.info = uinfo,
};
struct net_device *bridge_dev;
int err;
- dsa_lag_map(dp->ds->dst, lag);
- dp->lag_dev = lag;
+ err = dsa_port_lag_create(dp, lag_dev);
+ if (err)
+ goto err_lag_create;
+ info.lag = *dp->lag;
err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info);
if (err)
goto err_lag_join;
- bridge_dev = netdev_master_upper_dev_get(lag);
+ bridge_dev = netdev_master_upper_dev_get(lag_dev);
if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
return 0;
@@ -473,12 +524,12 @@ int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag,
err_bridge_join:
dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info);
err_lag_join:
- dp->lag_dev = NULL;
- dsa_lag_unmap(dp->ds->dst, lag);
+ dsa_port_lag_destroy(dp);
+err_lag_create:
return err;
}
-void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag)
+void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev)
{
struct net_device *br = dsa_port_bridge_dev_get(dp);
@@ -486,17 +537,16 @@ void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag)
dsa_port_pre_bridge_leave(dp, br);
}
-void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
+void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev)
{
struct net_device *br = dsa_port_bridge_dev_get(dp);
struct dsa_notifier_lag_info info = {
.sw_index = dp->ds->index,
.port = dp->index,
- .lag = lag,
};
int err;
- if (!dp->lag_dev)
+ if (!dp->lag)
return;
/* Port might have been part of a LAG that in turn was
@@ -505,16 +555,15 @@ void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
if (br)
dsa_port_bridge_leave(dp, br);
- dp->lag_tx_enabled = false;
- dp->lag_dev = NULL;
+ info.lag = *dp->lag;
+
+ dsa_port_lag_destroy(dp);
err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info);
if (err)
dev_err(dp->ds->dev,
"port %d failed to notify DSA_NOTIFIER_LAG_LEAVE: %pe\n",
dp->index, ERR_PTR(err));
-
- dsa_lag_unmap(dp->ds->dst, lag);
}
/* Must be called under rcu_read_lock() */
@@ -750,8 +799,19 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
.port = dp->index,
.addr = addr,
.vid = vid,
+ .db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ },
};
+ /* Refcounting takes bridge.num as a key, and should be global for all
+ * bridges in the absence of FDB isolation, and per bridge otherwise.
+ * Force the bridge.num to zero here in the absence of FDB isolation.
+ */
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info);
}
@@ -763,48 +823,154 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
.port = dp->index,
.addr = addr,
.vid = vid,
-
+ .db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ },
};
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
}
-int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
- u16 vid)
+static int dsa_port_host_fdb_add(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
{
struct dsa_notifier_fdb_info info = {
.sw_index = dp->ds->index,
.port = dp->index,
.addr = addr,
.vid = vid,
+ .db = db,
+ };
+
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info);
+}
+
+int dsa_port_standalone_host_fdb_add(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid)
+{
+ struct dsa_db db = {
+ .type = DSA_DB_PORT,
+ .dp = dp,
};
+
+ return dsa_port_host_fdb_add(dp, addr, vid, db);
+}
+
+int dsa_port_bridge_host_fdb_add(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid)
+{
struct dsa_port *cpu_dp = dp->cpu_dp;
+ struct dsa_db db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ };
int err;
- err = dev_uc_add(cpu_dp->master, addr);
- if (err)
- return err;
+ /* Avoid a call to __dev_set_promiscuity() on the master, which
+ * requires rtnl_lock(), since we can't guarantee that is held here,
+ * and we can't take it either.
+ */
+ if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) {
+ err = dev_uc_add(cpu_dp->master, addr);
+ if (err)
+ return err;
+ }
- return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info);
+ return dsa_port_host_fdb_add(dp, addr, vid, db);
}
-int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
- u16 vid)
+static int dsa_port_host_fdb_del(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
{
struct dsa_notifier_fdb_info info = {
.sw_index = dp->ds->index,
.port = dp->index,
.addr = addr,
.vid = vid,
+ .db = db,
};
+
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info);
+}
+
+int dsa_port_standalone_host_fdb_del(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid)
+{
+ struct dsa_db db = {
+ .type = DSA_DB_PORT,
+ .dp = dp,
+ };
+
+ return dsa_port_host_fdb_del(dp, addr, vid, db);
+}
+
+int dsa_port_bridge_host_fdb_del(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid)
+{
struct dsa_port *cpu_dp = dp->cpu_dp;
+ struct dsa_db db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ };
int err;
- err = dev_uc_del(cpu_dp->master, addr);
- if (err)
- return err;
+ if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) {
+ err = dev_uc_del(cpu_dp->master, addr);
+ if (err)
+ return err;
+ }
- return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info);
+ return dsa_port_host_fdb_del(dp, addr, vid, db);
+}
+
+int dsa_port_lag_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid)
+{
+ struct dsa_notifier_lag_fdb_info info = {
+ .lag = dp->lag,
+ .addr = addr,
+ .vid = vid,
+ .db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ },
+ };
+
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_LAG_FDB_ADD, &info);
+}
+
+int dsa_port_lag_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid)
+{
+ struct dsa_notifier_lag_fdb_info info = {
+ .lag = dp->lag,
+ .addr = addr,
+ .vid = vid,
+ .db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ },
+ };
+
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_LAG_FDB_DEL, &info);
}
int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
@@ -825,8 +991,15 @@ int dsa_port_mdb_add(const struct dsa_port *dp,
.sw_index = dp->ds->index,
.port = dp->index,
.mdb = mdb,
+ .db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ },
};
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
}
@@ -837,45 +1010,106 @@ int dsa_port_mdb_del(const struct dsa_port *dp,
.sw_index = dp->ds->index,
.port = dp->index,
.mdb = mdb,
+ .db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ },
};
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
}
-int dsa_port_host_mdb_add(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb)
+static int dsa_port_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
{
struct dsa_notifier_mdb_info info = {
.sw_index = dp->ds->index,
.port = dp->index,
.mdb = mdb,
+ .db = db,
};
+
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_ADD, &info);
+}
+
+int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct dsa_db db = {
+ .type = DSA_DB_PORT,
+ .dp = dp,
+ };
+
+ return dsa_port_host_mdb_add(dp, mdb, db);
+}
+
+int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
+{
struct dsa_port *cpu_dp = dp->cpu_dp;
+ struct dsa_db db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ };
int err;
err = dev_mc_add(cpu_dp->master, mdb->addr);
if (err)
return err;
- return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_ADD, &info);
+ return dsa_port_host_mdb_add(dp, mdb, db);
}
-int dsa_port_host_mdb_del(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb)
+static int dsa_port_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
{
struct dsa_notifier_mdb_info info = {
.sw_index = dp->ds->index,
.port = dp->index,
.mdb = mdb,
+ .db = db,
};
+
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_DEL, &info);
+}
+
+int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct dsa_db db = {
+ .type = DSA_DB_PORT,
+ .dp = dp,
+ };
+
+ return dsa_port_host_mdb_del(dp, mdb, db);
+}
+
+int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
+{
struct dsa_port *cpu_dp = dp->cpu_dp;
+ struct dsa_db db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ };
int err;
err = dev_mc_del(cpu_dp->master, mdb->addr);
if (err)
return err;
- return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_DEL, &info);
+ return dsa_port_host_mdb_del(dp, mdb, db);
}
int dsa_port_vlan_add(struct dsa_port *dp,
@@ -904,6 +1138,48 @@ int dsa_port_vlan_del(struct dsa_port *dp,
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
}
+int dsa_port_host_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_notifier_vlan_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .vlan = vlan,
+ .extack = extack,
+ };
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+ int err;
+
+ err = dsa_port_notify(dp, DSA_NOTIFIER_HOST_VLAN_ADD, &info);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ vlan_vid_add(cpu_dp->master, htons(ETH_P_8021Q), vlan->vid);
+
+ return err;
+}
+
+int dsa_port_host_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct dsa_notifier_vlan_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .vlan = vlan,
+ };
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+ int err;
+
+ err = dsa_port_notify(dp, DSA_NOTIFIER_HOST_VLAN_DEL, &info);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ vlan_vid_del(cpu_dp->master, htons(ETH_P_8021Q), vlan->vid);
+
+ return err;
+}
+
int dsa_port_mrp_add(const struct dsa_port *dp,
const struct switchdev_obj_mrp *mrp)
{
@@ -1011,6 +1287,20 @@ static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
}
}
+static struct phylink_pcs *
+dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
+{
+ struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct phylink_pcs *pcs = ERR_PTR(-EOPNOTSUPP);
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->phylink_mac_select_pcs)
+ pcs = ds->ops->phylink_mac_select_pcs(ds, dp->index, interface);
+
+ return pcs;
+}
+
static void dsa_port_phylink_mac_config(struct phylink_config *config,
unsigned int mode,
const struct phylink_link_state *state)
@@ -1077,6 +1367,7 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
.validate = dsa_port_phylink_validate,
+ .mac_select_pcs = dsa_port_phylink_mac_select_pcs,
.mac_pcs_get_state = dsa_port_phylink_mac_pcs_get_state,
.mac_config = dsa_port_phylink_mac_config,
.mac_an_restart = dsa_port_phylink_mac_an_restart,
@@ -1194,7 +1485,6 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
dp->pl_config.dev = ds->dev;
dp->pl_config.type = PHYLINK_DEV;
- dp->pl_config.pcs_poll = ds->pcs_poll;
err = dsa_port_phylink_create(dp);
if (err)
@@ -1258,63 +1548,6 @@ void dsa_port_link_unregister_of(struct dsa_port *dp)
dsa_port_setup_phy_of(dp, false);
}
-int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data)
-{
- struct phy_device *phydev;
- int ret = -EOPNOTSUPP;
-
- if (of_phy_is_fixed_link(dp->dn))
- return ret;
-
- phydev = dsa_port_get_phy_device(dp);
- if (IS_ERR_OR_NULL(phydev))
- return ret;
-
- ret = phy_ethtool_get_strings(phydev, data);
- put_device(&phydev->mdio.dev);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(dsa_port_get_phy_strings);
-
-int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data)
-{
- struct phy_device *phydev;
- int ret = -EOPNOTSUPP;
-
- if (of_phy_is_fixed_link(dp->dn))
- return ret;
-
- phydev = dsa_port_get_phy_device(dp);
- if (IS_ERR_OR_NULL(phydev))
- return ret;
-
- ret = phy_ethtool_get_stats(phydev, NULL, data);
- put_device(&phydev->mdio.dev);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(dsa_port_get_ethtool_phy_stats);
-
-int dsa_port_get_phy_sset_count(struct dsa_port *dp)
-{
- struct phy_device *phydev;
- int ret = -EOPNOTSUPP;
-
- if (of_phy_is_fixed_link(dp->dn))
- return ret;
-
- phydev = dsa_port_get_phy_device(dp);
- if (IS_ERR_OR_NULL(phydev))
- return ret;
-
- ret = phy_ethtool_get_sset_count(phydev);
- put_device(&phydev->mdio.dev);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(dsa_port_get_phy_sset_count);
-
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr)
{
struct dsa_switch *ds = dp->ds;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 2f6caf5d037e..a61a7c54af20 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -23,6 +23,114 @@
#include "dsa_priv.h"
+static void dsa_slave_standalone_event_work(struct work_struct *work)
+{
+ struct dsa_standalone_event_work *standalone_work =
+ container_of(work, struct dsa_standalone_event_work, work);
+ const unsigned char *addr = standalone_work->addr;
+ struct net_device *dev = standalone_work->dev;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct switchdev_obj_port_mdb mdb;
+ struct dsa_switch *ds = dp->ds;
+ u16 vid = standalone_work->vid;
+ int err;
+
+ switch (standalone_work->event) {
+ case DSA_UC_ADD:
+ err = dsa_port_standalone_host_fdb_add(dp, addr, vid);
+ if (err) {
+ dev_err(ds->dev,
+ "port %d failed to add %pM vid %d to fdb: %d\n",
+ dp->index, addr, vid, err);
+ break;
+ }
+ break;
+
+ case DSA_UC_DEL:
+ err = dsa_port_standalone_host_fdb_del(dp, addr, vid);
+ if (err) {
+ dev_err(ds->dev,
+ "port %d failed to delete %pM vid %d from fdb: %d\n",
+ dp->index, addr, vid, err);
+ }
+
+ break;
+ case DSA_MC_ADD:
+ ether_addr_copy(mdb.addr, addr);
+ mdb.vid = vid;
+
+ err = dsa_port_standalone_host_mdb_add(dp, &mdb);
+ if (err) {
+ dev_err(ds->dev,
+ "port %d failed to add %pM vid %d to mdb: %d\n",
+ dp->index, addr, vid, err);
+ break;
+ }
+ break;
+ case DSA_MC_DEL:
+ ether_addr_copy(mdb.addr, addr);
+ mdb.vid = vid;
+
+ err = dsa_port_standalone_host_mdb_del(dp, &mdb);
+ if (err) {
+ dev_err(ds->dev,
+ "port %d failed to delete %pM vid %d from mdb: %d\n",
+ dp->index, addr, vid, err);
+ }
+
+ break;
+ }
+
+ kfree(standalone_work);
+}
+
+static int dsa_slave_schedule_standalone_work(struct net_device *dev,
+ enum dsa_standalone_event event,
+ const unsigned char *addr,
+ u16 vid)
+{
+ struct dsa_standalone_event_work *standalone_work;
+
+ standalone_work = kzalloc(sizeof(*standalone_work), GFP_ATOMIC);
+ if (!standalone_work)
+ return -ENOMEM;
+
+ INIT_WORK(&standalone_work->work, dsa_slave_standalone_event_work);
+ standalone_work->event = event;
+ standalone_work->dev = dev;
+
+ ether_addr_copy(standalone_work->addr, addr);
+ standalone_work->vid = vid;
+
+ dsa_schedule_work(&standalone_work->work);
+
+ return 0;
+}
+
+static int dsa_slave_sync_uc(struct net_device *dev,
+ const unsigned char *addr)
+{
+ return dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0);
+}
+
+static int dsa_slave_unsync_uc(struct net_device *dev,
+ const unsigned char *addr)
+{
+ return dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0);
+}
+
+static int dsa_slave_sync_mc(struct net_device *dev,
+ const unsigned char *addr)
+{
+ return dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0);
+}
+
+static int dsa_slave_unsync_mc(struct net_device *dev,
+ const unsigned char *addr)
+{
+ return dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0);
+}
+
/* slave mii_bus handling ***************************************************/
static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
{
@@ -67,6 +175,7 @@ static int dsa_slave_open(struct net_device *dev)
{
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
int err;
err = dev_open(master, NULL);
@@ -75,38 +184,30 @@ static int dsa_slave_open(struct net_device *dev)
goto out;
}
- if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) {
- err = dev_uc_add(master, dev->dev_addr);
- if (err < 0)
+ if (dsa_switch_supports_uc_filtering(ds)) {
+ err = dsa_port_standalone_host_fdb_add(dp, dev->dev_addr, 0);
+ if (err)
goto out;
}
- if (dev->flags & IFF_ALLMULTI) {
- err = dev_set_allmulti(master, 1);
- if (err < 0)
- goto del_unicast;
- }
- if (dev->flags & IFF_PROMISC) {
- err = dev_set_promiscuity(master, 1);
+ if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) {
+ err = dev_uc_add(master, dev->dev_addr);
if (err < 0)
- goto clear_allmulti;
+ goto del_host_addr;
}
err = dsa_port_enable_rt(dp, dev->phydev);
if (err)
- goto clear_promisc;
+ goto del_unicast;
return 0;
-clear_promisc:
- if (dev->flags & IFF_PROMISC)
- dev_set_promiscuity(master, -1);
-clear_allmulti:
- if (dev->flags & IFF_ALLMULTI)
- dev_set_allmulti(master, -1);
del_unicast:
if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
dev_uc_del(master, dev->dev_addr);
+del_host_addr:
+ if (dsa_switch_supports_uc_filtering(ds))
+ dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0);
out:
return err;
}
@@ -115,68 +216,129 @@ static int dsa_slave_close(struct net_device *dev)
{
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
dsa_port_disable_rt(dp);
- dev_mc_unsync(master, dev);
- dev_uc_unsync(master, dev);
- if (dev->flags & IFF_ALLMULTI)
- dev_set_allmulti(master, -1);
- if (dev->flags & IFF_PROMISC)
- dev_set_promiscuity(master, -1);
-
if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
dev_uc_del(master, dev->dev_addr);
+ if (dsa_switch_supports_uc_filtering(ds))
+ dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0);
+
return 0;
}
+/* Keep flooding enabled towards this port's CPU port as long as it serves at
+ * least one port in the tree that requires it.
+ */
+static void dsa_port_manage_cpu_flood(struct dsa_port *dp)
+{
+ struct switchdev_brport_flags flags = {
+ .mask = BR_FLOOD | BR_MCAST_FLOOD,
+ };
+ struct dsa_switch_tree *dst = dp->ds->dst;
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+ struct dsa_port *other_dp;
+ int err;
+
+ list_for_each_entry(other_dp, &dst->ports, list) {
+ if (!dsa_port_is_user(other_dp))
+ continue;
+
+ if (other_dp->cpu_dp != cpu_dp)
+ continue;
+
+ if (other_dp->slave->flags & IFF_ALLMULTI)
+ flags.val |= BR_MCAST_FLOOD;
+ if (other_dp->slave->flags & IFF_PROMISC)
+ flags.val |= BR_FLOOD;
+ }
+
+ err = dsa_port_pre_bridge_flags(dp, flags, NULL);
+ if (err)
+ return;
+
+ dsa_port_bridge_flags(cpu_dp, flags, NULL);
+}
+
static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
{
struct net_device *master = dsa_slave_to_master(dev);
- if (dev->flags & IFF_UP) {
- if (change & IFF_ALLMULTI)
- dev_set_allmulti(master,
- dev->flags & IFF_ALLMULTI ? 1 : -1);
- if (change & IFF_PROMISC)
- dev_set_promiscuity(master,
- dev->flags & IFF_PROMISC ? 1 : -1);
- }
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (change & IFF_ALLMULTI)
+ dev_set_allmulti(master,
+ dev->flags & IFF_ALLMULTI ? 1 : -1);
+ if (change & IFF_PROMISC)
+ dev_set_promiscuity(master,
+ dev->flags & IFF_PROMISC ? 1 : -1);
+
+ if (dsa_switch_supports_uc_filtering(ds) &&
+ dsa_switch_supports_mc_filtering(ds))
+ dsa_port_manage_cpu_flood(dp);
}
static void dsa_slave_set_rx_mode(struct net_device *dev)
{
struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
dev_mc_sync(master, dev);
dev_uc_sync(master, dev);
+ if (dsa_switch_supports_mc_filtering(ds))
+ __dev_mc_sync(dev, dsa_slave_sync_mc, dsa_slave_unsync_mc);
+ if (dsa_switch_supports_uc_filtering(ds))
+ __dev_uc_sync(dev, dsa_slave_sync_uc, dsa_slave_unsync_uc);
}
static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
{
struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
struct sockaddr *addr = a;
int err;
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
+ /* If the port is down, the address isn't synced yet to hardware or
+ * to the DSA master, so there is nothing to change.
+ */
if (!(dev->flags & IFF_UP))
- goto out;
+ goto out_change_dev_addr;
+
+ if (dsa_switch_supports_uc_filtering(ds)) {
+ err = dsa_port_standalone_host_fdb_add(dp, addr->sa_data, 0);
+ if (err)
+ return err;
+ }
if (!ether_addr_equal(addr->sa_data, master->dev_addr)) {
err = dev_uc_add(master, addr->sa_data);
if (err < 0)
- return err;
+ goto del_unicast;
}
if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
dev_uc_del(master, dev->dev_addr);
-out:
+ if (dsa_switch_supports_uc_filtering(ds))
+ dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0);
+
+out_change_dev_addr:
eth_hw_addr_set(dev, addr->sa_data);
return 0;
+
+del_unicast:
+ if (dsa_switch_supports_uc_filtering(ds))
+ dsa_port_standalone_host_fdb_del(dp, addr->sa_data, 0);
+
+ return err;
}
struct dsa_slave_dump_ctx {
@@ -348,9 +510,8 @@ static int dsa_slave_vlan_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack)
{
- struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
- struct switchdev_obj_port_vlan vlan;
+ struct switchdev_obj_port_vlan *vlan;
int err;
if (dsa_port_skip_vlan_configuration(dp)) {
@@ -358,14 +519,14 @@ static int dsa_slave_vlan_add(struct net_device *dev,
return 0;
}
- vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj);
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
/* Deny adding a bridge VLAN when there is already an 802.1Q upper with
* the same VID.
*/
if (br_vlan_enabled(dsa_port_bridge_dev_get(dp))) {
rcu_read_lock();
- err = dsa_slave_vlan_check_for_8021q_uppers(dev, &vlan);
+ err = dsa_slave_vlan_check_for_8021q_uppers(dev, vlan);
rcu_read_unlock();
if (err) {
NL_SET_ERR_MSG_MOD(extack,
@@ -374,21 +535,36 @@ static int dsa_slave_vlan_add(struct net_device *dev,
}
}
- err = dsa_port_vlan_add(dp, &vlan, extack);
- if (err)
- return err;
+ return dsa_port_vlan_add(dp, vlan, extack);
+}
- /* We need the dedicated CPU port to be a member of the VLAN as well.
- * Even though drivers often handle CPU membership in special ways,
+/* Offload a VLAN installed on the bridge or on a foreign interface by
+ * installing it as a VLAN towards the CPU port.
+ */
+static int dsa_slave_host_vlan_add(struct net_device *dev,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct switchdev_obj_port_vlan vlan;
+
+ /* Do nothing if this is a software bridge */
+ if (!dp->bridge)
+ return -EOPNOTSUPP;
+
+ if (dsa_port_skip_vlan_configuration(dp)) {
+ NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN");
+ return 0;
+ }
+
+ vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj);
+
+ /* Even though drivers often handle CPU membership in special ways,
* it doesn't make sense to program a PVID, so clear this flag.
*/
vlan.flags &= ~BRIDGE_VLAN_INFO_PVID;
- err = dsa_port_vlan_add(dp->cpu_dp, &vlan, extack);
- if (err)
- return err;
-
- return vlan_vid_add(master, htons(ETH_P_8021Q), vlan.vid);
+ return dsa_port_host_vlan_add(dp, &vlan, extack);
}
static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
@@ -412,13 +588,13 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev))
return -EOPNOTSUPP;
- err = dsa_port_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
+ err = dsa_port_bridge_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
- return -EOPNOTSUPP;
-
- err = dsa_slave_vlan_add(dev, obj, extack);
+ if (dsa_port_offloads_bridge_port(dp, obj->orig_dev))
+ err = dsa_slave_vlan_add(dev, obj, extack);
+ else
+ err = dsa_slave_host_vlan_add(dev, obj, extack);
break;
case SWITCHDEV_OBJ_ID_MRP:
if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev))
@@ -444,26 +620,33 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
static int dsa_slave_vlan_del(struct net_device *dev,
const struct switchdev_obj *obj)
{
- struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
struct switchdev_obj_port_vlan *vlan;
- int err;
if (dsa_port_skip_vlan_configuration(dp))
return 0;
vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
- /* Do not deprogram the CPU port as it may be shared with other user
- * ports which can be members of this VLAN as well.
- */
- err = dsa_port_vlan_del(dp, vlan);
- if (err)
- return err;
+ return dsa_port_vlan_del(dp, vlan);
+}
- vlan_vid_del(master, htons(ETH_P_8021Q), vlan->vid);
+static int dsa_slave_host_vlan_del(struct net_device *dev,
+ const struct switchdev_obj *obj)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct switchdev_obj_port_vlan *vlan;
- return 0;
+ /* Do nothing if this is a software bridge */
+ if (!dp->bridge)
+ return -EOPNOTSUPP;
+
+ if (dsa_port_skip_vlan_configuration(dp))
+ return 0;
+
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+
+ return dsa_port_host_vlan_del(dp, vlan);
}
static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
@@ -486,13 +669,13 @@ static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev))
return -EOPNOTSUPP;
- err = dsa_port_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
+ err = dsa_port_bridge_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
- return -EOPNOTSUPP;
-
- err = dsa_slave_vlan_del(dev, obj);
+ if (dsa_port_offloads_bridge_port(dp, obj->orig_dev))
+ err = dsa_slave_vlan_del(dev, obj);
+ else
+ err = dsa_slave_host_vlan_del(dev, obj);
break;
case SWITCHDEV_OBJ_ID_MRP:
if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev))
@@ -1347,7 +1530,6 @@ static int dsa_slave_get_ts_info(struct net_device *dev,
static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
u16 vid)
{
- struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
struct switchdev_obj_port_vlan vlan = {
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
@@ -1367,7 +1549,7 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
}
/* And CPU port... */
- ret = dsa_port_vlan_add(dp->cpu_dp, &vlan, &extack);
+ ret = dsa_port_host_vlan_add(dp, &vlan, &extack);
if (ret) {
if (extack._msg)
netdev_err(dev, "CPU port %d: %s\n", dp->cpu_dp->index,
@@ -1375,13 +1557,12 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
return ret;
}
- return vlan_vid_add(master, proto, vid);
+ return 0;
}
static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
u16 vid)
{
- struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
struct switchdev_obj_port_vlan vlan = {
.vid = vid,
@@ -1390,16 +1571,11 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
};
int err;
- /* Do not deprogram the CPU port as it may be shared with other user
- * ports which can be members of this VLAN as well.
- */
err = dsa_port_vlan_del(dp, &vlan);
if (err)
return err;
- vlan_vid_del(master, proto, vid);
-
- return 0;
+ return dsa_port_host_vlan_del(dp, &vlan);
}
static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg)
@@ -1934,6 +2110,8 @@ int dsa_slave_create(struct dsa_port *port)
else
eth_hw_addr_inherit(slave_dev, master);
slave_dev->priv_flags |= IFF_NO_QUEUE;
+ if (dsa_switch_supports_uc_filtering(ds))
+ slave_dev->priv_flags |= IFF_UNICAST_FLT;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
if (ds->ops->port_max_mtu)
slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index);
@@ -2120,7 +2298,7 @@ dsa_slave_lag_changeupper(struct net_device *dev,
continue;
dp = dsa_slave_to_port(lower);
- if (!dp->lag_dev)
+ if (!dp->lag)
/* Software LAG */
continue;
@@ -2149,7 +2327,7 @@ dsa_slave_lag_prechangeupper(struct net_device *dev,
continue;
dp = dsa_slave_to_port(lower);
- if (!dp->lag_dev)
+ if (!dp->lag)
/* Software LAG */
continue;
@@ -2359,43 +2537,40 @@ static void
dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work)
{
struct switchdev_notifier_fdb_info info = {};
- struct dsa_switch *ds = switchdev_work->ds;
- struct dsa_port *dp;
-
- if (!dsa_is_user_port(ds, switchdev_work->port))
- return;
info.addr = switchdev_work->addr;
info.vid = switchdev_work->vid;
info.offloaded = true;
- dp = dsa_to_port(ds, switchdev_work->port);
call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED,
- dp->slave, &info.info, NULL);
+ switchdev_work->orig_dev, &info.info, NULL);
}
static void dsa_slave_switchdev_event_work(struct work_struct *work)
{
struct dsa_switchdev_event_work *switchdev_work =
container_of(work, struct dsa_switchdev_event_work, work);
- struct dsa_switch *ds = switchdev_work->ds;
+ const unsigned char *addr = switchdev_work->addr;
+ struct net_device *dev = switchdev_work->dev;
+ u16 vid = switchdev_work->vid;
+ struct dsa_switch *ds;
struct dsa_port *dp;
int err;
- dp = dsa_to_port(ds, switchdev_work->port);
+ dp = dsa_slave_to_port(dev);
+ ds = dp->ds;
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
if (switchdev_work->host_addr)
- err = dsa_port_host_fdb_add(dp, switchdev_work->addr,
- switchdev_work->vid);
+ err = dsa_port_bridge_host_fdb_add(dp, addr, vid);
+ else if (dp->lag)
+ err = dsa_port_lag_fdb_add(dp, addr, vid);
else
- err = dsa_port_fdb_add(dp, switchdev_work->addr,
- switchdev_work->vid);
+ err = dsa_port_fdb_add(dp, addr, vid);
if (err) {
dev_err(ds->dev,
"port %d failed to add %pM vid %d to fdb: %d\n",
- dp->index, switchdev_work->addr,
- switchdev_work->vid, err);
+ dp->index, addr, vid, err);
break;
}
dsa_fdb_offload_notify(switchdev_work);
@@ -2403,16 +2578,15 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
case SWITCHDEV_FDB_DEL_TO_DEVICE:
if (switchdev_work->host_addr)
- err = dsa_port_host_fdb_del(dp, switchdev_work->addr,
- switchdev_work->vid);
+ err = dsa_port_bridge_host_fdb_del(dp, addr, vid);
+ else if (dp->lag)
+ err = dsa_port_lag_fdb_del(dp, addr, vid);
else
- err = dsa_port_fdb_del(dp, switchdev_work->addr,
- switchdev_work->vid);
+ err = dsa_port_fdb_del(dp, addr, vid);
if (err) {
dev_err(ds->dev,
"port %d failed to delete %pM vid %d from fdb: %d\n",
- dp->index, switchdev_work->addr,
- switchdev_work->vid, err);
+ dp->index, addr, vid, err);
}
break;
@@ -2450,19 +2624,17 @@ static int dsa_slave_fdb_event(struct net_device *dev,
if (ctx && ctx != dp)
return 0;
- if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del)
- return -EOPNOTSUPP;
+ if (switchdev_fdb_is_dynamically_learned(fdb_info)) {
+ if (dsa_port_offloads_bridge_port(dp, orig_dev))
+ return 0;
- if (dsa_slave_dev_check(orig_dev) &&
- switchdev_fdb_is_dynamically_learned(fdb_info))
- return 0;
-
- /* FDB entries learned by the software bridge should be installed as
- * host addresses only if the driver requests assisted learning.
- */
- if (switchdev_fdb_is_dynamically_learned(fdb_info) &&
- !ds->assisted_learning_on_cpu_port)
- return 0;
+ /* FDB entries learned by the software bridge or by foreign
+ * bridge ports should be installed as host addresses only if
+ * the driver requests assisted learning.
+ */
+ if (!ds->assisted_learning_on_cpu_port)
+ return 0;
+ }
/* Also treat FDB entries on foreign interfaces bridged with us as host
* addresses.
@@ -2470,6 +2642,18 @@ static int dsa_slave_fdb_event(struct net_device *dev,
if (dsa_foreign_dev_check(dev, orig_dev))
host_addr = true;
+ /* Check early that we're not doing work in vain.
+ * Host addresses on LAG ports still require regular FDB ops,
+ * since the CPU port isn't in a LAG.
+ */
+ if (dp->lag && !host_addr) {
+ if (!ds->ops->lag_fdb_add || !ds->ops->lag_fdb_del)
+ return -EOPNOTSUPP;
+ } else {
+ if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del)
+ return -EOPNOTSUPP;
+ }
+
switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
if (!switchdev_work)
return -ENOMEM;
@@ -2480,10 +2664,9 @@ static int dsa_slave_fdb_event(struct net_device *dev,
host_addr ? " as host address" : "");
INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work);
- switchdev_work->ds = ds;
- switchdev_work->port = dp->index;
switchdev_work->event = event;
switchdev_work->dev = dev;
+ switchdev_work->orig_dev = orig_dev;
ether_addr_copy(switchdev_work->addr, fdb_info->addr);
switchdev_work->vid = fdb_info->vid;
@@ -2512,8 +2695,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
err = switchdev_handle_fdb_event_to_device(dev, event, ptr,
dsa_slave_dev_check,
dsa_foreign_dev_check,
- dsa_slave_fdb_event,
- NULL);
+ dsa_slave_fdb_event);
return notifier_from_errno(err);
default:
return NOTIFY_DONE;
@@ -2530,14 +2712,16 @@ static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused,
switch (event) {
case SWITCHDEV_PORT_OBJ_ADD:
- err = switchdev_handle_port_obj_add(dev, ptr,
- dsa_slave_dev_check,
- dsa_slave_port_obj_add);
+ err = switchdev_handle_port_obj_add_foreign(dev, ptr,
+ dsa_slave_dev_check,
+ dsa_foreign_dev_check,
+ dsa_slave_port_obj_add);
return notifier_from_errno(err);
case SWITCHDEV_PORT_OBJ_DEL:
- err = switchdev_handle_port_obj_del(dev, ptr,
- dsa_slave_dev_check,
- dsa_slave_port_obj_del);
+ err = switchdev_handle_port_obj_del_foreign(dev, ptr,
+ dsa_slave_dev_check,
+ dsa_foreign_dev_check,
+ dsa_slave_port_obj_del);
return notifier_from_errno(err);
case SWITCHDEV_PORT_ATTR_SET:
err = switchdev_handle_port_attr_set(dev, ptr,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 4866b58649e4..d25cd1da3eb3 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -96,7 +96,8 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
return -EOPNOTSUPP;
err = ds->ops->port_bridge_join(ds, info->port, info->bridge,
- &info->tx_fwd_offload);
+ &info->tx_fwd_offload,
+ info->extack);
if (err)
return err;
}
@@ -105,12 +106,13 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
ds->ops->crosschip_bridge_join) {
err = ds->ops->crosschip_bridge_join(ds, info->tree_index,
info->sw_index,
- info->port, info->bridge);
+ info->port, info->bridge,
+ info->extack);
if (err)
return err;
}
- return dsa_tag_8021q_bridge_join(ds, info);
+ return 0;
}
static int dsa_switch_sync_vlan_filtering(struct dsa_switch *ds,
@@ -186,7 +188,7 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
return err;
}
- return dsa_tag_8021q_bridge_leave(ds, info);
+ return 0;
}
/* Matches for all upstream-facing ports (the CPU port and all upstream-facing
@@ -211,20 +213,22 @@ static bool dsa_port_host_address_match(struct dsa_port *dp,
}
static struct dsa_mac_addr *dsa_mac_addr_find(struct list_head *addr_list,
- const unsigned char *addr,
- u16 vid)
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
{
struct dsa_mac_addr *a;
list_for_each_entry(a, addr_list, list)
- if (ether_addr_equal(a->addr, addr) && a->vid == vid)
+ if (ether_addr_equal(a->addr, addr) && a->vid == vid &&
+ dsa_db_equal(&a->db, &db))
return a;
return NULL;
}
static int dsa_port_do_mdb_add(struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb)
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
{
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
@@ -233,11 +237,11 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_mdb_add(ds, port, mdb);
+ return ds->ops->port_mdb_add(ds, port, mdb, db);
mutex_lock(&dp->addr_lists_lock);
- a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
+ a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db);
if (a) {
refcount_inc(&a->refcount);
goto out;
@@ -249,7 +253,7 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
goto out;
}
- err = ds->ops->port_mdb_add(ds, port, mdb);
+ err = ds->ops->port_mdb_add(ds, port, mdb, db);
if (err) {
kfree(a);
goto out;
@@ -257,6 +261,7 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
ether_addr_copy(a->addr, mdb->addr);
a->vid = mdb->vid;
+ a->db = db;
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &dp->mdbs);
@@ -267,7 +272,8 @@ out:
}
static int dsa_port_do_mdb_del(struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb)
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
{
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
@@ -276,11 +282,11 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp,
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_mdb_del(ds, port, mdb);
+ return ds->ops->port_mdb_del(ds, port, mdb, db);
mutex_lock(&dp->addr_lists_lock);
- a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
+ a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db);
if (!a) {
err = -ENOENT;
goto out;
@@ -289,7 +295,7 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp,
if (!refcount_dec_and_test(&a->refcount))
goto out;
- err = ds->ops->port_mdb_del(ds, port, mdb);
+ err = ds->ops->port_mdb_del(ds, port, mdb, db);
if (err) {
refcount_set(&a->refcount, 1);
goto out;
@@ -305,7 +311,7 @@ out:
}
static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
- u16 vid)
+ u16 vid, struct dsa_db db)
{
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
@@ -314,11 +320,11 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_fdb_add(ds, port, addr, vid);
+ return ds->ops->port_fdb_add(ds, port, addr, vid, db);
mutex_lock(&dp->addr_lists_lock);
- a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
+ a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db);
if (a) {
refcount_inc(&a->refcount);
goto out;
@@ -330,7 +336,7 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
goto out;
}
- err = ds->ops->port_fdb_add(ds, port, addr, vid);
+ err = ds->ops->port_fdb_add(ds, port, addr, vid, db);
if (err) {
kfree(a);
goto out;
@@ -338,6 +344,7 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
ether_addr_copy(a->addr, addr);
a->vid = vid;
+ a->db = db;
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &dp->fdbs);
@@ -348,7 +355,7 @@ out:
}
static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
- u16 vid)
+ u16 vid, struct dsa_db db)
{
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
@@ -357,11 +364,11 @@ static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_fdb_del(ds, port, addr, vid);
+ return ds->ops->port_fdb_del(ds, port, addr, vid, db);
mutex_lock(&dp->addr_lists_lock);
- a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
+ a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db);
if (!a) {
err = -ENOENT;
goto out;
@@ -370,7 +377,7 @@ static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
if (!refcount_dec_and_test(&a->refcount))
goto out;
- err = ds->ops->port_fdb_del(ds, port, addr, vid);
+ err = ds->ops->port_fdb_del(ds, port, addr, vid, db);
if (err) {
refcount_set(&a->refcount, 1);
goto out;
@@ -385,6 +392,77 @@ out:
return err;
}
+static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
+{
+ struct dsa_mac_addr *a;
+ int err = 0;
+
+ mutex_lock(&lag->fdb_lock);
+
+ a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db);
+ if (a) {
+ refcount_inc(&a->refcount);
+ goto out;
+ }
+
+ a = kzalloc(sizeof(*a), GFP_KERNEL);
+ if (!a) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = ds->ops->lag_fdb_add(ds, *lag, addr, vid, db);
+ if (err) {
+ kfree(a);
+ goto out;
+ }
+
+ ether_addr_copy(a->addr, addr);
+ a->vid = vid;
+ refcount_set(&a->refcount, 1);
+ list_add_tail(&a->list, &lag->fdbs);
+
+out:
+ mutex_unlock(&lag->fdb_lock);
+
+ return err;
+}
+
+static int dsa_switch_do_lag_fdb_del(struct dsa_switch *ds, struct dsa_lag *lag,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
+{
+ struct dsa_mac_addr *a;
+ int err = 0;
+
+ mutex_lock(&lag->fdb_lock);
+
+ a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db);
+ if (!a) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (!refcount_dec_and_test(&a->refcount))
+ goto out;
+
+ err = ds->ops->lag_fdb_del(ds, *lag, addr, vid, db);
+ if (err) {
+ refcount_set(&a->refcount, 1);
+ goto out;
+ }
+
+ list_del(&a->list);
+ kfree(a);
+
+out:
+ mutex_unlock(&lag->fdb_lock);
+
+ return err;
+}
+
static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
@@ -397,7 +475,8 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
dsa_switch_for_each_port(dp, ds) {
if (dsa_port_host_address_match(dp, info->sw_index,
info->port)) {
- err = dsa_port_do_fdb_add(dp, info->addr, info->vid);
+ err = dsa_port_do_fdb_add(dp, info->addr, info->vid,
+ info->db);
if (err)
break;
}
@@ -418,7 +497,8 @@ static int dsa_switch_host_fdb_del(struct dsa_switch *ds,
dsa_switch_for_each_port(dp, ds) {
if (dsa_port_host_address_match(dp, info->sw_index,
info->port)) {
- err = dsa_port_do_fdb_del(dp, info->addr, info->vid);
+ err = dsa_port_do_fdb_del(dp, info->addr, info->vid,
+ info->db);
if (err)
break;
}
@@ -436,7 +516,7 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds,
if (!ds->ops->port_fdb_add)
return -EOPNOTSUPP;
- return dsa_port_do_fdb_add(dp, info->addr, info->vid);
+ return dsa_port_do_fdb_add(dp, info->addr, info->vid, info->db);
}
static int dsa_switch_fdb_del(struct dsa_switch *ds,
@@ -448,7 +528,43 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
if (!ds->ops->port_fdb_del)
return -EOPNOTSUPP;
- return dsa_port_do_fdb_del(dp, info->addr, info->vid);
+ return dsa_port_do_fdb_del(dp, info->addr, info->vid, info->db);
+}
+
+static int dsa_switch_lag_fdb_add(struct dsa_switch *ds,
+ struct dsa_notifier_lag_fdb_info *info)
+{
+ struct dsa_port *dp;
+
+ if (!ds->ops->lag_fdb_add)
+ return -EOPNOTSUPP;
+
+ /* Notify switch only if it has a port in this LAG */
+ dsa_switch_for_each_port(dp, ds)
+ if (dsa_port_offloads_lag(dp, info->lag))
+ return dsa_switch_do_lag_fdb_add(ds, info->lag,
+ info->addr, info->vid,
+ info->db);
+
+ return 0;
+}
+
+static int dsa_switch_lag_fdb_del(struct dsa_switch *ds,
+ struct dsa_notifier_lag_fdb_info *info)
+{
+ struct dsa_port *dp;
+
+ if (!ds->ops->lag_fdb_del)
+ return -EOPNOTSUPP;
+
+ /* Notify switch only if it has a port in this LAG */
+ dsa_switch_for_each_port(dp, ds)
+ if (dsa_port_offloads_lag(dp, info->lag))
+ return dsa_switch_do_lag_fdb_del(ds, info->lag,
+ info->addr, info->vid,
+ info->db);
+
+ return 0;
}
static int dsa_switch_lag_change(struct dsa_switch *ds,
@@ -501,7 +617,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
if (!ds->ops->port_mdb_add)
return -EOPNOTSUPP;
- return dsa_port_do_mdb_add(dp, info->mdb);
+ return dsa_port_do_mdb_add(dp, info->mdb, info->db);
}
static int dsa_switch_mdb_del(struct dsa_switch *ds,
@@ -513,7 +629,7 @@ static int dsa_switch_mdb_del(struct dsa_switch *ds,
if (!ds->ops->port_mdb_del)
return -EOPNOTSUPP;
- return dsa_port_do_mdb_del(dp, info->mdb);
+ return dsa_port_do_mdb_del(dp, info->mdb, info->db);
}
static int dsa_switch_host_mdb_add(struct dsa_switch *ds,
@@ -528,7 +644,7 @@ static int dsa_switch_host_mdb_add(struct dsa_switch *ds,
dsa_switch_for_each_port(dp, ds) {
if (dsa_port_host_address_match(dp, info->sw_index,
info->port)) {
- err = dsa_port_do_mdb_add(dp, info->mdb);
+ err = dsa_port_do_mdb_add(dp, info->mdb, info->db);
if (err)
break;
}
@@ -549,7 +665,7 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds,
dsa_switch_for_each_port(dp, ds) {
if (dsa_port_host_address_match(dp, info->sw_index,
info->port)) {
- err = dsa_port_do_mdb_del(dp, info->mdb);
+ err = dsa_port_do_mdb_del(dp, info->mdb, info->db);
if (err)
break;
}
@@ -558,6 +674,7 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds,
return err;
}
+/* Port VLANs match on the targeted port and on all DSA ports */
static bool dsa_port_vlan_match(struct dsa_port *dp,
struct dsa_notifier_vlan_info *info)
{
@@ -570,6 +687,126 @@ static bool dsa_port_vlan_match(struct dsa_port *dp,
return false;
}
+/* Host VLANs match on the targeted port's CPU port, and on all DSA ports
+ * (upstream and downstream) of that switch and its upstream switches.
+ */
+static bool dsa_port_host_vlan_match(struct dsa_port *dp,
+ struct dsa_notifier_vlan_info *info)
+{
+ struct dsa_port *targeted_dp, *cpu_dp;
+ struct dsa_switch *targeted_ds;
+
+ targeted_ds = dsa_switch_find(dp->ds->dst->index, info->sw_index);
+ targeted_dp = dsa_to_port(targeted_ds, info->port);
+ cpu_dp = targeted_dp->cpu_dp;
+
+ if (dsa_switch_is_upstream_of(dp->ds, targeted_ds))
+ return dsa_port_is_dsa(dp) || dp == cpu_dp;
+
+ return false;
+}
+
+static struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct dsa_vlan *v;
+
+ list_for_each_entry(v, vlan_list, list)
+ if (v->vid == vlan->vid)
+ return v;
+
+ return NULL;
+}
+
+static int dsa_port_do_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+ struct dsa_vlan *v;
+ int err = 0;
+
+ /* No need to bother with refcounting for user ports. */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->port_vlan_add(ds, port, vlan, extack);
+
+ /* No need to propagate on shared ports the existing VLANs that were
+ * re-notified after just the flags have changed. This would cause a
+ * refcount bump which we need to avoid, since it unbalances the
+ * additions with the deletions.
+ */
+ if (vlan->changed)
+ return 0;
+
+ mutex_lock(&dp->vlans_lock);
+
+ v = dsa_vlan_find(&dp->vlans, vlan);
+ if (v) {
+ refcount_inc(&v->refcount);
+ goto out;
+ }
+
+ v = kzalloc(sizeof(*v), GFP_KERNEL);
+ if (!v) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = ds->ops->port_vlan_add(ds, port, vlan, extack);
+ if (err) {
+ kfree(v);
+ goto out;
+ }
+
+ v->vid = vlan->vid;
+ refcount_set(&v->refcount, 1);
+ list_add_tail(&v->list, &dp->vlans);
+
+out:
+ mutex_unlock(&dp->vlans_lock);
+
+ return err;
+}
+
+static int dsa_port_do_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+ struct dsa_vlan *v;
+ int err = 0;
+
+ /* No need to bother with refcounting for user ports */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->port_vlan_del(ds, port, vlan);
+
+ mutex_lock(&dp->vlans_lock);
+
+ v = dsa_vlan_find(&dp->vlans, vlan);
+ if (!v) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (!refcount_dec_and_test(&v->refcount))
+ goto out;
+
+ err = ds->ops->port_vlan_del(ds, port, vlan);
+ if (err) {
+ refcount_set(&v->refcount, 1);
+ goto out;
+ }
+
+ list_del(&v->list);
+ kfree(v);
+
+out:
+ mutex_unlock(&dp->vlans_lock);
+
+ return err;
+}
+
static int dsa_switch_vlan_add(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
@@ -581,8 +818,8 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
dsa_switch_for_each_port(dp, ds) {
if (dsa_port_vlan_match(dp, info)) {
- err = ds->ops->port_vlan_add(ds, dp->index, info->vlan,
- info->extack);
+ err = dsa_port_do_vlan_add(dp, info->vlan,
+ info->extack);
if (err)
return err;
}
@@ -594,15 +831,61 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
static int dsa_switch_vlan_del(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
+ struct dsa_port *dp;
+ int err;
+
if (!ds->ops->port_vlan_del)
return -EOPNOTSUPP;
- if (ds->index == info->sw_index)
- return ds->ops->port_vlan_del(ds, info->port, info->vlan);
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_vlan_match(dp, info)) {
+ err = dsa_port_do_vlan_del(dp, info->vlan);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int dsa_switch_host_vlan_add(struct dsa_switch *ds,
+ struct dsa_notifier_vlan_info *info)
+{
+ struct dsa_port *dp;
+ int err;
+
+ if (!ds->ops->port_vlan_add)
+ return -EOPNOTSUPP;
+
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_host_vlan_match(dp, info)) {
+ err = dsa_port_do_vlan_add(dp, info->vlan,
+ info->extack);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int dsa_switch_host_vlan_del(struct dsa_switch *ds,
+ struct dsa_notifier_vlan_info *info)
+{
+ struct dsa_port *dp;
+ int err;
+
+ if (!ds->ops->port_vlan_del)
+ return -EOPNOTSUPP;
+
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_host_vlan_match(dp, info)) {
+ err = dsa_port_do_vlan_del(dp, info->vlan);
+ if (err)
+ return err;
+ }
+ }
- /* Do not deprogram the DSA links as they may be used as conduit
- * for other VLAN members in the fabric.
- */
return 0;
}
@@ -737,6 +1020,12 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_HOST_FDB_DEL:
err = dsa_switch_host_fdb_del(ds, info);
break;
+ case DSA_NOTIFIER_LAG_FDB_ADD:
+ err = dsa_switch_lag_fdb_add(ds, info);
+ break;
+ case DSA_NOTIFIER_LAG_FDB_DEL:
+ err = dsa_switch_lag_fdb_del(ds, info);
+ break;
case DSA_NOTIFIER_LAG_CHANGE:
err = dsa_switch_lag_change(ds, info);
break;
@@ -764,6 +1053,12 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_VLAN_DEL:
err = dsa_switch_vlan_del(ds, info);
break;
+ case DSA_NOTIFIER_HOST_VLAN_ADD:
+ err = dsa_switch_host_vlan_add(ds, info);
+ break;
+ case DSA_NOTIFIER_HOST_VLAN_DEL:
+ err = dsa_switch_host_vlan_del(ds, info);
+ break;
case DSA_NOTIFIER_MTU:
err = dsa_switch_mtu(ds, info);
break;
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 27712a81c967..a786569203f0 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -16,15 +16,11 @@
*
* | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
* +-----------+-----+-----------------+-----------+-----------------------+
- * | DIR | VBID| SWITCH_ID | VBID | PORT |
+ * | RSV | VBID| SWITCH_ID | VBID | PORT |
* +-----------+-----+-----------------+-----------+-----------------------+
*
- * DIR - VID[11:10]:
- * Direction flags.
- * * 1 (0b01) for RX VLAN,
- * * 2 (0b10) for TX VLAN.
- * These values make the special VIDs of 0, 1 and 4095 to be left
- * unused by this coding scheme.
+ * RSV - VID[11:10]:
+ * Reserved. Must be set to 3 (0b11).
*
* SWITCH_ID - VID[8:6]:
* Index of switch within DSA tree. Must be between 0 and 7.
@@ -32,18 +28,17 @@
* VBID - { VID[9], VID[5:4] }:
* Virtual bridge ID. If between 1 and 7, packet targets the broadcast
* domain of a bridge. If transmitted as zero, packet targets a single
- * port. Field only valid on transmit, must be ignored on receive.
+ * port.
*
* PORT - VID[3:0]:
* Index of switch port. Must be between 0 and 15.
*/
-#define DSA_8021Q_DIR_SHIFT 10
-#define DSA_8021Q_DIR_MASK GENMASK(11, 10)
-#define DSA_8021Q_DIR(x) (((x) << DSA_8021Q_DIR_SHIFT) & \
- DSA_8021Q_DIR_MASK)
-#define DSA_8021Q_DIR_RX DSA_8021Q_DIR(1)
-#define DSA_8021Q_DIR_TX DSA_8021Q_DIR(2)
+#define DSA_8021Q_RSV_VAL 3
+#define DSA_8021Q_RSV_SHIFT 10
+#define DSA_8021Q_RSV_MASK GENMASK(11, 10)
+#define DSA_8021Q_RSV ((DSA_8021Q_RSV_VAL << DSA_8021Q_RSV_SHIFT) & \
+ DSA_8021Q_RSV_MASK)
#define DSA_8021Q_SWITCH_ID_SHIFT 6
#define DSA_8021Q_SWITCH_ID_MASK GENMASK(8, 6)
@@ -67,34 +62,24 @@
#define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \
DSA_8021Q_PORT_MASK)
-u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num)
+u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num)
{
/* The VBID value of 0 is reserved for precise TX, but it is also
* reserved/invalid for the bridge_num, so all is well.
*/
- return DSA_8021Q_DIR_TX | DSA_8021Q_VBID(bridge_num);
+ return DSA_8021Q_RSV | DSA_8021Q_VBID(bridge_num);
}
-EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid);
-
-/* Returns the VID to be inserted into the frame from xmit for switch steering
- * instructions on egress. Encodes switch ID and port ID.
- */
-u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp)
-{
- return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(dp->ds->index) |
- DSA_8021Q_PORT(dp->index);
-}
-EXPORT_SYMBOL_GPL(dsa_tag_8021q_tx_vid);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_vid);
/* Returns the VID that will be installed as pvid for this switch port, sent as
* tagged egress towards the CPU port and decoded by the rcv function.
*/
-u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp)
+u16 dsa_tag_8021q_standalone_vid(const struct dsa_port *dp)
{
- return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(dp->ds->index) |
+ return DSA_8021Q_RSV | DSA_8021Q_SWITCH_ID(dp->ds->index) |
DSA_8021Q_PORT(dp->index);
}
-EXPORT_SYMBOL_GPL(dsa_tag_8021q_rx_vid);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_standalone_vid);
/* Returns the decoded switch ID from the RX VID. */
int dsa_8021q_rx_switch_id(u16 vid)
@@ -110,21 +95,20 @@ int dsa_8021q_rx_source_port(u16 vid)
}
EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port);
-bool vid_is_dsa_8021q_rxvlan(u16 vid)
+/* Returns the decoded VBID from the RX VID. */
+static int dsa_tag_8021q_rx_vbid(u16 vid)
{
- return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX;
-}
-EXPORT_SYMBOL_GPL(vid_is_dsa_8021q_rxvlan);
+ u16 vbid_hi = (vid & DSA_8021Q_VBID_HI_MASK) >> DSA_8021Q_VBID_HI_SHIFT;
+ u16 vbid_lo = (vid & DSA_8021Q_VBID_LO_MASK) >> DSA_8021Q_VBID_LO_SHIFT;
-bool vid_is_dsa_8021q_txvlan(u16 vid)
-{
- return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_TX;
+ return (vbid_hi << 2) | vbid_lo;
}
-EXPORT_SYMBOL_GPL(vid_is_dsa_8021q_txvlan);
bool vid_is_dsa_8021q(u16 vid)
{
- return vid_is_dsa_8021q_rxvlan(vid) || vid_is_dsa_8021q_txvlan(vid);
+ u16 rsv = (vid & DSA_8021Q_RSV_MASK) >> DSA_8021Q_RSV_SHIFT;
+
+ return rsv == DSA_8021Q_RSV_VAL;
}
EXPORT_SYMBOL_GPL(vid_is_dsa_8021q);
@@ -242,12 +226,8 @@ int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
u16 flags = 0;
if (dsa_port_is_user(dp))
- flags |= BRIDGE_VLAN_INFO_UNTAGGED;
-
- if (vid_is_dsa_8021q_rxvlan(info->vid) &&
- dsa_8021q_rx_switch_id(info->vid) == ds->index &&
- dsa_8021q_rx_source_port(info->vid) == dp->index)
- flags |= BRIDGE_VLAN_INFO_PVID;
+ flags |= BRIDGE_VLAN_INFO_UNTAGGED |
+ BRIDGE_VLAN_INFO_PVID;
err = dsa_port_do_tag_8021q_vlan_add(dp, info->vid,
flags);
@@ -279,162 +259,78 @@ int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
return 0;
}
-/* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single
- * front-panel switch port (here swp0).
+/* There are 2 ways of offloading tag_8021q VLANs.
*
- * Port identification through VLAN (802.1Q) tags has different requirements
- * for it to work effectively:
- * - On RX (ingress from network): each front-panel port must have a pvid
- * that uniquely identifies it, and the egress of this pvid must be tagged
- * towards the CPU port, so that software can recover the source port based
- * on the VID in the frame. But this would only work for standalone ports;
- * if bridged, this VLAN setup would break autonomous forwarding and would
- * force all switched traffic to pass through the CPU. So we must also make
- * the other front-panel ports members of this VID we're adding, albeit
- * we're not making it their PVID (they'll still have their own).
- * - On TX (ingress from CPU and towards network) we are faced with a problem.
- * If we were to tag traffic (from within DSA) with the port's pvid, all
- * would be well, assuming the switch ports were standalone. Frames would
- * have no choice but to be directed towards the correct front-panel port.
- * But because we also want the RX VLAN to not break bridging, then
- * inevitably that means that we have to give them a choice (of what
- * front-panel port to go out on), and therefore we cannot steer traffic
- * based on the RX VID. So what we do is simply install one more VID on the
- * front-panel and CPU ports, and profit off of the fact that steering will
- * work just by virtue of the fact that there is only one other port that's
- * a member of the VID we're tagging the traffic with - the desired one.
+ * One is to use a hardware TCAM to push the port's standalone VLAN into the
+ * frame when forwarding it to the CPU, as an egress modification rule on the
+ * CPU port. This is preferable because it has no side effects for the
+ * autonomous forwarding path, and accomplishes tag_8021q's primary goal of
+ * identifying the source port of each packet based on VLAN ID.
*
- * So at the end, each front-panel port will have one RX VID (also the PVID),
- * the RX VID of all other front-panel ports that are in the same bridge, and
- * one TX VID. Whereas the CPU port will have the RX and TX VIDs of all
- * front-panel ports, and on top of that, is also tagged-input and
- * tagged-output (VLAN trunk).
+ * The other is to commit the tag_8021q VLAN as a PVID to the VLAN table, and
+ * to configure the port as VLAN-unaware. This is less preferable because
+ * unique source port identification can only be done for standalone ports;
+ * under a VLAN-unaware bridge, all ports share the same tag_8021q VLAN as
+ * PVID, and under a VLAN-aware bridge, packets received by software will not
+ * have tag_8021q VLANs appended, just bridge VLANs.
*
- * CPU port CPU port
- * +-------------+-----+-------------+ +-------------+-----+-------------+
- * | RX VID | | | | TX VID | | |
- * | of swp0 | | | | of swp0 | | |
- * | +-----+ | | +-----+ |
- * | ^ T | | | Tagged |
- * | | | | | ingress |
- * | +-------+---+---+-------+ | | +-----------+ |
- * | | | | | | | | Untagged |
- * | | U v U v U v | | v egress |
- * | +-----+ +-----+ +-----+ +-----+ | | +-----+ +-----+ +-----+ +-----+ |
- * | | | | | | | | | | | | | | | | | | | |
- * | |PVID | | | | | | | | | | | | | | | | | |
- * +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+
- * swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3
+ * For tag_8021q implementations of the second type, this method is used to
+ * replace the standalone tag_8021q VLAN of a port with the tag_8021q VLAN to
+ * be used for VLAN-unaware bridging.
*/
-static bool
-dsa_port_tag_8021q_bridge_match(struct dsa_port *dp,
- struct dsa_notifier_bridge_info *info)
+int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port,
+ struct dsa_bridge bridge)
{
- /* Don't match on self */
- if (dp->ds->dst->index == info->tree_index &&
- dp->ds->index == info->sw_index &&
- dp->index == info->port)
- return false;
-
- if (dsa_port_is_user(dp))
- return dsa_port_offloads_bridge(dp, &info->bridge);
-
- return false;
-}
-
-int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
- struct dsa_notifier_bridge_info *info)
-{
- struct dsa_switch *targeted_ds;
- struct dsa_port *targeted_dp;
- struct dsa_port *dp;
- u16 targeted_rx_vid;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 standalone_vid, bridge_vid;
int err;
- if (!ds->tag_8021q_ctx)
- return 0;
-
- targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
- targeted_dp = dsa_to_port(targeted_ds, info->port);
- targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp);
-
- dsa_switch_for_each_port(dp, ds) {
- u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
-
- if (!dsa_port_tag_8021q_bridge_match(dp, info))
- continue;
+ /* Delete the standalone VLAN of the port and replace it with a
+ * bridging VLAN
+ */
+ standalone_vid = dsa_tag_8021q_standalone_vid(dp);
+ bridge_vid = dsa_tag_8021q_bridge_vid(bridge.num);
- /* Install the RX VID of the targeted port in our VLAN table */
- err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid, true);
- if (err)
- return err;
+ err = dsa_port_tag_8021q_vlan_add(dp, bridge_vid, true);
+ if (err)
+ return err;
- /* Install our RX VID into the targeted port's VLAN table */
- err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid, true);
- if (err)
- return err;
- }
+ dsa_port_tag_8021q_vlan_del(dp, standalone_vid, false);
return 0;
}
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_join);
-int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
- struct dsa_notifier_bridge_info *info)
+void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port,
+ struct dsa_bridge bridge)
{
- struct dsa_switch *targeted_ds;
- struct dsa_port *targeted_dp;
- struct dsa_port *dp;
- u16 targeted_rx_vid;
-
- if (!ds->tag_8021q_ctx)
- return 0;
-
- targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
- targeted_dp = dsa_to_port(targeted_ds, info->port);
- targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp);
-
- dsa_switch_for_each_port(dp, ds) {
- u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
-
- if (!dsa_port_tag_8021q_bridge_match(dp, info))
- continue;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 standalone_vid, bridge_vid;
+ int err;
- /* Remove the RX VID of the targeted port from our VLAN table */
- dsa_port_tag_8021q_vlan_del(dp, targeted_rx_vid, true);
+ /* Delete the bridging VLAN of the port and replace it with a
+ * standalone VLAN
+ */
+ standalone_vid = dsa_tag_8021q_standalone_vid(dp);
+ bridge_vid = dsa_tag_8021q_bridge_vid(bridge.num);
- /* Remove our RX VID from the targeted port's VLAN table */
- dsa_port_tag_8021q_vlan_del(targeted_dp, rx_vid, true);
+ err = dsa_port_tag_8021q_vlan_add(dp, standalone_vid, false);
+ if (err) {
+ dev_err(ds->dev,
+ "Failed to delete tag_8021q standalone VLAN %d from port %d: %pe\n",
+ standalone_vid, port, ERR_PTR(err));
}
- return 0;
-}
-
-int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port,
- struct dsa_bridge bridge)
-{
- u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge.num);
-
- return dsa_port_tag_8021q_vlan_add(dsa_to_port(ds, port), tx_vid,
- true);
+ dsa_port_tag_8021q_vlan_del(dp, bridge_vid, true);
}
-EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_offload);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_leave);
-void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port,
- struct dsa_bridge bridge)
-{
- u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge.num);
-
- dsa_port_tag_8021q_vlan_del(dsa_to_port(ds, port), tx_vid, true);
-}
-EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_unoffload);
-
-/* Set up a port's tag_8021q RX and TX VLAN for standalone mode operation */
+/* Set up a port's standalone tag_8021q VLAN */
static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
{
struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
struct dsa_port *dp = dsa_to_port(ds, port);
- u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
- u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
+ u16 vid = dsa_tag_8021q_standalone_vid(dp);
struct net_device *master;
int err;
@@ -446,30 +342,16 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
master = dp->cpu_dp->master;
- /* Add this user port's RX VID to the membership list of all others
- * (including itself). This is so that bridging will not be hindered.
- * L2 forwarding rules still take precedence when there are no VLAN
- * restrictions, so there are no concerns about leaking traffic.
- */
- err = dsa_port_tag_8021q_vlan_add(dp, rx_vid, false);
+ err = dsa_port_tag_8021q_vlan_add(dp, vid, false);
if (err) {
dev_err(ds->dev,
- "Failed to apply RX VID %d to port %d: %pe\n",
- rx_vid, port, ERR_PTR(err));
+ "Failed to apply standalone VID %d to port %d: %pe\n",
+ vid, port, ERR_PTR(err));
return err;
}
- /* Add @rx_vid to the master's RX filter. */
- vlan_vid_add(master, ctx->proto, rx_vid);
-
- /* Finally apply the TX VID on this port and on the CPU port */
- err = dsa_port_tag_8021q_vlan_add(dp, tx_vid, false);
- if (err) {
- dev_err(ds->dev,
- "Failed to apply TX VID %d on port %d: %pe\n",
- tx_vid, port, ERR_PTR(err));
- return err;
- }
+ /* Add the VLAN to the master's RX filter. */
+ vlan_vid_add(master, ctx->proto, vid);
return err;
}
@@ -478,8 +360,7 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port)
{
struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
struct dsa_port *dp = dsa_to_port(ds, port);
- u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
- u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
+ u16 vid = dsa_tag_8021q_standalone_vid(dp);
struct net_device *master;
/* The CPU port is implicitly configured by
@@ -490,11 +371,9 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port)
master = dp->cpu_dp->master;
- dsa_port_tag_8021q_vlan_del(dp, rx_vid, false);
+ dsa_port_tag_8021q_vlan_del(dp, vid, false);
- vlan_vid_del(master, ctx->proto, rx_vid);
-
- dsa_port_tag_8021q_vlan_del(dp, tx_vid, false);
+ vlan_vid_del(master, ctx->proto, vid);
}
static int dsa_tag_8021q_setup(struct dsa_switch *ds)
@@ -573,23 +452,57 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
}
EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
-void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id)
+struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master,
+ int vbid)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->dst;
+ struct dsa_port *dp;
+
+ if (WARN_ON(!vbid))
+ return NULL;
+
+ dsa_tree_for_each_user_port(dp, dst) {
+ if (!dp->bridge)
+ continue;
+
+ if (dp->stp_state != BR_STATE_LEARNING &&
+ dp->stp_state != BR_STATE_FORWARDING)
+ continue;
+
+ if (dp->cpu_dp != cpu_dp)
+ continue;
+
+ if (dsa_port_bridge_num_get(dp) == vbid)
+ return dp->slave;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_port_by_vbid);
+
+void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
+ int *vbid)
{
u16 vid, tci;
- skb_push_rcsum(skb, ETH_HLEN);
if (skb_vlan_tag_present(skb)) {
tci = skb_vlan_tag_get(skb);
__vlan_hwaccel_clear_tag(skb);
} else {
+ skb_push_rcsum(skb, ETH_HLEN);
__skb_vlan_pop(skb, &tci);
+ skb_pull_rcsum(skb, ETH_HLEN);
}
- skb_pull_rcsum(skb, ETH_HLEN);
vid = tci & VLAN_VID_MASK;
*source_port = dsa_8021q_rx_source_port(vid);
*switch_id = dsa_8021q_rx_switch_id(vid);
+
+ if (vbid)
+ *vbid = dsa_tag_8021q_rx_vbid(vid);
+
skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
}
EXPORT_SYMBOL_GPL(dsa_8021q_rcv);
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 8abf39dcac64..e4b6e3f2a3db 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -127,6 +127,7 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
u8 extra)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct net_device *br_dev;
u8 tag_dev, tag_port;
enum dsa_cmd cmd;
u8 *dsa_header;
@@ -149,7 +150,16 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
tag_port = dp->index;
}
- if (skb->protocol == htons(ETH_P_8021Q)) {
+ br_dev = dsa_port_bridge_dev_get(dp);
+
+ /* If frame is already 802.1Q tagged, we can convert it to a DSA
+ * tag (avoiding a memmove), but only if the port is standalone
+ * (in which case we always send FROM_CPU) or if the port's
+ * bridge has VLAN filtering enabled (in which case the CPU port
+ * will be a member of the VLAN).
+ */
+ if (skb->protocol == htons(ETH_P_8021Q) &&
+ (!br_dev || br_vlan_enabled(br_dev))) {
if (extra) {
skb_push(skb, extra);
dsa_alloc_etype_header(skb, extra);
@@ -166,10 +176,9 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
dsa_header[2] &= ~0x10;
}
} else {
- struct net_device *br = dsa_port_bridge_dev_get(dp);
u16 vid;
- vid = br ? MV88E6XXX_VID_BRIDGED : MV88E6XXX_VID_STANDALONE;
+ vid = br_dev ? MV88E6XXX_VID_BRIDGED : MV88E6XXX_VID_STANDALONE;
skb_push(skb, DSA_HLEN + extra);
dsa_alloc_etype_header(skb, DSA_HLEN + extra);
@@ -246,12 +255,14 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
if (trunk) {
struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_lag *lag;
/* The exact source port is not available in the tag,
* so we inject the frame directly on the upper
* team/bond.
*/
- skb->dev = dsa_lag_dev(cpu_dp->dst, source_port);
+ lag = dsa_lag_by_id(cpu_dp->dst, source_port + 1);
+ skb->dev = lag ? lag->dev : NULL;
} else {
skb->dev = dsa_master_find_slave(dev, source_device,
source_port);
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index cb548188f813..98d7d7120bab 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -77,7 +77,6 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
{
- __be16 *lan9303_tag;
u16 lan9303_tag1;
unsigned int source_port;
@@ -87,14 +86,15 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
return NULL;
}
- lan9303_tag = dsa_etype_header_pos_rx(skb);
-
- if (lan9303_tag[0] != htons(ETH_P_8021Q)) {
- dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n");
- return NULL;
+ if (skb_vlan_tag_present(skb)) {
+ lan9303_tag1 = skb_vlan_tag_get(skb);
+ __vlan_hwaccel_clear_tag(skb);
+ } else {
+ skb_push_rcsum(skb, ETH_HLEN);
+ __skb_vlan_pop(skb, &lan9303_tag1);
+ skb_pull_rcsum(skb, ETH_HLEN);
}
- lan9303_tag1 = ntohs(lan9303_tag[1]);
source_port = lan9303_tag1 & 0x3;
skb->dev = dsa_master_find_slave(dev, 0, source_port);
@@ -103,13 +103,6 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
return NULL;
}
- /* remove the special VLAN tag between the MAC addresses
- * and the current ethertype field.
- */
- skb_pull_rcsum(skb, 2 + 2);
-
- dsa_strip_etype_header(skb, LAN9303_TAG_LEN);
-
if (!(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU))
dsa_default_offload_fwd_mark(skb);
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 68982b2789a5..37ccf00404ea 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -32,6 +32,13 @@ static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp,
if (!xmit_work_fn || !xmit_worker)
return NULL;
+ /* PTP over IP packets need UDP checksumming. We may have inherited
+ * NETIF_F_HW_CSUM from the DSA master, but these packets are not sent
+ * through the DSA master, so calculate the checksum here.
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
+ return NULL;
+
xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC);
if (!xmit_work)
return NULL;
@@ -55,7 +62,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
struct dsa_port *dp = dsa_slave_to_port(netdev);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
- u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
+ u16 tx_vid = dsa_tag_8021q_standalone_vid(dp);
struct ethhdr *hdr = eth_hdr(skb);
if (ocelot_ptp_rew_op(skb) || is_link_local_ether_addr(hdr->h_dest))
@@ -70,7 +77,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
{
int src_port, switch_id;
- dsa_8021q_rcv(skb, &src_port, &switch_id);
+ dsa_8021q_rcv(skb, &src_port, &switch_id, NULL);
skb->dev = dsa_master_find_slave(netdev, switch_id, src_port);
if (!skb->dev)
diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c
index 02686ad4045d..a593ead7ff26 100644
--- a/net/dsa/tag_rtl8_4.c
+++ b/net/dsa/tag_rtl8_4.c
@@ -7,13 +7,8 @@
* NOTE: Currently only supports protocol "4" found in the RTL8365MB, hence
* named tag_rtl8_4.
*
- * This tag header has the following format:
+ * This tag has the following format:
*
- * -------------------------------------------
- * | MAC DA | MAC SA | 8 byte tag | Type | ...
- * -------------------------------------------
- * _______________/ \______________________________________
- * / \
* 0 7|8 15
* |-----------------------------------+-----------------------------------|---
* | (16-bit) | ^
@@ -58,6 +53,24 @@
* TX/RX | TX (switch->CPU): port number the packet was received on
* | RX (CPU->switch): forwarding port mask (if ALLOW=0)
* | allowance port mask (if ALLOW=1)
+ *
+ * The tag can be positioned before Ethertype, using tag "rtl8_4":
+ *
+ * +--------+--------+------------+------+-----
+ * | MAC DA | MAC SA | 8 byte tag | Type | ...
+ * +--------+--------+------------+------+-----
+ *
+ * The tag can also appear between the end of the payload and before the CRC,
+ * using tag "rtl8_4t":
+ *
+ * +--------+--------+------+-----+---------+------------+-----+
+ * | MAC DA | MAC SA | TYPE | ... | payload | 8-byte tag | CRC |
+ * +--------+--------+------+-----+---------+------------+-----+
+ *
+ * The added bytes after the payload will break most checksums, either in
+ * software or hardware. To avoid this issue, if the checksum is still pending,
+ * this tagger checksums the packet in software before adding the tag.
+ *
*/
#include <linux/bitfield.h>
@@ -84,87 +97,133 @@
#define RTL8_4_TX GENMASK(3, 0)
#define RTL8_4_RX GENMASK(10, 0)
-static struct sk_buff *rtl8_4_tag_xmit(struct sk_buff *skb,
- struct net_device *dev)
+static void rtl8_4_write_tag(struct sk_buff *skb, struct net_device *dev,
+ void *tag)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
- __be16 *tag;
-
- skb_push(skb, RTL8_4_TAG_LEN);
-
- dsa_alloc_etype_header(skb, RTL8_4_TAG_LEN);
- tag = dsa_etype_header_pos_tx(skb);
+ __be16 tag16[RTL8_4_TAG_LEN / 2];
/* Set Realtek EtherType */
- tag[0] = htons(ETH_P_REALTEK);
+ tag16[0] = htons(ETH_P_REALTEK);
/* Set Protocol; zero REASON */
- tag[1] = htons(FIELD_PREP(RTL8_4_PROTOCOL, RTL8_4_PROTOCOL_RTL8365MB));
+ tag16[1] = htons(FIELD_PREP(RTL8_4_PROTOCOL, RTL8_4_PROTOCOL_RTL8365MB));
/* Zero FID_EN, FID, PRI_EN, PRI, KEEP; set LEARN_DIS */
- tag[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1));
+ tag16[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1));
/* Zero ALLOW; set RX (CPU->switch) forwarding port mask */
- tag[3] = htons(FIELD_PREP(RTL8_4_RX, BIT(dp->index)));
+ tag16[3] = htons(FIELD_PREP(RTL8_4_RX, BIT(dp->index)));
+
+ memcpy(tag, tag16, RTL8_4_TAG_LEN);
+}
+
+static struct sk_buff *rtl8_4_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ skb_push(skb, RTL8_4_TAG_LEN);
+
+ dsa_alloc_etype_header(skb, RTL8_4_TAG_LEN);
+
+ rtl8_4_write_tag(skb, dev, dsa_etype_header_pos_tx(skb));
return skb;
}
-static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb,
- struct net_device *dev)
+static struct sk_buff *rtl8_4t_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* Calculate the checksum here if not done yet as trailing tags will
+ * break either software or hardware based checksum
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
+ return NULL;
+
+ rtl8_4_write_tag(skb, dev, skb_put(skb, RTL8_4_TAG_LEN));
+
+ return skb;
+}
+
+static int rtl8_4_read_tag(struct sk_buff *skb, struct net_device *dev,
+ void *tag)
{
- __be16 *tag;
+ __be16 tag16[RTL8_4_TAG_LEN / 2];
u16 etype;
u8 reason;
u8 proto;
u8 port;
- if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN)))
- return NULL;
-
- tag = dsa_etype_header_pos_rx(skb);
+ memcpy(tag16, tag, RTL8_4_TAG_LEN);
/* Parse Realtek EtherType */
- etype = ntohs(tag[0]);
+ etype = ntohs(tag16[0]);
if (unlikely(etype != ETH_P_REALTEK)) {
dev_warn_ratelimited(&dev->dev,
"non-realtek ethertype 0x%04x\n", etype);
- return NULL;
+ return -EPROTO;
}
/* Parse Protocol */
- proto = FIELD_GET(RTL8_4_PROTOCOL, ntohs(tag[1]));
+ proto = FIELD_GET(RTL8_4_PROTOCOL, ntohs(tag16[1]));
if (unlikely(proto != RTL8_4_PROTOCOL_RTL8365MB)) {
dev_warn_ratelimited(&dev->dev,
"unknown realtek protocol 0x%02x\n",
proto);
- return NULL;
+ return -EPROTO;
}
/* Parse REASON */
- reason = FIELD_GET(RTL8_4_REASON, ntohs(tag[1]));
+ reason = FIELD_GET(RTL8_4_REASON, ntohs(tag16[1]));
/* Parse TX (switch->CPU) */
- port = FIELD_GET(RTL8_4_TX, ntohs(tag[3]));
+ port = FIELD_GET(RTL8_4_TX, ntohs(tag16[3]));
skb->dev = dsa_master_find_slave(dev, 0, port);
if (!skb->dev) {
dev_warn_ratelimited(&dev->dev,
"could not find slave for port %d\n",
port);
- return NULL;
+ return -ENOENT;
}
+ if (reason != RTL8_4_REASON_TRAP)
+ dsa_default_offload_fwd_mark(skb);
+
+ return 0;
+}
+
+static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN)))
+ return NULL;
+
+ if (unlikely(rtl8_4_read_tag(skb, dev, dsa_etype_header_pos_rx(skb))))
+ return NULL;
+
/* Remove tag and recalculate checksum */
skb_pull_rcsum(skb, RTL8_4_TAG_LEN);
dsa_strip_etype_header(skb, RTL8_4_TAG_LEN);
- if (reason != RTL8_4_REASON_TRAP)
- dsa_default_offload_fwd_mark(skb);
+ return skb;
+}
+
+static struct sk_buff *rtl8_4t_tag_rcv(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ if (skb_linearize(skb))
+ return NULL;
+
+ if (unlikely(rtl8_4_read_tag(skb, dev, skb_tail_pointer(skb) - RTL8_4_TAG_LEN)))
+ return NULL;
+
+ if (pskb_trim_rcsum(skb, skb->len - RTL8_4_TAG_LEN))
+ return NULL;
return skb;
}
+/* Ethertype version */
static const struct dsa_device_ops rtl8_4_netdev_ops = {
.name = "rtl8_4",
.proto = DSA_TAG_PROTO_RTL8_4,
@@ -172,7 +231,28 @@ static const struct dsa_device_ops rtl8_4_netdev_ops = {
.rcv = rtl8_4_tag_rcv,
.needed_headroom = RTL8_4_TAG_LEN,
};
-module_dsa_tag_driver(rtl8_4_netdev_ops);
-MODULE_LICENSE("GPL");
+DSA_TAG_DRIVER(rtl8_4_netdev_ops);
+
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL8_4);
+
+/* Tail version */
+static const struct dsa_device_ops rtl8_4t_netdev_ops = {
+ .name = "rtl8_4t",
+ .proto = DSA_TAG_PROTO_RTL8_4T,
+ .xmit = rtl8_4t_tag_xmit,
+ .rcv = rtl8_4t_tag_rcv,
+ .needed_tailroom = RTL8_4_TAG_LEN,
+};
+
+DSA_TAG_DRIVER(rtl8_4t_netdev_ops);
+
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL8_4T);
+
+static struct dsa_tag_driver *dsa_tag_drivers[] = {
+ &DSA_TAG_DRIVER_NAME(rtl8_4_netdev_ops),
+ &DSA_TAG_DRIVER_NAME(rtl8_4t_netdev_ops),
+};
+module_dsa_tag_drivers(dsa_tag_drivers);
+
+MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 72d5e0ef8dcf..83e4136516b0 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -226,7 +226,7 @@ static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb,
* TX VLAN that targets the bridge's entire broadcast domain,
* instead of just the specific port.
*/
- tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num);
+ tx_vid = dsa_tag_8021q_bridge_vid(bridge_num);
return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), tx_vid);
}
@@ -267,7 +267,7 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
struct dsa_port *dp = dsa_slave_to_port(netdev);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
- u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
+ u16 tx_vid = dsa_tag_8021q_standalone_vid(dp);
if (skb->offload_fwd_mark)
return sja1105_imprecise_xmit(skb, netdev);
@@ -295,7 +295,7 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb,
struct dsa_port *dp = dsa_slave_to_port(netdev);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
- u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
+ u16 tx_vid = dsa_tag_8021q_standalone_vid(dp);
__be32 *tx_trailer;
__be16 *tx_header;
int trailer_pos;
@@ -509,7 +509,7 @@ static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb)
* packet.
*/
static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
- int *switch_id, u16 *vid)
+ int *switch_id, int *vbid, u16 *vid)
{
struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)skb_mac_header(skb);
u16 vlan_tci;
@@ -519,8 +519,8 @@ static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
else
vlan_tci = ntohs(hdr->h_vlan_TCI);
- if (vid_is_dsa_8021q_rxvlan(vlan_tci & VLAN_VID_MASK))
- return dsa_8021q_rcv(skb, source_port, switch_id);
+ if (vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK))
+ return dsa_8021q_rcv(skb, source_port, switch_id, vbid);
/* Try our best with imprecise RX */
*vid = vlan_tci & VLAN_VID_MASK;
@@ -529,7 +529,7 @@ static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
struct net_device *netdev)
{
- int source_port = -1, switch_id = -1;
+ int source_port = -1, switch_id = -1, vbid = -1;
struct sja1105_meta meta = {0};
struct ethhdr *hdr;
bool is_link_local;
@@ -542,7 +542,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
if (sja1105_skb_has_tag_8021q(skb)) {
/* Normal traffic path. */
- sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid);
+ sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid);
} else if (is_link_local) {
/* Management traffic path. Switch embeds the switch ID and
* port ID into bytes of the destination MAC, courtesy of
@@ -561,7 +561,9 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
return NULL;
}
- if (source_port == -1 || switch_id == -1)
+ if (vbid >= 1)
+ skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid);
+ else if (source_port == -1 || switch_id == -1)
skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
else
skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
@@ -686,7 +688,7 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
struct net_device *netdev)
{
- int source_port = -1, switch_id = -1;
+ int source_port = -1, switch_id = -1, vbid = -1;
bool host_only = false;
u16 vid = 0;
@@ -700,9 +702,11 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
/* Packets with in-band control extensions might still have RX VLANs */
if (likely(sja1105_skb_has_tag_8021q(skb)))
- sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid);
+ sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid);
- if (source_port == -1 || switch_id == -1)
+ if (vbid >= 1)
+ skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid);
+ else if (source_port == -1 || switch_id == -1)
skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
else
skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 75856db299e9..29d01662a48b 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -363,7 +363,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT
extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1];
extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1];
extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1];
-extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_RX_BUF_LEN + 1];
+extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_CQE_SIZE + 1];
extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1];
extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1];
extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index 18a5035d3bee..9f33c9689b56 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -54,7 +54,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */
nla_total_size(sizeof(u32)) + /* _RINGS_TX */
nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */
- nla_total_size(sizeof(u8)); /* _RINGS_TCP_DATA_SPLIT */
+ nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */
+ nla_total_size(sizeof(u32)); /* _RINGS_CQE_SIZE */
}
static int rings_fill_reply(struct sk_buff *skb,
@@ -91,7 +92,9 @@ static int rings_fill_reply(struct sk_buff *skb,
(nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN, kr->rx_buf_len))) ||
(kr->tcp_data_split &&
(nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT,
- kr->tcp_data_split))))
+ kr->tcp_data_split))) ||
+ (kr->cqe_size &&
+ (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))))
return -EMSGSIZE;
return 0;
@@ -119,6 +122,7 @@ const struct nla_policy ethnl_rings_set_policy[] = {
[ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 },
[ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 },
[ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1),
+ [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1),
};
int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
@@ -159,6 +163,8 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod);
ethnl_update_u32(&kernel_ringparam.rx_buf_len,
tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod);
+ ethnl_update_u32(&kernel_ringparam.cqe_size,
+ tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod);
ret = 0;
if (!mod)
goto out_ops;
@@ -190,6 +196,15 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
goto out_ops;
}
+ if (kernel_ringparam.cqe_size &&
+ !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) {
+ ret = -EOPNOTSUPP;
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_CQE_SIZE],
+ "setting cqe size not supported");
+ goto out_ops;
+ }
+
ret = dev->ethtool_ops->set_ringparam(dev, &ringparam,
&kernel_ringparam, info->extack);
if (ret < 0)
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index b3c6ffa1894d..584e21788799 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -20,6 +20,13 @@
#include "hsr_framereg.h"
#include "hsr_netlink.h"
+#ifdef CONFIG_LOCKDEP
+int lockdep_hsr_is_held(spinlock_t *lock)
+{
+ return lockdep_is_held(lock);
+}
+#endif
+
u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr)
{
u32 hash = jhash(addr, ETH_ALEN, hsr->hash_seed);
@@ -27,11 +34,12 @@ u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr)
return reciprocal_scale(hash, hsr->hash_buckets);
}
-struct hsr_node *hsr_node_get_first(struct hlist_head *head)
+struct hsr_node *hsr_node_get_first(struct hlist_head *head, spinlock_t *lock)
{
struct hlist_node *first;
- first = rcu_dereference(hlist_first_rcu(head));
+ first = rcu_dereference_bh_check(hlist_first_rcu(head),
+ lockdep_hsr_is_held(lock));
if (first)
return hlist_entry(first, struct hsr_node, mac_list);
@@ -59,7 +67,7 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
{
struct hsr_node *node;
- node = hsr_node_get_first(&hsr->self_node_db);
+ node = hsr_node_get_first(&hsr->self_node_db, &hsr->list_lock);
if (!node) {
WARN_ONCE(1, "HSR: No self node\n");
return false;
@@ -106,7 +114,7 @@ int hsr_create_self_node(struct hsr_priv *hsr,
ether_addr_copy(node->macaddress_B, addr_b);
spin_lock_bh(&hsr->list_lock);
- oldnode = hsr_node_get_first(self_node_db);
+ oldnode = hsr_node_get_first(self_node_db, &hsr->list_lock);
if (oldnode) {
hlist_replace_rcu(&oldnode->mac_list, &node->mac_list);
spin_unlock_bh(&hsr->list_lock);
@@ -125,7 +133,7 @@ void hsr_del_self_node(struct hsr_priv *hsr)
struct hsr_node *node;
spin_lock_bh(&hsr->list_lock);
- node = hsr_node_get_first(self_node_db);
+ node = hsr_node_get_first(self_node_db, &hsr->list_lock);
if (node) {
hlist_del_rcu(&node->mac_list);
kfree_rcu(node, rcu_head);
@@ -191,7 +199,7 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
spin_lock_bh(&hsr->list_lock);
hlist_for_each_entry_rcu(node, node_db, mac_list,
- lockdep_is_held(&hsr->list_lock)) {
+ lockdep_hsr_is_held(&hsr->list_lock)) {
if (ether_addr_equal(node->macaddress_A, addr))
goto out;
if (ether_addr_equal(node->macaddress_B, addr))
@@ -597,7 +605,8 @@ void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
hash = hsr_mac_hash(hsr, addr);
if (!_pos) {
- node = hsr_node_get_first(&hsr->node_db[hash]);
+ node = hsr_node_get_first(&hsr->node_db[hash],
+ &hsr->list_lock);
if (node)
ether_addr_copy(addr, node->macaddress_A);
return node;
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index d7cce6b161e3..f3762e9e42b5 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -28,8 +28,14 @@ struct hsr_frame_info {
bool is_from_san;
};
+#ifdef CONFIG_LOCKDEP
+int lockdep_hsr_is_held(spinlock_t *lock);
+#else
+#define lockdep_hsr_is_held(lock) 1
+#endif
+
u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr);
-struct hsr_node *hsr_node_get_first(struct hlist_head *head);
+struct hsr_node *hsr_node_get_first(struct hlist_head *head, spinlock_t *lock);
void hsr_del_self_node(struct hsr_priv *hsr);
void hsr_del_nodes(struct hlist_head *node_db);
struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db,
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index ca556bda3467..b158ba409f9a 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -45,22 +45,6 @@
/* PRP V1 life redundancy box MAC address */
#define PRP_TLV_REDBOX_MAC 30
-/* HSR Tag.
- * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB,
- * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest,
- * h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr,
- * encapsulated protocol } instead.
- *
- * Field names as defined in the IEC:2010 standard for HSR.
- */
-struct hsr_tag {
- __be16 path_and_LSDU_size;
- __be16 sequence_nr;
- __be16 encap_proto;
-} __packed;
-
-#define HSR_HLEN 6
-
#define HSR_V1_SUP_LSDUSIZE 52
#define HSR_HSIZE_SHIFT 8
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index be6f06adefe0..a91283d1e5bf 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -130,6 +130,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
goto err;
fq->q.stamp = skb->tstamp;
+ fq->q.mono_delivery_time = skb->mono_delivery_time;
if (frag_type == LOWPAN_DISPATCH_FRAG1)
fq->q.flags |= INET_FRAG_FIRST_IN;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9c465bac1eb0..72fde2888ad2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1376,8 +1376,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
}
ops = rcu_dereference(inet_offloads[proto]);
- if (likely(ops && ops->callbacks.gso_segment))
+ if (likely(ops && ops->callbacks.gso_segment)) {
segs = ops->callbacks.gso_segment(skb, features);
+ if (!segs)
+ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
+ }
if (IS_ERR_OR_NULL(segs))
goto out;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 4db0325f6e1a..2d0c05ca9c6f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -293,7 +293,7 @@ static int arp_constructor(struct neighbour *neigh)
static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb)
{
dst_link_failure(skb);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
}
/* Create and send an arp packet. */
@@ -1116,13 +1116,18 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
return err;
}
-static int arp_invalidate(struct net_device *dev, __be32 ip)
+int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
{
struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
int err = -ENXIO;
struct neigh_table *tbl = &arp_tbl;
if (neigh) {
+ if ((neigh->nud_state & NUD_VALID) && !force) {
+ neigh_release(neigh);
+ return 0;
+ }
+
if (neigh->nud_state & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
@@ -1169,7 +1174,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
if (!dev)
return -EINVAL;
}
- return arp_invalidate(dev, ip);
+ return arp_invalidate(dev, ip, true);
}
/*
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index fba2bffd65f7..53a6b14dc50a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -104,6 +104,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_FLAGS] = { .type = NLA_U32 },
[IFA_RT_PRIORITY] = { .type = NLA_U32 },
[IFA_TARGET_NETNSID] = { .type = NLA_S32 },
+ [IFA_PROTO] = { .type = NLA_U8 },
};
struct inet_fill_args {
@@ -889,6 +890,9 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
if (tb[IFA_RT_PRIORITY])
ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
+ if (tb[IFA_PROTO])
+ ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
+
if (tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci;
@@ -1625,6 +1629,7 @@ static size_t inet_nlmsg_size(void)
+ nla_total_size(4) /* IFA_BROADCAST */
+ nla_total_size(IFNAMSIZ) /* IFA_LABEL */
+ nla_total_size(4) /* IFA_FLAGS */
+ + nla_total_size(1) /* IFA_PROTO */
+ nla_total_size(4) /* IFA_RT_PRIORITY */
+ nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
}
@@ -1699,6 +1704,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
(ifa->ifa_label[0] &&
nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
+ (ifa->ifa_proto &&
+ nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
(ifa->ifa_rt_priority &&
nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 851f542928a3..70e6c87fbe3d 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -446,6 +446,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
struct page *page;
struct sk_buff *trailer;
int tailen = esp->tailen;
+ unsigned int allocsz;
/* this is non-NULL only with TCP/UDP Encapsulation */
if (x->encap) {
@@ -455,6 +456,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
return err;
}
+ allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
+ if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+ goto cow;
+
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
@@ -671,7 +676,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;
- padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
+ padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index d87f02a6e934..935026f4c807 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -110,8 +110,7 @@ static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x,
struct sk_buff *skb,
netdev_features_t features)
{
- __skb_push(skb, skb->mac_len);
- return skb_mac_gso_segment(skb, features);
+ return skb_eth_gso_segment(skb, features, htons(ETH_P_IP));
}
static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
@@ -160,6 +159,9 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x,
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
}
+ if (proto == IPPROTO_IPV6)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4;
+
__skb_pull(skb, skb_transport_offset(skb));
ops = rcu_dereference(inet_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment))
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 54811728d906..7408051632ac 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -437,6 +437,9 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (net->ipv4.fib_has_custom_local_routes ||
fib4_has_custom_rules(net))
goto full_check;
+ /* Within the same container, it is regarded as a martian source,
+ * and the same host but different containers are not.
+ */
if (inet_lookup_ifaddr_rcu(net, src))
return -EINVAL;
@@ -1121,9 +1124,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
return;
/* Add broadcast address, if it is explicitly assigned. */
- if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
+ if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) {
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
prim, 0);
+ arp_invalidate(dev, ifa->ifa_broadcast, false);
+ }
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
@@ -1137,6 +1142,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
if (ifa->ifa_prefixlen < 31) {
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
32, prim, 0);
+ arp_invalidate(dev, prefix | ~mask, false);
}
}
}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index a63014b54809..f9b9e26c32c1 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -17,10 +17,9 @@ struct fib_alias {
u8 fa_slen;
u32 tb_id;
s16 fa_default;
- u8 offload:1,
- trap:1,
- offload_failed:1,
- unused:5;
+ u8 offload;
+ u8 trap;
+ u8 offload_failed;
struct rcu_head rcu;
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c9c4f2f66b38..c5a29703185a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -526,9 +526,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.dst_len = dst_len;
fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
fri.type = fa->fa_type;
- fri.offload = fa->offload;
- fri.trap = fa->trap;
- fri.offload_failed = fa->offload_failed;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed = READ_ONCE(fa->offload_failed);
err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c05cd105e95e..2af2b99e0bea 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1052,19 +1052,23 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
if (!fa_match)
goto out;
- if (fa_match->offload == fri->offload && fa_match->trap == fri->trap &&
- fa_match->offload_failed == fri->offload_failed)
+ /* These are paired with the WRITE_ONCE() happening in this function.
+ * The reason is that we are only protected by RCU at this point.
+ */
+ if (READ_ONCE(fa_match->offload) == fri->offload &&
+ READ_ONCE(fa_match->trap) == fri->trap &&
+ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
- fa_match->offload = fri->offload;
- fa_match->trap = fri->trap;
+ WRITE_ONCE(fa_match->offload, fri->offload);
+ WRITE_ONCE(fa_match->trap, fri->trap);
/* 2 means send notifications only if offload_failed was changed. */
if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
- fa_match->offload_failed == fri->offload_failed)
+ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
- fa_match->offload_failed = fri->offload_failed;
+ WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
if (!net->ipv4.sysctl_fib_notify_on_flag_change)
goto out;
@@ -2306,9 +2310,9 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.dst_len = KEYLENGTH - fa->fa_slen;
fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
fri.type = fa->fa_type;
- fri.offload = fa->offload;
- fri.trap = fa->trap;
- fri.offload_failed = fa->offload_failed;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed = READ_ONCE(fa->offload_failed);
err = fib_dump_info(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 341096807100..63948f6aeca0 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -572,6 +572,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
skb_mark_not_on_list(head);
head->prev = NULL;
head->tstamp = q->stamp;
+ head->mono_delivery_time = q->mono_delivery_time;
}
EXPORT_SYMBOL(inet_frag_reasm_finish);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 00ec819f949b..92ba3350274b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -79,7 +79,7 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
if (unlikely(opt->optlen))
ip_forward_options(skb);
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
return dst_output(net, sk, skb);
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fad803d2d711..fb153569889e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -349,6 +349,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->iif = dev->ifindex;
qp->q.stamp = skb->tstamp;
+ qp->q.mono_delivery_time = skb->mono_delivery_time;
qp->q.meat += skb->len;
qp->ecn |= ecn;
add_frag_mem_limit(qp->q.fqdir, skb->truesize);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d94f9f7e60c3..95f7bb052784 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -226,6 +226,7 @@ resubmit:
static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ skb_clear_delivery_time(skb);
__skb_pull(skb, skb_network_header_len(skb));
rcu_read_lock();
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0c0574eb5f5b..00b4bf26fd93 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -233,7 +233,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
__func__);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
return -EINVAL;
}
@@ -317,7 +317,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
case NET_XMIT_CN:
return __ip_finish_output(net, sk, skb) ? : ret;
default:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
return ret;
}
}
@@ -337,7 +337,7 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk,
case NET_XMIT_SUCCESS:
break;
default:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
return ret;
}
@@ -536,7 +536,7 @@ packet_routed:
no_route:
rcu_read_unlock();
IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_OUTNOROUTES);
return -EHOSTUNREACH;
}
EXPORT_SYMBOL(__ip_queue_xmit);
@@ -761,6 +761,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
{
struct iphdr *iph;
struct sk_buff *skb2;
+ bool mono_delivery_time = skb->mono_delivery_time;
struct rtable *rt = skb_rtable(skb);
unsigned int mtu, hlen, ll_rs;
struct ip_fraglist_iter iter;
@@ -852,7 +853,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
}
}
- skb->tstamp = tstamp;
+ skb_set_delivery_time(skb, tstamp, mono_delivery_time);
err = output(net, sk, skb);
if (!err)
@@ -908,7 +909,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- skb2->tstamp = tstamp;
+ skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
err = output(net, sk, skb2);
if (err)
goto fail;
@@ -991,7 +992,7 @@ static int __ip_append_data(struct sock *sk,
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
- tskey = sk->sk_tskey++;
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1727,6 +1728,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
+ nskb->mono_delivery_time = !!transmit_time;
ip_push_pending_frames(sk, &fl4);
}
out:
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index bcf7bc71cb56..3ee947557b88 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -172,16 +172,22 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
struct sock *sk = NULL;
struct inet_sock *isk;
struct hlist_nulls_node *hnode;
- int dif = skb->dev->ifindex;
+ int dif, sdif;
if (skb->protocol == htons(ETH_P_IP)) {
+ dif = inet_iif(skb);
+ sdif = inet_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
(int)ident, &ip_hdr(skb)->daddr, dif);
#if IS_ENABLED(CONFIG_IPV6)
} else if (skb->protocol == htons(ETH_P_IPV6)) {
+ dif = inet6_iif(skb);
+ sdif = inet6_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
(int)ident, &ipv6_hdr(skb)->daddr, dif);
#endif
+ } else {
+ return NULL;
}
read_lock_bh(&ping_table.lock);
@@ -221,7 +227,7 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
}
if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
- sk->sk_bound_dev_if != inet_sdif(skb))
+ sk->sk_bound_dev_if != sdif)
continue;
sock_hold(sk);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 202d6b1fff43..f444f5983405 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3400,8 +3400,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fa->fa_dscp == inet_dsfield_to_dscp(fri.tos) &&
fa->fa_info == res.fi &&
fa->fa_type == fri.type) {
- fri.offload = fa->offload;
- fri.trap = fa->trap;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
break;
}
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1cae27b5dcd8..ad80d180b60b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1272,6 +1272,13 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ONE,
},
{
+ .procname = "tcp_tso_rtt_log",
+ .data = &init_net.ipv4.sysctl_tcp_tso_rtt_log,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ },
+ {
.procname = "tcp_min_rtt_wlen",
.data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 760e8221d321..cf18fbcbf123 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -688,7 +688,8 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
return skb->len < size_goal &&
sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
!tcp_rtx_queue_empty(sk) &&
- refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
+ refcount_read(&sk->sk_wmem_alloc) > skb->truesize &&
+ tcp_skb_can_collapse_to(skb);
}
void tcp_push(struct sock *sk, int flags, int mss_now,
@@ -1683,11 +1684,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (!copied)
copied = used;
break;
- } else if (used <= len) {
- seq += used;
- copied += used;
- offset += used;
}
+ if (WARN_ON_ONCE(used > len))
+ used = len;
+ seq += used;
+ copied += used;
+ offset += used;
+
/* If recv_actor drops the lock (e.g. TCP splice
* receive) the skb pointer might be invalid when
* getting here: tcp_collapse might have deleted it
@@ -4431,6 +4434,73 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *ke
}
EXPORT_SYMBOL(tcp_md5_hash_key);
+/* Called with rcu_read_lock() */
+enum skb_drop_reason
+tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
+ const void *saddr, const void *daddr,
+ int family, int dif, int sdif)
+{
+ /*
+ * This gets called for each TCP segment that arrives
+ * so we want to be efficient.
+ * We have 3 drop cases:
+ * o No MD5 hash and one expected.
+ * o MD5 hash and we're not expecting one.
+ * o MD5 hash and its wrong.
+ */
+ const __u8 *hash_location = NULL;
+ struct tcp_md5sig_key *hash_expected;
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct tcp_sock *tp = tcp_sk(sk);
+ int genhash, l3index;
+ u8 newhash[16];
+
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and dif is set to the l3mdev
+ */
+ l3index = sdif ? dif : 0;
+
+ hash_expected = tcp_md5_do_lookup(sk, l3index, saddr, family);
+ hash_location = tcp_parse_md5sig_option(th);
+
+ /* We've parsed the options - do we have a hash? */
+ if (!hash_expected && !hash_location)
+ return SKB_NOT_DROPPED_YET;
+
+ if (hash_expected && !hash_location) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+ return SKB_DROP_REASON_TCP_MD5NOTFOUND;
+ }
+
+ if (!hash_expected && hash_location) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+ return SKB_DROP_REASON_TCP_MD5UNEXPECTED;
+ }
+
+ /* check the signature */
+ genhash = tp->af_specific->calc_md5_hash(newhash, hash_expected,
+ NULL, skb);
+
+ if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
+ if (family == AF_INET) {
+ net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",
+ saddr, ntohs(th->source),
+ daddr, ntohs(th->dest),
+ genhash ? " tcp_v4_calc_md5_hash failed"
+ : "", l3index);
+ } else {
+ net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
+ genhash ? "failed" : "mismatch",
+ saddr, ntohs(th->source),
+ daddr, ntohs(th->dest), l3index);
+ }
+ return SKB_DROP_REASON_TCP_MD5FAILURE;
+ }
+ return SKB_NOT_DROPPED_YET;
+}
+EXPORT_SYMBOL(tcp_inbound_md5_hash);
+
#endif
void tcp_done(struct sock *sk)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 92e65d56dc2c..2088f93fa37b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4684,10 +4684,16 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
return res;
}
-static void tcp_drop(struct sock *sk, struct sk_buff *skb)
+static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason reason)
{
sk_drops_add(sk, skb);
- __kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
+}
+
+static void tcp_drop(struct sock *sk, struct sk_buff *skb)
+{
+ tcp_drop_reason(sk, skb, SKB_DROP_REASON_NOT_SPECIFIED);
}
/* This one checks to see if we can put data from the
@@ -4773,7 +4779,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
sk->sk_data_ready(sk);
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM);
return;
}
@@ -4836,7 +4842,8 @@ coalesce_done:
/* All the bits are present. Drop. */
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb,
+ SKB_DROP_REASON_TCP_OFOMERGE);
skb = NULL;
tcp_dsack_set(sk, seq, end_seq);
goto add_sack;
@@ -4855,7 +4862,8 @@ coalesce_done:
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb1);
+ tcp_drop_reason(sk, skb1,
+ SKB_DROP_REASON_TCP_OFOMERGE);
goto merge_right;
}
} else if (tcp_ooo_try_coalesce(sk, skb1,
@@ -4883,7 +4891,7 @@ merge_right:
tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb1);
+ tcp_drop_reason(sk, skb1, SKB_DROP_REASON_TCP_OFOMERGE);
}
/* If there is no skb after us, we are the last_skb ! */
if (!skb1)
@@ -4982,6 +4990,7 @@ void tcp_data_ready(struct sock *sk)
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ enum skb_drop_reason reason;
bool fragstolen;
int eaten;
@@ -5000,6 +5009,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
skb_dst_drop(skb);
__skb_pull(skb, tcp_hdr(skb)->doff * 4);
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
tp->rx_opt.dsack = 0;
/* Queue data for delivery to the user.
@@ -5008,6 +5018,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
*/
if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
if (tcp_receive_window(tp) == 0) {
+ reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
goto out_of_window;
}
@@ -5017,6 +5028,7 @@ queue_and_out:
if (skb_queue_len(&sk->sk_receive_queue) == 0)
sk_forced_mem_schedule(sk, skb->truesize);
else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+ reason = SKB_DROP_REASON_PROTO_MEM;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
sk->sk_data_ready(sk);
goto drop;
@@ -5053,6 +5065,7 @@ queue_and_out:
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
tcp_rcv_spurious_retrans(sk, skb);
/* A retransmit, 2nd most common case. Force an immediate ack. */
+ reason = SKB_DROP_REASON_TCP_OLD_DATA;
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
@@ -5060,13 +5073,16 @@ out_of_window:
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
inet_csk_schedule_ack(sk);
drop:
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, reason);
return;
}
/* Out of window. F.e. zero window probe. */
- if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
+ if (!before(TCP_SKB_CB(skb)->seq,
+ tp->rcv_nxt + tcp_receive_window(tp))) {
+ reason = SKB_DROP_REASON_TCP_OVERWINDOW;
goto out_of_window;
+ }
if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
/* Partial packet, seq < rcv_next < end_seq */
@@ -5076,6 +5092,7 @@ drop:
* remembering D-SACK for its head made in previous line.
*/
if (!tcp_receive_window(tp)) {
+ reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
goto out_of_window;
}
@@ -5781,6 +5798,7 @@ discard:
*/
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
const struct tcphdr *th = (const struct tcphdr *)skb->data;
struct tcp_sock *tp = tcp_sk(sk);
unsigned int len = skb->len;
@@ -5869,6 +5887,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
return;
} else { /* Header too small */
+ reason = SKB_DROP_REASON_PKT_TOO_SMALL;
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
@@ -5924,8 +5943,10 @@ slow_path:
if (len < (th->doff << 2) || tcp_checksum_complete(skb))
goto csum_error;
- if (!th->ack && !th->rst && !th->syn)
+ if (!th->ack && !th->rst && !th->syn) {
+ reason = SKB_DROP_REASON_TCP_FLAGS;
goto discard;
+ }
/*
* Standard slow path.
@@ -5951,12 +5972,13 @@ step5:
return;
csum_error:
+ reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
discard:
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, reason);
}
EXPORT_SYMBOL(tcp_rcv_established);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6873f46fc8ba..f9cec624068d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1409,72 +1409,6 @@ EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
#endif
-/* Called with rcu_read_lock() */
-static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb,
- int dif, int sdif)
-{
-#ifdef CONFIG_TCP_MD5SIG
- /*
- * This gets called for each TCP segment that arrives
- * so we want to be efficient.
- * We have 3 drop cases:
- * o No MD5 hash and one expected.
- * o MD5 hash and we're not expecting one.
- * o MD5 hash and its wrong.
- */
- const __u8 *hash_location = NULL;
- struct tcp_md5sig_key *hash_expected;
- const struct iphdr *iph = ip_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
- const union tcp_md5_addr *addr;
- unsigned char newhash[16];
- int genhash, l3index;
-
- /* sdif set, means packet ingressed via a device
- * in an L3 domain and dif is set to the l3mdev
- */
- l3index = sdif ? dif : 0;
-
- addr = (union tcp_md5_addr *)&iph->saddr;
- hash_expected = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
- hash_location = tcp_parse_md5sig_option(th);
-
- /* We've parsed the options - do we have a hash? */
- if (!hash_expected && !hash_location)
- return false;
-
- if (hash_expected && !hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
- return true;
- }
-
- if (!hash_expected && hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
- return true;
- }
-
- /* Okay, so this is hash_expected and hash_location -
- * so we need to calculate the checksum.
- */
- genhash = tcp_v4_md5_hash_skb(newhash,
- hash_expected,
- NULL, skb);
-
- if (genhash || memcmp(hash_location, newhash, 16) != 0) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",
- &iph->saddr, ntohs(th->source),
- &iph->daddr, ntohs(th->dest),
- genhash ? " tcp_v4_calc_md5_hash failed"
- : "", l3index);
- return true;
- }
- return false;
-#endif
- return false;
-}
-
static void tcp_v4_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
@@ -1704,6 +1638,7 @@ INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
*/
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
struct sock *rsk;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
@@ -1726,6 +1661,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (tcp_checksum_complete(skb))
goto csum_err;
@@ -1753,7 +1689,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
reset:
tcp_v4_send_reset(rsk, skb);
discard:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
/* Be careful here. If this function gets more complicated and
* gcc suffers from register pressure on the x86, sk (in %ebx)
* might be destroyed here. This current version compiles correctly,
@@ -1762,6 +1698,7 @@ discard:
return 0;
csum_err:
+ reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
@@ -1807,7 +1744,8 @@ int tcp_v4_early_demux(struct sk_buff *skb)
return 0;
}
-bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
+bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason)
{
u32 limit, tail_gso_size, tail_gso_segs;
struct skb_shared_info *shinfo;
@@ -1833,6 +1771,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
if (unlikely(tcp_checksum_complete(skb))) {
bh_unlock_sock(sk);
trace_tcp_bad_csum(skb);
+ *reason = SKB_DROP_REASON_TCP_CSUM;
__TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
return true;
@@ -1921,6 +1860,7 @@ no_coalesce:
if (unlikely(sk_add_backlog(sk, skb, limit))) {
bh_unlock_sock(sk);
+ *reason = SKB_DROP_REASON_SOCKET_BACKLOG;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
return true;
}
@@ -1971,13 +1911,13 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
int tcp_v4_rcv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ enum skb_drop_reason drop_reason;
int sdif = inet_sdif(skb);
int dif = inet_iif(skb);
const struct iphdr *iph;
const struct tcphdr *th;
bool refcounted;
struct sock *sk;
- int drop_reason;
int ret;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
@@ -2025,7 +1965,10 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) {
+ drop_reason = tcp_inbound_md5_hash(sk, skb,
+ &iph->saddr, &iph->daddr,
+ AF_INET, dif, sdif);
+ if (unlikely(drop_reason)) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
@@ -2057,6 +2000,8 @@ process:
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
+ } else {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
}
if (!nsk) {
reqsk_put(req);
@@ -2092,10 +2037,14 @@ process:
}
}
- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
goto discard_and_relse;
+ }
- if (tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))
+ drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr,
+ &iph->daddr, AF_INET, dif, sdif);
+ if (drop_reason)
goto discard_and_relse;
nf_reset_ct(skb);
@@ -2124,7 +2073,7 @@ process:
if (!sock_owned_by_user(sk)) {
ret = tcp_v4_do_rcv(sk, skb);
} else {
- if (tcp_add_backlog(sk, skb))
+ if (tcp_add_backlog(sk, skb, &drop_reason))
goto discard_and_relse;
}
bh_unlock_sock(sk);
@@ -2166,6 +2115,7 @@ discard_and_relse:
do_time_wait:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
@@ -3187,6 +3137,7 @@ static int __net_init tcp_sk_init(struct net *net)
/* rfc5961 challenge ack rate limiting */
net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
net->ipv4.sysctl_tcp_min_tso_segs = 2;
+ net->ipv4.sysctl_tcp_tso_rtt_log = 9; /* 2^9 = 512 usec */
net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
net->ipv4.sysctl_tcp_autocorking = 1;
net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e76bf1e9251e..81aaa7da3e8c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1253,7 +1253,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tp = tcp_sk(sk);
prior_wstamp = tp->tcp_wstamp_ns;
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
- skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
+ skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
if (clone_it) {
oskb = skb;
@@ -1589,7 +1589,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
skb_split(skb, buff, len);
- buff->tstamp = skb->tstamp;
+ skb_set_delivery_time(buff, skb->tstamp, true);
tcp_fragment_tstamp(skb, buff);
old_factor = tcp_skb_pcount(skb);
@@ -1951,25 +1951,34 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
}
/* Return how many segs we'd like on a TSO packet,
- * to send one TSO packet per ms
+ * depending on current pacing rate, and how close the peer is.
+ *
+ * Rationale is:
+ * - For close peers, we rather send bigger packets to reduce
+ * cpu costs, because occasional losses will be repaired fast.
+ * - For long distance/rtt flows, we would like to get ACK clocking
+ * with 1 ACK per ms.
+ *
+ * Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting
+ * in bigger TSO bursts. We we cut the RTT-based allowance in half
+ * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance
+ * is below 1500 bytes after 6 * ~500 usec = 3ms.
*/
static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
int min_tso_segs)
{
- u32 bytes, segs;
+ unsigned long bytes;
+ u32 r;
- bytes = min_t(unsigned long,
- sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
- sk->sk_gso_max_size);
+ bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift);
- /* Goal is to send at least one packet per ms,
- * not one big TSO packet every 100 ms.
- * This preserves ACK clocking and is consistent
- * with tcp_tso_should_defer() heuristic.
- */
- segs = max_t(u32, bytes / mss_now, min_tso_segs);
+ r = tcp_min_rtt(tcp_sk(sk)) >> sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log;
+ if (r < BITS_PER_TYPE(sk->sk_gso_max_size))
+ bytes += sk->sk_gso_max_size >> r;
+
+ bytes = min_t(unsigned long, bytes, sk->sk_gso_max_size);
- return segs;
+ return max_t(u32, bytes / mss_now, min_tso_segs);
}
/* Return the number of segments we want in the skb we are transmitting.
@@ -2616,7 +2625,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp_ns" is used as a start point for the retransmit timer */
- skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
+ tp->tcp_wstamp_ns = tp->tcp_clock_cache;
+ skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
tcp_init_tso_segs(skb, mss_now);
goto repair; /* Skip network transmission */
@@ -3541,11 +3551,12 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
now = tcp_clock_ns();
#ifdef CONFIG_SYN_COOKIES
if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
- skb->skb_mstamp_ns = cookie_init_timestamp(req, now);
+ skb_set_delivery_time(skb, cookie_init_timestamp(req, now),
+ true);
else
#endif
{
- skb->skb_mstamp_ns = now;
+ skb_set_delivery_time(skb, now, true);
if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
}
@@ -3594,7 +3605,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb,
synack_type, &opts);
- skb->skb_mstamp_ns = now;
+ skb_set_delivery_time(skb, now, true);
tcp_add_tx_delay(skb, tp);
return skb;
@@ -3771,7 +3782,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
- syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
+ skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true);
/* Now full SYN+DATA was cloned and sent (or not),
* remove the SYN from the original skb (syn_data)
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index b91003538d87..bc3a043a5d5c 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -846,7 +846,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
list_for_each_entry(node, &info->shared->devices, list)
if (node->dev == dev)
break;
- if (node->dev != dev)
+ if (list_entry_is_head(node, &info->shared->devices, list))
return;
list_del(&node->list);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 02d31d4fcab3..b22504176588 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -400,16 +400,16 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
/* We refer to the device */
dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL);
- if (dev != blackhole_netdev) {
- if (snmp6_alloc_dev(ndev) < 0) {
- netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
- __func__);
- neigh_parms_release(&nd_tbl, ndev->nd_parms);
- dev_put_track(dev, &ndev->dev_tracker);
- kfree(ndev);
- return ERR_PTR(err);
- }
+ if (snmp6_alloc_dev(ndev) < 0) {
+ netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
+ __func__);
+ neigh_parms_release(&nd_tbl, ndev->nd_parms);
+ dev_put_track(dev, &ndev->dev_tracker);
+ kfree(ndev);
+ return ERR_PTR(err);
+ }
+ if (dev != blackhole_netdev) {
if (snmp6_register_dev(ndev) < 0) {
netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
__func__, dev->name);
@@ -1115,6 +1115,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
ifa->prefix_len = cfg->plen;
ifa->rt_priority = cfg->rt_priority;
ifa->flags = cfg->ifa_flags;
+ ifa->ifa_proto = cfg->ifa_proto;
/* No need to add the TENTATIVE flag for addresses with NODAD */
if (!(cfg->ifa_flags & IFA_F_NODAD))
ifa->flags |= IFA_F_TENTATIVE;
@@ -1836,8 +1837,8 @@ out:
}
EXPORT_SYMBOL(ipv6_dev_get_saddr);
-int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
- u32 banned_flags)
+static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
+ u32 banned_flags)
{
struct inet6_ifaddr *ifp;
int err = -EADDRNOTAVAIL;
@@ -2593,6 +2594,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
.valid_lft = valid_lft,
.preferred_lft = prefered_lft,
.scope = addr_type & IPV6_ADDR_SCOPE_MASK,
+ .ifa_proto = IFAPROT_KERNEL_RA
};
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@ -3077,7 +3079,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
}
static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
- int plen, int scope)
+ int plen, int scope, u8 proto)
{
struct inet6_ifaddr *ifp;
struct ifa6_config cfg = {
@@ -3086,7 +3088,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
.ifa_flags = IFA_F_PERMANENT,
.valid_lft = INFINITY_LIFE_TIME,
.preferred_lft = INFINITY_LIFE_TIME,
- .scope = scope
+ .scope = scope,
+ .ifa_proto = proto
};
ifp = ipv6_add_addr(idev, &cfg, true, NULL);
@@ -3131,7 +3134,7 @@ static void add_v4_addrs(struct inet6_dev *idev)
}
if (addr.s6_addr32[3]) {
- add_addr(idev, &addr, plen, scope);
+ add_addr(idev, &addr, plen, scope, IFAPROT_UNSPEC);
addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags,
GFP_KERNEL);
return;
@@ -3154,7 +3157,8 @@ static void add_v4_addrs(struct inet6_dev *idev)
flag |= IFA_HOST;
}
- add_addr(idev, &addr, plen, flag);
+ add_addr(idev, &addr, plen, flag,
+ IFAPROT_UNSPEC);
addrconf_prefix_route(&addr, plen, 0, idev->dev,
0, pflags, GFP_KERNEL);
}
@@ -3177,7 +3181,7 @@ static void init_loopback(struct net_device *dev)
return;
}
- add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
+ add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFAPROT_KERNEL_LO);
}
void addrconf_add_linklocal(struct inet6_dev *idev,
@@ -3189,7 +3193,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
.ifa_flags = flags | IFA_F_PERMANENT,
.valid_lft = INFINITY_LIFE_TIME,
.preferred_lft = INFINITY_LIFE_TIME,
- .scope = IFA_LINK
+ .scope = IFA_LINK,
+ .ifa_proto = IFAPROT_KERNEL_LL
};
struct inet6_ifaddr *ifp;
@@ -3725,6 +3730,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
struct inet6_dev *idev;
struct inet6_ifaddr *ifa, *tmp;
bool keep_addr = false;
+ bool was_ready;
int state, i;
ASSERT_RTNL();
@@ -3790,7 +3796,10 @@ restart:
addrconf_del_rs_timer(idev);
- /* Step 2: clear flags for stateless addrconf */
+ /* Step 2: clear flags for stateless addrconf, repeated down
+ * detection
+ */
+ was_ready = idev->if_flags & IF_READY;
if (!unregister)
idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
@@ -3864,7 +3873,7 @@ restart:
if (unregister) {
ipv6_ac_destroy_dev(idev);
ipv6_mc_destroy_dev(idev);
- } else {
+ } else if (was_ready) {
ipv6_mc_down(idev);
}
@@ -4627,6 +4636,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
[IFA_FLAGS] = { .len = sizeof(u32) },
[IFA_RT_PRIORITY] = { .len = sizeof(u32) },
[IFA_TARGET_NETNSID] = { .type = NLA_S32 },
+ [IFA_PROTO] = { .type = NLA_U8 },
};
static int
@@ -4752,6 +4762,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
ifp->tstamp = jiffies;
ifp->valid_lft = cfg->valid_lft;
ifp->prefered_lft = cfg->preferred_lft;
+ ifp->ifa_proto = cfg->ifa_proto;
if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
ifp->rt_priority = cfg->rt_priority;
@@ -4845,6 +4856,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[IFA_RT_PRIORITY])
cfg.rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
+ if (tb[IFA_PROTO])
+ cfg.ifa_proto = nla_get_u8(tb[IFA_PROTO]);
+
cfg.valid_lft = INFINITY_LIFE_TIME;
cfg.preferred_lft = INFINITY_LIFE_TIME;
@@ -4948,6 +4962,7 @@ static inline int inet6_ifaddr_msgsize(void)
+ nla_total_size(16) /* IFA_ADDRESS */
+ nla_total_size(sizeof(struct ifa_cacheinfo))
+ nla_total_size(4) /* IFA_FLAGS */
+ + nla_total_size(1) /* IFA_PROTO */
+ nla_total_size(4) /* IFA_RT_PRIORITY */;
}
@@ -4985,6 +5000,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
goto error;
+ spin_lock_bh(&ifa->lock);
if (!((ifa->flags&IFA_F_PERMANENT) &&
(ifa->prefered_lft == INFINITY_LIFE_TIME))) {
preferred = ifa->prefered_lft;
@@ -5006,6 +5022,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
preferred = INFINITY_LIFE_TIME;
valid = INFINITY_LIFE_TIME;
}
+ spin_unlock_bh(&ifa->lock);
if (!ipv6_addr_any(&ifa->peer_addr)) {
if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 ||
@@ -5025,6 +5042,10 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0)
goto error;
+ if (ifa->ifa_proto &&
+ nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto))
+ goto error;
+
nlmsg_end(skb, nlh);
return 0;
@@ -7187,7 +7208,7 @@ static void __net_exit addrconf_exit_net(struct net *net)
kfree(net->ipv6.devconf_all);
net->ipv6.devconf_all = NULL;
- cancel_delayed_work(&net->ipv6.addr_chk_work);
+ cancel_delayed_work_sync(&net->ipv6.addr_chk_work);
/*
* Check hash table, then free it.
*/
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8fe7900f1949..7d7b7523d126 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -441,11 +441,14 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
+ const struct proto *prot;
int err = 0;
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
/* If the socket has its own bind function then use it. */
- if (sk->sk_prot->bind)
- return sk->sk_prot->bind(sk, uaddr, addr_len);
+ if (prot->bind)
+ return prot->bind(sk, uaddr, addr_len);
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -555,6 +558,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
void __user *argp = (void __user *)arg;
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
+ const struct proto *prot;
switch (cmd) {
case SIOCADDRT:
@@ -572,9 +576,11 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCSIFDSTADDR:
return addrconf_set_dstaddr(net, argp);
default:
- if (!sk->sk_prot->ioctl)
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
+ if (!prot->ioctl)
return -ENOIOCTLCMD;
- return sk->sk_prot->ioctl(sk, cmd, arg);
+ return prot->ioctl(sk, cmd, arg);
}
/*NOTREACHED*/
return 0;
@@ -636,11 +642,14 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *,
int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
+ const struct proto *prot;
if (unlikely(inet_send_prepare(sk)))
return -EAGAIN;
- return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
+ return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
sk, msg, size);
}
@@ -650,13 +659,16 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
struct sock *sk = sock->sk;
+ const struct proto *prot;
int addr_len = 0;
int err;
if (likely(!(flags & MSG_ERRQUEUE)))
sock_rps_record_flow(sk);
- err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
+ err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
sk, msg, size, flags & MSG_DONTWAIT,
flags & ~MSG_DONTWAIT, &addr_len);
if (err >= 0)
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 8bb2c407b46b..b0ffbcd5432d 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -482,6 +482,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
struct page *page;
struct sk_buff *trailer;
int tailen = esp->tailen;
+ unsigned int allocsz;
if (x->encap) {
int err = esp6_output_encap(x, skb, esp);
@@ -490,6 +491,10 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
return err;
}
+ allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
+ if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+ goto cow;
+
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
@@ -707,7 +712,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;
- padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
+ padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index ba5e81cd569c..3a293838a91d 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -145,8 +145,7 @@ static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x,
struct sk_buff *skb,
netdev_features_t features)
{
- __skb_push(skb, skb->mac_len);
- return skb_mac_gso_segment(skb, features);
+ return skb_eth_gso_segment(skb, features, htons(ETH_P_IPV6));
}
static struct sk_buff *xfrm6_transport_gso_segment(struct xfrm_state *x,
@@ -199,6 +198,9 @@ static struct sk_buff *xfrm6_beet_gso_segment(struct xfrm_state *x,
ipv6_skip_exthdr(skb, 0, &proto, &frag);
}
+ if (proto == IPPROTO_IPIP)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6;
+
__skb_pull(skb, skb_transport_offset(skb));
ops = rcu_dereference(inet6_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment))
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index e159eb4328a8..1098131ed90c 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -635,7 +635,8 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
struct ioam6_schema *sc,
u8 sclen, bool is_input)
{
- struct __kernel_sock_timeval ts;
+ struct timespec64 ts;
+ ktime_t tstamp;
u64 raw64;
u32 raw32;
u16 raw16;
@@ -680,10 +681,9 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (!skb->dev) {
*(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
} else {
- if (!skb->tstamp)
- __net_timestamp(skb);
+ tstamp = skb_tstamp_cond(skb, true);
+ ts = ktime_to_timespec64(tstamp);
- skb_get_new_timestamp(skb, &ts);
*(__be32 *)data = cpu_to_be32((u32)ts.tv_sec);
}
data += sizeof(__be32);
@@ -694,13 +694,12 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (!skb->dev) {
*(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
} else {
- if (!skb->tstamp)
- __net_timestamp(skb);
+ if (!trace->type.bit2) {
+ tstamp = skb_tstamp_cond(skb, true);
+ ts = ktime_to_timespec64(tstamp);
+ }
- if (!trace->type.bit2)
- skb_get_new_timestamp(skb, &ts);
-
- *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec);
+ *(__be32 *)data = cpu_to_be32((u32)(ts.tv_nsec / NSEC_PER_USEC));
}
data += sizeof(__be32);
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index aa673a6a7e43..ceb85c67ce39 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -450,8 +450,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
err = -EINVAL;
goto done;
}
- if (fl_shared_exclusive(fl) || fl->opt)
+ if (fl_shared_exclusive(fl) || fl->opt) {
+ WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1);
static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
+ }
return fl;
done:
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index d4b1e2c5aa76..5b5ea35635f9 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -459,6 +459,7 @@ discard:
static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ skb_clear_delivery_time(skb);
rcu_read_lock();
ip6_protocol_deliver_rcu(net, skb, 0, false);
rcu_read_unlock();
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d37a79a8554e..c4fc03c1ac99 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -114,6 +114,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (likely(ops && ops->callbacks.gso_segment)) {
skb_reset_transport_header(skb);
segs = ops->callbacks.gso_segment(skb, features);
+ if (!segs)
+ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
}
if (IS_ERR_OR_NULL(segs))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0c6c971ce0a5..e69fac576970 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -130,7 +130,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
rcu_read_unlock_bh();
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
return -EINVAL;
}
@@ -202,7 +202,7 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
case NET_XMIT_CN:
return __ip6_finish_output(net, sk, skb) ? : ret;
default:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
return ret;
}
}
@@ -217,7 +217,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(idev->cnf.disable_ipv6)) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
return 0;
}
@@ -440,7 +440,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
}
#endif
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
return dst_output(net, sk, skb);
}
@@ -813,6 +813,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
+ bool mono_delivery_time = skb->mono_delivery_time;
struct ip6_frag_state state;
unsigned int mtu, hlen, nexthdr_offset;
ktime_t tstamp = skb->tstamp;
@@ -903,7 +904,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
- skb->tstamp = tstamp;
+ skb_set_delivery_time(skb, tstamp, mono_delivery_time);
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -962,7 +963,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- frag->tstamp = tstamp;
+ skb_set_delivery_time(frag, tstamp, mono_delivery_time);
err = output(net, sk, frag);
if (err)
goto fail;
@@ -1406,8 +1407,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (np->frag_size)
mtu = np->frag_size;
}
- if (mtu < IPV6_MIN_MTU)
- return -EINVAL;
cork->base.fragsize = mtu;
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
@@ -1464,14 +1463,12 @@ static int __ip6_append_data(struct sock *sk,
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
- tskey = sk->sk_tskey++;
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
(opt ? opt->opt_nflen : 0);
- maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
- sizeof(struct frag_hdr);
headersize = sizeof(struct ipv6hdr) +
(opt ? opt->opt_flen + opt->opt_nflen : 0) +
@@ -1479,6 +1476,13 @@ static int __ip6_append_data(struct sock *sk,
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
+ if (mtu < fragheaderlen ||
+ ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr))
+ goto emsgsize;
+
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
+ sizeof(struct frag_hdr);
+
/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
* the first fragment
*/
@@ -1487,6 +1491,7 @@ static int __ip6_append_data(struct sock *sk,
if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
(sk->sk_protocol == IPPROTO_UDP ||
+ sk->sk_protocol == IPPROTO_ICMPV6 ||
sk->sk_protocol == IPPROTO_RAW)) {
ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
sizeof(struct ipv6hdr));
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0ebaaec3faf9..a9775c830194 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1040,7 +1040,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
int ret;
#ifdef CONFIG_IPV6_PIMSM_V2
- if (assert == MRT6MSG_WHOLEPKT)
+ if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
+sizeof(*msg));
else
@@ -1056,7 +1056,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
skb->ip_summed = CHECKSUM_UNNECESSARY;
#ifdef CONFIG_IPV6_PIMSM_V2
- if (assert == MRT6MSG_WHOLEPKT) {
+ if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
/* Ugly, but we have no choice with this interface.
Duplicate old header, fix length etc.
And all this only to mangle msg->im6_msgtype and
@@ -1068,8 +1068,11 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
skb_reset_transport_header(skb);
msg = (struct mrt6msg *)skb_transport_header(skb);
msg->im6_mbz = 0;
- msg->im6_msgtype = MRT6MSG_WHOLEPKT;
- msg->im6_mif = mrt->mroute_reg_vif_num;
+ msg->im6_msgtype = assert;
+ if (assert == MRT6MSG_WRMIFWHOLE)
+ msg->im6_mif = mifi;
+ else
+ msg->im6_mif = mrt->mroute_reg_vif_num;
msg->im6_pad = 0;
msg->im6_src = ipv6_hdr(pkt)->saddr;
msg->im6_dst = ipv6_hdr(pkt)->daddr;
@@ -1650,6 +1653,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
mifi_t mifi;
struct net *net = sock_net(sk);
struct mr_table *mrt;
+ bool do_wrmifwhole;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1763,12 +1767,15 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
return -EINVAL;
if (copy_from_sockptr(&v, optval, sizeof(v)))
return -EFAULT;
+
+ do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
v = !!v;
rtnl_lock();
ret = 0;
if (v != mrt->mroute_do_pim) {
mrt->mroute_do_pim = v;
mrt->mroute_do_assert = v;
+ mrt->mroute_do_wrvifwhole = do_wrmifwhole;
}
rtnl_unlock();
return ret;
@@ -2144,6 +2151,9 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
MFC_ASSERT_THRESH)) {
c->_c.mfc_un.res.last_assert = jiffies;
ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
+ if (mrt->mroute_do_wrvifwhole)
+ ip6mr_cache_report(mrt, skb, true_vifi,
+ MRT6MSG_WRMIFWHOLE);
}
goto dont_forward;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a733803a710c..222f6bf220ba 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -475,7 +475,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
sock_prot_inuse_add(net, sk->sk_prot, -1);
sock_prot_inuse_add(net, &tcp_prot, 1);
- sk->sk_prot = &tcp_prot;
+ /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */
+ WRITE_ONCE(sk->sk_prot, &tcp_prot);
icsk->icsk_af_ops = &ipv4_specific;
sk->sk_socket->ops = &inet_stream_ops;
sk->sk_family = PF_INET;
@@ -489,7 +490,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
sock_prot_inuse_add(net, sk->sk_prot, -1);
sock_prot_inuse_add(net, prot, 1);
- sk->sk_prot = prot;
+ /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */
+ WRITE_ONCE(sk->sk_prot, prot);
sk->sk_socket->ops = &inet_dgram_ops;
sk->sk_family = PF_INET;
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bed8155508c8..909f937befd7 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1371,27 +1371,23 @@ static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
}
/* called with rcu_read_lock() */
-int igmp6_event_query(struct sk_buff *skb)
+void igmp6_event_query(struct sk_buff *skb)
{
struct inet6_dev *idev = __in6_dev_get(skb->dev);
- if (!idev)
- return -EINVAL;
-
- if (idev->dead) {
- kfree_skb(skb);
- return -ENODEV;
- }
+ if (!idev || idev->dead)
+ goto out;
spin_lock_bh(&idev->mc_query_lock);
if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) {
__skb_queue_tail(&idev->mc_query_queue, skb);
if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0))
in6_dev_hold(idev);
+ skb = NULL;
}
spin_unlock_bh(&idev->mc_query_lock);
-
- return 0;
+out:
+ kfree_skb(skb);
}
static void __mld_query_work(struct sk_buff *skb)
@@ -1542,27 +1538,23 @@ static void mld_query_work(struct work_struct *work)
}
/* called with rcu_read_lock() */
-int igmp6_event_report(struct sk_buff *skb)
+void igmp6_event_report(struct sk_buff *skb)
{
struct inet6_dev *idev = __in6_dev_get(skb->dev);
- if (!idev)
- return -EINVAL;
-
- if (idev->dead) {
- kfree_skb(skb);
- return -ENODEV;
- }
+ if (!idev || idev->dead)
+ goto out;
spin_lock_bh(&idev->mc_report_lock);
if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) {
__skb_queue_tail(&idev->mc_report_queue, skb);
if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0))
in6_dev_hold(idev);
+ skb = NULL;
}
spin_unlock_bh(&idev->mc_report_lock);
-
- return 0;
+out:
+ kfree_skb(skb);
}
static void __mld_report_work(struct sk_buff *skb)
@@ -1759,7 +1751,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
skb_reserve(skb, hlen);
skb_tailroom_reserve(skb, mtu, tlen);
- if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
+ if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
* when a valid link-local address is not available.
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f03b597e4121..fcb288b0ae13 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -466,9 +466,8 @@ static void ip6_nd_hdr(struct sk_buff *skb,
hdr->daddr = *daddr;
}
-static void ndisc_send_skb(struct sk_buff *skb,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr)
+void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct dst_entry *dst = skb_dst(skb);
struct net *net = dev_net(skb->dev);
@@ -515,6 +514,7 @@ static void ndisc_send_skb(struct sk_buff *skb,
rcu_read_unlock();
}
+EXPORT_SYMBOL(ndisc_send_skb);
void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
@@ -598,22 +598,16 @@ static void ndisc_send_unsol_na(struct net_device *dev)
in6_dev_put(idev);
}
-void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
- const struct in6_addr *daddr, const struct in6_addr *saddr,
- u64 nonce)
+struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit,
+ const struct in6_addr *saddr, u64 nonce)
{
- struct sk_buff *skb;
- struct in6_addr addr_buf;
int inc_opt = dev->addr_len;
- int optlen = 0;
+ struct sk_buff *skb;
struct nd_msg *msg;
+ int optlen = 0;
- if (!saddr) {
- if (ipv6_get_lladdr(dev, &addr_buf,
- (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
- return;
- saddr = &addr_buf;
- }
+ if (!saddr)
+ return NULL;
if (ipv6_addr_any(saddr))
inc_opt = false;
@@ -625,7 +619,7 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
- return;
+ return NULL;
msg = skb_put(skb, sizeof(*msg));
*msg = (struct nd_msg) {
@@ -647,7 +641,28 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
memcpy(opt + 2, &nonce, 6);
}
- ndisc_send_skb(skb, daddr, saddr);
+ return skb;
+}
+EXPORT_SYMBOL(ndisc_ns_create);
+
+void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
+ const struct in6_addr *daddr, const struct in6_addr *saddr,
+ u64 nonce)
+{
+ struct in6_addr addr_buf;
+ struct sk_buff *skb;
+
+ if (!saddr) {
+ if (ipv6_get_lladdr(dev, &addr_buf,
+ (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC)))
+ return;
+ saddr = &addr_buf;
+ }
+
+ skb = ndisc_ns_create(dev, solicit, saddr, nonce);
+
+ if (skb)
+ ndisc_send_skb(skb, daddr, saddr);
}
void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
@@ -1337,8 +1352,12 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
neigh->flags |= NTF_ROUTER;
- } else if (rt) {
+ } else if (rt && IPV6_EXTRACT_PREF(rt->fib6_flags) != pref) {
+ struct nl_info nlinfo = {
+ .nl_net = net,
+ };
rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+ inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
}
if (rt)
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 6ab710b5a1a8..1da332450d98 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -121,6 +121,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+ bool mono_delivery_time = skb->mono_delivery_time;
ktime_t tstamp = skb->tstamp;
struct ip6_frag_state state;
u8 *prevhdr, nexthdr = 0;
@@ -186,7 +187,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
- skb->tstamp = tstamp;
+ skb_set_delivery_time(skb, tstamp, mono_delivery_time);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -219,7 +220,7 @@ slow_path:
goto blackhole;
}
- skb2->tstamp = tstamp;
+ skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 5c47be29b9ee..7dd3629dd19e 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -264,6 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
+ fq->q.mono_delivery_time = skb->mono_delivery_time;
fq->q.meat += skb->len;
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index d5544cf67ffe..ff033d16549e 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -101,11 +101,21 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.sockc.tsflags = sk->sk_tsflags;
ipc6.sockc.mark = sk->sk_mark;
- err = sock_cmsg_send(sk, msg, &ipc6.sockc);
- if (err)
- return err;
+ if (msg->msg_controllen) {
+ struct ipv6_txoptions opt = {};
+
+ opt.tot_len = sizeof(opt);
+ ipc6.opt = &opt;
- /* TODO: use ip6_datagram_send_ctl to get options from cmsg */
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6);
+ if (err < 0)
+ return err;
+
+ /* Changes to txoptions and flow info are not implemented, yet.
+ * Drop the options, fl6 is wiped below.
+ */
+ ipc6.opt = NULL;
+ }
memset(&fl6, 0, sizeof(fl6));
@@ -140,7 +150,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
pfh.wcheck = 0;
pfh.family = AF_INET6;
- ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ if (ipc6.hlimit < 0)
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
lock_sock(sk);
err = ip6_append_data(sk, ping_getfrag, &pfh, len,
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 28e44782c94d..ff866f2a879e 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -194,6 +194,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
+ fq->q.mono_delivery_time = skb->mono_delivery_time;
fq->q.meat += skb->len;
fq->ecn |= ecn;
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 46c7a1b36075..6188712f24b0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5760,11 +5760,11 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
}
if (!dst) {
- if (rt->offload)
+ if (READ_ONCE(rt->offload))
rtm->rtm_flags |= RTM_F_OFFLOAD;
- if (rt->trap)
+ if (READ_ONCE(rt->trap))
rtm->rtm_flags |= RTM_F_TRAP;
- if (rt->offload_failed)
+ if (READ_ONCE(rt->offload_failed))
rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
}
@@ -6222,19 +6222,20 @@ void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
struct sk_buff *skb;
int err;
- if (f6i->offload == offload && f6i->trap == trap &&
- f6i->offload_failed == offload_failed)
+ if (READ_ONCE(f6i->offload) == offload &&
+ READ_ONCE(f6i->trap) == trap &&
+ READ_ONCE(f6i->offload_failed) == offload_failed)
return;
- f6i->offload = offload;
- f6i->trap = trap;
+ WRITE_ONCE(f6i->offload, offload);
+ WRITE_ONCE(f6i->trap, trap);
/* 2 means send notifications only if offload_failed was changed. */
if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 &&
- f6i->offload_failed == offload_failed)
+ READ_ONCE(f6i->offload_failed) == offload_failed)
return;
- f6i->offload_failed = offload_failed;
+ WRITE_ONCE(f6i->offload_failed, offload_failed);
if (!rcu_access_pointer(f6i->fib6_node))
/* The route was removed from the tree, do not send
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0c648bf07f39..13678d3908fa 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -773,57 +773,6 @@ clear_hash_noput:
#endif
-static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb,
- int dif, int sdif)
-{
-#ifdef CONFIG_TCP_MD5SIG
- const __u8 *hash_location = NULL;
- struct tcp_md5sig_key *hash_expected;
- const struct ipv6hdr *ip6h = ipv6_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
- int genhash, l3index;
- u8 newhash[16];
-
- /* sdif set, means packet ingressed via a device
- * in an L3 domain and dif is set to the l3mdev
- */
- l3index = sdif ? dif : 0;
-
- hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
- hash_location = tcp_parse_md5sig_option(th);
-
- /* We've parsed the options - do we have a hash? */
- if (!hash_expected && !hash_location)
- return false;
-
- if (hash_expected && !hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
- return true;
- }
-
- if (!hash_expected && hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
- return true;
- }
-
- /* check the signature */
- genhash = tcp_v6_md5_hash_skb(newhash,
- hash_expected,
- NULL, skb);
-
- if (genhash || memcmp(hash_location, newhash, 16) != 0) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
- genhash ? "failed" : "mismatch",
- &ip6h->saddr, ntohs(th->source),
- &ip6h->daddr, ntohs(th->dest), l3index);
- return true;
- }
-#endif
- return false;
-}
-
static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
@@ -921,12 +870,11 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
}
#endif
- buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
- GFP_ATOMIC);
+ buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
if (!buff)
return;
- skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
+ skb_reserve(buff, MAX_TCP_HEADER);
t1 = skb_push(buff, tot_len);
skb_reset_transport_header(buff);
@@ -992,7 +940,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
} else {
mark = sk->sk_mark;
}
- buff->tstamp = tcp_transmit_time(sk);
+ skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
}
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
fl6.fl6_dport = t1->dest;
@@ -1472,6 +1420,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct sk_buff *opt_skb = NULL;
+ enum skb_drop_reason reason;
struct tcp_sock *tp;
/* Imagine: socket is IPv6. IPv4 packet arrives,
@@ -1506,6 +1455,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (np->rxopt.all)
opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
struct dst_entry *dst;
@@ -1559,9 +1509,10 @@ reset:
discard:
if (opt_skb)
__kfree_skb(opt_skb);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
csum_err:
+ reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
@@ -1627,6 +1578,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
+ enum skb_drop_reason drop_reason;
int sdif = inet6_sdif(skb);
int dif = inet6_iif(skb);
const struct tcphdr *th;
@@ -1636,6 +1588,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
int ret;
struct net *net = dev_net(skb->dev);
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
@@ -1649,8 +1602,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data;
- if (unlikely(th->doff < sizeof(struct tcphdr)/4))
+ if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
+ drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
goto bad_packet;
+ }
if (!pskb_may_pull(skb, th->doff*4))
goto discard_it;
@@ -1677,7 +1632,10 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
+ drop_reason = tcp_inbound_md5_hash(sk, skb,
+ &hdr->saddr, &hdr->daddr,
+ AF_INET6, dif, sdif);
+ if (drop_reason) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
@@ -1706,6 +1664,8 @@ process:
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
+ } else {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
}
if (!nsk) {
reqsk_put(req);
@@ -1741,14 +1701,20 @@ process:
}
}
- if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
goto discard_and_relse;
+ }
- if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
+ drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
+ AF_INET6, dif, sdif);
+ if (drop_reason)
goto discard_and_relse;
- if (tcp_filter(sk, skb))
+ if (tcp_filter(sk, skb)) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto discard_and_relse;
+ }
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
@@ -1769,7 +1735,7 @@ process:
if (!sock_owned_by_user(sk)) {
ret = tcp_v6_do_rcv(sk, skb);
} else {
- if (tcp_add_backlog(sk, skb))
+ if (tcp_add_backlog(sk, skb, &drop_reason))
goto discard_and_relse;
}
bh_unlock_sock(sk);
@@ -1779,6 +1745,7 @@ put_and_return:
return ret ? -1 : 0;
no_tcp_socket:
+ drop_reason = SKB_DROP_REASON_NO_SOCKET;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
@@ -1786,6 +1753,7 @@ no_tcp_socket:
if (tcp_checksum_complete(skb)) {
csum_error:
+ drop_reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb);
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
@@ -1795,7 +1763,7 @@ bad_packet:
}
discard_it:
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return 0;
discard_and_relse:
@@ -1806,6 +1774,7 @@ discard_and_relse:
do_time_wait:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c6872596b408..7f0fa9bd9ffe 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -912,6 +912,7 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
const struct in6_addr *saddr, *daddr;
struct net *net = dev_net(skb->dev);
struct udphdr *uh;
@@ -988,6 +989,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
return udp6_unicast_rcv_skb(sk, skb, uh);
}
+ reason = SKB_DROP_REASON_NO_SOCKET;
+
if (!uh->check)
goto report_csum_error;
@@ -1000,10 +1003,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
__UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
short_packet:
+ if (reason == SKB_DROP_REASON_NOT_SPECIFIED)
+ reason = SKB_DROP_REASON_PKT_TOO_SMALL;
net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
proto == IPPROTO_UDPLITE ? "-Lite" : "",
saddr, ntohs(uh->source),
@@ -1014,10 +1019,12 @@ short_packet:
report_csum_error:
udp6_csum_zero_error(skb);
csum_error:
+ if (reason == SKB_DROP_REASON_NOT_SPECIFIED)
+ reason = SKB_DROP_REASON_UDP_CSUM;
__UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
discard:
__UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index d0d280077721..ad07904642ca 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -45,6 +45,19 @@ static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buf
return xfrm_output(sk, skb);
}
+static int xfrm6_noneed_fragment(struct sk_buff *skb)
+{
+ struct frag_hdr *fh;
+ u8 prevhdr = ipv6_hdr(skb)->nexthdr;
+
+ if (prevhdr != NEXTHDR_FRAGMENT)
+ return 0;
+ fh = (struct frag_hdr *)(skb->data + sizeof(struct ipv6hdr));
+ if (fh->nexthdr == NEXTHDR_ESP || fh->nexthdr == NEXTHDR_AUTH)
+ return 1;
+ return 0;
+}
+
static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -73,6 +86,9 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
xfrm6_local_rxpmtu(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
+ } else if (toobig && xfrm6_noneed_fragment(skb)) {
+ skb->ignore_df = 1;
+ goto skip_frag;
} else if (!skb->ignore_df && toobig && skb->sk) {
xfrm_local_error(skb, mtu);
kfree_skb(skb);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 8f4d49a7d3e8..eb0295d90039 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -319,7 +319,7 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm)
*/
static int __iucv_query_maxconn(void *param, unsigned long *max_pathid)
{
- unsigned long reg1 = (unsigned long)param;
+ unsigned long reg1 = virt_to_phys(param);
int cc;
asm volatile (
diff --git a/net/key/af_key.c b/net/key/af_key.c
index de24a7d474df..9bf52a09b5ff 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2623,7 +2623,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
}
return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i,
- kma ? &k : NULL, net, NULL);
+ kma ? &k : NULL, net, NULL, 0);
out:
return err;
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 74a878f213d3..1deb3d874a4b 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2021 Intel Corporation
+ * Copyright (C) 2018 - 2022 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -626,6 +626,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
return -EINVAL;
}
+ if (test_sta_flag(sta, WLAN_STA_MFP) &&
+ !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
+ ht_dbg(sdata,
+ "MFP STA not authorized - deny BA session request %pM tid %d\n",
+ sta->sta.addr, tid);
+ return -EINVAL;
+ }
+
/*
* 802.11n-2009 11.5.1.1: If the initiating STA is an HT STA, is a
* member of an IBSS, and has no other existing Block Ack agreement
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b07ccfc44115..01b69109402e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -378,7 +378,7 @@ struct ieee80211_mgd_auth_data {
u8 key[WLAN_KEY_LEN_WEP104];
u8 key_len, key_idx;
- bool done;
+ bool done, waiting;
bool peer_confirmed;
bool timeout_started;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 29bfce6b3561..476b21365058 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -37,6 +37,7 @@
#define IEEE80211_AUTH_TIMEOUT_SAE (HZ * 2)
#define IEEE80211_AUTH_MAX_TRIES 3
#define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5)
+#define IEEE80211_AUTH_WAIT_SAE_RETRY (HZ * 2)
#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
#define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2)
#define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10)
@@ -734,7 +735,7 @@ static void ieee80211_add_eht_ie(struct ieee80211_sub_if_data *sdata,
ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + eht_cap_size);
}
-static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
+static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -754,6 +755,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
const struct ieee80211_sband_iftype_data *iftd;
struct ieee80211_prep_tx_info info = {};
+ int ret;
/* we know it's writable, cast away the const */
if (assoc_data->ie_len)
@@ -767,7 +769,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
rcu_read_unlock();
- return;
+ return -EINVAL;
}
chan = chanctx_conf->def.chan;
rcu_read_unlock();
@@ -818,7 +820,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
(iftd ? iftd->vendor_elems.len : 0),
GFP_KERNEL);
if (!skb)
- return;
+ return -ENOMEM;
skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -1104,15 +1106,22 @@ skip_rates:
skb_put_data(skb, assoc_data->ie + offset, noffset - offset);
}
- if (assoc_data->fils_kek_len &&
- fils_encrypt_assoc_req(skb, assoc_data) < 0) {
- dev_kfree_skb(skb);
- return;
+ if (assoc_data->fils_kek_len) {
+ ret = fils_encrypt_assoc_req(skb, assoc_data);
+ if (ret < 0) {
+ dev_kfree_skb(skb);
+ return ret;
+ }
}
pos = skb_tail_pointer(skb);
kfree(ifmgd->assoc_req_ies);
ifmgd->assoc_req_ies = kmemdup(ie_start, pos - ie_start, GFP_ATOMIC);
+ if (!ifmgd->assoc_req_ies) {
+ dev_kfree_skb(skb);
+ return -ENOMEM;
+ }
+
ifmgd->assoc_req_ies_len = pos - ie_start;
drv_mgd_prepare_tx(local, sdata, &info);
@@ -1122,6 +1131,8 @@ skip_rates:
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_INTFL_MLME_CONN_TX;
ieee80211_tx_skb(sdata, skb);
+
+ return 0;
}
void ieee80211_send_pspoll(struct ieee80211_local *local,
@@ -3074,8 +3085,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
(status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED ||
(auth_transaction == 1 &&
(status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT ||
- status_code == WLAN_STATUS_SAE_PK))))
+ status_code == WLAN_STATUS_SAE_PK)))) {
+ /* waiting for userspace now */
+ ifmgd->auth_data->waiting = true;
+ ifmgd->auth_data->timeout =
+ jiffies + IEEE80211_AUTH_WAIT_SAE_RETRY;
+ ifmgd->auth_data->timeout_started = true;
+ run_again(sdata, ifmgd->auth_data->timeout);
goto notify_driver;
+ }
sdata_info(sdata, "%pM denied authentication (status %d)\n",
mgmt->sa, status_code);
@@ -4586,6 +4604,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
struct ieee80211_local *local = sdata->local;
+ int ret;
sdata_assert_lock(sdata);
@@ -4606,7 +4625,9 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
sdata_info(sdata, "associate with %pM (try %d/%d)\n",
assoc_data->bss->bssid, assoc_data->tries,
IEEE80211_ASSOC_MAX_TRIES);
- ieee80211_send_assoc(sdata);
+ ret = ieee80211_send_assoc(sdata);
+ if (ret)
+ return ret;
if (!ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
@@ -4679,10 +4700,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
if (ifmgd->auth_data && ifmgd->auth_data->timeout_started &&
time_after(jiffies, ifmgd->auth_data->timeout)) {
- if (ifmgd->auth_data->done) {
+ if (ifmgd->auth_data->done || ifmgd->auth_data->waiting) {
/*
- * ok ... we waited for assoc but userspace didn't,
- * so let's just kill the auth data
+ * ok ... we waited for assoc or continuation but
+ * userspace didn't do it, so kill the auth data
*/
ieee80211_destroy_auth_data(sdata, false);
} else if (ieee80211_auth(sdata)) {
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d54a4d98c648..beb6b92eb780 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2607,7 +2607,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
* address, so that the authenticator (e.g. hostapd) will see
* the frame, but bridge won't forward it anywhere else. Note
* that due to earlier filtering, the only other address can
- * be the PAE group address.
+ * be the PAE group address, unless the hardware allowed them
+ * through in 802.3 offloaded mode.
*/
if (unlikely(skb->protocol == sdata->control_port_protocol &&
!ether_addr_equal(ehdr->h_dest, sdata->vif.addr)))
@@ -2922,13 +2923,13 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
ether_addr_equal(sdata->vif.addr, hdr->addr3))
return RX_CONTINUE;
- ac = ieee80211_select_queue_80211(sdata, skb, hdr);
+ ac = ieee802_1d_to_ac[skb->priority];
q = sdata->vif.hw_queue[ac];
if (ieee80211_queue_stopped(&local->hw, q)) {
IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
return RX_DROP_MONITOR;
}
- skb_set_queue_mapping(skb, q);
+ skb_set_queue_mapping(skb, ac);
if (!--mesh_hdr->ttl) {
if (!is_multicast_ether_addr(hdr->addr1))
@@ -4514,12 +4515,7 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
/* deliver to local stack */
skb->protocol = eth_type_trans(skb, fast_rx->dev);
- memset(skb->cb, 0, sizeof(skb->cb));
- if (rx->list)
- list_add_tail(&skb->list, rx->list);
- else
- netif_receive_skb(skb);
-
+ ieee80211_deliver_skb_to_local_stack(skb, rx);
}
static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
diff --git a/net/mctp/device.c b/net/mctp/device.c
index 02ddc0f1bd3e..f49be882e98e 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -25,12 +25,25 @@ struct mctp_dump_cb {
size_t a_idx;
};
-/* unlocked: caller must hold rcu_read_lock */
+/* unlocked: caller must hold rcu_read_lock.
+ * Returned mctp_dev has its refcount incremented, or NULL if unset.
+ */
struct mctp_dev *__mctp_dev_get(const struct net_device *dev)
{
- return rcu_dereference(dev->mctp_ptr);
+ struct mctp_dev *mdev = rcu_dereference(dev->mctp_ptr);
+
+ /* RCU guarantees that any mdev is still live.
+ * Zero refcount implies a pending free, return NULL.
+ */
+ if (mdev)
+ if (!refcount_inc_not_zero(&mdev->refs))
+ return NULL;
+ return mdev;
}
+/* Returned mctp_dev does not have refcount incremented. The returned pointer
+ * remains live while rtnl_lock is held, as that prevents mctp_unregister()
+ */
struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev)
{
return rtnl_dereference(dev->mctp_ptr);
@@ -107,7 +120,7 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct ifaddrmsg *hdr;
struct mctp_dev *mdev;
int ifindex;
- int idx, rc;
+ int idx = 0, rc;
hdr = nlmsg_data(cb->nlh);
// filter by ifindex if requested
@@ -124,6 +137,7 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
if (mdev) {
rc = mctp_dump_dev_addrinfo(mdev,
skb, cb);
+ mctp_dev_put(mdev);
// Error indicates full buffer, this
// callback will get retried.
if (rc < 0)
@@ -209,7 +223,7 @@ static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!mdev)
return -ENODEV;
- if (!mctp_address_ok(addr->s_addr))
+ if (!mctp_address_unicast(addr->s_addr))
return -EINVAL;
/* Prevent duplicates. Under RTNL so don't need to lock for reading */
@@ -298,7 +312,7 @@ void mctp_dev_hold(struct mctp_dev *mdev)
void mctp_dev_put(struct mctp_dev *mdev)
{
- if (refcount_dec_and_test(&mdev->refs)) {
+ if (mdev && refcount_dec_and_test(&mdev->refs)) {
dev_put(mdev->dev);
kfree_rcu(mdev, rcu);
}
@@ -370,6 +384,7 @@ static size_t mctp_get_link_af_size(const struct net_device *dev,
if (!mdev)
return 0;
ret = nla_total_size(4); /* IFLA_MCTP_NET */
+ mctp_dev_put(mdev);
return ret;
}
@@ -413,10 +428,10 @@ static void mctp_unregister(struct net_device *dev)
struct mctp_dev *mdev;
mdev = mctp_dev_get_rtnl(dev);
- if (mctp_known(dev) != (bool)mdev) {
+ if (mdev && !mctp_known(dev)) {
// Sanity check, should match what was set in mctp_register
- netdev_warn(dev, "%s: mdev pointer %d but type (%d) match is %d",
- __func__, (bool)mdev, mctp_known(dev), dev->type);
+ netdev_warn(dev, "%s: BUG mctp_ptr set for unknown type %d",
+ __func__, dev->type);
return;
}
if (!mdev)
@@ -440,7 +455,7 @@ static int mctp_register(struct net_device *dev)
if (mdev) {
if (!mctp_known(dev))
- netdev_warn(dev, "%s: mctp_dev set for unknown type %d",
+ netdev_warn(dev, "%s: BUG mctp_ptr set for unknown type %d",
__func__, dev->type);
return 0;
}
diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c
index 6ad3e33bd4d4..ffa0f9e0983f 100644
--- a/net/mctp/neigh.c
+++ b/net/mctp/neigh.c
@@ -143,7 +143,7 @@ static int mctp_rtm_newneigh(struct sk_buff *skb, struct nlmsghdr *nlh,
}
eid = nla_get_u8(tb[NDA_DST]);
- if (!mctp_address_ok(eid)) {
+ if (!mctp_address_unicast(eid)) {
NL_SET_ERR_MSG(extack, "Invalid neighbour EID");
return -EINVAL;
}
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 17e3482aa770..d5e7db83fe9d 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -419,13 +419,14 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* this function.
*/
rc = mctp_key_add(key, msk);
- if (rc)
+ if (rc) {
kfree(key);
+ } else {
+ trace_mctp_key_acquire(key);
- trace_mctp_key_acquire(key);
-
- /* we don't need to release key->lock on exit */
- mctp_key_unref(key);
+ /* we don't need to release key->lock on exit */
+ mctp_key_unref(key);
+ }
key = NULL;
} else {
@@ -455,7 +456,6 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* the reassembly/response key
*/
if (!rc && flags & MCTP_HDR_FLAG_EOM) {
- msk = container_of(key->sk, struct mctp_sock, sk);
sock_queue_rcv_skb(key->sk, key->reasm_head);
key->reasm_head = NULL;
__mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
@@ -835,9 +835,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
{
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct mctp_skb_cb *cb = mctp_cb(skb);
- struct mctp_route tmp_rt;
+ struct mctp_route tmp_rt = {0};
struct mctp_sk_key *key;
- struct net_device *dev;
struct mctp_hdr *hdr;
unsigned long flags;
unsigned int mtu;
@@ -850,12 +849,12 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
if (rt) {
ext_rt = false;
- dev = NULL;
-
if (WARN_ON(!rt->dev))
goto out_release;
} else if (cb->ifindex) {
+ struct net_device *dev;
+
ext_rt = true;
rt = &tmp_rt;
@@ -865,7 +864,6 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rcu_read_unlock();
return rc;
}
-
rt->dev = __mctp_dev_get(dev);
rcu_read_unlock();
@@ -946,10 +944,9 @@ out_release:
if (!ext_rt)
mctp_route_release(rt);
- dev_put(dev);
+ mctp_dev_put(tmp_rt.dev);
return rc;
-
}
/* route management */
@@ -961,7 +958,7 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
struct net *net = dev_net(mdev->dev);
struct mctp_route *rt, *ert;
- if (!mctp_address_ok(daddr_start))
+ if (!mctp_address_unicast(daddr_start))
return -EINVAL;
if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
@@ -1091,6 +1088,17 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
goto err_drop;
+ /* source must be valid unicast or null; drop reserved ranges and
+ * broadcast
+ */
+ if (!(mctp_address_unicast(mh->src) || mctp_address_null(mh->src)))
+ goto err_drop;
+
+ /* dest address: as above, but allow broadcast */
+ if (!(mctp_address_unicast(mh->dest) || mctp_address_null(mh->dest) ||
+ mctp_address_broadcast(mh->dest)))
+ goto err_drop;
+
/* MCTP drivers must populate halen/haddr */
if (dev->type == ARPHRD_MCTP) {
cb = mctp_cb(skb);
@@ -1112,11 +1120,13 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
rt->output(rt, skb);
mctp_route_release(rt);
+ mctp_dev_put(mdev);
return NET_RX_SUCCESS;
err_drop:
kfree_skb(skb);
+ mctp_dev_put(mdev);
return NET_RX_DROP;
}
diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c
index 7b7918702592..e03ba66bbe18 100644
--- a/net/mctp/test/utils.c
+++ b/net/mctp/test/utils.c
@@ -54,7 +54,6 @@ struct mctp_test_dev *mctp_test_create_dev(void)
rcu_read_lock();
dev->mdev = __mctp_dev_get(ndev);
- mctp_dev_hold(dev->mdev);
rcu_read_unlock();
return dev;
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 3240b72271a7..e55d3dfbee0c 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -35,17 +35,23 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR),
SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD),
SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD),
+ SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP),
SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX),
SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX),
SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX),
SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX),
SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX),
SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR),
+ SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP),
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX),
SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX),
+ SNMP_MIB_ITEM("MPFastcloseTx", MPTCP_MIB_MPFASTCLOSETX),
+ SNMP_MIB_ITEM("MPFastcloseRx", MPTCP_MIB_MPFASTCLOSERX),
+ SNMP_MIB_ITEM("MPRstTx", MPTCP_MIB_MPRSTTX),
+ SNMP_MIB_ITEM("MPRstRx", MPTCP_MIB_MPRSTRX),
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index ecd3d8b117e0..00576179a619 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -28,17 +28,23 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */
MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */
MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */
+ MPTCP_MIB_ADDADDRDROP, /* Dropped incoming ADD_ADDR */
MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */
MPTCP_MIB_JOINPORTSYNACKRX, /* Received a SYNACK MP_JOIN with a different port-number */
MPTCP_MIB_JOINPORTACKRX, /* Received an ACK MP_JOIN with a different port-number */
MPTCP_MIB_MISMATCHPORTSYNRX, /* Received a SYN MP_JOIN with a mismatched port-number */
MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */
MPTCP_MIB_RMADDR, /* Received RM_ADDR */
+ MPTCP_MIB_RMADDRDROP, /* Dropped incoming RM_ADDR */
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
MPTCP_MIB_MPFAILTX, /* Transmit a MP_FAIL */
MPTCP_MIB_MPFAILRX, /* Received a MP_FAIL */
+ MPTCP_MIB_MPFASTCLOSETX, /* Transmit a MP_FASTCLOSE */
+ MPTCP_MIB_MPFASTCLOSERX, /* Received a MP_FASTCLOSE */
+ MPTCP_MIB_MPRSTTX, /* Transmit a MP_RST */
+ MPTCP_MIB_MPRSTRX, /* Received a MP_RST */
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */
MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 3e82ac24d548..325383646f5c 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -323,6 +323,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8;
mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE;
+ pr_debug("MP_FASTCLOSE: recv_key=%llu", mp_opt->rcvr_key);
break;
case MPTCPOPT_RST:
@@ -355,8 +356,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
}
}
-void mptcp_get_options(const struct sock *sk,
- const struct sk_buff *skb,
+void mptcp_get_options(const struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
{
const struct tcphdr *th = tcp_hdr(skb);
@@ -653,7 +653,6 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
bool drop_other_suboptions = false;
unsigned int opt_size = *size;
bool echo;
- bool port;
int len;
/* add addr will strip the existing options, be sure to avoid breaking
@@ -662,12 +661,12 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
if (!mptcp_pm_should_add_signal(msk) ||
(opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) ||
!mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr,
- &echo, &port, &drop_other_suboptions))
+ &echo, &drop_other_suboptions))
return false;
if (drop_other_suboptions)
remaining += opt_size;
- len = mptcp_add_addr_len(opts->addr.family, echo, port);
+ len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port);
if (remaining < len)
return false;
@@ -834,11 +833,13 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX);
}
/* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */
if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX);
}
return true;
}
@@ -1086,8 +1087,7 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
&mp_opt->addr);
pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
- msk, (unsigned long long)hmac,
- (unsigned long long)mp_opt->ahmac);
+ msk, hmac, mp_opt->ahmac);
return hmac == mp_opt->ahmac;
}
@@ -1114,7 +1114,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return true;
}
- mptcp_get_options(sk, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
/* The subflow can be in close state only if check_fully_established()
* just sent a reset. If so, tell the caller to ignore the current packet.
@@ -1127,6 +1127,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
msk->local_key == mp_opt.rcvr_key) {
WRITE_ONCE(msk->rcv_fastclose, true);
mptcp_schedule_work((struct sock *)msk);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSERX);
}
if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) &&
@@ -1161,6 +1162,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
subflow->reset_seen = 1;
subflow->reset_reason = mp_opt.reset_reason;
subflow->reset_transient = mp_opt.reset_transient;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTRX);
}
if (!(mp_opt.suboptions & OPTION_MPTCP_DSS))
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 696b2c4613a7..01809eef29b4 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -213,13 +213,15 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
mptcp_pm_add_addr_send_ack(msk);
} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
pm->remote = *addr;
+ } else {
+ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
}
spin_unlock_bh(&pm->lock);
}
void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr)
+ const struct mptcp_addr_info *addr)
{
struct mptcp_pm_data *pm = &msk->pm;
@@ -253,8 +255,10 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
mptcp_event_addr_removed(msk, rm_list->ids[i]);
spin_lock_bh(&pm->lock);
- mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
- pm->rm_list_rx = *rm_list;
+ if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED))
+ pm->rm_list_rx = *rm_list;
+ else
+ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP);
spin_unlock_bh(&pm->lock);
}
@@ -275,14 +279,15 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
/* path manager helpers */
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
unsigned int opt_size, unsigned int remaining,
struct mptcp_addr_info *addr, bool *echo,
- bool *port, bool *drop_other_suboptions)
+ bool *drop_other_suboptions)
{
int ret = false;
u8 add_addr;
u8 family;
+ bool port;
spin_lock_bh(&msk->pm.lock);
@@ -300,10 +305,10 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
}
*echo = mptcp_pm_should_add_signal_echo(msk);
- *port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
+ port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
family = *echo ? msk->pm.remote.family : msk->pm.local.family;
- if (remaining < mptcp_add_addr_len(family, *echo, *port))
+ if (remaining < mptcp_add_addr_len(family, *echo, port))
goto out_unlock;
if (*echo) {
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index e4fd54fff1d2..800515fe5e1d 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -83,16 +83,6 @@ static bool addresses_equal(const struct mptcp_addr_info *a,
return a->port == b->port;
}
-static bool address_zero(const struct mptcp_addr_info *addr)
-{
- struct mptcp_addr_info zero;
-
- memset(&zero, 0, sizeof(zero));
- zero.family = addr->family;
-
- return addresses_equal(addr, &zero, true);
-}
-
static void local_address(const struct sock_common *skc,
struct mptcp_addr_info *addr)
{
@@ -120,7 +110,7 @@ static void remote_address(const struct sock_common *skc,
}
static bool lookup_subflow_by_saddr(const struct list_head *list,
- struct mptcp_addr_info *saddr)
+ const struct mptcp_addr_info *saddr)
{
struct mptcp_subflow_context *subflow;
struct mptcp_addr_info cur;
@@ -138,7 +128,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list,
}
static bool lookup_subflow_by_daddr(const struct list_head *list,
- struct mptcp_addr_info *daddr)
+ const struct mptcp_addr_info *daddr)
{
struct mptcp_subflow_context *subflow;
struct mptcp_addr_info cur;
@@ -157,10 +147,10 @@ static bool lookup_subflow_by_daddr(const struct list_head *list,
static struct mptcp_pm_addr_entry *
select_local_address(const struct pm_nl_pernet *pernet,
- struct mptcp_sock *msk)
+ const struct mptcp_sock *msk)
{
+ const struct sock *sk = (const struct sock *)msk;
struct mptcp_pm_addr_entry *entry, *ret = NULL;
- struct sock *sk = (struct sock *)msk;
msk_owned_by_me(msk);
@@ -190,7 +180,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
}
static struct mptcp_pm_addr_entry *
-select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk)
+select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
{
struct mptcp_pm_addr_entry *entry, *ret = NULL;
@@ -214,16 +204,16 @@ select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk)
return ret;
}
-unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk)
+unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ const struct pm_nl_pernet *pernet;
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ pernet = net_generic(sock_net((const struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->add_addr_signal_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max);
-unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk)
+unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk)
{
struct pm_nl_pernet *pernet;
@@ -232,7 +222,7 @@ unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk)
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max);
-unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk)
+unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk)
{
struct pm_nl_pernet *pernet;
@@ -241,7 +231,7 @@ unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk)
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max);
-unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk)
+unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk)
{
struct pm_nl_pernet *pernet;
@@ -264,8 +254,8 @@ bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk)
}
struct mptcp_pm_add_entry *
-mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr)
+mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
{
struct mptcp_pm_add_entry *entry;
@@ -346,7 +336,7 @@ out:
struct mptcp_pm_add_entry *
mptcp_pm_del_add_timer(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr, bool check_id)
+ const struct mptcp_addr_info *addr, bool check_id)
{
struct mptcp_pm_add_entry *entry;
struct sock *sk = (struct sock *)msk;
@@ -364,7 +354,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
}
static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry)
+ const struct mptcp_pm_addr_entry *entry)
{
struct mptcp_pm_add_entry *add_entry = NULL;
struct sock *sk = (struct sock *)msk;
@@ -410,8 +400,8 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
-static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr,
- struct mptcp_addr_info *addr)
+static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
+ const struct mptcp_addr_info *addr)
{
int i;
@@ -493,9 +483,9 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
}
static int
-lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
+lookup_id_by_addr(const struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
{
- struct mptcp_pm_addr_entry *entry;
+ const struct mptcp_pm_addr_entry *entry;
int ret = -1;
rcu_read_lock();
@@ -546,6 +536,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (msk->pm.add_addr_signaled < add_addr_signal_max) {
local = select_signal_address(pernet, msk);
+ /* due to racing events on both ends we can reach here while
+ * previous add address is still running: if we invoke now
+ * mptcp_pm_announce_addr(), that will fail and the
+ * corresponding id will be marked as used.
+ * Instead let the PM machinery reschedule us when the
+ * current address announce will be completed.
+ */
+ if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
+ return;
+
if (local) {
if (mptcp_pm_alloc_anno_list(msk, local)) {
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
@@ -650,6 +650,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
unsigned int subflows_max;
+ bool reset_port = false;
int i, nr;
add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
@@ -659,15 +660,19 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
msk->pm.add_addr_accepted, add_addr_accept_max,
msk->pm.remote.family);
- if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
+ remote = msk->pm.remote;
+ if (lookup_subflow_by_daddr(&msk->conn_list, &remote))
goto add_addr_echo;
+ /* pick id 0 port, if none is provided the remote address */
+ if (!remote.port) {
+ reset_port = true;
+ remote.port = sk->sk_dport;
+ }
+
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
- remote = msk->pm.remote;
- if (!remote.port)
- remote.port = sk->sk_dport;
nr = fill_local_addresses_vec(msk, addrs);
msk->pm.add_addr_accepted++;
@@ -680,8 +685,12 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
__mptcp_subflow_connect(sk, &addrs[i], &remote);
spin_lock_bh(&msk->pm.lock);
+ /* be sure to echo exactly the received address */
+ if (reset_port)
+ remote.port = 0;
+
add_addr_echo:
- mptcp_pm_announce_addr(msk, &msk->pm.remote, true);
+ mptcp_pm_announce_addr(msk, &remote, true);
mptcp_pm_nl_addr_send_ack(msk);
}
@@ -858,10 +867,18 @@ static bool address_use_port(struct mptcp_pm_addr_entry *entry)
MPTCP_PM_ADDR_FLAG_SIGNAL;
}
+/* caller must ensure the RCU grace period is already elapsed */
+static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
+{
+ if (entry->lsk)
+ sock_release(entry->lsk);
+ kfree(entry);
+}
+
static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
struct mptcp_pm_addr_entry *entry)
{
- struct mptcp_pm_addr_entry *cur;
+ struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
unsigned int addr_max;
int ret = -EINVAL;
@@ -882,8 +899,22 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
list_for_each_entry(cur, &pernet->local_addr_list, list) {
if (addresses_equal(&cur->addr, &entry->addr,
address_use_port(entry) &&
- address_use_port(cur)))
- goto out;
+ address_use_port(cur))) {
+ /* allow replacing the exiting endpoint only if such
+ * endpoint is an implicit one and the user-space
+ * did not provide an endpoint id
+ */
+ if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT))
+ goto out;
+ if (entry->addr.id)
+ goto out;
+
+ pernet->addrs--;
+ entry->addr.id = cur->addr.id;
+ list_del_rcu(&cur->list);
+ del_entry = cur;
+ break;
+ }
}
if (!entry->addr.id) {
@@ -919,6 +950,12 @@ find_next:
out:
spin_unlock_bh(&pernet->lock);
+
+ /* just replaced an existing entry, free it */
+ if (del_entry) {
+ synchronize_rcu();
+ __mptcp_pm_release_addr_entry(del_entry);
+ }
return ret;
}
@@ -992,9 +1029,6 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
if (addresses_equal(&msk_local, &skc_local, false))
return 0;
- if (address_zero(&skc_local))
- return 0;
-
pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
rcu_read_lock();
@@ -1017,7 +1051,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
entry->addr.id = 0;
entry->addr.port = 0;
entry->ifindex = 0;
- entry->flags = 0;
+ entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
entry->lsk = NULL;
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
if (ret < 0)
@@ -1230,6 +1264,17 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
+ if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
+ addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+ GENL_SET_ERR_MSG(info, "flags mustn't have both signal and fullmesh");
+ return -EINVAL;
+ }
+
+ if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) {
+ GENL_SET_ERR_MSG(info, "can't create IMPLICIT endpoint");
+ return -EINVAL;
+ }
+
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry) {
GENL_SET_ERR_MSG(info, "can't allocate addr");
@@ -1281,7 +1326,7 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
}
static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr)
+ const struct mptcp_addr_info *addr)
{
struct mptcp_pm_add_entry *entry;
@@ -1296,7 +1341,7 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
}
static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr,
+ const struct mptcp_addr_info *addr,
bool force)
{
struct mptcp_rm_list list = { .nr = 0 };
@@ -1314,11 +1359,12 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
}
static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
- struct mptcp_addr_info *addr)
+ const struct mptcp_pm_addr_entry *entry)
{
- struct mptcp_sock *msk;
- long s_slot = 0, s_num = 0;
+ const struct mptcp_addr_info *addr = &entry->addr;
struct mptcp_rm_list list = { .nr = 0 };
+ long s_slot = 0, s_num = 0;
+ struct mptcp_sock *msk;
pr_debug("remove_id=%d", addr->id);
@@ -1335,7 +1381,8 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
lock_sock(sk);
remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
- mptcp_pm_remove_anno_addr(msk, addr, remove_subflow);
+ mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
+ !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT));
if (remove_subflow)
mptcp_pm_remove_subflow(msk, &list);
release_sock(sk);
@@ -1348,14 +1395,6 @@ next:
return 0;
}
-/* caller must ensure the RCU grace period is already elapsed */
-static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
-{
- if (entry->lsk)
- sock_release(entry->lsk);
- kfree(entry);
-}
-
static int mptcp_nl_remove_id_zero_address(struct net *net,
struct mptcp_addr_info *addr)
{
@@ -1432,7 +1471,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
__clear_bit(entry->addr.id, pernet->id_bitmap);
spin_unlock_bh(&pernet->lock);
- mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr);
+ mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry);
synchronize_rcu();
__mptcp_pm_release_addr_entry(entry);
@@ -1447,14 +1486,12 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
list_for_each_entry(entry, rm_list, list) {
if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
- alist.nr < MPTCP_RM_IDS_MAX &&
- slist.nr < MPTCP_RM_IDS_MAX) {
- alist.ids[alist.nr++] = entry->addr.id;
+ slist.nr < MPTCP_RM_IDS_MAX)
slist.ids[slist.nr++] = entry->addr.id;
- } else if (remove_anno_list_by_saddr(msk, &entry->addr) &&
- alist.nr < MPTCP_RM_IDS_MAX) {
+
+ if (remove_anno_list_by_saddr(msk, &entry->addr) &&
+ alist.nr < MPTCP_RM_IDS_MAX)
alist.ids[alist.nr++] = entry->addr.id;
- }
}
if (alist.nr) {
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f60f01b14fac..101aeebeb9eb 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -117,6 +117,9 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
subflow->request_mptcp = 1;
+
+ /* This is the first subflow, always with id 0 */
+ subflow->local_id_valid = 1;
mptcp_sock_graft(msk->first, sk->sk_socket);
return 0;
@@ -466,9 +469,12 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
static void mptcp_set_datafin_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 retransmits;
+
+ retransmits = min_t(u32, icsk->icsk_retransmits,
+ ilog2(TCP_RTO_MAX / TCP_RTO_MIN));
- mptcp_sk(sk)->timer_ival = min(TCP_RTO_MAX,
- TCP_RTO_MIN << icsk->icsk_retransmits);
+ mptcp_sk(sk)->timer_ival = TCP_RTO_MIN << retransmits;
}
static void __mptcp_set_timeout(struct sock *sk, long tout)
@@ -1353,6 +1359,7 @@ alloc_skb:
out:
if (READ_ONCE(msk->csum_enabled))
mptcp_update_data_checksum(skb, copy);
+ trace_mptcp_sendmsg_frag(mpext);
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
return copy;
}
@@ -3294,6 +3301,17 @@ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
return 0;
delta = msk->write_seq - v;
+ if (__mptcp_check_fallback(msk) && msk->first) {
+ struct tcp_sock *tp = tcp_sk(msk->first);
+
+ /* the first subflow is disconnected after close - see
+ * __mptcp_close_ssk(). tcp_disconnect() moves the write_seq
+ * so ignore that status, too.
+ */
+ if (!((1 << msk->first->sk_state) &
+ (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)))
+ delta += READ_ONCE(tp->write_seq) - tp->snd_una;
+ }
if (delta > INT_MAX)
delta = INT_MAX;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 85317ce38e3f..3c1a3036550f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -442,7 +442,8 @@ struct mptcp_subflow_context {
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
- stale : 1; /* unable to snd/rcv data, do not use for xmit */
+ stale : 1, /* unable to snd/rcv data, do not use for xmit */
+ local_id_valid : 1; /* local_id is correctly initialized */
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
@@ -468,9 +469,7 @@ struct mptcp_subflow_context {
struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
const struct inet_connection_sock_af_ops *icsk_af_ops;
- void (*tcp_data_ready)(struct sock *sk);
void (*tcp_state_change)(struct sock *sk);
- void (*tcp_write_space)(struct sock *sk);
void (*tcp_error_report)(struct sock *sk);
struct rcu_head rcu;
@@ -614,9 +613,9 @@ bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
struct mptcp_subflow_context *ctx)
{
- sk->sk_data_ready = ctx->tcp_data_ready;
+ sk->sk_data_ready = sock_def_readable;
sk->sk_state_change = ctx->tcp_state_change;
- sk->sk_write_space = ctx->tcp_write_space;
+ sk->sk_write_space = sk_stream_write_space;
sk->sk_error_report = ctx->tcp_error_report;
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
@@ -643,8 +642,7 @@ int __init mptcp_proto_v6_init(void);
struct sock *mptcp_sk_clone(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct request_sock *req);
-void mptcp_get_options(const struct sock *sk,
- const struct sk_buff *skb,
+void mptcp_get_options(const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
@@ -743,7 +741,7 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr);
+ const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk);
void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
@@ -754,10 +752,10 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
mptcp_pm_del_add_timer(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr, bool check_id);
+ const struct mptcp_addr_info *addr, bool check_id);
struct mptcp_pm_add_entry *
-mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr);
+mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr);
int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
u8 *flags, int *ifindex);
@@ -816,10 +814,10 @@ static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
}
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
unsigned int opt_size, unsigned int remaining,
struct mptcp_addr_info *addr, bool *echo,
- bool *port, bool *drop_other_suboptions);
+ bool *drop_other_suboptions);
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
@@ -830,10 +828,10 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
-unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk);
-unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
-unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
-unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
+unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk);
+unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
+unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
+unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk);
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index dacf3cee0027..f949d22f52bd 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -343,6 +343,8 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_RCVLOWAT:
case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
+ case SO_SNDTIMEO_OLD:
+ case SO_SNDTIMEO_NEW:
case SO_BUSY_POLL:
case SO_PREFER_BUSY_POLL:
case SO_BUSY_POLL_BUDGET:
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index bea47a1180dc..aba260f547da 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -153,7 +153,7 @@ static int subflow_check_req(struct request_sock *req,
return -EINVAL;
#endif
- mptcp_get_options(sk_listener, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
@@ -250,7 +250,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
int err;
subflow_init_req(req, sk_listener);
- mptcp_get_options(sk_listener, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
@@ -344,9 +344,7 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
thmac = get_unaligned_be64(hmac);
pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
- subflow, subflow->token,
- (unsigned long long)thmac,
- (unsigned long long)subflow->thmac);
+ subflow, subflow->token, thmac, subflow->thmac);
return thmac == subflow->thmac;
}
@@ -410,7 +408,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
- mptcp_get_options(sk, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp) {
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
MPTCP_INC_STATS(sock_net(sk),
@@ -483,9 +481,53 @@ do_reset:
mptcp_subflow_reset(sk);
}
+static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
+{
+ subflow->local_id = local_id;
+ subflow->local_id_valid = 1;
+}
+
+static int subflow_chk_local_id(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ int err;
+
+ if (likely(subflow->local_id_valid))
+ return 0;
+
+ err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
+ if (err < 0)
+ return err;
+
+ subflow_set_local_id(subflow, err);
+ return 0;
+}
+
+static int subflow_rebuild_header(struct sock *sk)
+{
+ int err = subflow_chk_local_id(sk);
+
+ if (unlikely(err < 0))
+ return err;
+
+ return inet_sk_rebuild_header(sk);
+}
+
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+static int subflow_v6_rebuild_header(struct sock *sk)
+{
+ int err = subflow_chk_local_id(sk);
+
+ if (unlikely(err < 0))
+ return err;
+
+ return inet6_sk_rebuild_header(sk);
+}
+#endif
+
struct request_sock_ops mptcp_subflow_request_sock_ops;
-EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops);
-static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
+static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init;
static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
@@ -506,9 +548,9 @@ drop:
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
-static struct inet_connection_sock_af_ops subflow_v6_specific;
-static struct inet_connection_sock_af_ops subflow_v6m_specific;
+static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init;
+static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init;
+static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init;
static struct proto tcpv6_prot_override;
static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
@@ -663,7 +705,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* reordered MPC will cause fallback, but we don't have other
* options.
*/
- mptcp_get_options(sk, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
fallback = true;
goto create_child;
@@ -673,7 +715,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
- mptcp_get_options(sk, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
!subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
@@ -790,7 +832,7 @@ dispose_child:
return child;
}
-static struct inet_connection_sock_af_ops subflow_specific;
+static struct inet_connection_sock_af_ops subflow_specific __ro_after_init;
static struct proto tcp_prot_override;
enum mapping_status {
@@ -1107,7 +1149,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
struct sk_buff *skb;
if (!skb_peek(&ssk->sk_receive_queue))
- WRITE_ONCE(subflow->data_avail, 0);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
if (subflow->data_avail)
return true;
@@ -1172,7 +1214,7 @@ fallback:
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, 0);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
return true;
}
@@ -1185,7 +1227,7 @@ fallback:
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMPTCP;
tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, 0);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
return false;
}
@@ -1207,7 +1249,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
if (subflow->map_valid &&
mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
subflow->map_valid = 0;
- WRITE_ONCE(subflow->data_avail, 0);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
pr_debug("Done with mapping: seq=%u data_len=%u",
subflow->map_subflow_seq,
@@ -1311,7 +1353,7 @@ static void subflow_write_space(struct sock *ssk)
mptcp_write_space(sk);
}
-static struct inet_connection_sock_af_ops *
+static const struct inet_connection_sock_af_ops *
subflow_default_af_ops(struct sock *sk)
{
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -1326,7 +1368,7 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_connection_sock_af_ops *target;
+ const struct inet_connection_sock_af_ops *target;
target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
@@ -1401,13 +1443,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
get_random_bytes(&subflow->local_nonce, sizeof(u32));
} while (!subflow->local_nonce);
- if (!local_id) {
- err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk);
- if (err < 0)
- goto failed;
-
- local_id = err;
- }
+ if (local_id)
+ subflow_set_local_id(subflow, local_id);
mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id,
&flags, &ifindex);
@@ -1432,7 +1469,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
remote_token, local_id, remote_id);
subflow->remote_token = remote_token;
- subflow->local_id = local_id;
subflow->remote_id = remote_id;
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
@@ -1657,10 +1693,12 @@ static int subflow_ulp_init(struct sock *sk)
tp->is_mptcp = 1;
ctx->icsk_af_ops = icsk->icsk_af_ops;
icsk->icsk_af_ops = subflow_default_af_ops(sk);
- ctx->tcp_data_ready = sk->sk_data_ready;
ctx->tcp_state_change = sk->sk_state_change;
- ctx->tcp_write_space = sk->sk_write_space;
ctx->tcp_error_report = sk->sk_error_report;
+
+ WARN_ON_ONCE(sk->sk_data_ready != sock_def_readable);
+ WARN_ON_ONCE(sk->sk_write_space != sk_stream_write_space);
+
sk->sk_data_ready = subflow_data_ready;
sk->sk_write_space = subflow_write_space;
sk->sk_state_change = subflow_state_change;
@@ -1715,9 +1753,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->conn_finished = 1;
new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
- new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
- new_ctx->tcp_write_space = old_ctx->tcp_write_space;
new_ctx->tcp_error_report = old_ctx->tcp_error_report;
new_ctx->rel_write_seq = 1;
new_ctx->tcp_sock = newsk;
@@ -1731,15 +1767,22 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->token = subflow_req->token;
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->idsn = subflow_req->idsn;
+
+ /* this is the first subflow, id is always 0 */
+ new_ctx->local_id_valid = 1;
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
new_ctx->backup = subflow_req->backup;
- new_ctx->local_id = subflow_req->local_id;
new_ctx->remote_id = subflow_req->remote_id;
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;
+
+ /* the subflow req id is valid, fetched via subflow_check_req()
+ * and subflow_token_join_request()
+ */
+ subflow_set_local_id(new_ctx, subflow_req->local_id);
}
}
@@ -1792,6 +1835,7 @@ void __init mptcp_subflow_init(void)
subflow_specific.conn_request = subflow_v4_conn_request;
subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
subflow_specific.sk_rx_dst_set = subflow_finish_connect;
+ subflow_specific.rebuild_header = subflow_rebuild_header;
tcp_prot_override = tcp_prot;
tcp_prot_override.release_cb = tcp_release_cb_override;
@@ -1804,6 +1848,7 @@ void __init mptcp_subflow_init(void)
subflow_v6_specific.conn_request = subflow_v6_conn_request;
subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
+ subflow_v6_specific.rebuild_header = subflow_v6_rebuild_header;
subflow_v6m_specific = subflow_v6_specific;
subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
@@ -1811,6 +1856,7 @@ void __init mptcp_subflow_init(void)
subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
subflow_v6m_specific.net_frag_header_len = 0;
+ subflow_v6m_specific.rebuild_header = subflow_rebuild_header;
tcpv6_prot_override = tcpv6_prot;
tcpv6_prot_override.release_cb = tcp_release_cb_override;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index d1c9dfbb11fa..9a4feb922cf6 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -428,14 +428,15 @@ static int __nf_register_net_hook(struct net *net, int pf,
p = nf_entry_dereference(*pp);
new_hooks = nf_hook_entries_grow(p, reg);
- if (!IS_ERR(new_hooks))
+ if (!IS_ERR(new_hooks)) {
+ hooks_validate(new_hooks);
rcu_assign_pointer(*pp, new_hooks);
+ }
mutex_unlock(&nf_hook_mutex);
if (IS_ERR(new_hooks))
return PTR_ERR(new_hooks);
- hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
if (nf_ingress_hook(reg, pf))
net_inc_ingress_queue();
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index d2e5a8f644b8..029171379884 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -610,7 +610,7 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
nf_reset_ct(skb);
skb_forward_csum(skb);
if (skb->dev)
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
}
return ret;
}
@@ -652,7 +652,7 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
if (!local) {
skb_forward_csum(skb);
if (skb->dev)
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
@@ -674,7 +674,7 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb);
if (skb->dev)
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index a579e59ee5c5..7873bd1389c3 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -19,7 +19,7 @@ static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
skb_push(skb, skb->mac_len);
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
dev_queue_xmit(skb);
}
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 889cf88d3dba..f1d387129f02 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -376,7 +376,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
nf_flow_nat_ip(flow, skb, thoff, dir, iph);
ip_decrease_ttl(iph);
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
@@ -611,7 +611,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
nf_flow_nat_ipv6(flow, skb, dir, ip6h);
ip6h->hop_limit--;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index b561e0a44a45..fc4265acd9c4 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -110,7 +110,11 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
nf_flow_rule_lwt_match(match, tun_info);
}
- key->meta.ingress_ifindex = tuple->iifidx;
+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_TC)
+ key->meta.ingress_ifindex = tuple->tc.iifidx;
+ else
+ key->meta.ingress_ifindex = tuple->iifidx;
+
mask->meta.ingress_ifindex = 0xffffffff;
if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 6d12afabfe8a..63d1516816b1 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -46,6 +46,15 @@ void nf_unregister_queue_handler(void)
}
EXPORT_SYMBOL(nf_unregister_queue_handler);
+static void nf_queue_sock_put(struct sock *sk)
+{
+#ifdef CONFIG_INET
+ sock_gen_put(sk);
+#else
+ sock_put(sk);
+#endif
+}
+
static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
@@ -54,7 +63,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
dev_put(state->in);
dev_put(state->out);
if (state->sk)
- sock_put(state->sk);
+ nf_queue_sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dev_put(entry->physin);
@@ -87,19 +96,21 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
}
/* Bump dev refs so they don't vanish while packet is out */
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
+ if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
+ return false;
+
dev_hold(state->in);
dev_hold(state->out);
- if (state->sk)
- sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dev_hold(entry->physin);
dev_hold(entry->physout);
#endif
+ return true;
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -169,6 +180,18 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
break;
}
+ if (skb_sk_is_prefetched(skb)) {
+ struct sock *sk = skb->sk;
+
+ if (!sk_is_refcounted(sk)) {
+ if (!refcount_inc_not_zero(&sk->sk_refcnt))
+ return -ENOTCONN;
+
+ /* drop refcount on skb_orphan */
+ skb->destructor = sock_edemux;
+ }
+ }
+
entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
if (!entry)
return -ENOMEM;
@@ -187,7 +210,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
__nf_queue_entry_init_physdevs(entry);
- nf_queue_entry_get_refs(entry);
+ if (!nf_queue_entry_get_refs(entry)) {
+ kfree(entry);
+ return -ENOTCONN;
+ }
switch (entry->state.pf) {
case AF_INET:
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5fa16990da95..c86748b3873b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4502,7 +4502,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
list_del_rcu(&catchall->list);
nft_set_elem_destroy(set, catchall->elem, true);
- kfree_rcu(catchall);
+ kfree_rcu(catchall, rcu);
}
}
@@ -5669,7 +5669,7 @@ static void nft_setelem_catchall_remove(const struct net *net,
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
if (catchall->elem == elem->priv) {
list_del_rcu(&catchall->list);
- kfree_rcu(catchall);
+ kfree_rcu(catchall, rcu);
break;
}
}
@@ -6551,12 +6551,15 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
{
struct nft_object *newobj;
struct nft_trans *trans;
- int err;
+ int err = -ENOMEM;
+
+ if (!try_module_get(type->owner))
+ return -ENOENT;
trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
sizeof(struct nft_trans_obj));
if (!trans)
- return -ENOMEM;
+ goto err_trans;
newobj = nft_obj_init(ctx, type, attr);
if (IS_ERR(newobj)) {
@@ -6573,6 +6576,8 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
err_free_trans:
kfree(trans);
+err_trans:
+ module_put(type->owner);
return err;
}
@@ -8185,7 +8190,7 @@ static void nft_obj_commit_update(struct nft_trans *trans)
if (obj->ops->update)
obj->ops->update(obj, newobj);
- kfree(newobj);
+ nft_obj_destroy(&trans->ctx, newobj);
}
static void nft_commit_release(struct nft_trans *trans)
@@ -8976,7 +8981,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_NEWOBJ:
if (nft_trans_obj_update(trans)) {
- kfree(nft_trans_obj_newobj(trans));
+ nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
nft_trans_destroy(trans);
} else {
trans->ctx.table->use--;
@@ -9636,10 +9641,13 @@ EXPORT_SYMBOL_GPL(__nft_release_basechain);
static void __nft_release_hook(struct net *net, struct nft_table *table)
{
+ struct nft_flowtable *flowtable;
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list)
nf_tables_unregister_hook(net, table, chain);
+ list_for_each_entry(flowtable, &table->flowtables, list)
+ nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list);
}
static void __nft_release_hooks(struct net *net)
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 9656c1646222..2d36952b1392 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -94,7 +94,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
expr = nft_expr_first(rule);
while (nft_expr_more(rule, expr)) {
- if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION)
+ if (expr->ops->offload_action &&
+ expr->ops->offload_action(expr))
num_actions++;
expr = nft_expr_next(expr);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index ae9c0756bba5..d97eb280cb2e 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -460,6 +460,7 @@ __build_packet_message(struct nfnl_log_net *log,
sk_buff_data_t old_tail = inst->skb->tail;
struct sock *sk;
const unsigned char *hwhdrp;
+ ktime_t tstamp;
nlh = nfnl_msg_put(inst->skb, 0, 0,
nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
@@ -588,9 +589,10 @@ __build_packet_message(struct nfnl_log_net *log,
goto nla_put_failure;
}
- if (hooknum <= NF_INET_FORWARD && skb->tstamp) {
+ tstamp = skb_tstamp_cond(skb, false);
+ if (hooknum <= NF_INET_FORWARD && tstamp) {
struct nfulnl_msg_packet_timestamp ts;
- struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
+ struct timespec64 kts = ktime_to_timespec64(tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8c15978d9258..a364f8e5e698 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -392,6 +392,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
bool csum_verify;
char *secdata = NULL;
u32 seclen = 0;
+ ktime_t tstamp;
size = nlmsg_total_size(sizeof(struct nfgenmsg))
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -407,7 +408,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
+ nla_total_size(sizeof(u_int32_t)) /* skbinfo */
+ nla_total_size(sizeof(u_int32_t)); /* cap_len */
- if (entskb->tstamp)
+ tstamp = skb_tstamp_cond(entskb, false);
+ if (tstamp)
size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
size += nfqnl_get_bridge_size(entry);
@@ -582,9 +584,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (nfqnl_put_bridge(entry, skb) < 0)
goto nla_put_failure;
- if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) {
+ if (entry->state.hook <= NF_INET_FORWARD && tstamp) {
struct nfqnl_msg_packet_timestamp ts;
- struct timespec64 kts = ktime_to_timespec64(entskb->tstamp);
+ struct timespec64 kts = ktime_to_timespec64(tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
@@ -715,9 +717,15 @@ static struct nf_queue_entry *
nf_queue_entry_dup(struct nf_queue_entry *e)
{
struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
- if (entry)
- nf_queue_entry_get_refs(entry);
- return entry;
+
+ if (!entry)
+ return NULL;
+
+ if (nf_queue_entry_get_refs(entry))
+ return entry;
+
+ kfree(entry);
+ return NULL;
}
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index bbf3fcba3df4..5b5c607fbf83 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -67,6 +67,11 @@ static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx,
return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif);
}
+static bool nft_dup_netdev_offload_action(const struct nft_expr *expr)
+{
+ return true;
+}
+
static struct nft_expr_type nft_dup_netdev_type;
static const struct nft_expr_ops nft_dup_netdev_ops = {
.type = &nft_dup_netdev_type,
@@ -75,6 +80,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = {
.init = nft_dup_netdev_init,
.dump = nft_dup_netdev_dump,
.offload = nft_dup_netdev_offload,
+ .offload_action = nft_dup_netdev_offload_action,
};
static struct nft_expr_type nft_dup_netdev_type __read_mostly = {
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index fa9301ca6033..08e7a289738e 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -79,6 +79,11 @@ static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx,
return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif);
}
+static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr)
+{
+ return true;
+}
+
struct nft_fwd_neigh {
u8 sreg_dev;
u8 sreg_addr;
@@ -140,7 +145,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr,
return;
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
neigh_xmit(neigh_table, dev, addr, skb);
out:
regs->verdict.code = verdict;
@@ -222,6 +227,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
.dump = nft_fwd_netdev_dump,
.validate = nft_fwd_validate,
.offload = nft_fwd_netdev_offload,
+ .offload_action = nft_fwd_netdev_offload_action,
};
static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 90c64d27ae53..d0f67d325bdf 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -213,6 +213,16 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx,
return 0;
}
+static bool nft_immediate_offload_action(const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ if (priv->dreg == NFT_REG_VERDICT)
+ return true;
+
+ return false;
+}
+
static const struct nft_expr_ops nft_imm_ops = {
.type = &nft_imm_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
@@ -224,7 +234,7 @@ static const struct nft_expr_ops nft_imm_ops = {
.dump = nft_immediate_dump,
.validate = nft_immediate_validate,
.offload = nft_immediate_offload,
- .offload_flags = NFT_OFFLOAD_F_ACTION,
+ .offload_action = nft_immediate_offload_action,
};
struct nft_expr_type nft_imm_type __read_mostly = {
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index c4f308460dd1..a726b623963d 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -340,11 +340,20 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
}
+static void nft_limit_obj_pkts_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
+{
+ struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
+
+ nft_limit_destroy(ctx, &priv->limit);
+}
+
static struct nft_object_type nft_limit_obj_type;
static const struct nft_object_ops nft_limit_obj_pkts_ops = {
.type = &nft_limit_obj_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)),
.init = nft_limit_obj_pkts_init,
+ .destroy = nft_limit_obj_pkts_destroy,
.eval = nft_limit_obj_pkts_eval,
.dump = nft_limit_obj_pkts_dump,
};
@@ -378,11 +387,20 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
}
+static void nft_limit_obj_bytes_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
+{
+ struct nft_limit_priv *priv = nft_obj_data(obj);
+
+ nft_limit_destroy(ctx, priv);
+}
+
static struct nft_object_type nft_limit_obj_type;
static const struct nft_object_ops nft_limit_obj_bytes_ops = {
.type = &nft_limit_obj_type,
.size = sizeof(struct nft_limit_priv),
.init = nft_limit_obj_bytes_init,
+ .destroy = nft_limit_obj_bytes_destroy,
.eval = nft_limit_obj_bytes_eval,
.dump = nft_limit_obj_bytes_dump,
};
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index a0109fa1e92d..1133e06f3c40 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -191,8 +191,10 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx,
if (err)
goto nf_ct_failure;
err = nf_synproxy_ipv6_init(snet, ctx->net);
- if (err)
+ if (err) {
+ nf_synproxy_ipv4_fini(snet, ctx->net);
goto nf_ct_failure;
+ }
break;
}
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 5e6459e11605..7013f55f05d1 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -220,8 +220,10 @@ static void socket_mt_destroy(const struct xt_mtdtor_param *par)
{
if (par->family == NFPROTO_IPV4)
nf_defrag_ipv4_disable(par->net);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
else if (par->family == NFPROTO_IPV6)
- nf_defrag_ipv4_disable(par->net);
+ nf_defrag_ipv6_disable(par->net);
+#endif
}
static struct xt_match socket_mt_reg[] __read_mostly = {
diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h
index d49d4bf2e37c..c1d9be636933 100644
--- a/net/nfc/llcp.h
+++ b/net/nfc/llcp.h
@@ -6,7 +6,6 @@
enum llcp_state {
LLCP_CONNECTED = 1, /* wait_for_packet() wants that */
LLCP_CONNECTING,
- LLCP_DISCONNECTING,
LLCP_CLOSED,
LLCP_BOUND,
LLCP_LISTEN,
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 5ad5157aa9c5..3364caabef8b 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -383,7 +383,7 @@ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local,
pr_debug("WKS %d\n", ssap);
/* This is a WKS, let's check if it's free */
- if (local->local_wks & BIT(ssap)) {
+ if (test_bit(ssap, &local->local_wks)) {
mutex_unlock(&local->sdp_lock);
return LLCP_SAP_MAX;
@@ -737,13 +737,6 @@ static void nfc_llcp_tx_work(struct work_struct *work)
print_hex_dump_debug("LLCP Tx: ", DUMP_PREFIX_OFFSET,
16, 1, skb->data, skb->len, true);
- if (ptype == LLCP_PDU_DISC && sk != NULL &&
- sk->sk_state == LLCP_DISCONNECTING) {
- nfc_llcp_sock_unlink(&local->sockets, sk);
- sock_orphan(sk);
- sock_put(sk);
- }
-
if (ptype == LLCP_PDU_I)
copy_skb = skb_copy(skb, GFP_ATOMIC);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 0b93a17b9f11..4ca35791c93b 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -108,21 +108,13 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
llcp_sock->service_name_len,
GFP_KERNEL);
if (!llcp_sock->service_name) {
- nfc_llcp_local_put(llcp_sock->local);
- llcp_sock->local = NULL;
- llcp_sock->dev = NULL;
ret = -ENOMEM;
- goto put_dev;
+ goto sock_llcp_put_local;
}
llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
- nfc_llcp_local_put(llcp_sock->local);
- llcp_sock->local = NULL;
- kfree(llcp_sock->service_name);
- llcp_sock->service_name = NULL;
- llcp_sock->dev = NULL;
ret = -EADDRINUSE;
- goto put_dev;
+ goto free_service_name;
}
llcp_sock->reserved_ssap = llcp_sock->ssap;
@@ -132,6 +124,19 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
pr_debug("Socket bound to SAP %d\n", llcp_sock->ssap);
sk->sk_state = LLCP_BOUND;
+ nfc_put_device(dev);
+ release_sock(sk);
+
+ return 0;
+
+free_service_name:
+ kfree(llcp_sock->service_name);
+ llcp_sock->service_name = NULL;
+
+sock_llcp_put_local:
+ nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
+ llcp_sock->dev = NULL;
put_dev:
nfc_put_device(dev);
@@ -626,23 +631,16 @@ static int llcp_sock_release(struct socket *sock)
}
}
- if (llcp_sock->reserved_ssap < LLCP_SAP_MAX)
- nfc_llcp_put_ssap(llcp_sock->local, llcp_sock->ssap);
-
- release_sock(sk);
-
- /* Keep this sock alive and therefore do not remove it from the sockets
- * list until the DISC PDU has been actually sent. Otherwise we would
- * reply with DM PDUs before sending the DISC one.
- */
- if (sk->sk_state == LLCP_DISCONNECTING)
- return err;
-
if (sock->type == SOCK_RAW)
nfc_llcp_sock_unlink(&local->raw_sockets, sk);
else
nfc_llcp_sock_unlink(&local->sockets, sk);
+ if (llcp_sock->reserved_ssap < LLCP_SAP_MAX)
+ nfc_llcp_put_ssap(llcp_sock->local, llcp_sock->ssap);
+
+ release_sock(sk);
+
out:
sock_orphan(sk);
sock_put(sk);
@@ -712,10 +710,8 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
llcp_sock->local = nfc_llcp_local_get(local);
llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
- nfc_llcp_local_put(llcp_sock->local);
- llcp_sock->local = NULL;
ret = -ENOMEM;
- goto put_dev;
+ goto sock_llcp_put_local;
}
llcp_sock->reserved_ssap = llcp_sock->ssap;
@@ -760,8 +756,11 @@ sock_unlink:
sock_llcp_release:
nfc_llcp_put_ssap(local, llcp_sock->ssap);
+
+sock_llcp_put_local:
nfc_llcp_local_put(llcp_sock->local);
llcp_sock->local = NULL;
+ llcp_sock->dev = NULL;
put_dev:
nfc_put_device(dev);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 076774034bb9..780d9e2246f3 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -423,12 +423,43 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
memcpy(addr, new_addr, sizeof(__be32[4]));
}
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
+static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask)
{
+ u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
+
+ ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12),
+ (__force __wsum)(ipv6_tclass << 12));
+
+ ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
+}
+
+static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask)
+{
+ u32 ofl;
+
+ ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2];
+ fl = OVS_MASKED(ofl, fl, mask);
+
/* Bits 21-24 are always unmasked, so this retains their values. */
- OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
- OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
- OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+ nh->flow_lbl[0] = (u8)(fl >> 16);
+ nh->flow_lbl[1] = (u8)(fl >> 8);
+ nh->flow_lbl[2] = (u8)fl;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl));
+}
+
+static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask)
+{
+ new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8),
+ (__force __wsum)(new_ttl << 8));
+ nh->hop_limit = new_ttl;
}
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
@@ -546,18 +577,17 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
}
}
if (mask->ipv6_tclass) {
- ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+ set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass);
flow_key->ip.tos = ipv6_get_dsfield(nh);
}
if (mask->ipv6_label) {
- set_ipv6_fl(nh, ntohl(key->ipv6_label),
+ set_ipv6_fl(skb, nh, ntohl(key->ipv6_label),
ntohl(mask->ipv6_label));
flow_key->ipv6.label =
*(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
}
if (mask->ipv6_hlimit) {
- OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
- mask->ipv6_hlimit);
+ set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit);
flow_key->ip.ttl = nh->hop_limit;
}
return 0;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index f6cd24fd530c..372bf54a0ca9 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -241,6 +241,144 @@ static bool icmphdr_ok(struct sk_buff *skb)
sizeof(struct icmphdr));
}
+/**
+ * get_ipv6_ext_hdrs() - Parses packet and sets IPv6 extension header flags.
+ *
+ * @skb: buffer where extension header data starts in packet
+ * @nh: ipv6 header
+ * @ext_hdrs: flags are stored here
+ *
+ * OFPIEH12_UNREP is set if more than one of a given IPv6 extension header
+ * is unexpectedly encountered. (Two destination options headers may be
+ * expected and would not cause this bit to be set.)
+ *
+ * OFPIEH12_UNSEQ is set if IPv6 extension headers were not in the order
+ * preferred (but not required) by RFC 2460:
+ *
+ * When more than one extension header is used in the same packet, it is
+ * recommended that those headers appear in the following order:
+ * IPv6 header
+ * Hop-by-Hop Options header
+ * Destination Options header
+ * Routing header
+ * Fragment header
+ * Authentication header
+ * Encapsulating Security Payload header
+ * Destination Options header
+ * upper-layer header
+ */
+static void get_ipv6_ext_hdrs(struct sk_buff *skb, struct ipv6hdr *nh,
+ u16 *ext_hdrs)
+{
+ u8 next_type = nh->nexthdr;
+ unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
+ int dest_options_header_count = 0;
+
+ *ext_hdrs = 0;
+
+ while (ipv6_ext_hdr(next_type)) {
+ struct ipv6_opt_hdr _hdr, *hp;
+
+ switch (next_type) {
+ case IPPROTO_NONE:
+ *ext_hdrs |= OFPIEH12_NONEXT;
+ /* stop parsing */
+ return;
+
+ case IPPROTO_ESP:
+ if (*ext_hdrs & OFPIEH12_ESP)
+ *ext_hdrs |= OFPIEH12_UNREP;
+ if ((*ext_hdrs & ~(OFPIEH12_HOP | OFPIEH12_DEST |
+ OFPIEH12_ROUTER | IPPROTO_FRAGMENT |
+ OFPIEH12_AUTH | OFPIEH12_UNREP)) ||
+ dest_options_header_count >= 2) {
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ }
+ *ext_hdrs |= OFPIEH12_ESP;
+ break;
+
+ case IPPROTO_AH:
+ if (*ext_hdrs & OFPIEH12_AUTH)
+ *ext_hdrs |= OFPIEH12_UNREP;
+ if ((*ext_hdrs &
+ ~(OFPIEH12_HOP | OFPIEH12_DEST | OFPIEH12_ROUTER |
+ IPPROTO_FRAGMENT | OFPIEH12_UNREP)) ||
+ dest_options_header_count >= 2) {
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ }
+ *ext_hdrs |= OFPIEH12_AUTH;
+ break;
+
+ case IPPROTO_DSTOPTS:
+ if (dest_options_header_count == 0) {
+ if (*ext_hdrs &
+ ~(OFPIEH12_HOP | OFPIEH12_UNREP))
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ *ext_hdrs |= OFPIEH12_DEST;
+ } else if (dest_options_header_count == 1) {
+ if (*ext_hdrs &
+ ~(OFPIEH12_HOP | OFPIEH12_DEST |
+ OFPIEH12_ROUTER | OFPIEH12_FRAG |
+ OFPIEH12_AUTH | OFPIEH12_ESP |
+ OFPIEH12_UNREP)) {
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ }
+ } else {
+ *ext_hdrs |= OFPIEH12_UNREP;
+ }
+ dest_options_header_count++;
+ break;
+
+ case IPPROTO_FRAGMENT:
+ if (*ext_hdrs & OFPIEH12_FRAG)
+ *ext_hdrs |= OFPIEH12_UNREP;
+ if ((*ext_hdrs & ~(OFPIEH12_HOP |
+ OFPIEH12_DEST |
+ OFPIEH12_ROUTER |
+ OFPIEH12_UNREP)) ||
+ dest_options_header_count >= 2) {
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ }
+ *ext_hdrs |= OFPIEH12_FRAG;
+ break;
+
+ case IPPROTO_ROUTING:
+ if (*ext_hdrs & OFPIEH12_ROUTER)
+ *ext_hdrs |= OFPIEH12_UNREP;
+ if ((*ext_hdrs & ~(OFPIEH12_HOP |
+ OFPIEH12_DEST |
+ OFPIEH12_UNREP)) ||
+ dest_options_header_count >= 2) {
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ }
+ *ext_hdrs |= OFPIEH12_ROUTER;
+ break;
+
+ case IPPROTO_HOPOPTS:
+ if (*ext_hdrs & OFPIEH12_HOP)
+ *ext_hdrs |= OFPIEH12_UNREP;
+ /* OFPIEH12_HOP is set to 1 if a hop-by-hop IPv6
+ * extension header is present as the first
+ * extension header in the packet.
+ */
+ if (*ext_hdrs == 0)
+ *ext_hdrs |= OFPIEH12_HOP;
+ else
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ break;
+
+ default:
+ return;
+ }
+
+ hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+ if (!hp)
+ break;
+ next_type = hp->nexthdr;
+ start += ipv6_optlen(hp);
+ }
+}
+
static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
{
unsigned short frag_off;
@@ -256,6 +394,8 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
nh = ipv6_hdr(skb);
+ get_ipv6_ext_hdrs(skb, nh, &key->ipv6.exthdrs);
+
key->ip.proto = NEXTHDR_NONE;
key->ip.tos = ipv6_get_dsfield(nh);
key->ip.ttl = nh->hop_limit;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 758a8c77f736..073ab73ffeaa 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -32,6 +32,19 @@ enum sw_flow_mac_proto {
#define SW_FLOW_KEY_INVALID 0x80
#define MPLS_LABEL_DEPTH 3
+/* Bit definitions for IPv6 Extension Header pseudo-field. */
+enum ofp12_ipv6exthdr_flags {
+ OFPIEH12_NONEXT = 1 << 0, /* "No next header" encountered. */
+ OFPIEH12_ESP = 1 << 1, /* Encrypted Sec Payload header present. */
+ OFPIEH12_AUTH = 1 << 2, /* Authentication header present. */
+ OFPIEH12_DEST = 1 << 3, /* 1 or 2 dest headers present. */
+ OFPIEH12_FRAG = 1 << 4, /* Fragment header present. */
+ OFPIEH12_ROUTER = 1 << 5, /* Router header present. */
+ OFPIEH12_HOP = 1 << 6, /* Hop-by-hop header present. */
+ OFPIEH12_UNREP = 1 << 7, /* Unexpected repeats encountered. */
+ OFPIEH12_UNSEQ = 1 << 8 /* Unexpected sequencing encountered. */
+};
+
/* Store options at the end of the array if they are less than the
* maximum size. This allows us to get the benefits of variable length
* matching for small options.
@@ -121,6 +134,7 @@ struct sw_flow_key {
struct in6_addr dst; /* IPv6 destination address. */
} addr;
__be32 label; /* IPv6 flow label. */
+ u16 exthdrs; /* IPv6 extension header flags */
union {
struct {
struct in6_addr src;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index fd1f809e9bc1..5176f6ccac8e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -346,7 +346,7 @@ size_t ovs_key_attr_size(void)
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
- BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
+ BUILD_BUG_ON(OVS_KEY_ATTR_MAX != 32);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -369,7 +369,8 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
+ nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
- + nla_total_size(28); /* OVS_KEY_ATTR_ND */
+ + nla_total_size(28) /* OVS_KEY_ATTR_ND */
+ + nla_total_size(2); /* OVS_KEY_ATTR_IPV6_EXTHDRS */
}
static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
@@ -437,6 +438,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
[OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED,
.next = ovs_nsh_key_attr_lens, },
+ [OVS_KEY_ATTR_IPV6_EXTHDRS] = {
+ .len = sizeof(struct ovs_key_ipv6_exthdrs) },
};
static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -479,7 +482,14 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
return -EINVAL;
}
- if (attrs & (1 << type)) {
+ if (type == OVS_KEY_ATTR_PACKET_TYPE ||
+ type == OVS_KEY_ATTR_ND_EXTENSIONS ||
+ type == OVS_KEY_ATTR_TUNNEL_INFO) {
+ OVS_NLERR(log, "Key type %d is not supported", type);
+ return -EINVAL;
+ }
+
+ if (attrs & (1ULL << type)) {
OVS_NLERR(log, "Duplicate key (type %d).", type);
return -EINVAL;
}
@@ -492,7 +502,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
}
if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) {
- attrs |= 1 << type;
+ attrs |= 1ULL << type;
a[type] = nla;
}
}
@@ -1597,6 +1607,17 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
}
+ if (attrs & (1ULL << OVS_KEY_ATTR_IPV6_EXTHDRS)) {
+ const struct ovs_key_ipv6_exthdrs *ipv6_exthdrs_key;
+
+ ipv6_exthdrs_key = nla_data(a[OVS_KEY_ATTR_IPV6_EXTHDRS]);
+
+ SW_FLOW_KEY_PUT(match, ipv6.exthdrs,
+ ipv6_exthdrs_key->hdrs, is_mask);
+
+ attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6_EXTHDRS);
+ }
+
if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
const struct ovs_key_arp *arp_key;
@@ -2099,6 +2120,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
ipv4_key->ipv4_frag = output->ip.frag;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
struct ovs_key_ipv6 *ipv6_key;
+ struct ovs_key_ipv6_exthdrs *ipv6_exthdrs_key;
nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
if (!nla)
@@ -2113,6 +2135,13 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
ipv6_key->ipv6_tclass = output->ip.tos;
ipv6_key->ipv6_hlimit = output->ip.ttl;
ipv6_key->ipv6_frag = output->ip.frag;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6_EXTHDRS,
+ sizeof(*ipv6_exthdrs_key));
+ if (!nla)
+ goto nla_put_failure;
+ ipv6_exthdrs_key = nla_data(nla);
+ ipv6_exthdrs_key->hdrs = output->ipv6.exthdrs;
} else if (swkey->eth.type == htons(ETH_P_NSH)) {
if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
goto nla_put_failure;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index cf2ce5812489..82a74f998966 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -507,7 +507,7 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
}
skb->dev = vport->dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
vport->ops->send(skb);
return;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ab87f22cc7ec..1b93ce1a5600 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -460,7 +460,7 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
return TP_STATUS_TS_RAW_HARDWARE;
if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
- ktime_to_timespec64_cond(skb->tstamp, ts))
+ ktime_to_timespec64_cond(skb_tstamp(skb), ts))
return TP_STATUS_TS_SOFTWARE;
return 0;
@@ -2199,6 +2199,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
spin_lock(&sk->sk_receive_queue.lock);
po->stats.stats1.tp_packets++;
sock_skb_set_dropcount(sk, skb);
+ skb_clear_delivery_time(skb);
__skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk);
@@ -2377,6 +2378,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
po->stats.stats1.tp_packets++;
if (copy_skb) {
status |= TP_STATUS_COPY;
+ skb_clear_delivery_time(copy_skb);
__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
}
spin_unlock(&sk->sk_receive_queue.lock);
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 65218b7ce9f9..2b582da1e88c 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -146,7 +146,7 @@ EXPORT_SYMBOL(phonet_header_ops);
* Prepends an ISI header and sends a datagram.
*/
static int pn_send(struct sk_buff *skb, struct net_device *dev,
- u16 dst, u16 src, u8 res, u8 irq)
+ u16 dst, u16 src, u8 res)
{
struct phonethdr *ph;
int err;
@@ -182,7 +182,7 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
if (skb->pkt_type == PACKET_LOOPBACK) {
skb_reset_mac_header(skb);
skb_orphan(skb);
- err = (irq ? netif_rx(skb) : netif_rx_ni(skb)) ? -ENOBUFS : 0;
+ err = netif_rx(skb) ? -ENOBUFS : 0;
} else {
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
NULL, NULL, skb->len);
@@ -214,7 +214,7 @@ static int pn_raw_send(const void *data, int len, struct net_device *dev,
skb_reserve(skb, MAX_PHONET_HEADER);
__skb_put(skb, len);
skb_copy_to_linear_data(skb, data, len);
- return pn_send(skb, dev, dst, src, res, 1);
+ return pn_send(skb, dev, dst, src, res);
}
/*
@@ -269,7 +269,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
if (!pn_addr(src))
src = pn_object(saddr, pn_obj(src));
- err = pn_send(skb, dev, dst, src, res, 0);
+ err = pn_send(skb, dev, dst, src, res);
dev_put(dev);
return err;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 32563cef85bf..4f51094da9da 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -274,7 +274,7 @@ static int tcf_action_offload_add_ex(struct tc_action *action,
err = tc_setup_action(&fl_action->action, actions);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
- "Failed to setup tc actions for offload\n");
+ "Failed to setup tc actions for offload");
goto fl_err;
}
@@ -1037,6 +1037,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
restart_act_graph:
for (i = 0; i < nr_actions; i++) {
const struct tc_action *a = actions[i];
+ int repeat_ttl;
if (jmp_prgcnt > 0) {
jmp_prgcnt -= 1;
@@ -1045,11 +1046,17 @@ restart_act_graph:
if (tc_act_skip_sw(a->tcfa_flags))
continue;
+
+ repeat_ttl = 32;
repeat:
ret = a->ops->act(skb, a, res);
- if (ret == TC_ACT_REPEAT)
- goto repeat; /* we need a ttl - JHS */
-
+ if (unlikely(ret == TC_ACT_REPEAT)) {
+ if (--repeat_ttl != 0)
+ goto repeat;
+ /* suspicious opcode, stop pipeline */
+ net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n");
+ return TC_ACT_OK;
+ }
if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) {
jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK;
if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) {
@@ -1439,6 +1446,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
continue;
if (skip_sw != tc_act_skip_sw(act->tcfa_flags) ||
skip_hw != tc_act_skip_hw(act->tcfa_flags)) {
+ NL_SET_ERR_MSG(extack,
+ "Mismatch between action and filter offload flags");
err = -EINVAL;
goto err;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index a77d8908e737..fea2d78b9ddc 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -53,6 +53,8 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
bpf_compute_data_pointers(skb);
filter_res = bpf_prog_run(filter, skb);
}
+ if (unlikely(!skb->tstamp && skb->mono_delivery_time))
+ skb->mono_delivery_time = 0;
if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
skb_orphan(skb);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 7108e71ce4db..89e46f66e3d9 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -355,6 +355,13 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
}
}
+static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
+ struct nf_conn_act_ct_ext *act_ct_ext, u8 dir)
+{
+ entry->tuplehash[dir].tuple.xmit_type = FLOW_OFFLOAD_XMIT_TC;
+ entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir];
+}
+
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
bool tcp)
@@ -379,10 +386,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
act_ct_ext = nf_conn_act_ct_ext_find(ct);
if (act_ct_ext) {
- entry->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
- act_ct_ext->ifindex[IP_CT_DIR_ORIGINAL];
- entry->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
- act_ct_ext->ifindex[IP_CT_DIR_REPLY];
+ tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL);
+ tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY);
}
err = flow_offload_add(&ct_ft->nf_ft, entry);
@@ -527,11 +532,6 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
struct nf_conn *ct;
u8 dir;
- /* Previously seen or loopback */
- ct = nf_ct_get(skb, &ctinfo);
- if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
- return false;
-
switch (family) {
case NFPROTO_IPV4:
if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph))
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0923aa2b8f8a..f4d917705263 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -239,6 +239,20 @@ release_idr:
return err;
}
+static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit)
+{
+ u32 len;
+
+ if (skb_is_gso(skb))
+ return skb_gso_validate_mac_len(skb, limit);
+
+ len = qdisc_pkt_len(skb);
+ if (skb_at_tc_ingress(skb))
+ len += skb->mac_len;
+
+ return len <= limit;
+}
+
static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -261,7 +275,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
goto inc_overlimits;
}
- if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
+ if (tcf_police_mtu_check(skb, p->tcfp_mtu)) {
if (!p->rate_present && !p->pps_present) {
ret = p->tcfp_result;
goto end;
@@ -405,20 +419,66 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
return tcf_idr_search(tn, a, index);
}
+static int tcf_police_act_to_flow_act(int tc_act, u32 *extval)
+{
+ int act_id = -EOPNOTSUPP;
+
+ if (!TC_ACT_EXT_OPCODE(tc_act)) {
+ if (tc_act == TC_ACT_OK)
+ act_id = FLOW_ACTION_ACCEPT;
+ else if (tc_act == TC_ACT_SHOT)
+ act_id = FLOW_ACTION_DROP;
+ else if (tc_act == TC_ACT_PIPE)
+ act_id = FLOW_ACTION_PIPE;
+ } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_GOTO_CHAIN)) {
+ act_id = FLOW_ACTION_GOTO;
+ *extval = tc_act & TC_ACT_EXT_VAL_MASK;
+ } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_JUMP)) {
+ act_id = FLOW_ACTION_JUMP;
+ *extval = tc_act & TC_ACT_EXT_VAL_MASK;
+ }
+
+ return act_id;
+}
+
static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data,
u32 *index_inc, bool bind)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
+ struct tcf_police *police = to_police(act);
+ struct tcf_police_params *p;
+ int act_id;
+
+ p = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
entry->id = FLOW_ACTION_POLICE;
entry->police.burst = tcf_police_burst(act);
entry->police.rate_bytes_ps =
tcf_police_rate_bytes_ps(act);
+ entry->police.peakrate_bytes_ps = tcf_police_peakrate_bytes_ps(act);
+ entry->police.avrate = tcf_police_tcfp_ewma_rate(act);
+ entry->police.overhead = tcf_police_rate_overhead(act);
entry->police.burst_pkt = tcf_police_burst_pkt(act);
entry->police.rate_pkt_ps =
tcf_police_rate_pkt_ps(act);
entry->police.mtu = tcf_police_tcfp_mtu(act);
+
+ act_id = tcf_police_act_to_flow_act(police->tcf_action,
+ &entry->police.exceed.extval);
+ if (act_id < 0)
+ return act_id;
+
+ entry->police.exceed.act_id = act_id;
+
+ act_id = tcf_police_act_to_flow_act(p->tcfp_result,
+ &entry->police.notexceed.extval);
+ if (act_id < 0)
+ return act_id;
+
+ entry->police.notexceed.act_id = act_id;
+
*index_inc = 1;
} else {
struct flow_offload_action *fl_action = entry_data;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ff1e6b474fef..2957f8f5cea7 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1061,7 +1061,7 @@ static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
/* Find qdisc */
if (!*parent) {
- *q = dev->qdisc;
+ *q = rcu_dereference(dev->qdisc);
*parent = (*q)->handle;
} else {
*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
@@ -2606,7 +2606,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
parent = tcm->tcm_parent;
if (!parent)
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
if (!q)
@@ -2981,7 +2981,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
if (!tcm->tcm_parent)
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index df19a847829e..c85b85a192bf 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -102,6 +102,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
bpf_compute_data_pointers(skb);
filter_res = bpf_prog_run(prog->filter, skb);
}
+ if (unlikely(!skb->tstamp && skb->mono_delivery_time))
+ skb->mono_delivery_time = 0;
if (prog->exts_integrated) {
res->class = 0;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 179825a3b2fd..e3c0e8ea2dbb 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -301,7 +301,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
if (!handle)
return NULL;
- q = qdisc_match_from_root(dev->qdisc, handle);
+ q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
if (q)
goto out;
@@ -320,7 +320,7 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
if (!handle)
return NULL;
- q = qdisc_match_from_root(dev->qdisc, handle);
+ q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
if (q)
goto out;
@@ -1082,10 +1082,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
skip:
if (!ingress) {
notify_and_destroy(net, skb, n, classid,
- dev->qdisc, new);
+ rtnl_dereference(dev->qdisc), new);
if (new && !new->ops->attach)
qdisc_refcount_inc(new);
- dev->qdisc = new ? : &noop_qdisc;
+ rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
if (new && new->ops->attach)
new->ops->attach(new);
@@ -1451,7 +1451,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
q = dev_ingress_queue(dev)->qdisc_sleeping;
}
} else {
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
}
if (!q) {
NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
@@ -1540,7 +1540,7 @@ replay:
q = dev_ingress_queue(dev)->qdisc_sleeping;
}
} else {
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
}
/* It may be default qdisc, ignore it */
@@ -1762,7 +1762,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_q_idx = 0;
q_idx = 0;
- if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
+ if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
+ skb, cb, &q_idx, s_q_idx,
true, tca[TCA_DUMP_INVISIBLE]) < 0)
goto done;
@@ -2033,7 +2034,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
} else if (qid1) {
qid = qid1;
} else if (qid == 0)
- qid = dev->qdisc->handle;
+ qid = rtnl_dereference(dev->qdisc)->handle;
/* Now qid is genuine qdisc handle consistent
* both with parent and child.
@@ -2044,7 +2045,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
portid = TC_H_MAKE(qid, portid);
} else {
if (qid == 0)
- qid = dev->qdisc->handle;
+ qid = rtnl_dereference(dev->qdisc)->handle;
}
/* OK. Locate qdisc */
@@ -2205,7 +2206,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
t = 0;
- if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
+ if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
+ skb, tcm, cb, &t, s_t, true) < 0)
goto done;
dev_queue = dev_ingress_queue(dev);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f893d9a81b01..5bab9f8b8f45 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1164,30 +1164,33 @@ static void attach_default_qdiscs(struct net_device *dev)
if (!netif_is_multiqueue(dev) ||
dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- dev->qdisc = txq->qdisc_sleeping;
- qdisc_refcount_inc(dev->qdisc);
+ qdisc = txq->qdisc_sleeping;
+ rcu_assign_pointer(dev->qdisc, qdisc);
+ qdisc_refcount_inc(qdisc);
} else {
qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
if (qdisc) {
- dev->qdisc = qdisc;
+ rcu_assign_pointer(dev->qdisc, qdisc);
qdisc->ops->attach(qdisc);
}
}
+ qdisc = rtnl_dereference(dev->qdisc);
/* Detect default qdisc setup/init failed and fallback to "noqueue" */
- if (dev->qdisc == &noop_qdisc) {
+ if (qdisc == &noop_qdisc) {
netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
default_qdisc_ops->id, noqueue_qdisc_ops.id);
dev->priv_flags |= IFF_NO_QUEUE;
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- dev->qdisc = txq->qdisc_sleeping;
- qdisc_refcount_inc(dev->qdisc);
+ qdisc = txq->qdisc_sleeping;
+ rcu_assign_pointer(dev->qdisc, qdisc);
+ qdisc_refcount_inc(qdisc);
dev->priv_flags ^= IFF_NO_QUEUE;
}
#ifdef CONFIG_NET_SCHED
- if (dev->qdisc != &noop_qdisc)
- qdisc_hash_add(dev->qdisc, false);
+ if (qdisc != &noop_qdisc)
+ qdisc_hash_add(qdisc, false);
#endif
}
@@ -1217,7 +1220,7 @@ void dev_activate(struct net_device *dev)
* and noqueue_qdisc for virtual interfaces
*/
- if (dev->qdisc == &noop_qdisc)
+ if (rtnl_dereference(dev->qdisc) == &noop_qdisc)
attach_default_qdiscs(dev);
if (!netif_carrier_ok(dev))
@@ -1383,7 +1386,7 @@ static int qdisc_change_tx_queue_len(struct net_device *dev,
void dev_qdisc_change_real_num_tx(struct net_device *dev,
unsigned int new_real_tx)
{
- struct Qdisc *qdisc = dev->qdisc;
+ struct Qdisc *qdisc = rtnl_dereference(dev->qdisc);
if (qdisc->ops->change_real_num_tx)
qdisc->ops->change_real_num_tx(qdisc, new_real_tx);
@@ -1447,7 +1450,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
void dev_init_scheduler(struct net_device *dev)
{
- dev->qdisc = &noop_qdisc;
+ rcu_assign_pointer(dev->qdisc, &noop_qdisc);
netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
@@ -1475,8 +1478,8 @@ void dev_shutdown(struct net_device *dev)
netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
- qdisc_put(dev->qdisc);
- dev->qdisc = &noop_qdisc;
+ qdisc_put(rtnl_dereference(dev->qdisc));
+ rcu_assign_pointer(dev->qdisc, &noop_qdisc);
WARN_ON(timer_pending(&dev->watchdog_timer));
}
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 034e2c74497d..d9c6d8f30f09 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -61,10 +61,6 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
r->idiag_retrans = asoc->rtx_data_chunks;
r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
- } else {
- r->idiag_timer = 0;
- r->idiag_retrans = 0;
- r->idiag_expires = 0;
}
}
@@ -144,13 +140,14 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
r = nlmsg_data(nlh);
BUG_ON(!sk_fullsock(sk));
+ r->idiag_timer = 0;
+ r->idiag_retrans = 0;
+ r->idiag_expires = 0;
if (asoc) {
inet_diag_msg_sctpasoc_fill(r, sk, asoc);
} else {
inet_diag_msg_common_fill(r, sk);
r->idiag_state = sk->sk_state;
- r->idiag_timer = 0;
- r->idiag_retrans = 0;
}
if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 196fb6f01b14..875efcd126a2 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_SMC_DIAG) += smc_diag.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
smc-y += smc_tracepoint.o
+smc-$(CONFIG_SYSCTL) += smc_sysctl.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 246c874de629..f0d118e9f155 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -51,6 +51,7 @@
#include "smc_close.h"
#include "smc_stats.h"
#include "smc_tracepoint.h"
+#include "smc_sysctl.h"
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
@@ -192,12 +193,27 @@ void smc_unhash_sk(struct sock *sk)
}
EXPORT_SYMBOL_GPL(smc_unhash_sk);
+/* This will be called before user really release sock_lock. So do the
+ * work which we didn't do because of user hold the sock_lock in the
+ * BH context
+ */
+static void smc_release_cb(struct sock *sk)
+{
+ struct smc_sock *smc = smc_sk(sk);
+
+ if (smc->conn.tx_in_release_sock) {
+ smc_tx_pending(&smc->conn);
+ smc->conn.tx_in_release_sock = false;
+ }
+}
+
struct proto smc_proto = {
.name = "SMC",
.owner = THIS_MODULE,
.keepalive = smc_set_keepalive,
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
+ .release_cb = smc_release_cb,
.obj_size = sizeof(struct smc_sock),
.h.smc_hash = &smc_v4_hashinfo,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
@@ -210,6 +226,7 @@ struct proto smc_proto6 = {
.keepalive = smc_set_keepalive,
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
+ .release_cb = smc_release_cb,
.obj_size = sizeof(struct smc_sock),
.h.smc_hash = &smc_v6_hashinfo,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
@@ -268,7 +285,7 @@ static int smc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
- int rc = 0;
+ int old_state, rc = 0;
if (!sk)
goto out;
@@ -276,8 +293,10 @@ static int smc_release(struct socket *sock)
sock_hold(sk); /* sock_put below */
smc = smc_sk(sk);
+ old_state = sk->sk_state;
+
/* cleanup for a dangling non-blocking connect */
- if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
+ if (smc->connect_nonblock && old_state == SMC_INIT)
tcp_abort(smc->clcsock->sk, ECONNABORTED);
if (cancel_work_sync(&smc->connect_work))
@@ -291,6 +310,10 @@ static int smc_release(struct socket *sock)
else
lock_sock(sk);
+ if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE &&
+ !smc->use_fallback)
+ smc_close_active_abort(smc);
+
rc = __smc_release(smc);
/* detach socket */
@@ -752,14 +775,17 @@ static void smc_fback_error_report(struct sock *clcsk)
static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
{
struct sock *clcsk;
+ int rc = 0;
mutex_lock(&smc->clcsock_release_lock);
if (!smc->clcsock) {
- mutex_unlock(&smc->clcsock_release_lock);
- return -EBADF;
+ rc = -EBADF;
+ goto out;
}
clcsk = smc->clcsock->sk;
+ if (smc->use_fallback)
+ goto out;
smc->use_fallback = true;
smc->fallback_rsn = reason_code;
smc_stat_fallback(smc);
@@ -787,8 +813,9 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
smc->clcsock->sk->sk_user_data =
(void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
}
+out:
mutex_unlock(&smc->clcsock_release_lock);
- return 0;
+ return rc;
}
/* fall back during connect */
@@ -1372,8 +1399,14 @@ static int __smc_connect(struct smc_sock *smc)
/* perform CLC handshake */
rc = smc_connect_clc(smc, aclc2, ini);
- if (rc)
+ if (rc) {
+ /* -EAGAIN on timeout, see tcp_recvmsg() */
+ if (rc == -EAGAIN) {
+ rc = -ETIMEDOUT;
+ smc->sk.sk_err = ETIMEDOUT;
+ }
goto vlan_cleanup;
+ }
/* check if smc modes and versions of CLC proposal and accept match */
rc = smc_connect_check_aclc(ini, aclc);
@@ -2705,10 +2738,14 @@ static int __smc_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
switch (optname) {
case SMC_LIMIT_HS:
- if (optlen < sizeof(int))
- return -EINVAL;
- if (copy_from_sockptr(&val, optval, sizeof(int)))
- return -EFAULT;
+ if (optlen < sizeof(int)) {
+ rc = -EINVAL;
+ break;
+ }
+ if (copy_from_sockptr(&val, optval, sizeof(int))) {
+ rc = -EFAULT;
+ break;
+ }
smc->limit_smc_hs = !!val;
rc = 0;
@@ -2781,8 +2818,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_state != SMC_CLOSED) {
if (val) {
SMC_STAT_INC(smc, ndly_cnt);
- mod_delayed_work(smc->conn.lgr->tx_wq,
- &smc->conn.tx_work, 0);
+ smc_tx_pending(&smc->conn);
+ cancel_delayed_work(&smc->conn.tx_work);
}
}
break;
@@ -3142,11 +3179,17 @@ unsigned int smc_net_id;
static __net_init int smc_net_init(struct net *net)
{
+ int rc;
+
+ rc = smc_sysctl_net_init(net);
+ if (rc)
+ return rc;
return smc_pnet_net_init(net);
}
static void __net_exit smc_net_exit(struct net *net)
{
+ smc_sysctl_net_exit(net);
smc_pnet_net_exit(net);
}
@@ -3256,12 +3299,14 @@ static int __init smc_init(void)
rc = tcp_register_ulp(&smc_ulp_ops);
if (rc) {
pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
- goto out_sock;
+ goto out_ib;
}
static_branch_enable(&tcp_have_smc);
return 0;
+out_ib:
+ smc_ib_unregister_client();
out_sock:
sock_unregister(PF_SMC);
out_proto6:
diff --git a/net/smc/smc.h b/net/smc/smc.h
index a096d8af21a0..ea0620529ebe 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -29,6 +29,7 @@
#define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM
* devices
*/
+#define SMC_AUTOCORKING_DEFAULT_SIZE 0x10000 /* 64K by default */
extern struct proto smc_proto;
extern struct proto smc_proto6;
@@ -192,6 +193,7 @@ struct smc_connection {
* - dec on polled tx cqe
*/
wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/
+ atomic_t tx_pushing; /* nr_threads trying tx push */
struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
u32 tx_off; /* base offset in peer rmb */
@@ -211,6 +213,10 @@ struct smc_connection {
* data still pending
*/
char urg_rx_byte; /* urgent byte */
+ bool tx_in_release_sock;
+ /* flush pending tx data in
+ * sock release_cb()
+ */
atomic_t bytes_to_rcv; /* arrived data,
* not yet received
*/
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 9d5a97168969..5c731f27996e 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -48,9 +48,19 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
conn->tx_cdc_seq_fin = cdcpend->ctrl_seq;
}
- if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) &&
- unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq)))
- wake_up(&conn->cdc_pend_tx_wq);
+ if (atomic_dec_and_test(&conn->cdc_pend_tx_wr)) {
+ /* If user owns the sock_lock, mark the connection need sending.
+ * User context will later try to send when it release sock_lock
+ * in smc_release_cb()
+ */
+ if (sock_owned_by_user(&smc->sk))
+ conn->tx_in_release_sock = true;
+ else
+ smc_tx_pending(conn);
+
+ if (unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq)))
+ wake_up(&conn->cdc_pend_tx_wq);
+ }
WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0);
smc_tx_sndbuf_nonfull(smc);
@@ -350,8 +360,12 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
if ((diff_cons && smc_tx_prepared_sends(conn)) ||
conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
- conn->local_rx_ctrl.prod_flags.urg_data_pending)
- smc_tx_sndbuf_nonempty(conn);
+ conn->local_rx_ctrl.prod_flags.urg_data_pending) {
+ if (!sock_owned_by_user(&smc->sk))
+ smc_tx_pending(conn);
+ else
+ conn->tx_in_release_sock = true;
+ }
if (diff_cons && conn->urg_tx_pend &&
atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) {
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 29525d03b253..f40f6ed0fbdb 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1161,8 +1161,8 @@ void smc_conn_free(struct smc_connection *conn)
cancel_work_sync(&conn->abort_work);
}
if (!list_empty(&lgr->list)) {
- smc_lgr_unregister_conn(conn);
smc_buf_unuse(conn, lgr); /* allow buffer reuse */
+ smc_lgr_unregister_conn(conn);
}
if (!lgr->conns_num)
@@ -1864,7 +1864,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
(ini->smcd_version == SMC_V2 ||
lgr->vlan_id == ini->vlan_id) &&
(role == SMC_CLNT || ini->is_smcd ||
- lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
+ (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
+ !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
/* link group found */
ini->first_contact_local = 0;
conn->lgr = lgr;
@@ -1988,7 +1989,7 @@ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
*/
static inline int smc_rmb_wnd_update_limit(int rmbe_size)
{
- return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
+ return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}
/* map an rmb buf to a link */
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index ff61b7b95875..7984f8883472 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -113,7 +113,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
pnettable = &sn->pnettable;
/* remove table entry */
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
list) {
if (!pnet_name ||
@@ -131,7 +131,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
rc = 0;
}
}
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
/* if this is not the initial namespace, stop here */
if (net != &init_net)
@@ -192,7 +192,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
!strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
@@ -206,7 +206,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
break;
}
}
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -224,7 +224,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker);
@@ -237,7 +237,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
break;
}
}
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -370,7 +370,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
rc = -EEXIST;
new_netdev = true;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_ETH &&
!strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
@@ -385,9 +385,9 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
GFP_ATOMIC);
}
list_add_tail(&new_pe->list, &pnettable->pnetlist);
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
} else {
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
kfree(new_pe);
goto out_put;
}
@@ -448,7 +448,7 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
new_pe->ib_port = ib_port;
new_ibdev = true;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
@@ -458,9 +458,9 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
}
if (new_ibdev) {
list_add_tail(&new_pe->list, &pnettable->pnetlist);
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
} else {
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
kfree(new_pe);
}
return (new_ibdev) ? 0 : -EEXIST;
@@ -605,7 +605,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
pnettable = &sn->pnettable;
/* dump pnettable entries */
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
continue;
@@ -620,7 +620,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return idx;
}
@@ -864,7 +864,7 @@ int smc_pnet_net_init(struct net *net)
struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev;
INIT_LIST_HEAD(&pnettable->pnetlist);
- rwlock_init(&pnettable->lock);
+ mutex_init(&pnettable->lock);
INIT_LIST_HEAD(&pnetids_ndev->list);
rwlock_init(&pnetids_ndev->lock);
@@ -947,7 +947,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) {
/* get pnetid of netdev device */
@@ -956,7 +956,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -1159,7 +1159,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
sn = net_generic(&init_net, smc_net_id);
pnettable = &sn->pnettable;
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) &&
@@ -1169,7 +1169,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -1188,7 +1188,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
sn = net_generic(&init_net, smc_net_id);
pnettable = &sn->pnettable;
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
@@ -1197,7 +1197,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 14039272f7e4..80a88eea4949 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -29,7 +29,7 @@ struct smc_link_group;
* @pnetlist: List of PNETIDs
*/
struct smc_pnettable {
- rwlock_t lock;
+ struct mutex lock;
struct list_head pnetlist;
};
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
new file mode 100644
index 000000000000..bae19419e755
--- /dev/null
+++ b/net/smc/smc_sysctl.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * smc_sysctl.c: sysctl interface to SMC subsystem.
+ *
+ * Copyright (c) 2022, Alibaba Inc.
+ *
+ * Author: Tony Lu <tonylu@linux.alibaba.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <net/net_namespace.h>
+
+#include "smc.h"
+#include "smc_sysctl.h"
+
+static struct ctl_table smc_table[] = {
+ {
+ .procname = "autocorking_size",
+ .data = &init_net.smc.sysctl_autocorking_size,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec,
+ },
+ { }
+};
+
+int __net_init smc_sysctl_net_init(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = smc_table;
+ if (!net_eq(net, &init_net)) {
+ int i;
+
+ table = kmemdup(table, sizeof(smc_table), GFP_KERNEL);
+ if (!table)
+ goto err_alloc;
+
+ for (i = 0; i < ARRAY_SIZE(smc_table) - 1; i++)
+ table[i].data += (void *)net - (void *)&init_net;
+ }
+
+ net->smc.smc_hdr = register_net_sysctl(net, "net/smc", table);
+ if (!net->smc.smc_hdr)
+ goto err_reg;
+
+ net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
+
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+void __net_exit smc_sysctl_net_exit(struct net *net)
+{
+ unregister_net_sysctl_table(net->smc.smc_hdr);
+}
diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h
new file mode 100644
index 000000000000..0becc11bd2f4
--- /dev/null
+++ b/net/smc/smc_sysctl.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * smc_sysctl.c: sysctl interface to SMC subsystem.
+ *
+ * Copyright (c) 2022, Alibaba Inc.
+ *
+ * Author: Tony Lu <tonylu@linux.alibaba.com>
+ *
+ */
+
+#ifndef _SMC_SYSCTL_H
+#define _SMC_SYSCTL_H
+
+#ifdef CONFIG_SYSCTL
+
+int __net_init smc_sysctl_net_init(struct net *net);
+void __net_exit smc_sysctl_net_exit(struct net *net);
+
+#else
+
+static inline int smc_sysctl_net_init(struct net *net)
+{
+ net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
+ return 0;
+}
+
+static inline void smc_sysctl_net_exit(struct net *net) { }
+
+#endif /* CONFIG_SYSCTL */
+
+#endif /* _SMC_SYSCTL_H */
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index a96ce162825e..98ca9229fe87 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -131,6 +131,51 @@ static bool smc_tx_is_corked(struct smc_sock *smc)
return (tp->nonagle & TCP_NAGLE_CORK) ? true : false;
}
+/* If we have pending CDC messages, do not send:
+ * Because CQE of this CDC message will happen shortly, it gives
+ * a chance to coalesce future sendmsg() payload in to one RDMA Write,
+ * without need for a timer, and with no latency trade off.
+ * Algorithm here:
+ * 1. First message should never cork
+ * 2. If we have pending Tx CDC messages, wait for the first CDC
+ * message's completion
+ * 3. Don't cork to much data in a single RDMA Write to prevent burst
+ * traffic, total corked message should not exceed sendbuf/2
+ */
+static bool smc_should_autocork(struct smc_sock *smc)
+{
+ struct smc_connection *conn = &smc->conn;
+ int corking_size;
+
+ corking_size = min_t(unsigned int, conn->sndbuf_desc->len >> 1,
+ sock_net(&smc->sk)->smc.sysctl_autocorking_size);
+
+ if (atomic_read(&conn->cdc_pend_tx_wr) == 0 ||
+ smc_tx_prepared_sends(conn) > corking_size)
+ return false;
+ return true;
+}
+
+static bool smc_tx_should_cork(struct smc_sock *smc, struct msghdr *msg)
+{
+ struct smc_connection *conn = &smc->conn;
+
+ if (smc_should_autocork(smc))
+ return true;
+
+ /* for a corked socket defer the RDMA writes if
+ * sndbuf_space is still available. The applications
+ * should known how/when to uncork it.
+ */
+ if ((msg->msg_flags & MSG_MORE ||
+ smc_tx_is_corked(smc) ||
+ msg->msg_flags & MSG_SENDPAGE_NOTLAST) &&
+ atomic_read(&conn->sndbuf_space))
+ return true;
+
+ return false;
+}
+
/* sndbuf producer: main API called by socket layer.
* called under sock lock.
*/
@@ -235,15 +280,11 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
*/
if ((msg->msg_flags & MSG_OOB) && !send_remaining)
conn->urg_tx_pend = true;
- if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc) ||
- msg->msg_flags & MSG_SENDPAGE_NOTLAST) &&
- (atomic_read(&conn->sndbuf_space)))
- /* for a corked socket defer the RDMA writes if
- * sndbuf_space is still available. The applications
- * should known how/when to uncork it.
- */
- continue;
- smc_tx_sndbuf_nonempty(conn);
+ /* If we need to cork, do nothing and wait for the next
+ * sendmsg() call or push on tx completion
+ */
+ if (!smc_tx_should_cork(smc, msg))
+ smc_tx_sndbuf_nonempty(conn);
trace_smc_tx_sendmsg(smc, copylen);
} /* while (msg_data_left(msg)) */
@@ -590,13 +631,26 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
return rc;
}
-int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int __smc_tx_sndbuf_nonempty(struct smc_connection *conn)
{
- int rc;
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ int rc = 0;
+
+ /* No data in the send queue */
+ if (unlikely(smc_tx_prepared_sends(conn) <= 0))
+ goto out;
+
+ /* Peer don't have RMBE space */
+ if (unlikely(atomic_read(&conn->peer_rmbe_space) <= 0)) {
+ SMC_STAT_RMB_TX_PEER_FULL(smc, !conn->lnk);
+ goto out;
+ }
if (conn->killed ||
- conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
- return -EPIPE; /* connection being aborted */
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
+ rc = -EPIPE; /* connection being aborted */
+ goto out;
+ }
if (conn->lgr->is_smcd)
rc = smcd_tx_sndbuf_nonempty(conn);
else
@@ -604,13 +658,45 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
if (!rc) {
/* trigger socket release if connection is closing */
- struct smc_sock *smc = container_of(conn, struct smc_sock,
- conn);
smc_close_wake_tx_prepared(smc);
}
+
+out:
+ return rc;
+}
+
+int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+{
+ int rc;
+
+ /* This make sure only one can send simultaneously to prevent wasting
+ * of CPU and CDC slot.
+ * Record whether someone has tried to push while we are pushing.
+ */
+ if (atomic_inc_return(&conn->tx_pushing) > 1)
+ return 0;
+
+again:
+ atomic_set(&conn->tx_pushing, 1);
+ smp_wmb(); /* Make sure tx_pushing is 1 before real send */
+ rc = __smc_tx_sndbuf_nonempty(conn);
+
+ /* We need to check whether someone else have added some data into
+ * the send queue and tried to push but failed after the atomic_set()
+ * when we are pushing.
+ * If so, we need to push again to prevent those data hang in the send
+ * queue.
+ */
+ if (unlikely(!atomic_dec_and_test(&conn->tx_pushing)))
+ goto again;
+
return rc;
}
+/* Wakeup sndbuf consumers from process context
+ * since there is more data to transmit. The caller
+ * must hold sock lock.
+ */
void smc_tx_pending(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
@@ -626,7 +712,8 @@ void smc_tx_pending(struct smc_connection *conn)
}
/* Wakeup sndbuf consumers from process context
- * since there is more data to transmit
+ * since there is more data to transmit in locked
+ * sock.
*/
void smc_tx_work(struct work_struct *work)
{
diff --git a/net/socket.c b/net/socket.c
index 50cf75730fd7..982eecad464c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3448,7 +3448,7 @@ EXPORT_SYMBOL(kernel_connect);
* @addr: address holder
*
* Fills the @addr pointer with the address which the socket is bound.
- * Returns 0 or an error code.
+ * Returns the length of the address in bytes or an error code.
*/
int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
@@ -3463,7 +3463,7 @@ EXPORT_SYMBOL(kernel_getsockname);
* @addr: address holder
*
* Fills the @addr pointer with the address which the socket is connected.
- * Returns 0 or an error code.
+ * Returns the length of the address in bytes or an error code.
*/
int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 12e6b4146bfb..474f76383033 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -409,6 +409,27 @@ static int switchdev_lower_dev_walk(struct net_device *lower_dev,
}
static struct net_device *
+switchdev_lower_dev_find_rcu(struct net_device *dev,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev))
+{
+ struct switchdev_nested_priv switchdev_priv = {
+ .check_cb = check_cb,
+ .foreign_dev_check_cb = foreign_dev_check_cb,
+ .dev = dev,
+ .lower_dev = NULL,
+ };
+ struct netdev_nested_priv priv = {
+ .data = &switchdev_priv,
+ };
+
+ netdev_walk_all_lower_dev_rcu(dev, switchdev_lower_dev_walk, &priv);
+
+ return switchdev_priv.lower_dev;
+}
+
+static struct net_device *
switchdev_lower_dev_find(struct net_device *dev,
bool (*check_cb)(const struct net_device *dev),
bool (*foreign_dev_check_cb)(const struct net_device *dev,
@@ -424,7 +445,7 @@ switchdev_lower_dev_find(struct net_device *dev,
.data = &switchdev_priv,
};
- netdev_walk_all_lower_dev_rcu(dev, switchdev_lower_dev_walk, &priv);
+ netdev_walk_all_lower_dev(dev, switchdev_lower_dev_walk, &priv);
return switchdev_priv.lower_dev;
}
@@ -437,63 +458,40 @@ static int __switchdev_handle_fdb_event_to_device(struct net_device *dev,
const struct net_device *foreign_dev),
int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
unsigned long event, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info),
- int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev,
- unsigned long event, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info))
+ const struct switchdev_notifier_fdb_info *fdb_info))
{
const struct switchdev_notifier_info *info = &fdb_info->info;
- struct net_device *br, *lower_dev;
+ struct net_device *br, *lower_dev, *switchdev;
struct list_head *iter;
int err = -EOPNOTSUPP;
if (check_cb(dev))
return mod_cb(dev, orig_dev, event, info->ctx, fdb_info);
- if (netif_is_lag_master(dev)) {
- if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
- goto maybe_bridged_with_us;
-
- /* This is a LAG interface that we offload */
- if (!lag_mod_cb)
- return -EOPNOTSUPP;
-
- return lag_mod_cb(dev, orig_dev, event, info->ctx, fdb_info);
- }
-
/* Recurse through lower interfaces in case the FDB entry is pointing
- * towards a bridge device.
+ * towards a bridge or a LAG device.
*/
- if (netif_is_bridge_master(dev)) {
- if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
- return 0;
-
- /* This is a bridge interface that we offload */
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- /* Do not propagate FDB entries across bridges */
- if (netif_is_bridge_master(lower_dev))
- continue;
-
- /* Bridge ports might be either us, or LAG interfaces
- * that we offload.
- */
- if (!check_cb(lower_dev) &&
- !switchdev_lower_dev_find(lower_dev, check_cb,
- foreign_dev_check_cb))
- continue;
-
- err = __switchdev_handle_fdb_event_to_device(lower_dev, orig_dev,
- event, fdb_info, check_cb,
- foreign_dev_check_cb,
- mod_cb, lag_mod_cb);
- if (err && err != -EOPNOTSUPP)
- return err;
- }
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ /* Do not propagate FDB entries across bridges */
+ if (netif_is_bridge_master(lower_dev))
+ continue;
- return 0;
+ /* Bridge ports might be either us, or LAG interfaces
+ * that we offload.
+ */
+ if (!check_cb(lower_dev) &&
+ !switchdev_lower_dev_find_rcu(lower_dev, check_cb,
+ foreign_dev_check_cb))
+ continue;
+
+ err = __switchdev_handle_fdb_event_to_device(lower_dev, orig_dev,
+ event, fdb_info, check_cb,
+ foreign_dev_check_cb,
+ mod_cb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
}
-maybe_bridged_with_us:
/* Event is neither on a bridge nor a LAG. Check whether it is on an
* interface that is in a bridge with us.
*/
@@ -501,12 +499,16 @@ maybe_bridged_with_us:
if (!br || !netif_is_bridge_master(br))
return 0;
- if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
+ switchdev = switchdev_lower_dev_find_rcu(br, check_cb, foreign_dev_check_cb);
+ if (!switchdev)
return 0;
+ if (!foreign_dev_check_cb(switchdev, dev))
+ return err;
+
return __switchdev_handle_fdb_event_to_device(br, orig_dev, event, fdb_info,
check_cb, foreign_dev_check_cb,
- mod_cb, lag_mod_cb);
+ mod_cb);
}
int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event,
@@ -516,16 +518,13 @@ int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long e
const struct net_device *foreign_dev),
int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
unsigned long event, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info),
- int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev,
- unsigned long event, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info))
+ const struct switchdev_notifier_fdb_info *fdb_info))
{
int err;
err = __switchdev_handle_fdb_event_to_device(dev, dev, event, fdb_info,
check_cb, foreign_dev_check_cb,
- mod_cb, lag_mod_cb);
+ mod_cb);
if (err == -EOPNOTSUPP)
err = 0;
@@ -536,13 +535,15 @@ EXPORT_SYMBOL_GPL(switchdev_handle_fdb_event_to_device);
static int __switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
int (*add_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack))
{
struct switchdev_notifier_info *info = &port_obj_info->info;
+ struct net_device *br, *lower_dev, *switchdev;
struct netlink_ext_ack *extack;
- struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
@@ -566,15 +567,46 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev,
if (netif_is_bridge_master(lower_dev))
continue;
+ /* When searching for switchdev interfaces that are neighbors
+ * of foreign ones, and @dev is a bridge, do not recurse on the
+ * foreign interface again, it was already visited.
+ */
+ if (foreign_dev_check_cb && !check_cb(lower_dev) &&
+ !switchdev_lower_dev_find(lower_dev, check_cb, foreign_dev_check_cb))
+ continue;
+
err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info,
- check_cb, add_cb);
+ check_cb, foreign_dev_check_cb,
+ add_cb);
if (err && err != -EOPNOTSUPP)
return err;
}
- return err;
+ /* Event is neither on a bridge nor a LAG. Check whether it is on an
+ * interface that is in a bridge with us.
+ */
+ if (!foreign_dev_check_cb)
+ return err;
+
+ br = netdev_master_upper_dev_get(dev);
+ if (!br || !netif_is_bridge_master(br))
+ return err;
+
+ switchdev = switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb);
+ if (!switchdev)
+ return err;
+
+ if (!foreign_dev_check_cb(switchdev, dev))
+ return err;
+
+ return __switchdev_handle_port_obj_add(br, port_obj_info, check_cb,
+ foreign_dev_check_cb, add_cb);
}
+/* Pass through a port object addition, if @dev passes @check_cb, or replicate
+ * it towards all lower interfaces of @dev that pass @check_cb, if @dev is a
+ * bridge or a LAG.
+ */
int switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
@@ -585,21 +617,46 @@ int switchdev_handle_port_obj_add(struct net_device *dev,
int err;
err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb,
- add_cb);
+ NULL, add_cb);
if (err == -EOPNOTSUPP)
err = 0;
return err;
}
EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
+/* Same as switchdev_handle_port_obj_add(), except if object is notified on a
+ * @dev that passes @foreign_dev_check_cb, it is replicated towards all devices
+ * that pass @check_cb and are in the same bridge as @dev.
+ */
+int switchdev_handle_port_obj_add_foreign(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*add_cb)(struct net_device *dev, const void *ctx,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack))
+{
+ int err;
+
+ err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb,
+ foreign_dev_check_cb, add_cb);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add_foreign);
+
static int __switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
int (*del_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj))
{
struct switchdev_notifier_info *info = &port_obj_info->info;
- struct net_device *lower_dev;
+ struct net_device *br, *lower_dev, *switchdev;
struct list_head *iter;
int err = -EOPNOTSUPP;
@@ -621,15 +678,46 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev,
if (netif_is_bridge_master(lower_dev))
continue;
+ /* When searching for switchdev interfaces that are neighbors
+ * of foreign ones, and @dev is a bridge, do not recurse on the
+ * foreign interface again, it was already visited.
+ */
+ if (foreign_dev_check_cb && !check_cb(lower_dev) &&
+ !switchdev_lower_dev_find(lower_dev, check_cb, foreign_dev_check_cb))
+ continue;
+
err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info,
- check_cb, del_cb);
+ check_cb, foreign_dev_check_cb,
+ del_cb);
if (err && err != -EOPNOTSUPP)
return err;
}
- return err;
+ /* Event is neither on a bridge nor a LAG. Check whether it is on an
+ * interface that is in a bridge with us.
+ */
+ if (!foreign_dev_check_cb)
+ return err;
+
+ br = netdev_master_upper_dev_get(dev);
+ if (!br || !netif_is_bridge_master(br))
+ return err;
+
+ switchdev = switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb);
+ if (!switchdev)
+ return err;
+
+ if (!foreign_dev_check_cb(switchdev, dev))
+ return err;
+
+ return __switchdev_handle_port_obj_del(br, port_obj_info, check_cb,
+ foreign_dev_check_cb, del_cb);
}
+/* Pass through a port object deletion, if @dev passes @check_cb, or replicate
+ * it towards all lower interfaces of @dev that pass @check_cb, if @dev is a
+ * bridge or a LAG.
+ */
int switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
@@ -639,13 +727,35 @@ int switchdev_handle_port_obj_del(struct net_device *dev,
int err;
err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb,
- del_cb);
+ NULL, del_cb);
if (err == -EOPNOTSUPP)
err = 0;
return err;
}
EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
+/* Same as switchdev_handle_port_obj_del(), except if object is notified on a
+ * @dev that passes @foreign_dev_check_cb, it is replicated towards all devices
+ * that pass @check_cb and are in the same bridge as @dev.
+ */
+int switchdev_handle_port_obj_del_foreign(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*del_cb)(struct net_device *dev, const void *ctx,
+ const struct switchdev_obj *obj))
+{
+ int err;
+
+ err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb,
+ foreign_dev_check_cb, del_cb);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del_foreign);
+
static int __switchdev_handle_port_attr_set(struct net_device *dev,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 473a790f5894..6d39ca05f249 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -352,16 +352,18 @@ static int tipc_enable_bearer(struct net *net, const char *name,
goto rejected;
}
- test_and_set_bit_lock(0, &b->up);
- rcu_assign_pointer(tn->bearer_list[bearer_id], b);
- if (skb)
- tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
-
+ /* Create monitoring data before accepting activate messages */
if (tipc_mon_create(net, bearer_id)) {
bearer_disable(net, b);
+ kfree_skb(skb);
return -ENOMEM;
}
+ test_and_set_bit_lock(0, &b->up);
+ rcu_assign_pointer(tn->bearer_list[bearer_id], b);
+ if (skb)
+ tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
+
pr_info("Enabled bearer <%s>, priority %u\n", name, prio);
return res;
@@ -768,7 +770,7 @@ void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts)
skb->pkt_type = PACKET_HOST;
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->protocol = eth_type_trans(skb, dev);
- netif_rx_ni(skb);
+ netif_rx(skb);
}
}
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 9325479295b8..f09316a9035f 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -2276,7 +2276,7 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr)
struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
struct tipc_aead_key *skey = NULL;
u16 key_gen = msg_key_gen(hdr);
- u16 size = msg_data_sz(hdr);
+ u32 size = msg_data_sz(hdr);
u8 *data = msg_data(hdr);
unsigned int keylen;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1e14d7f8f28f..e260c0d557f5 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2286,6 +2286,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
case STATE_MSG:
+ /* Validate Gap ACK blocks, drop if invalid */
+ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
+ if (glen > dlen)
+ break;
+
l->rcv_nxt_state = msg_seqno(hdr) + 1;
/* Update own tolerance if peer indicates a non-zero value */
@@ -2311,10 +2316,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
}
- /* Receive Gap ACK blocks from peer if any */
- glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
- if(glen > dlen)
- break;
tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
&l->mon_state, l->bearer_id);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 01396dd1c899..1d8ba233d047 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -967,7 +967,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
list_for_each_entry(p, &sr->all_publ, all_publ)
if (p->key == *last_key)
break;
- if (p->key != *last_key)
+ if (list_entry_is_head(p, &sr->all_publ, all_publ))
return -EPIPE;
} else {
p = list_first_entry(&sr->all_publ,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9947b7dfe1d2..6ef95ce565bd 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -403,7 +403,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
u32 flags = n->action_flags;
struct list_head *publ_list;
struct tipc_uaddr ua;
- u32 bearer_id;
+ u32 bearer_id, node;
if (likely(!flags)) {
write_unlock_bh(&n->lock);
@@ -413,7 +413,8 @@ static void tipc_node_write_unlock(struct tipc_node *n)
tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
TIPC_LINK_STATE, n->addr, n->addr);
sk.ref = n->link_id;
- sk.node = n->addr;
+ sk.node = tipc_own_addr(net);
+ node = n->addr;
bearer_id = n->link_id & 0xffff;
publ_list = &n->publ_list;
@@ -423,17 +424,17 @@ static void tipc_node_write_unlock(struct tipc_node *n)
write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN)
- tipc_publ_notify(net, publ_list, sk.node, n->capabilities);
+ tipc_publ_notify(net, publ_list, node, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP)
- tipc_named_node_up(net, sk.node, n->capabilities);
+ tipc_named_node_up(net, node, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) {
- tipc_mon_peer_up(net, sk.node, bearer_id);
+ tipc_mon_peer_up(net, node, bearer_id);
tipc_nametbl_publish(net, &ua, &sk, sk.ref);
}
if (flags & TIPC_NOTIFY_LINK_DOWN) {
- tipc_mon_peer_down(net, sk.node, bearer_id);
+ tipc_mon_peer_down(net, node, bearer_id);
tipc_nametbl_withdraw(net, &ua, &sk, sk.ref);
}
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3e63c83e641c..7545321c3440 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3749,7 +3749,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
if (p->key == *last_publ)
break;
}
- if (p->key != *last_publ) {
+ if (list_entry_is_head(p, &tsk->publications, binding_sock)) {
/* We never set seq or call nl_dump_check_consistent()
* this means that setting prev_seq here will cause the
* consistence check to fail in the netlink callback
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 3235261f138d..38baeb189d4e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1401,6 +1401,7 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
+ vsock_remove_connected(vsk);
goto out_wait;
} else if (timeout == 0) {
err = -ETIMEDOUT;
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 1e9be50469ce..527ae669f6f7 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -33,7 +33,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
) > $@
-$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDI) \
+$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR) \
$(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509)
@$(kecho) " GEN $@"
$(Q)(set -e; \
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3a54c8e6b6c6..f08d4b3bb148 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -332,29 +332,20 @@ static void cfg80211_event_work(struct work_struct *work)
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev)
{
struct wireless_dev *wdev, *tmp;
- bool found = false;
ASSERT_RTNL();
- list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+ list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
if (wdev->nl_owner_dead) {
if (wdev->netdev)
dev_close(wdev->netdev);
- found = true;
- }
- }
-
- if (!found)
- return;
- wiphy_lock(&rdev->wiphy);
- list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
- if (wdev->nl_owner_dead) {
+ wiphy_lock(&rdev->wiphy);
cfg80211_leave(rdev, wdev);
rdev_del_virtual_intf(rdev, wdev);
+ wiphy_unlock(&rdev->wiphy);
}
}
- wiphy_unlock(&rdev->wiphy);
}
static void cfg80211_destroy_iface_wk(struct work_struct *work)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7543c73a3f1d..ee1c2b6b6971 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -13504,6 +13504,9 @@ static int handle_nan_filter(struct nlattr *attr_filter,
i = 0;
nla_for_each_nested(attr, attr_filter, rem) {
filter[i].filter = nla_memdup(attr, GFP_KERNEL);
+ if (!filter[i].filter)
+ goto err;
+
filter[i].len = nla_len(attr);
i++;
}
@@ -13516,6 +13519,15 @@ static int handle_nan_filter(struct nlattr *attr_filter,
}
return 0;
+
+err:
+ i = 0;
+ nla_for_each_nested(attr, attr_filter, rem) {
+ kfree(filter[i].filter);
+ i++;
+ }
+ kfree(filter);
+ return -ENOMEM;
}
static int nl80211_nan_add_func(struct sk_buff *skb,
@@ -17909,7 +17921,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
wdev->chandef = *chandef;
wdev->preset_chandef = *chandef;
- if (wdev->iftype == NL80211_IFTYPE_STATION &&
+ if ((wdev->iftype == NL80211_IFTYPE_STATION ||
+ wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
!WARN_ON(!wdev->current_bss))
cfg80211_update_assoc_bss_entry(wdev, chandef->chan);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 2eda097aee7f..a60d7d638e72 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -2284,7 +2284,7 @@ void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr)
skb->dev = dev;
skb->protocol = eth_type_trans(skb, dev);
memset(skb->cb, 0, sizeof(skb->cb));
- netif_rx_ni(skb);
+ netif_rx(skb);
}
EXPORT_SYMBOL(cfg80211_send_layer2_update);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 3fa066419d37..39bce5d764de 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -223,6 +223,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
if (x->encap || x->tfcpad)
return -EINVAL;
+ if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND))
+ return -EINVAL;
+
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
@@ -262,7 +265,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC);
xso->real_dev = dev;
xso->num_exthdrs = 1;
- xso->flags = xuo->flags;
+ /* Don't forward bit that is not implemented */
+ xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6;
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 57448fc519fc..5113fa0fbcee 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -190,7 +190,7 @@ static void xfrmi_dev_uninit(struct net_device *dev)
static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
{
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
skb->ignore_df = 0;
@@ -304,7 +304,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ if (skb->len > 1280)
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ else
+ goto xmit;
} else {
if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
goto xmit;
@@ -673,12 +676,12 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
struct net *net = xi->net;
struct xfrm_if_parms p = {};
+ xfrmi_netlink_parms(data, &p);
if (!p.if_id) {
NL_SET_ERR_MSG(extack, "if_id must be non zero");
return -EINVAL;
}
- xfrmi_netlink_parms(data, &p);
xi = xfrmi_locate(net, &p);
if (!xi) {
xi = netdev_priv(dev);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 04d1ce9b510f..882526159d3a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -4256,7 +4256,7 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
}
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
- u8 dir, u8 type, struct net *net)
+ u8 dir, u8 type, struct net *net, u32 if_id)
{
struct xfrm_policy *pol, *ret = NULL;
struct hlist_head *chain;
@@ -4265,7 +4265,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
hlist_for_each_entry(pol, chain, bydst) {
- if (xfrm_migrate_selector_match(sel, &pol->selector) &&
+ if ((if_id == 0 || pol->if_id == if_id) &&
+ xfrm_migrate_selector_match(sel, &pol->selector) &&
pol->type == type) {
ret = pol;
priority = ret->priority;
@@ -4277,7 +4278,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
if ((pol->priority >= priority) && ret)
break;
- if (xfrm_migrate_selector_match(sel, &pol->selector) &&
+ if ((if_id == 0 || pol->if_id == if_id) &&
+ xfrm_migrate_selector_match(sel, &pol->selector) &&
pol->type == type) {
ret = pol;
break;
@@ -4393,7 +4395,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *m, int num_migrate,
struct xfrm_kmaddress *k, struct net *net,
- struct xfrm_encap_tmpl *encap)
+ struct xfrm_encap_tmpl *encap, u32 if_id)
{
int i, err, nx_cur = 0, nx_new = 0;
struct xfrm_policy *pol = NULL;
@@ -4412,14 +4414,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* Stage 1 - find policy */
- if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
+ if ((pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id)) == NULL) {
err = -ENOENT;
goto out;
}
/* Stage 2 - find and update state(s) */
for (i = 0, mp = m; i < num_migrate; i++, mp++) {
- if ((x = xfrm_migrate_state_find(mp, net))) {
+ if ((x = xfrm_migrate_state_find(mp, net, if_id))) {
x_cur[nx_cur] = x;
nx_cur++;
xc = xfrm_state_migrate(x, mp, encap);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index ca6bee18346d..b749935152ba 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1579,9 +1579,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
memcpy(&x->mark, &orig->mark, sizeof(x->mark));
memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark));
- if (xfrm_init_state(x) < 0)
- goto error;
-
x->props.flags = orig->props.flags;
x->props.extra_flags = orig->props.extra_flags;
@@ -1606,7 +1603,8 @@ out:
return NULL;
}
-struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
+struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
+ u32 if_id)
{
unsigned int h;
struct xfrm_state *x = NULL;
@@ -1622,6 +1620,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
continue;
if (m->reqid && x->props.reqid != m->reqid)
continue;
+ if (if_id != 0 && x->if_id != if_id)
+ continue;
if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
m->old_family) ||
!xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
@@ -1637,6 +1637,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
if (x->props.mode != m->mode ||
x->id.proto != m->proto)
continue;
+ if (if_id != 0 && x->if_id != if_id)
+ continue;
if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
m->old_family) ||
!xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
@@ -1663,6 +1665,11 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
if (!xc)
return NULL;
+ xc->props.family = m->new_family;
+
+ if (xfrm_init_state(xc) < 0)
+ goto error;
+
memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
@@ -2572,7 +2579,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_delete_tunnel);
-u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu)
+u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
const struct xfrm_type *type = READ_ONCE(x->type);
struct crypto_aead *aead;
@@ -2603,17 +2610,7 @@ u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu)
return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-EXPORT_SYMBOL_GPL(__xfrm_state_mtu);
-
-u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
-{
- mtu = __xfrm_state_mtu(x, mtu);
-
- if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU)
- return IPV6_MIN_MTU;
-
- return mtu;
-}
+EXPORT_SYMBOL_GPL(xfrm_state_mtu);
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
{
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8cd6c8129004..72b2f173aac8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -630,13 +630,8 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
xfrm_smark_init(attrs, &x->props.smark);
- if (attrs[XFRMA_IF_ID]) {
+ if (attrs[XFRMA_IF_ID])
x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
- if (!x->if_id) {
- err = -EINVAL;
- goto error;
- }
- }
err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
if (err)
@@ -1432,13 +1427,8 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
mark = xfrm_mark_get(attrs, &m);
- if (attrs[XFRMA_IF_ID]) {
+ if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
- if (!if_id) {
- err = -EINVAL;
- goto out_noput;
- }
- }
if (p->info.seq) {
x = xfrm_find_acq_byseq(net, mark, p->info.seq);
@@ -1751,13 +1741,8 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
xfrm_mark_get(attrs, &xp->mark);
- if (attrs[XFRMA_IF_ID]) {
+ if (attrs[XFRMA_IF_ID])
xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
- if (!xp->if_id) {
- err = -EINVAL;
- goto error;
- }
- }
return xp;
error:
@@ -2608,6 +2593,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
int n = 0;
struct net *net = sock_net(skb->sk);
struct xfrm_encap_tmpl *encap = NULL;
+ u32 if_id = 0;
if (attrs[XFRMA_MIGRATE] == NULL)
return -EINVAL;
@@ -2632,7 +2618,10 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENOMEM;
}
- err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap);
+ if (attrs[XFRMA_IF_ID])
+ if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
+ err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, if_id);
kfree(encap);