summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c24
-rw-r--r--net/8021q/vlanproc.c46
-rw-r--r--net/Makefile2
-rw-r--r--net/batman-adv/distributed-arp-table.c3
-rw-r--r--net/batman-adv/main.c14
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/netlink.c1
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c22
-rw-r--r--net/bridge/br.c15
-rw-r--r--net/bridge/br_device.c9
-rw-r--r--net/bridge/br_fdb.c5
-rw-r--r--net/bridge/br_netlink.c3
-rw-r--r--net/bridge/br_vlan.c4
-rw-r--r--net/bridge/netfilter/Kconfig7
-rw-r--r--net/bridge/netfilter/Makefile2
-rw-r--r--net/can/af_can.c2
-rw-r--r--net/can/bcm.c69
-rw-r--r--net/can/isotp.c5
-rw-r--r--net/can/raw.c93
-rw-r--r--net/core/dev.c304
-rw-r--r--net/core/dev.h4
-rw-r--r--net/core/dst.c6
-rw-r--r--net/core/filter.c155
-rw-r--r--net/core/link_watch.c13
-rw-r--r--net/core/net-procfs.c48
-rw-r--r--net/core/net-sysfs.c39
-rw-r--r--net/core/net_namespace.c33
-rw-r--r--net/core/page_pool.c23
-rw-r--r--net/core/rtnetlink.c95
-rw-r--r--net/core/scm.c5
-rw-r--r--net/core/skbuff.c97
-rw-r--r--net/core/sock.c14
-rw-r--r--net/core/sock_diag.c120
-rw-r--r--net/core/xdp.c6
-rw-r--r--net/dccp/ackvec.c8
-rw-r--r--net/dccp/diag.c1
-rw-r--r--net/dsa/tag_sja1105.c4
-rw-r--r--net/dsa/user.c16
-rw-r--r--net/ethtool/common.c5
-rw-r--r--net/ethtool/common.h1
-rw-r--r--net/ethtool/eee.c75
-rw-r--r--net/ethtool/ioctl.c69
-rw-r--r--net/ethtool/netlink.c14
-rw-r--r--net/hsr/hsr_device.c28
-rw-r--r--net/ieee802154/6lowpan/core.c1
-rw-r--r--net/ieee802154/socket.c1
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/bpf_tcp_ca.c22
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c101
-rw-r--r--net/ipv4/inetpeer.c5
-rw-r--r--net/ipv4/ip_gre.c24
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_tunnel.c38
-rw-r--r--net/ipv4/ip_vti.c8
-rw-r--r--net/ipv4/ipip.c8
-rw-r--r--net/ipv4/netfilter/Kconfig43
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/nexthop.c38
-rw-r--r--net/ipv4/raw_diag.c1
-rw-r--r--net/ipv4/syncookies.c40
-rw-r--r--net/ipv4/tcp_diag.c1
-rw-r--r--net/ipv4/tcp_input.c23
-rw-r--r--net/ipv4/udp_diag.c2
-rw-r--r--net/ipv6/addrconf.c113
-rw-r--r--net/ipv6/ip6_fib.c79
-rw-r--r--net/ipv6/ip6_gre.c14
-rw-r--r--net/ipv6/ip6_tunnel.c13
-rw-r--r--net/ipv6/ip6_vti.c13
-rw-r--r--net/ipv6/mcast.c1
-rw-r--r--net/ipv6/ndisc.c13
-rw-r--r--net/ipv6/netfilter/Kconfig20
-rw-r--r--net/ipv6/netfilter/Makefile2
-rw-r--r--net/ipv6/route.c27
-rw-r--r--net/ipv6/sit.c14
-rw-r--r--net/ipv6/syncookies.c13
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/mptcp/mptcp_diag.c1
-rw-r--r--net/mptcp/options.c20
-rw-r--r--net/mptcp/pm.c2
-rw-r--r--net/mptcp/pm_netlink.c10
-rw-r--r--net/mptcp/protocol.c52
-rw-r--r--net/mptcp/protocol.h8
-rw-r--r--net/mptcp/sockopt.c2
-rw-r--r--net/mptcp/subflow.c10
-rw-r--r--net/netfilter/Kconfig12
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c4
-rw-r--r--net/netfilter/nf_bpf_link.c2
-rw-r--r--net/netfilter/nf_conncount.c8
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c35
-rw-r--r--net/netlabel/netlabel_kapi.c8
-rw-r--r--net/netlink/diag.c1
-rw-r--r--net/nfc/hci/llc.c20
-rw-r--r--net/packet/diag.c1
-rw-r--r--net/rds/connection.c4
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_bpf.c1
-rw-r--r--net/sched/act_connmark.c1
-rw-r--r--net/sched/act_csum.c1
-rw-r--r--net/sched/act_ct.c1
-rw-r--r--net/sched/act_ctinfo.c1
-rw-r--r--net/sched/act_gact.c1
-rw-r--r--net/sched/act_gate.c1
-rw-r--r--net/sched/act_ife.c1
-rw-r--r--net/sched/act_mirred.c1
-rw-r--r--net/sched/act_mpls.c1
-rw-r--r--net/sched/act_nat.c1
-rw-r--r--net/sched/act_pedit.c1
-rw-r--r--net/sched/act_police.c1
-rw-r--r--net/sched/act_sample.c1
-rw-r--r--net/sched/act_simple.c1
-rw-r--r--net/sched/act_skbedit.c1
-rw-r--r--net/sched/act_skbmod.c1
-rw-r--r--net/sched/act_tunnel_key.c1
-rw-r--r--net/sched/act_vlan.c1
-rw-r--r--net/sched/cls_api.c2
-rw-r--r--net/sched/cls_basic.c1
-rw-r--r--net/sched/cls_bpf.c1
-rw-r--r--net/sched/cls_cgroup.c1
-rw-r--r--net/sched/cls_flow.c1
-rw-r--r--net/sched/cls_flower.c1
-rw-r--r--net/sched/cls_fw.c1
-rw-r--r--net/sched/cls_matchall.c1
-rw-r--r--net/sched/cls_route.c1
-rw-r--r--net/sched/cls_u32.c1
-rw-r--r--net/sched/sch_api.c4
-rw-r--r--net/sched/sch_cake.c1
-rw-r--r--net/sched/sch_cbs.c1
-rw-r--r--net/sched/sch_choke.c1
-rw-r--r--net/sched/sch_codel.c33
-rw-r--r--net/sched/sch_drr.c1
-rw-r--r--net/sched/sch_etf.c1
-rw-r--r--net/sched/sch_ets.c1
-rw-r--r--net/sched/sch_fq.c1
-rw-r--r--net/sched/sch_fq_codel.c1
-rw-r--r--net/sched/sch_gred.c1
-rw-r--r--net/sched/sch_hfsc.c1
-rw-r--r--net/sched/sch_hhf.c1
-rw-r--r--net/sched/sch_htb.c1
-rw-r--r--net/sched/sch_ingress.c3
-rw-r--r--net/sched/sch_mqprio.c1
-rw-r--r--net/sched/sch_multiq.c1
-rw-r--r--net/sched/sch_netem.c1
-rw-r--r--net/sched/sch_pie.c1
-rw-r--r--net/sched/sch_plug.c1
-rw-r--r--net/sched/sch_prio.c1
-rw-r--r--net/sched/sch_qfq.c1
-rw-r--r--net/sched/sch_red.c1
-rw-r--r--net/sched/sch_sfb.c1
-rw-r--r--net/sched/sch_sfq.c1
-rw-r--r--net/sched/sch_skbprio.c1
-rw-r--r--net/sched/sch_taprio.c73
-rw-r--r--net/sched/sch_tbf.c1
-rw-r--r--net/sctp/diag.c1
-rw-r--r--net/sctp/protocol.c10
-rw-r--r--net/smc/af_smc.c22
-rw-r--r--net/smc/smc.h4
-rw-r--r--net/smc/smc_clc.c6
-rw-r--r--net/smc/smc_clc.h2
-rw-r--r--net/smc/smc_core.c4
-rw-r--r--net/smc/smc_diag.c1
-rw-r--r--net/smc/smc_ism.h10
-rw-r--r--net/tipc/Kconfig7
-rw-r--r--net/tipc/Makefile4
-rw-r--r--net/tipc/bearer.c15
-rw-r--r--net/tipc/diag.c1
-rw-r--r--net/tipc/node.c2
-rw-r--r--net/tipc/socket.c1
-rw-r--r--net/unix/Kconfig5
-rw-r--r--net/unix/Makefile2
-rw-r--r--net/unix/af_unix.c73
-rw-r--r--net/unix/diag.c1
-rw-r--r--net/unix/garbage.c200
-rw-r--r--net/unix/scm.c159
-rw-r--r--net/unix/scm.h10
-rw-r--r--net/vmw_vsock/diag.c1
-rw-r--r--net/xdp/xsk_diag.c1
-rw-r--r--net/xfrm/xfrm_interface_core.c14
-rw-r--r--net/xfrm/xfrm_proc.c1
-rw-r--r--net/xfrm/xfrm_state.c17
-rw-r--r--net/xfrm/xfrm_user.c2
185 files changed, 1893 insertions, 1267 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 407b2335f091..790b54a7cbe3 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -504,28 +504,6 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
}
-/*
- * vlan network devices have devices nesting below it, and are a special
- * "super class" of normal network devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key vlan_netdev_xmit_lock_key;
-static struct lock_class_key vlan_netdev_addr_lock_key;
-
-static void vlan_dev_set_lockdep_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *unused)
-{
- lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key);
-}
-
-static void vlan_dev_set_lockdep_class(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock,
- &vlan_netdev_addr_lock_key);
- netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL);
-}
-
static __be16 vlan_parse_protocol(const struct sk_buff *skb)
{
struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
@@ -627,7 +605,7 @@ static int vlan_dev_init(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &vlan_type);
- vlan_dev_set_lockdep_class(dev);
+ netdev_lockdep_set_classes(dev);
vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
if (!vlan->vlan_pcpu_stats)
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 7825c129742a..87b959da00cd 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -163,48 +163,34 @@ void vlan_proc_rem_dev(struct net_device *vlandev)
* The following few functions build the content of /proc/net/vlan/config
*/
-/* start read of /proc/net/vlan/config */
-static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(rcu)
+static void *vlan_seq_from_index(struct seq_file *seq, loff_t *pos)
{
+ unsigned long ifindex = *pos;
struct net_device *dev;
- struct net *net = seq_file_net(seq);
- loff_t i = 1;
-
- rcu_read_lock();
- if (*pos == 0)
- return SEQ_START_TOKEN;
- for_each_netdev_rcu(net, dev) {
+ for_each_netdev_dump(seq_file_net(seq), dev, ifindex) {
if (!is_vlan_dev(dev))
continue;
-
- if (i++ == *pos)
- return dev;
+ *pos = dev->ifindex;
+ return dev;
}
+ return NULL;
+}
+
+static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(rcu)
+{
+ rcu_read_lock();
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
- return NULL;
+ return vlan_seq_from_index(seq, pos);
}
static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net_device *dev;
- struct net *net = seq_file_net(seq);
-
++*pos;
-
- dev = v;
- if (v == SEQ_START_TOKEN)
- dev = net_device_entry(&net->dev_base_head);
-
- for_each_netdev_continue_rcu(net, dev) {
- if (!is_vlan_dev(dev))
- continue;
-
- return dev;
- }
-
- return NULL;
+ return vlan_seq_from_index(seq, pos);
}
static void vlan_seq_stop(struct seq_file *seq, void *v)
diff --git a/net/Makefile b/net/Makefile
index b06b5539e7a6..65bb8c72a35e 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/
-obj-$(CONFIG_UNIX_SCM) += unix/
+obj-$(CONFIG_UNIX) += unix/
obj-y += ipv6/
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 28a939d56090..4c7e85534324 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -684,7 +684,7 @@ static bool batadv_dat_forward_data(struct batadv_priv *bat_priv,
cand = batadv_dat_select_candidates(bat_priv, ip, vid);
if (!cand)
- goto out;
+ return ret;
batadv_dbg(BATADV_DBG_DAT, bat_priv, "DHT_SEND for %pI4\n", &ip);
@@ -728,7 +728,6 @@ free_orig:
batadv_orig_node_put(cand[i].orig_node);
}
-out:
kfree(cand);
return ret;
}
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 5fc754b0b3f7..75119f1ffccc 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -691,29 +691,31 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
"%s%s", BATADV_UEV_TYPE_VAR,
batadv_uev_type_str[type]);
if (!uevent_env[0])
- goto out;
+ goto report_error;
uevent_env[1] = kasprintf(GFP_ATOMIC,
"%s%s", BATADV_UEV_ACTION_VAR,
batadv_uev_action_str[action]);
if (!uevent_env[1])
- goto out;
+ goto free_first_env;
/* If the event is DEL, ignore the data field */
if (action != BATADV_UEV_DEL) {
uevent_env[2] = kasprintf(GFP_ATOMIC,
"%s%s", BATADV_UEV_DATA_VAR, data);
if (!uevent_env[2])
- goto out;
+ goto free_second_env;
}
ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env);
-out:
- kfree(uevent_env[0]);
- kfree(uevent_env[1]);
kfree(uevent_env[2]);
+free_second_env:
+ kfree(uevent_env[1]);
+free_first_env:
+ kfree(uevent_env[0]);
if (ret)
+report_error:
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Impossible to send uevent for (%s,%s,%s) event (err: %d)\n",
batadv_uev_type_str[type],
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 870dcd7f1786..8ca854a75a32 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2024.0"
+#define BATADV_SOURCE_VERSION "2024.1"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 1f7ed9d4f6fd..0954757f0b8b 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -15,7 +15,6 @@
#include <linux/cache.h>
#include <linux/err.h>
#include <linux/errno.h>
-#include <linux/export.h>
#include <linux/genetlink.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 8906f7bdf4a9..02de71719aed 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -7,7 +7,7 @@
#include <linux/bpf.h>
#include <linux/btf.h>
-extern struct bpf_struct_ops bpf_bpf_dummy_ops;
+static struct bpf_struct_ops bpf_bpf_dummy_ops;
/* A common type for test_N with return value in bpf_dummy_ops */
typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...);
@@ -22,6 +22,8 @@ struct bpf_dummy_ops_test_args {
struct bpf_dummy_ops_state state;
};
+static struct btf *bpf_dummy_ops_btf;
+
static struct bpf_dummy_ops_test_args *
dummy_ops_init_args(const union bpf_attr *kattr, unsigned int nr)
{
@@ -90,9 +92,15 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
void *image = NULL;
unsigned int op_idx;
int prog_ret;
+ s32 type_id;
int err;
- if (prog->aux->attach_btf_id != st_ops->type_id)
+ type_id = btf_find_by_name_kind(bpf_dummy_ops_btf,
+ bpf_bpf_dummy_ops.name,
+ BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ if (prog->aux->attach_btf_id != type_id)
return -EOPNOTSUPP;
func_proto = prog->aux->attach_func_proto;
@@ -148,6 +156,7 @@ out:
static int bpf_dummy_init(struct btf *btf)
{
+ bpf_dummy_ops_btf = btf;
return 0;
}
@@ -247,7 +256,7 @@ static struct bpf_dummy_ops __bpf_bpf_dummy_ops = {
.test_sleepable = bpf_dummy_test_sleepable,
};
-struct bpf_struct_ops bpf_bpf_dummy_ops = {
+static struct bpf_struct_ops bpf_bpf_dummy_ops = {
.verifier_ops = &bpf_dummy_verifier_ops,
.init = bpf_dummy_init,
.check_member = bpf_dummy_ops_check_member,
@@ -256,4 +265,11 @@ struct bpf_struct_ops bpf_bpf_dummy_ops = {
.unreg = bpf_dummy_unreg,
.name = "bpf_dummy_ops",
.cfi_stubs = &__bpf_bpf_dummy_ops,
+ .owner = THIS_MODULE,
};
+
+static int __init bpf_dummy_struct_ops_init(void)
+{
+ return register_bpf_struct_ops(&bpf_bpf_dummy_ops, bpf_dummy_ops);
+}
+late_initcall(bpf_dummy_struct_ops_init);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index ac19b797dbec..2cab878e0a39 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -356,26 +356,21 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
clear_bit(opt, &br->options);
}
-static void __net_exit br_net_exit_batch(struct list_head *net_list)
+static void __net_exit br_net_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net_device *dev;
struct net *net;
- LIST_HEAD(list);
-
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
for_each_netdev(net, dev)
if (netif_is_bridge_master(dev))
- br_dev_delete(dev, &list);
-
- unregister_netdevice_many(&list);
-
- rtnl_unlock();
+ br_dev_delete(dev, dev_to_kill);
}
static struct pernet_operations br_net_ops = {
- .exit_batch = br_net_exit_batch,
+ .exit_batch_rtnl = br_net_exit_batch_rtnl,
};
static const struct stp_proto br_stp_proto = {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 65cee0ad3c1b..717e9750614c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -108,13 +108,6 @@ out:
return NETDEV_TX_OK;
}
-static struct lock_class_key bridge_netdev_addr_lock_key;
-
-static void br_set_lockdep_class(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
-}
-
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
@@ -153,7 +146,7 @@ static int br_dev_init(struct net_device *dev)
br_fdb_hash_fini(br);
}
- br_set_lockdep_class(dev);
+ netdev_lockdep_set_classes(dev);
return err;
}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c622de5eccd0..c77591e63841 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -35,10 +35,7 @@ static struct kmem_cache *br_fdb_cache __read_mostly;
int __init br_fdb_init(void)
{
- br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
- sizeof(struct net_bridge_fdb_entry),
- 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ br_fdb_cache = KMEM_CACHE(net_bridge_fdb_entry, SLAB_HWCACHE_ALIGN);
if (!br_fdb_cache)
return -ENOMEM;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 5ad4abfcb7ba..2cf4fc756263 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -455,7 +455,8 @@ static int br_fill_ifinfo(struct sk_buff *skb,
u32 filter_mask, const struct net_device *dev,
bool getlink)
{
- u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
+ u8 operstate = netif_running(dev) ? READ_ONCE(dev->operstate) :
+ IF_OPER_DOWN;
struct nlattr *af = NULL;
struct net_bridge *br;
struct ifinfomsg *hdr;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 15f44d026e75..9c2fffb827ab 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -841,7 +841,7 @@ void br_vlan_flush(struct net_bridge *br)
vg = br_vlan_group(br);
__vlan_flush(br, NULL, vg);
RCU_INIT_POINTER(br->vlgrp, NULL);
- synchronize_rcu();
+ synchronize_net();
__vlan_group_free(vg);
}
@@ -1372,7 +1372,7 @@ void nbp_vlan_flush(struct net_bridge_port *port)
vg = nbp_vlan_group(port);
__vlan_flush(port->br, port, vg);
RCU_INIT_POINTER(port->vlgrp, NULL);
- synchronize_rcu();
+ synchronize_net();
__vlan_group_free(vg);
}
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 7f304a19ac1b..104c0125e32e 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -39,6 +39,10 @@ config NF_CONNTRACK_BRIDGE
To compile it as a module, choose M here. If unsure, say N.
+# old sockopt interface and eval loop
+config BRIDGE_NF_EBTABLES_LEGACY
+ tristate
+
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
@@ -55,6 +59,7 @@ if BRIDGE_NF_EBTABLES
#
config BRIDGE_EBT_BROUTE
tristate "ebt: broute table support"
+ select BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables broute table is used to define rules that decide between
bridging and routing frames, giving Linux the functionality of a
@@ -65,6 +70,7 @@ config BRIDGE_EBT_BROUTE
config BRIDGE_EBT_T_FILTER
tristate "ebt: filter table support"
+ select BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables filter table is used to define frame filtering rules at
local input, forwarding and local output. See the man page for
@@ -74,6 +80,7 @@ config BRIDGE_EBT_T_FILTER
config BRIDGE_EBT_T_NAT
tristate "ebt: nat table support"
+ select BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables nat table is used to define rules that alter the MAC
source address (MAC SNAT) or the MAC destination address (MAC DNAT).
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 1c9ce49ab651..b9a1303da977 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
# connection tracking
obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o
-obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
+obj-$(CONFIG_BRIDGE_NF_EBTABLES_LEGACY) += ebtables.o
# tables
obj-$(CONFIG_BRIDGE_EBT_BROUTE) += ebtable_broute.o
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 7343fd487dbe..707576eeeb58 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -865,6 +865,8 @@ static __init int can_init(void)
/* check for correct padding to be able to use the structs similarly */
BUILD_BUG_ON(offsetof(struct can_frame, len) !=
offsetof(struct canfd_frame, len) ||
+ offsetof(struct can_frame, len) !=
+ offsetof(struct canxl_frame, flags) ||
offsetof(struct can_frame, data) !=
offsetof(struct canfd_frame, data));
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 9168114fc87f..27d5fcf0eac9 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -72,9 +72,11 @@
#define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60)
/* use of last_frames[index].flags */
+#define RX_LOCAL 0x10 /* frame was created on the local host */
+#define RX_OWN 0x20 /* frame was sent via the socket it was received on */
#define RX_RECV 0x40 /* received data for this element */
#define RX_THR 0x80 /* element not been sent due to throttle feature */
-#define BCM_CAN_FLAGS_MASK 0x3F /* to clean private flags after usage */
+#define BCM_CAN_FLAGS_MASK 0x0F /* to clean private flags after usage */
/* get best masking value for can_rx_register() for a given single can_id */
#define REGMASK(id) ((id & CAN_EFF_FLAG) ? \
@@ -138,6 +140,16 @@ static LIST_HEAD(bcm_notifier_list);
static DEFINE_SPINLOCK(bcm_notifier_lock);
static struct bcm_sock *bcm_busy_notifier;
+/* Return pointer to store the extra msg flags for bcm_recvmsg().
+ * We use the space of one unsigned int beyond the 'struct sockaddr_can'
+ * in skb->cb.
+ */
+static inline unsigned int *bcm_flags(struct sk_buff *skb)
+{
+ /* return pointer after struct sockaddr_can */
+ return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]);
+}
+
static inline struct bcm_sock *bcm_sk(const struct sock *sk)
{
return (struct bcm_sock *)sk;
@@ -325,6 +337,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
struct sock *sk = op->sk;
unsigned int datalen = head->nframes * op->cfsiz;
int err;
+ unsigned int *pflags;
skb = alloc_skb(sizeof(*head) + datalen, gfp_any());
if (!skb)
@@ -332,6 +345,14 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
skb_put_data(skb, head, sizeof(*head));
+ /* ensure space for sockaddr_can and msg flags */
+ sock_skb_cb_check_size(sizeof(struct sockaddr_can) +
+ sizeof(unsigned int));
+
+ /* initialize msg flags */
+ pflags = bcm_flags(skb);
+ *pflags = 0;
+
if (head->nframes) {
/* CAN frames starting here */
firstframe = (struct canfd_frame *)skb_tail_pointer(skb);
@@ -344,8 +365,14 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
* relevant for updates that are generated by the
* BCM, where nframes is 1
*/
- if (head->nframes == 1)
+ if (head->nframes == 1) {
+ if (firstframe->flags & RX_LOCAL)
+ *pflags |= MSG_DONTROUTE;
+ if (firstframe->flags & RX_OWN)
+ *pflags |= MSG_CONFIRM;
+
firstframe->flags &= BCM_CAN_FLAGS_MASK;
+ }
}
if (has_timestamp) {
@@ -360,7 +387,6 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
* containing the interface index.
*/
- sock_skb_cb_check_size(sizeof(struct sockaddr_can));
addr = (struct sockaddr_can *)skb->cb;
memset(addr, 0, sizeof(*addr));
addr->can_family = AF_CAN;
@@ -444,7 +470,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
op->frames_filtered = op->frames_abs = 0;
/* this element is not throttled anymore */
- data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV);
+ data->flags &= ~RX_THR;
memset(&head, 0, sizeof(head));
head.opcode = RX_CHANGED;
@@ -465,13 +491,17 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
*/
static void bcm_rx_update_and_send(struct bcm_op *op,
struct canfd_frame *lastdata,
- const struct canfd_frame *rxdata)
+ const struct canfd_frame *rxdata,
+ unsigned char traffic_flags)
{
memcpy(lastdata, rxdata, op->cfsiz);
/* mark as used and throttled by default */
lastdata->flags |= (RX_RECV|RX_THR);
+ /* add own/local/remote traffic flags */
+ lastdata->flags |= traffic_flags;
+
/* throttling mode inactive ? */
if (!op->kt_ival2) {
/* send RX_CHANGED to the user immediately */
@@ -508,7 +538,8 @@ rx_changed_settime:
* received data stored in op->last_frames[]
*/
static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
- const struct canfd_frame *rxdata)
+ const struct canfd_frame *rxdata,
+ unsigned char traffic_flags)
{
struct canfd_frame *cf = op->frames + op->cfsiz * index;
struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
@@ -521,7 +552,7 @@ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
if (!(lcf->flags & RX_RECV)) {
/* received data for the first time => send update to user */
- bcm_rx_update_and_send(op, lcf, rxdata);
+ bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags);
return;
}
@@ -529,7 +560,7 @@ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
for (i = 0; i < rxdata->len; i += 8) {
if ((get_u64(cf, i) & get_u64(rxdata, i)) !=
(get_u64(cf, i) & get_u64(lcf, i))) {
- bcm_rx_update_and_send(op, lcf, rxdata);
+ bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags);
return;
}
}
@@ -537,7 +568,7 @@ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index,
if (op->flags & RX_CHECK_DLC) {
/* do a real check in CAN frame length */
if (rxdata->len != lcf->len) {
- bcm_rx_update_and_send(op, lcf, rxdata);
+ bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags);
return;
}
}
@@ -644,6 +675,7 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
struct bcm_op *op = (struct bcm_op *)data;
const struct canfd_frame *rxframe = (struct canfd_frame *)skb->data;
unsigned int i;
+ unsigned char traffic_flags;
if (op->can_id != rxframe->can_id)
return;
@@ -673,15 +705,24 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
return;
}
+ /* compute flags to distinguish between own/local/remote CAN traffic */
+ traffic_flags = 0;
+ if (skb->sk) {
+ traffic_flags |= RX_LOCAL;
+ if (skb->sk == op->sk)
+ traffic_flags |= RX_OWN;
+ }
+
if (op->flags & RX_FILTER_ID) {
/* the easiest case */
- bcm_rx_update_and_send(op, op->last_frames, rxframe);
+ bcm_rx_update_and_send(op, op->last_frames, rxframe,
+ traffic_flags);
goto rx_starttimer;
}
if (op->nframes == 1) {
/* simple compare with index 0 */
- bcm_rx_cmp_to_index(op, 0, rxframe);
+ bcm_rx_cmp_to_index(op, 0, rxframe, traffic_flags);
goto rx_starttimer;
}
@@ -698,7 +739,8 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
if ((get_u64(op->frames, 0) & get_u64(rxframe, 0)) ==
(get_u64(op->frames, 0) &
get_u64(op->frames + op->cfsiz * i, 0))) {
- bcm_rx_cmp_to_index(op, i, rxframe);
+ bcm_rx_cmp_to_index(op, i, rxframe,
+ traffic_flags);
break;
}
}
@@ -1675,6 +1717,9 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
}
+ /* assign the flags that have been recorded in bcm_send_to_user() */
+ msg->msg_flags |= *(bcm_flags(skb));
+
skb_free_datagram(sk, skb);
return size;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index d1c6f206f429..25bac0fafc83 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -381,8 +381,9 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
return 1;
}
- /* get communication parameters only from the first FC frame */
- if (so->tx.state == ISOTP_WAIT_FIRST_FC) {
+ /* get static/dynamic communication params from first/every FC frame */
+ if (so->tx.state == ISOTP_WAIT_FIRST_FC ||
+ so->opt.flags & CAN_ISOTP_DYN_FC_PARMS) {
so->txfc.bs = cf->data[ae + 1];
so->txfc.stmin = cf->data[ae + 2];
diff --git a/net/can/raw.c b/net/can/raw.c
index e6b822624ba2..cb8e6f788af8 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -91,6 +91,10 @@ struct raw_sock {
int recv_own_msgs;
int fd_frames;
int xl_frames;
+ struct can_raw_vcid_options raw_vcid_opts;
+ canid_t tx_vcid_shifted;
+ canid_t rx_vcid_shifted;
+ canid_t rx_vcid_mask_shifted;
int join_filters;
int count; /* number of active filters */
struct can_filter dfilter; /* default/single filter */
@@ -134,10 +138,29 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
return;
/* make sure to not pass oversized frames to the socket */
- if ((!ro->fd_frames && can_is_canfd_skb(oskb)) ||
- (!ro->xl_frames && can_is_canxl_skb(oskb)))
+ if (!ro->fd_frames && can_is_canfd_skb(oskb))
return;
+ if (can_is_canxl_skb(oskb)) {
+ struct canxl_frame *cxl = (struct canxl_frame *)oskb->data;
+
+ /* make sure to not pass oversized frames to the socket */
+ if (!ro->xl_frames)
+ return;
+
+ /* filter CAN XL VCID content */
+ if (ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_RX_FILTER) {
+ /* apply VCID filter if user enabled the filter */
+ if ((cxl->prio & ro->rx_vcid_mask_shifted) !=
+ (ro->rx_vcid_shifted & ro->rx_vcid_mask_shifted))
+ return;
+ } else {
+ /* no filter => do not forward VCID tagged frames */
+ if (cxl->prio & CANXL_VCID_MASK)
+ return;
+ }
+ }
+
/* eliminate multiple filter matches for the same skb */
if (this_cpu_ptr(ro->uniq)->skb == oskb &&
this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) {
@@ -698,6 +721,19 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->fd_frames = ro->xl_frames;
break;
+ case CAN_RAW_XL_VCID_OPTS:
+ if (optlen != sizeof(ro->raw_vcid_opts))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&ro->raw_vcid_opts, optval, optlen))
+ return -EFAULT;
+
+ /* prepare 32 bit values for handling in hot path */
+ ro->tx_vcid_shifted = ro->raw_vcid_opts.tx_vcid << CANXL_VCID_OFFSET;
+ ro->rx_vcid_shifted = ro->raw_vcid_opts.rx_vcid << CANXL_VCID_OFFSET;
+ ro->rx_vcid_mask_shifted = ro->raw_vcid_opts.rx_vcid_mask << CANXL_VCID_OFFSET;
+ break;
+
case CAN_RAW_JOIN_FILTERS:
if (optlen != sizeof(ro->join_filters))
return -EINVAL;
@@ -786,6 +822,21 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
val = &ro->xl_frames;
break;
+ case CAN_RAW_XL_VCID_OPTS:
+ /* user space buffer to small for VCID opts? */
+ if (len < sizeof(ro->raw_vcid_opts)) {
+ /* return -ERANGE and needed space in optlen */
+ err = -ERANGE;
+ if (put_user(sizeof(ro->raw_vcid_opts), optlen))
+ err = -EFAULT;
+ } else {
+ if (len > sizeof(ro->raw_vcid_opts))
+ len = sizeof(ro->raw_vcid_opts);
+ if (copy_to_user(optval, &ro->raw_vcid_opts, len))
+ err = -EFAULT;
+ }
+ break;
+
case CAN_RAW_JOIN_FILTERS:
if (len > sizeof(int))
len = sizeof(int);
@@ -803,23 +854,41 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
-static bool raw_bad_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu)
+static void raw_put_canxl_vcid(struct raw_sock *ro, struct sk_buff *skb)
+{
+ struct canxl_frame *cxl = (struct canxl_frame *)skb->data;
+
+ /* sanitize non CAN XL bits */
+ cxl->prio &= (CANXL_PRIO_MASK | CANXL_VCID_MASK);
+
+ /* clear VCID in CAN XL frame if pass through is disabled */
+ if (!(ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_TX_PASS))
+ cxl->prio &= CANXL_PRIO_MASK;
+
+ /* set VCID in CAN XL frame if enabled */
+ if (ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_TX_SET) {
+ cxl->prio &= CANXL_PRIO_MASK;
+ cxl->prio |= ro->tx_vcid_shifted;
+ }
+}
+
+static unsigned int raw_check_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu)
{
/* Classical CAN -> no checks for flags and device capabilities */
if (can_is_can_skb(skb))
- return false;
+ return CAN_MTU;
/* CAN FD -> needs to be enabled and a CAN FD or CAN XL device */
if (ro->fd_frames && can_is_canfd_skb(skb) &&
(mtu == CANFD_MTU || can_is_canxl_dev_mtu(mtu)))
- return false;
+ return CANFD_MTU;
/* CAN XL -> needs to be enabled and a CAN XL device */
if (ro->xl_frames && can_is_canxl_skb(skb) &&
can_is_canxl_dev_mtu(mtu))
- return false;
+ return CANXL_MTU;
- return true;
+ return 0;
}
static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -829,6 +898,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
struct sockcm_cookie sockc;
struct sk_buff *skb;
struct net_device *dev;
+ unsigned int txmtu;
int ifindex;
int err = -EINVAL;
@@ -869,9 +939,16 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
goto free_skb;
err = -EINVAL;
- if (raw_bad_txframe(ro, skb, dev->mtu))
+
+ /* check for valid CAN (CC/FD/XL) frame content */
+ txmtu = raw_check_txframe(ro, skb, dev->mtu);
+ if (!txmtu)
goto free_skb;
+ /* only CANXL: clear/forward/set VCID value */
+ if (txmtu == CANXL_MTU)
+ raw_put_canxl_vcid(ro, skb);
+
sockcm_init(&sockc, sk);
if (msg->msg_controllen) {
err = sock_cmsg_send(sk, msg, &sockc);
diff --git a/net/core/dev.c b/net/core/dev.c
index 73a021973007..cc9c2eda65ac 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -153,6 +153,8 @@
#include <linux/prandom.h>
#include <linux/once_lite.h>
#include <net/netdev_rx_queue.h>
+#include <net/page_pool/types.h>
+#include <net/page_pool/helpers.h>
#include "dev.h"
#include "net-sysfs.h"
@@ -166,28 +168,6 @@ static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack);
-/*
- * The @dev_base_head list is protected by @dev_base_lock and the rtnl
- * semaphore.
- *
- * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
- *
- * Writers must hold the rtnl semaphore while they loop through the
- * dev_base_head list, and hold dev_base_lock for writing when they do the
- * actual updates. This allows pure readers to access the list even
- * while a writer is preparing to update it.
- *
- * To put it another way, dev_base_lock is held for writing only to
- * protect against pure readers; the rtnl semaphore provides the
- * protection against other writers.
- *
- * See, for example usages, register_netdevice() and
- * unregister_netdevice(), which must be called with the rtnl
- * semaphore held.
- */
-DEFINE_RWLOCK(dev_base_lock);
-EXPORT_SYMBOL(dev_base_lock);
-
static DEFINE_MUTEX(ifalias_mutex);
/* protects napi_hash addition/deletion and napi_gen_id */
@@ -341,13 +321,22 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name)
return 0;
}
-static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+static void netdev_name_node_alt_free(struct rcu_head *head)
{
- list_del(&name_node->list);
+ struct netdev_name_node *name_node =
+ container_of(head, struct netdev_name_node, rcu);
+
kfree(name_node->name);
netdev_name_node_free(name_node);
}
+static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+{
+ netdev_name_node_del(name_node);
+ list_del(&name_node->list);
+ call_rcu(&name_node->rcu, netdev_name_node_alt_free);
+}
+
int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
{
struct netdev_name_node *name_node;
@@ -362,10 +351,7 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
if (name_node == dev->name_node || name_node->dev != dev)
return -EINVAL;
- netdev_name_node_del(name_node);
- synchronize_rcu();
__netdev_name_node_alt_destroy(name_node);
-
return 0;
}
@@ -373,8 +359,10 @@ static void netdev_name_node_alt_flush(struct net_device *dev)
{
struct netdev_name_node *name_node, *tmp;
- list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
- __netdev_name_node_alt_destroy(name_node);
+ list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list) {
+ list_del(&name_node->list);
+ netdev_name_node_alt_free(&name_node->rcu);
+ }
}
/* Device list insertion */
@@ -385,12 +373,10 @@ static void list_netdevice(struct net_device *dev)
ASSERT_RTNL();
- write_lock(&dev_base_lock);
list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
netdev_name_node_add(net, dev->name_node);
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
- write_unlock(&dev_base_lock);
netdev_for_each_altname(dev, name_node)
netdev_name_node_add(net, name_node);
@@ -404,7 +390,7 @@ static void list_netdevice(struct net_device *dev)
/* Device list removal
* caller must respect a RCU grace period before freeing/reusing dev
*/
-static void unlist_netdevice(struct net_device *dev, bool lock)
+static void unlist_netdevice(struct net_device *dev)
{
struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
@@ -417,13 +403,9 @@ static void unlist_netdevice(struct net_device *dev, bool lock)
netdev_name_node_del(name_node);
/* Unlink dev from the device chain */
- if (lock)
- write_lock(&dev_base_lock);
list_del_rcu(&dev->dev_list);
netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
- if (lock)
- write_unlock(&dev_base_lock);
dev_base_seq_inc(dev_net(dev));
}
@@ -442,6 +424,12 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
EXPORT_PER_CPU_SYMBOL(softnet_data);
+/* Page_pool has a lockless array/stack to alloc/recycle pages.
+ * PP consumers must pay attention to run APIs in the appropriate context
+ * (e.g. NAPI context).
+ */
+static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool);
+
#ifdef CONFIG_LOCKDEP
/*
* register_netdevice() inits txq->_xmit_lock and sets lockdep class
@@ -738,9 +726,9 @@ EXPORT_SYMBOL_GPL(dev_fill_forward_path);
* @net: the applicable net namespace
* @name: name to find
*
- * Find an interface by name. Must be called under RTNL semaphore
- * or @dev_base_lock. If the name is found a pointer to the device
- * is returned. If the name is not found then %NULL is returned. The
+ * Find an interface by name. Must be called under RTNL semaphore.
+ * If the name is found a pointer to the device is returned.
+ * If the name is not found then %NULL is returned. The
* reference counters are not incremented so the caller must be
* careful with locks.
*/
@@ -821,8 +809,7 @@ EXPORT_SYMBOL(netdev_get_by_name);
* Search for an interface by index. Returns %NULL if the device
* is not found or a pointer to the device. The device has not
* had its reference counter increased so the caller must be careful
- * about locking. The caller must hold either the RTNL semaphore
- * or @dev_base_lock.
+ * about locking. The caller must hold the RTNL semaphore.
*/
struct net_device *__dev_get_by_index(struct net *net, int ifindex)
@@ -1212,13 +1199,13 @@ int dev_change_name(struct net_device *dev, const char *newname)
dev->flags & IFF_UP ? " (while UP)" : "");
old_assign_type = dev->name_assign_type;
- dev->name_assign_type = NET_NAME_RENAMED;
+ WRITE_ONCE(dev->name_assign_type, NET_NAME_RENAMED);
rollback:
ret = device_rename(&dev->dev, dev->name);
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
- dev->name_assign_type = old_assign_type;
+ WRITE_ONCE(dev->name_assign_type, old_assign_type);
up_write(&devnet_rename_sem);
return ret;
}
@@ -1227,15 +1214,11 @@ rollback:
netdev_adjacent_rename_links(dev, oldname);
- write_lock(&dev_base_lock);
netdev_name_node_del(dev->name_node);
- write_unlock(&dev_base_lock);
- synchronize_rcu();
+ synchronize_net();
- write_lock(&dev_base_lock);
netdev_name_node_add(net, dev->name_node);
- write_unlock(&dev_base_lock);
ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
ret = notifier_to_errno(ret);
@@ -1247,7 +1230,7 @@ rollback:
down_write(&devnet_rename_sem);
memcpy(dev->name, oldname, IFNAMSIZ);
memcpy(oldname, newname, IFNAMSIZ);
- dev->name_assign_type = old_assign_type;
+ WRITE_ONCE(dev->name_assign_type, old_assign_type);
old_assign_type = NET_NAME_RENAMED;
goto rollback;
} else {
@@ -4858,6 +4841,12 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
skb_headlen(skb) + mac_len, true);
+ if (skb_is_nonlinear(skb)) {
+ skb_shinfo(skb)->xdp_frags_size = skb->data_len;
+ xdp_buff_set_frags_flag(xdp);
+ } else {
+ xdp_buff_clear_frags_flag(xdp);
+ }
orig_data_end = xdp->data_end;
orig_data = xdp->data;
@@ -4887,6 +4876,14 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
skb->len += off; /* positive on grow, negative on shrink */
}
+ /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
+ * (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
+ */
+ if (xdp_buff_has_frags(xdp))
+ skb->data_len = skb_shinfo(skb)->xdp_frags_size;
+ else
+ skb->data_len = 0;
+
/* check if XDP changed eth hdr such SKB needs update */
eth = (struct ethhdr *)xdp->data;
if ((orig_eth_type != eth->h_proto) ||
@@ -4920,11 +4917,35 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
return act;
}
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+static int
+netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog)
+{
+ struct sk_buff *skb = *pskb;
+ int err, hroom, troom;
+
+ if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog))
+ return 0;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ troom = skb->tail + skb->data_len - skb->end;
+ err = pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC);
+ if (err)
+ return err;
+
+ return skb_linearize(skb);
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
- u32 act = XDP_DROP;
+ struct sk_buff *skb = *pskb;
+ u32 mac_len, act = XDP_DROP;
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
@@ -4932,41 +4953,36 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (skb_is_redirected(skb))
return XDP_PASS;
- /* XDP packets must be linear and must have sufficient headroom
- * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
- * native XDP provides, thus we need to do it here as well.
+ /* XDP packets must have sufficient headroom of XDP_PACKET_HEADROOM
+ * bytes. This is the guarantee that also native XDP provides,
+ * thus we need to do it here as well.
*/
+ mac_len = skb->data - skb_mac_header(skb);
+ __skb_push(skb, mac_len);
+
if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
- int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
- int troom = skb->tail + skb->data_len - skb->end;
-
- /* In case we have to go down the path and also linearize,
- * then lets do the pskb_expand_head() work just once here.
- */
- if (pskb_expand_head(skb,
- hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
- troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
- goto do_drop;
- if (skb_linearize(skb))
+ if (netif_skb_check_for_xdp(pskb, xdp_prog))
goto do_drop;
}
- act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
+ __skb_pull(*pskb, mac_len);
+
+ act = bpf_prog_run_generic_xdp(*pskb, xdp, xdp_prog);
switch (act) {
case XDP_REDIRECT:
case XDP_TX:
case XDP_PASS:
break;
default:
- bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act);
+ bpf_warn_invalid_xdp_action((*pskb)->dev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
- trace_xdp_exception(skb->dev, xdp_prog, act);
+ trace_xdp_exception((*pskb)->dev, xdp_prog, act);
fallthrough;
case XDP_DROP:
do_drop:
- kfree_skb(skb);
+ kfree_skb(*pskb);
break;
}
@@ -5004,24 +5020,24 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
-int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
+int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
{
if (xdp_prog) {
struct xdp_buff xdp;
u32 act;
int err;
- act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
+ act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
if (act != XDP_PASS) {
switch (act) {
case XDP_REDIRECT:
- err = xdp_do_generic_redirect(skb->dev, skb,
+ err = xdp_do_generic_redirect((*pskb)->dev, *pskb,
&xdp, xdp_prog);
if (err)
goto out_redir;
break;
case XDP_TX:
- generic_xdp_tx(skb, xdp_prog);
+ generic_xdp_tx(*pskb, xdp_prog);
break;
}
return XDP_DROP;
@@ -5029,7 +5045,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
}
return XDP_PASS;
out_redir:
- kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
+ kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
EXPORT_SYMBOL_GPL(do_xdp_generic);
@@ -5352,7 +5368,8 @@ another_round:
int ret2;
migrate_disable();
- ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+ ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
+ &skb);
migrate_enable();
if (ret2 != XDP_PASS) {
@@ -6177,8 +6194,13 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
clear_bit(NAPI_STATE_SCHED, &napi->state);
}
-static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll,
- u16 budget)
+enum {
+ NAPI_F_PREFER_BUSY_POLL = 1,
+ NAPI_F_END_ON_RESCHED = 2,
+};
+
+static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
+ unsigned flags, u16 budget)
{
bool skip_schedule = false;
unsigned long timeout;
@@ -6198,7 +6220,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
local_bh_disable();
- if (prefer_busy_poll) {
+ if (flags & NAPI_F_PREFER_BUSY_POLL) {
napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
timeout = READ_ONCE(napi->dev->gro_flush_timeout);
if (napi->defer_hard_irqs_count && timeout) {
@@ -6222,23 +6244,23 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool
local_bh_enable();
}
-void napi_busy_loop(unsigned int napi_id,
- bool (*loop_end)(void *, unsigned long),
- void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+static void __napi_busy_loop(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, unsigned flags, u16 budget)
{
unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
int (*napi_poll)(struct napi_struct *napi, int budget);
void *have_poll_lock = NULL;
struct napi_struct *napi;
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
restart:
napi_poll = NULL;
- rcu_read_lock();
-
napi = napi_by_id(napi_id);
if (!napi)
- goto out;
+ return;
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
@@ -6254,14 +6276,14 @@ restart:
*/
if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
NAPIF_STATE_IN_BUSY_POLL)) {
- if (prefer_busy_poll)
+ if (flags & NAPI_F_PREFER_BUSY_POLL)
set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
goto count;
}
if (cmpxchg(&napi->state, val,
val | NAPIF_STATE_IN_BUSY_POLL |
NAPIF_STATE_SCHED) != val) {
- if (prefer_busy_poll)
+ if (flags & NAPI_F_PREFER_BUSY_POLL)
set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
goto count;
}
@@ -6281,12 +6303,15 @@ count:
break;
if (unlikely(need_resched())) {
+ if (flags & NAPI_F_END_ON_RESCHED)
+ break;
if (napi_poll)
- busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
+ busy_poll_stop(napi, have_poll_lock, flags, budget);
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
rcu_read_unlock();
cond_resched();
+ rcu_read_lock();
if (loop_end(loop_end_arg, start_time))
return;
goto restart;
@@ -6294,10 +6319,31 @@ count:
cpu_relax();
}
if (napi_poll)
- busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
+ busy_poll_stop(napi, have_poll_lock, flags, budget);
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
-out:
+}
+
+void napi_busy_loop_rcu(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+{
+ unsigned flags = NAPI_F_END_ON_RESCHED;
+
+ if (prefer_busy_poll)
+ flags |= NAPI_F_PREFER_BUSY_POLL;
+
+ __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
+}
+
+void napi_busy_loop(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+{
+ unsigned flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : 0;
+
+ rcu_read_lock();
+ __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
rcu_read_unlock();
}
EXPORT_SYMBOL(napi_busy_loop);
@@ -8914,7 +8960,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
}
EXPORT_SYMBOL(dev_set_mac_address);
-static DECLARE_RWSEM(dev_addr_sem);
+DECLARE_RWSEM(dev_addr_sem);
int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
struct netlink_ext_ack *extack)
@@ -9690,11 +9736,11 @@ static void dev_index_release(struct net *net, int ifindex)
/* Delayed registration/unregisteration */
LIST_HEAD(net_todo_list);
DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
+atomic_t dev_unreg_count = ATOMIC_INIT(0);
static void net_set_todo(struct net_device *dev)
{
list_add_tail(&dev->todo_list, &net_todo_list);
- atomic_inc(&dev_net(dev)->dev_unreg_count);
}
static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
@@ -10259,9 +10305,9 @@ int register_netdevice(struct net_device *dev)
goto err_ifindex_release;
ret = netdev_register_kobject(dev);
- write_lock(&dev_base_lock);
- dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
- write_unlock(&dev_base_lock);
+
+ WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED);
+
if (ret)
goto err_uninit_notify;
@@ -10337,7 +10383,7 @@ EXPORT_SYMBOL(register_netdevice);
* that need to tie several hardware interfaces to a single NAPI
* poll scheduler due to HW limitations.
*/
-int init_dummy_netdev(struct net_device *dev)
+void init_dummy_netdev(struct net_device *dev)
{
/* Clear everything. Note we don't initialize spinlocks
* are they aren't supposed to be taken by any of the
@@ -10365,8 +10411,6 @@ int init_dummy_netdev(struct net_device *dev)
* because users of this 'device' dont need to change
* its refcount.
*/
-
- return 0;
}
EXPORT_SYMBOL_GPL(init_dummy_netdev);
@@ -10521,6 +10565,7 @@ void netdev_run_todo(void)
{
struct net_device *dev, *tmp;
struct list_head list;
+ int cnt;
#ifdef CONFIG_LOCKDEP
struct list_head unlink_list;
@@ -10551,12 +10596,11 @@ void netdev_run_todo(void)
continue;
}
- write_lock(&dev_base_lock);
- dev->reg_state = NETREG_UNREGISTERED;
- write_unlock(&dev_base_lock);
+ WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED);
linkwatch_sync_dev(dev);
}
+ cnt = 0;
while (!list_empty(&list)) {
dev = netdev_wait_allrefs_any(&list);
list_del(&dev->todo_list);
@@ -10574,12 +10618,13 @@ void netdev_run_todo(void)
if (dev->needs_free_netdev)
free_netdev(dev);
- if (atomic_dec_and_test(&dev_net(dev)->dev_unreg_count))
- wake_up(&netdev_unregistering_wq);
+ cnt++;
/* Free network device */
kobject_put(&dev->dev.kobj);
}
+ if (cnt && atomic_sub_and_test(cnt, &dev_unreg_count))
+ wake_up(&netdev_unregistering_wq);
}
/* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
@@ -10970,7 +11015,7 @@ void free_netdev(struct net_device *dev)
}
BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
- dev->reg_state = NETREG_RELEASED;
+ WRITE_ONCE(dev->reg_state, NETREG_RELEASED);
/* will free via device release */
put_device(&dev->dev);
@@ -11026,6 +11071,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
{
struct net_device *dev, *tmp;
LIST_HEAD(close_head);
+ int cnt = 0;
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
@@ -11057,10 +11103,8 @@ void unregister_netdevice_many_notify(struct list_head *head,
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
- write_lock(&dev_base_lock);
- unlist_netdevice(dev, false);
- dev->reg_state = NETREG_UNREGISTERING;
- write_unlock(&dev_base_lock);
+ unlist_netdevice(dev);
+ WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
}
flush_all_backlogs();
@@ -11122,7 +11166,9 @@ void unregister_netdevice_many_notify(struct list_head *head,
list_for_each_entry(dev, head, unreg_list) {
netdev_put(dev, &dev->dev_registered_tracker);
net_set_todo(dev);
+ cnt++;
}
+ atomic_add(cnt, &dev_unreg_count);
list_del(head);
}
@@ -11240,7 +11286,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_close(dev);
/* And unlink it from device chain */
- unlist_netdevice(dev, true);
+ unlist_netdevice(dev);
synchronize_net();
@@ -11576,11 +11622,8 @@ static void __net_exit default_device_exit_net(struct net *net)
snprintf(fb_name, IFNAMSIZ, "dev%%d");
netdev_for_each_altname_safe(dev, name_node, tmp)
- if (netdev_name_in_use(&init_net, name_node->name)) {
- netdev_name_node_del(name_node);
- synchronize_rcu();
+ if (netdev_name_in_use(&init_net, name_node->name))
__netdev_name_node_alt_destroy(name_node);
- }
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
@@ -11687,6 +11730,27 @@ static void __init net_dev_struct_check(void)
*
*/
+/* We allocate 256 pages for each CPU if PAGE_SHIFT is 12 */
+#define SYSTEM_PERCPU_PAGE_POOL_SIZE ((1 << 20) / PAGE_SIZE)
+
+static int net_page_pool_create(int cpuid)
+{
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+ struct page_pool_params page_pool_params = {
+ .pool_size = SYSTEM_PERCPU_PAGE_POOL_SIZE,
+ .nid = NUMA_NO_NODE,
+ };
+ struct page_pool *pp_ptr;
+
+ pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
+ if (IS_ERR(pp_ptr))
+ return -ENOMEM;
+
+ per_cpu(system_page_pool, cpuid) = pp_ptr;
+#endif
+ return 0;
+}
+
/*
* This is called single threaded during boot, so no need
* to take the rtnl semaphore.
@@ -11739,6 +11803,9 @@ static int __init net_dev_init(void)
init_gro_hash(&sd->backlog);
sd->backlog.poll = process_backlog;
sd->backlog.weight = weight_p;
+
+ if (net_page_pool_create(i))
+ goto out;
}
dev_boot_phase = 0;
@@ -11766,6 +11833,19 @@ static int __init net_dev_init(void)
WARN_ON(rc < 0);
rc = 0;
out:
+ if (rc < 0) {
+ for_each_possible_cpu(i) {
+ struct page_pool *pp_ptr;
+
+ pp_ptr = per_cpu(system_page_pool, i);
+ if (!pp_ptr)
+ continue;
+
+ page_pool_destroy(pp_ptr);
+ per_cpu(system_page_pool, i) = NULL;
+ }
+ }
+
return rc;
}
diff --git a/net/core/dev.h b/net/core/dev.h
index 7480b4c84298..45892267848d 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -3,6 +3,7 @@
#define _NET_CORE_DEV_H
#include <linux/types.h>
+#include <linux/rwsem.h>
struct net;
struct net_device;
@@ -46,6 +47,8 @@ extern int weight_p;
extern int dev_weight_rx_bias;
extern int dev_weight_tx_bias;
+extern struct rw_semaphore dev_addr_sem;
+
/* rtnl helpers */
extern struct list_head net_todo_list;
void netdev_run_todo(void);
@@ -56,6 +59,7 @@ struct netdev_name_node {
struct list_head list;
struct net_device *dev;
const char *name;
+ struct rcu_head rcu;
};
int netdev_get_name(struct net *net, char *name, int ifindex);
diff --git a/net/core/dst.c b/net/core/dst.c
index 6838d3212c37..95f533844f17 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -96,7 +96,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
}
EXPORT_SYMBOL(dst_alloc);
-struct dst_entry *dst_destroy(struct dst_entry * dst)
+static void dst_destroy(struct dst_entry *dst)
{
struct dst_entry *child = NULL;
@@ -126,15 +126,13 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
dst = child;
if (dst)
dst_release_immediate(dst);
- return NULL;
}
-EXPORT_SYMBOL(dst_destroy);
static void dst_destroy_rcu(struct rcu_head *head)
{
struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
- dst = dst_destroy(dst);
+ dst_destroy(dst);
}
/* Operations to mark dst as DEAD and clean up the net device referenced
diff --git a/net/core/filter.c b/net/core/filter.c
index ef3e78b6a39c..358870408a51 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -88,7 +88,7 @@
#include "dev.h"
static const struct bpf_func_proto *
-bpf_sk_base_func_proto(enum bpf_func_id func_id);
+bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
{
@@ -778,7 +778,7 @@ jmp_rest:
BPF_EMIT_JMP;
break;
- /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
+ /* ldxb 4 * ([14] & 0xf) is remapped into 6 insns. */
case BPF_LDX | BPF_MSH | BPF_B: {
struct sock_filter tmp = {
.code = BPF_LD | BPF_ABS | BPF_B,
@@ -804,7 +804,7 @@ jmp_rest:
*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
break;
}
- /* RET_K is remaped into 2 insns. RET_A case doesn't need an
+ /* RET_K is remapped into 2 insns. RET_A case doesn't need an
* extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
*/
case BPF_RET | BPF_A:
@@ -2968,7 +2968,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
*
* Then if B is non-zero AND there is no space allocate space and
* compact A, B regions into page. If there is space shift ring to
- * the rigth free'ing the next element in ring to place B, leaving
+ * the right free'ing the next element in ring to place B, leaving
* A untouched except to reduce length.
*/
if (start != offset) {
@@ -7894,7 +7894,7 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -7987,7 +7987,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8006,7 +8006,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_skb_event_output_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8193,7 +8193,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8252,13 +8252,13 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
#if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
/* The nf_conn___init type is used in the NF_CONNTRACK kfuncs. The
* kfuncs are defined in two different modules, and we want to be able
- * to use them interchangably with the same BTF type ID. Because modules
+ * to use them interchangeably with the same BTF type ID. Because modules
* can't de-duplicate BTF IDs between each other, we need the type to be
* referenced in the vmlinux BTF or the verifier will get confused about
* the different types. So we add this dummy type reference which will
@@ -8313,7 +8313,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_sock_proto;
#endif /* CONFIG_INET */
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8355,7 +8355,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_cgroup_classid_curr_proto;
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8399,7 +8399,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skc_lookup_tcp_proto;
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8410,7 +8410,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes:
return &bpf_flow_dissector_load_bytes_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8437,7 +8437,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8612,7 +8612,7 @@ static bool cg_skb_is_valid_access(int off, int size,
return false;
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_end):
- if (!bpf_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_BPF))
return false;
break;
}
@@ -8624,7 +8624,7 @@ static bool cg_skb_is_valid_access(int off, int size,
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break;
case bpf_ctx_range(struct __sk_buff, tstamp):
- if (!bpf_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_BPF))
return false;
break;
default:
@@ -11268,7 +11268,7 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -11450,7 +11450,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -11784,7 +11784,7 @@ const struct bpf_func_proto bpf_sock_from_file_proto = {
};
static const struct bpf_func_proto *
-bpf_sk_base_func_proto(enum bpf_func_id func_id)
+bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
const struct bpf_func_proto *func;
@@ -11813,10 +11813,10 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
- if (!perfmon_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_PERFMON))
return NULL;
return func;
@@ -11869,6 +11869,103 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
return 0;
}
+
+__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk,
+ struct bpf_tcp_req_attrs *attrs, int attrs__sz)
+{
+#if IS_ENABLED(CONFIG_SYN_COOKIES)
+ const struct request_sock_ops *ops;
+ struct inet_request_sock *ireq;
+ struct tcp_request_sock *treq;
+ struct request_sock *req;
+ struct net *net;
+ __u16 min_mss;
+ u32 tsoff = 0;
+
+ if (attrs__sz != sizeof(*attrs) ||
+ attrs->reserved[0] || attrs->reserved[1] || attrs->reserved[2])
+ return -EINVAL;
+
+ if (!skb_at_tc_ingress(skb))
+ return -EINVAL;
+
+ net = dev_net(skb->dev);
+ if (net != sock_net(sk))
+ return -ENETUNREACH;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ops = &tcp_request_sock_ops;
+ min_mss = 536;
+ break;
+#if IS_BUILTIN(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ ops = &tcp6_request_sock_ops;
+ min_mss = IPV6_MIN_MTU - 60;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ if (sk->sk_type != SOCK_STREAM || sk->sk_state != TCP_LISTEN ||
+ sk_is_mptcp(sk))
+ return -EINVAL;
+
+ if (attrs->mss < min_mss)
+ return -EINVAL;
+
+ if (attrs->wscale_ok) {
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_window_scaling))
+ return -EINVAL;
+
+ if (attrs->snd_wscale > TCP_MAX_WSCALE ||
+ attrs->rcv_wscale > TCP_MAX_WSCALE)
+ return -EINVAL;
+ }
+
+ if (attrs->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
+ return -EINVAL;
+
+ if (attrs->tstamp_ok) {
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
+ return -EINVAL;
+
+ tsoff = attrs->rcv_tsecr - tcp_ns_to_ts(attrs->usec_ts_ok, tcp_clock_ns());
+ }
+
+ req = inet_reqsk_alloc(ops, sk, false);
+ if (!req)
+ return -ENOMEM;
+
+ ireq = inet_rsk(req);
+ treq = tcp_rsk(req);
+
+ req->rsk_listener = sk;
+ req->syncookie = 1;
+ req->mss = attrs->mss;
+ req->ts_recent = attrs->rcv_tsval;
+
+ ireq->snd_wscale = attrs->snd_wscale;
+ ireq->rcv_wscale = attrs->rcv_wscale;
+ ireq->tstamp_ok = !!attrs->tstamp_ok;
+ ireq->sack_ok = !!attrs->sack_ok;
+ ireq->wscale_ok = !!attrs->wscale_ok;
+ ireq->ecn_ok = !!attrs->ecn_ok;
+
+ treq->req_usec_ts = !!attrs->usec_ts_ok;
+ treq->ts_off = tsoff;
+
+ skb_orphan(skb);
+ skb->sk = req_to_sk(req);
+ skb->destructor = sock_pfree;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
__bpf_kfunc_end_defs();
int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
@@ -11897,6 +11994,10 @@ BTF_SET8_START(bpf_kfunc_check_set_sock_addr)
BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path)
BTF_SET8_END(bpf_kfunc_check_set_sock_addr)
+BTF_SET8_START(bpf_kfunc_check_set_tcp_reqsk)
+BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk, KF_TRUSTED_ARGS)
+BTF_SET8_END(bpf_kfunc_check_set_tcp_reqsk)
+
static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
.owner = THIS_MODULE,
.set = &bpf_kfunc_check_set_skb,
@@ -11912,6 +12013,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = {
.set = &bpf_kfunc_check_set_sock_addr,
};
+static const struct btf_kfunc_id_set bpf_kfunc_set_tcp_reqsk = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_tcp_reqsk,
+};
+
static int __init bpf_kfunc_init(void)
{
int ret;
@@ -11927,8 +12033,9 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
- return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- &bpf_kfunc_set_sock_addr);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ &bpf_kfunc_set_sock_addr);
+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk);
}
late_initcall(bpf_kfunc_init);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 429571c258da..8ec35194bfcb 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -33,7 +33,7 @@ static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
static LIST_HEAD(lweventlist);
static DEFINE_SPINLOCK(lweventlist_lock);
-static unsigned char default_operstate(const struct net_device *dev)
+static unsigned int default_operstate(const struct net_device *dev)
{
if (netif_testing(dev))
return IF_OPER_TESTING;
@@ -62,16 +62,13 @@ static unsigned char default_operstate(const struct net_device *dev)
return IF_OPER_UP;
}
-
static void rfc2863_policy(struct net_device *dev)
{
- unsigned char operstate = default_operstate(dev);
+ unsigned int operstate = default_operstate(dev);
- if (operstate == dev->operstate)
+ if (operstate == READ_ONCE(dev->operstate))
return;
- write_lock(&dev_base_lock);
-
switch(dev->link_mode) {
case IF_LINK_MODE_TESTING:
if (operstate == IF_OPER_UP)
@@ -87,9 +84,7 @@ static void rfc2863_policy(struct net_device *dev)
break;
}
- dev->operstate = operstate;
-
- write_unlock(&dev_base_lock);
+ WRITE_ONCE(dev->operstate, operstate);
}
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 09f7ed1a04e8..2e4e96d30ee1 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -6,49 +6,18 @@
#include "dev.h"
-#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
-
-#define get_bucket(x) ((x) >> BUCKET_SPACE)
-#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
-#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
-
-static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
+static void *dev_seq_from_index(struct seq_file *seq, loff_t *pos)
{
- struct net *net = seq_file_net(seq);
+ unsigned long ifindex = *pos;
struct net_device *dev;
- struct hlist_head *h;
- unsigned int count = 0, offset = get_offset(*pos);
- h = &net->dev_index_head[get_bucket(*pos)];
- hlist_for_each_entry_rcu(dev, h, index_hlist) {
- if (++count == offset)
- return dev;
+ for_each_netdev_dump(seq_file_net(seq), dev, ifindex) {
+ *pos = dev->ifindex;
+ return dev;
}
-
- return NULL;
-}
-
-static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
-{
- struct net_device *dev;
- unsigned int bucket;
-
- do {
- dev = dev_from_same_bucket(seq, pos);
- if (dev)
- return dev;
-
- bucket = get_bucket(*pos) + 1;
- *pos = set_bucket_offset(bucket, 1);
- } while (bucket < NETDEV_HASHENTRIES);
-
return NULL;
}
-/*
- * This is invoked by the /proc filesystem handler to display a device
- * in detail.
- */
static void *dev_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
@@ -56,16 +25,13 @@ static void *dev_seq_start(struct seq_file *seq, loff_t *pos)
if (!*pos)
return SEQ_START_TOKEN;
- if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
- return NULL;
-
- return dev_from_bucket(seq, pos);
+ return dev_seq_from_index(seq, pos);
}
static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
- return dev_from_bucket(seq, pos);
+ return dev_seq_from_index(seq, pos);
}
static void dev_seq_stop(struct seq_file *seq, void *v)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a09d507c5b03..946caefdd959 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -34,10 +34,10 @@ static const char fmt_dec[] = "%d\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
-/* Caller holds RTNL or dev_base_lock */
+/* Caller holds RTNL or RCU */
static inline int dev_isalive(const struct net_device *dev)
{
- return dev->reg_state <= NETREG_REGISTERED;
+ return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
}
/* use same locking rules as GIF* ioctl's */
@@ -48,10 +48,10 @@ static ssize_t netdev_show(const struct device *dev,
struct net_device *ndev = to_net_dev(dev);
ssize_t ret = -EINVAL;
- read_lock(&dev_base_lock);
+ rcu_read_lock();
if (dev_isalive(ndev))
ret = (*format)(ndev, buf);
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return ret;
}
@@ -60,7 +60,7 @@ static ssize_t netdev_show(const struct device *dev,
#define NETDEVICE_SHOW(field, format_string) \
static ssize_t format_##field(const struct net_device *dev, char *buf) \
{ \
- return sysfs_emit(buf, format_string, dev->field); \
+ return sysfs_emit(buf, format_string, READ_ONCE(dev->field)); \
} \
static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
@@ -125,7 +125,7 @@ static DEVICE_ATTR_RO(iflink);
static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
{
- return sysfs_emit(buf, fmt_dec, dev->name_assign_type);
+ return sysfs_emit(buf, fmt_dec, READ_ONCE(dev->name_assign_type));
}
static ssize_t name_assign_type_show(struct device *dev,
@@ -135,24 +135,28 @@ static ssize_t name_assign_type_show(struct device *dev,
struct net_device *ndev = to_net_dev(dev);
ssize_t ret = -EINVAL;
- if (ndev->name_assign_type != NET_NAME_UNKNOWN)
+ if (READ_ONCE(ndev->name_assign_type) != NET_NAME_UNKNOWN)
ret = netdev_show(dev, attr, buf, format_name_assign_type);
return ret;
}
static DEVICE_ATTR_RO(name_assign_type);
-/* use same locking rules as GIFHWADDR ioctl's */
+/* use same locking rules as GIFHWADDR ioctl's (dev_get_mac_address()) */
static ssize_t address_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct net_device *ndev = to_net_dev(dev);
ssize_t ret = -EINVAL;
- read_lock(&dev_base_lock);
+ down_read(&dev_addr_sem);
+
+ rcu_read_lock();
if (dev_isalive(ndev))
ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len);
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
+
+ up_read(&dev_addr_sem);
return ret;
}
static DEVICE_ATTR_RO(address);
@@ -161,10 +165,13 @@ static ssize_t broadcast_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *ndev = to_net_dev(dev);
+ int ret = -EINVAL;
+ rcu_read_lock();
if (dev_isalive(ndev))
- return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
- return -EINVAL;
+ ret = sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
+ rcu_read_unlock();
+ return ret;
}
static DEVICE_ATTR_RO(broadcast);
@@ -318,11 +325,9 @@ static ssize_t operstate_show(struct device *dev,
const struct net_device *netdev = to_net_dev(dev);
unsigned char operstate;
- read_lock(&dev_base_lock);
- operstate = netdev->operstate;
+ operstate = READ_ONCE(netdev->operstate);
if (!netif_running(netdev))
operstate = IF_OPER_DOWN;
- read_unlock(&dev_base_lock);
if (operstate >= ARRAY_SIZE(operstates))
return -EINVAL; /* should not happen */
@@ -680,14 +685,14 @@ static ssize_t netstat_show(const struct device *d,
WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
offset % sizeof(u64) != 0);
- read_lock(&dev_base_lock);
+ rcu_read_lock();
if (dev_isalive(dev)) {
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return ret;
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 72799533426b..f0540c557515 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -318,8 +318,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
/* Must be called with pernet_ops_rwsem held */
const struct pernet_operations *ops, *saved_ops;
- int error = 0;
LIST_HEAD(net_exit_list);
+ LIST_HEAD(dev_kill_list);
+ int error = 0;
refcount_set(&net->ns.count, 1);
ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
@@ -358,6 +359,15 @@ out_undo:
synchronize_rcu();
ops = saved_ops;
+ rtnl_lock();
+ list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
+ if (ops->exit_batch_rtnl)
+ ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
+ }
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
+
+ ops = saved_ops;
list_for_each_entry_continue_reverse(ops, &pernet_list, list)
ops_exit_list(ops, &net_exit_list);
@@ -573,6 +583,7 @@ static void cleanup_net(struct work_struct *work)
struct net *net, *tmp, *last;
struct llist_node *net_kill_list;
LIST_HEAD(net_exit_list);
+ LIST_HEAD(dev_kill_list);
/* Atomically snapshot the list of namespaces to cleanup */
net_kill_list = llist_del_all(&cleanup_list);
@@ -611,7 +622,15 @@ static void cleanup_net(struct work_struct *work)
* the rcu_barrier() below isn't sufficient alone.
* Also the pre_exit() and exit() methods need this barrier.
*/
- synchronize_rcu();
+ synchronize_rcu_expedited();
+
+ rtnl_lock();
+ list_for_each_entry_reverse(ops, &pernet_list, list) {
+ if (ops->exit_batch_rtnl)
+ ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
+ }
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
/* Run all of the network namespace exit methods */
list_for_each_entry_reverse(ops, &pernet_list, list)
@@ -1193,7 +1212,17 @@ static void free_exit_list(struct pernet_operations *ops, struct list_head *net_
{
ops_pre_exit_list(ops, net_exit_list);
synchronize_rcu();
+
+ if (ops->exit_batch_rtnl) {
+ LIST_HEAD(dev_kill_list);
+
+ rtnl_lock();
+ ops->exit_batch_rtnl(net_exit_list, &dev_kill_list);
+ unregister_netdevice_many(&dev_kill_list);
+ rtnl_unlock();
+ }
ops_exit_list(ops, net_exit_list);
+
ops_free_list(ops, net_exit_list);
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 4933762e5a6b..89c835fcf094 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -171,13 +171,16 @@ static void page_pool_producer_unlock(struct page_pool *pool,
}
static int page_pool_init(struct page_pool *pool,
- const struct page_pool_params *params)
+ const struct page_pool_params *params,
+ int cpuid)
{
unsigned int ring_qsize = 1024; /* Default */
memcpy(&pool->p, &params->fast, sizeof(pool->p));
memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
+ pool->cpuid = cpuid;
+
/* Validate only known flags were used */
if (pool->p.flags & ~(PP_FLAG_ALL))
return -EINVAL;
@@ -253,10 +256,12 @@ static void page_pool_uninit(struct page_pool *pool)
}
/**
- * page_pool_create() - create a page pool.
+ * page_pool_create_percpu() - create a page pool for a given cpu.
* @params: parameters, see struct page_pool_params
+ * @cpuid: cpu identifier
*/
-struct page_pool *page_pool_create(const struct page_pool_params *params)
+struct page_pool *
+page_pool_create_percpu(const struct page_pool_params *params, int cpuid)
{
struct page_pool *pool;
int err;
@@ -265,7 +270,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
if (!pool)
return ERR_PTR(-ENOMEM);
- err = page_pool_init(pool, params);
+ err = page_pool_init(pool, params, cpuid);
if (err < 0)
goto err_free;
@@ -282,6 +287,16 @@ err_free:
kfree(pool);
return ERR_PTR(err);
}
+EXPORT_SYMBOL(page_pool_create_percpu);
+
+/**
+ * page_pool_create() - create a page pool
+ * @params: parameters, see struct page_pool_params
+ */
+struct page_pool *page_pool_create(const struct page_pool_params *params)
+{
+ return page_pool_create_percpu(params, -1);
+}
EXPORT_SYMBOL(page_pool_create);
static void page_pool_return_page(struct page_pool *pool, struct page *page);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9c4f427f3a50..c54dbe05c4c5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -483,24 +483,15 @@ EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
*/
static void rtnl_lock_unregistering_all(void)
{
- struct net *net;
- bool unregistering;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(&netdev_unregistering_wq, &wait);
for (;;) {
- unregistering = false;
rtnl_lock();
/* We held write locked pernet_ops_rwsem, and parallel
* setup_net() and cleanup_net() are not possible.
*/
- for_each_net(net) {
- if (atomic_read(&net->dev_unreg_count) > 0) {
- unregistering = true;
- break;
- }
- }
- if (!unregistering)
+ if (!atomic_read(&dev_unreg_count))
break;
__rtnl_unlock();
@@ -851,9 +842,22 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
}
EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
+void netdev_set_operstate(struct net_device *dev, int newstate)
+{
+ unsigned int old = READ_ONCE(dev->operstate);
+
+ do {
+ if (old == newstate)
+ return;
+ } while (!try_cmpxchg(&dev->operstate, &old, newstate));
+
+ netdev_state_change(dev);
+}
+EXPORT_SYMBOL(netdev_set_operstate);
+
static void set_operstate(struct net_device *dev, unsigned char transition)
{
- unsigned char operstate = dev->operstate;
+ unsigned char operstate = READ_ONCE(dev->operstate);
switch (transition) {
case IF_OPER_UP:
@@ -875,12 +879,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
break;
}
- if (dev->operstate != operstate) {
- write_lock(&dev_base_lock);
- dev->operstate = operstate;
- write_unlock(&dev_base_lock);
- netdev_state_change(dev);
- }
+ netdev_set_operstate(dev, operstate);
}
static unsigned int rtnl_dev_get_flags(const struct net_device *dev)
@@ -2200,25 +2199,22 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh,
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct rtnl_link_ops *kind_ops = NULL;
struct netlink_ext_ack *extack = cb->extack;
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- struct net *tgt_net = net;
- int h, s_h;
- int idx = 0, s_idx;
- struct net_device *dev;
- struct hlist_head *head;
+ unsigned int flags = NLM_F_MULTI;
struct nlattr *tb[IFLA_MAX+1];
+ struct {
+ unsigned long ifindex;
+ } *ctx = (void *)cb->ctx;
+ struct net *tgt_net = net;
u32 ext_filter_mask = 0;
- const struct rtnl_link_ops *kind_ops = NULL;
- unsigned int flags = NLM_F_MULTI;
+ struct net_device *dev;
int master_idx = 0;
int netnsid = -1;
int err, i;
- s_h = cb->args[0];
- s_idx = cb->args[1];
-
err = rtnl_valid_dump_ifinfo_req(nlh, cb->strict_check, tb, extack);
if (err < 0) {
if (cb->strict_check)
@@ -2262,36 +2258,21 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
flags |= NLM_F_DUMP_FILTERED;
walk_entries:
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &tgt_net->dev_index_head[h];
- hlist_for_each_entry(dev, head, index_hlist) {
- if (link_dump_filtered(dev, master_idx, kind_ops))
- goto cont;
- if (idx < s_idx)
- goto cont;
- err = rtnl_fill_ifinfo(skb, dev, net,
- RTM_NEWLINK,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq, 0, flags,
- ext_filter_mask, 0, NULL, 0,
- netnsid, GFP_KERNEL);
-
- if (err < 0) {
- if (likely(skb->len))
- goto out;
-
- goto out_err;
- }
-cont:
- idx++;
+ err = 0;
+ for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
+ if (link_dump_filtered(dev, master_idx, kind_ops))
+ continue;
+ err = rtnl_fill_ifinfo(skb, dev, net, RTM_NEWLINK,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq, 0, flags,
+ ext_filter_mask, 0, NULL, 0,
+ netnsid, GFP_KERNEL);
+ if (err < 0) {
+ if (likely(skb->len))
+ err = skb->len;
+ break;
}
}
-out:
- err = skb->len;
-out_err:
- cb->args[1] = idx;
- cb->args[0] = h;
cb->seq = tgt_net->dev_base_seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
if (netnsid >= 0)
@@ -2983,11 +2964,9 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_LINKMODE]) {
unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]);
- write_lock(&dev_base_lock);
if (dev->link_mode ^ value)
status |= DO_SETLINK_NOTIFY;
- dev->link_mode = value;
- write_unlock(&dev_base_lock);
+ WRITE_ONCE(dev->link_mode, value);
}
if (tb[IFLA_VFINFO_LIST]) {
diff --git a/net/core/scm.c b/net/core/scm.c
index d0e0852a24d5..9cd4b0a01cd6 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -36,6 +36,7 @@
#include <net/compat.h>
#include <net/scm.h>
#include <net/cls_cgroup.h>
+#include <net/af_unix.h>
/*
@@ -85,6 +86,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
return -ENOMEM;
*fplp = fpl;
fpl->count = 0;
+ fpl->count_unix = 0;
fpl->max = SCM_MAX_FD;
fpl->user = NULL;
}
@@ -109,6 +111,9 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
fput(file);
return -EINVAL;
}
+ if (unix_get_socket(file))
+ fpl->count_unix++;
+
*fpp++ = file;
fpl->count++;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index edbbef563d4d..0d9a489e6ae1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -895,6 +895,98 @@ static bool is_pp_page(struct page *page)
return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
}
+int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
+ unsigned int headroom)
+{
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+ u32 size, truesize, len, max_head_size, off;
+ struct sk_buff *skb = *pskb, *nskb;
+ int err, i, head_off;
+ void *data;
+
+ /* XDP does not support fraglist so we need to linearize
+ * the skb.
+ */
+ if (skb_has_frag_list(skb))
+ return -EOPNOTSUPP;
+
+ max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - headroom);
+ if (skb->len > max_head_size + MAX_SKB_FRAGS * PAGE_SIZE)
+ return -ENOMEM;
+
+ size = min_t(u32, skb->len, max_head_size);
+ truesize = SKB_HEAD_ALIGN(size) + headroom;
+ data = page_pool_dev_alloc_va(pool, &truesize);
+ if (!data)
+ return -ENOMEM;
+
+ nskb = napi_build_skb(data, truesize);
+ if (!nskb) {
+ page_pool_free_va(pool, data, true);
+ return -ENOMEM;
+ }
+
+ skb_reserve(nskb, headroom);
+ skb_copy_header(nskb, skb);
+ skb_mark_for_recycle(nskb);
+
+ err = skb_copy_bits(skb, 0, nskb->data, size);
+ if (err) {
+ consume_skb(nskb);
+ return err;
+ }
+ skb_put(nskb, size);
+
+ head_off = skb_headroom(nskb) - skb_headroom(skb);
+ skb_headers_offset_update(nskb, head_off);
+
+ off = size;
+ len = skb->len - off;
+ for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
+ struct page *page;
+ u32 page_off;
+
+ size = min_t(u32, len, PAGE_SIZE);
+ truesize = size;
+
+ page = page_pool_dev_alloc(pool, &page_off, &truesize);
+ if (!data) {
+ consume_skb(nskb);
+ return -ENOMEM;
+ }
+
+ skb_add_rx_frag(nskb, i, page, page_off, size, truesize);
+ err = skb_copy_bits(skb, off, page_address(page) + page_off,
+ size);
+ if (err) {
+ consume_skb(nskb);
+ return err;
+ }
+
+ len -= size;
+ off += size;
+ }
+
+ consume_skb(skb);
+ *pskb = nskb;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+EXPORT_SYMBOL(skb_pp_cow_data);
+
+int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
+ struct bpf_prog *prog)
+{
+ if (!prog->aux->xdp_has_frags)
+ return -EINVAL;
+
+ return skb_pp_cow_data(pool, pskb, XDP_PACKET_HEADROOM);
+}
+EXPORT_SYMBOL(skb_cow_data_for_xdp);
+
#if IS_ENABLED(CONFIG_PAGE_POOL)
bool napi_pp_put_page(struct page *page, bool napi_safe)
{
@@ -923,9 +1015,10 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
*/
if (napi_safe || in_softirq()) {
const struct napi_struct *napi = READ_ONCE(pp->p.napi);
+ unsigned int cpuid = smp_processor_id();
- allow_direct = napi &&
- READ_ONCE(napi->list_owner) == smp_processor_id();
+ allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid;
+ allow_direct |= (pp->cpuid == cpuid);
}
/* Driver set this to memory recycling info. Reset it on recycle.
diff --git a/net/core/sock.c b/net/core/sock.c
index 0a7f46c37f0c..88bf810394a5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2583,8 +2583,18 @@ EXPORT_SYMBOL(sock_efree);
#ifdef CONFIG_INET
void sock_pfree(struct sk_buff *skb)
{
- if (sk_is_refcounted(skb->sk))
- sock_gen_put(skb->sk);
+ struct sock *sk = skb->sk;
+
+ if (!sk_is_refcounted(sk))
+ return;
+
+ if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
+ inet_reqsk(sk)->rsk_listener = NULL;
+ reqsk_free(inet_reqsk(sk));
+ return;
+ }
+
+ sock_gen_put(sk);
}
EXPORT_SYMBOL(sock_pfree);
#endif /* CONFIG_INET */
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index b1e29e18d1d6..654122838025 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -16,9 +16,10 @@
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
-static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
-static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
-static DEFINE_MUTEX(sock_diag_table_mutex);
+static const struct sock_diag_handler __rcu *sock_diag_handlers[AF_MAX];
+
+static struct sock_diag_inet_compat __rcu *inet_rcv_compat;
+
static struct workqueue_struct *broadcast_wq;
DEFINE_COOKIE(sock_cookie);
@@ -122,6 +123,24 @@ static size_t sock_diag_nlmsg_size(void)
+ nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
}
+static const struct sock_diag_handler *sock_diag_lock_handler(int family)
+{
+ const struct sock_diag_handler *handler;
+
+ rcu_read_lock();
+ handler = rcu_dereference(sock_diag_handlers[family]);
+ if (handler && !try_module_get(handler->owner))
+ handler = NULL;
+ rcu_read_unlock();
+
+ return handler;
+}
+
+static void sock_diag_unlock_handler(const struct sock_diag_handler *handler)
+{
+ module_put(handler->owner);
+}
+
static void sock_diag_broadcast_destroy_work(struct work_struct *work)
{
struct broadcast_sk *bsk =
@@ -138,12 +157,12 @@ static void sock_diag_broadcast_destroy_work(struct work_struct *work)
if (!skb)
goto out;
- mutex_lock(&sock_diag_table_mutex);
- hndl = sock_diag_handlers[sk->sk_family];
- if (hndl && hndl->get_info)
- err = hndl->get_info(skb, sk);
- mutex_unlock(&sock_diag_table_mutex);
-
+ hndl = sock_diag_lock_handler(sk->sk_family);
+ if (hndl) {
+ if (hndl->get_info)
+ err = hndl->get_info(skb, sk);
+ sock_diag_unlock_handler(hndl);
+ }
if (!err)
nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group,
GFP_KERNEL);
@@ -166,51 +185,45 @@ void sock_diag_broadcast_destroy(struct sock *sk)
queue_work(broadcast_wq, &bsk->work);
}
-void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
+void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr)
{
- mutex_lock(&sock_diag_table_mutex);
- inet_rcv_compat = fn;
- mutex_unlock(&sock_diag_table_mutex);
+ xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat,
+ ptr);
}
EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat);
-void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
+void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr)
{
- mutex_lock(&sock_diag_table_mutex);
- inet_rcv_compat = NULL;
- mutex_unlock(&sock_diag_table_mutex);
+ const struct sock_diag_inet_compat *old;
+
+ old = xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat,
+ NULL);
+ WARN_ON_ONCE(old != ptr);
}
EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat);
int sock_diag_register(const struct sock_diag_handler *hndl)
{
- int err = 0;
+ int family = hndl->family;
- if (hndl->family >= AF_MAX)
+ if (family >= AF_MAX)
return -EINVAL;
- mutex_lock(&sock_diag_table_mutex);
- if (sock_diag_handlers[hndl->family])
- err = -EBUSY;
- else
- sock_diag_handlers[hndl->family] = hndl;
- mutex_unlock(&sock_diag_table_mutex);
-
- return err;
+ return !cmpxchg((const struct sock_diag_handler **)
+ &sock_diag_handlers[family],
+ NULL, hndl) ? 0 : -EBUSY;
}
EXPORT_SYMBOL_GPL(sock_diag_register);
-void sock_diag_unregister(const struct sock_diag_handler *hnld)
+void sock_diag_unregister(const struct sock_diag_handler *hndl)
{
- int family = hnld->family;
+ int family = hndl->family;
if (family >= AF_MAX)
return;
- mutex_lock(&sock_diag_table_mutex);
- BUG_ON(sock_diag_handlers[family] != hnld);
- sock_diag_handlers[family] = NULL;
- mutex_unlock(&sock_diag_table_mutex);
+ xchg((const struct sock_diag_handler **)&sock_diag_handlers[family],
+ NULL);
}
EXPORT_SYMBOL_GPL(sock_diag_unregister);
@@ -227,20 +240,20 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX);
- if (sock_diag_handlers[req->sdiag_family] == NULL)
+ if (!rcu_access_pointer(sock_diag_handlers[req->sdiag_family]))
sock_load_diag_module(req->sdiag_family, 0);
- mutex_lock(&sock_diag_table_mutex);
- hndl = sock_diag_handlers[req->sdiag_family];
+ hndl = sock_diag_lock_handler(req->sdiag_family);
if (hndl == NULL)
- err = -ENOENT;
- else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
+ return -ENOENT;
+
+ if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
err = hndl->dump(skb, nlh);
else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
err = hndl->destroy(skb, nlh);
else
err = -EOPNOTSUPP;
- mutex_unlock(&sock_diag_table_mutex);
+ sock_diag_unlock_handler(hndl);
return err;
}
@@ -248,20 +261,27 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ const struct sock_diag_inet_compat *ptr;
int ret;
switch (nlh->nlmsg_type) {
case TCPDIAG_GETSOCK:
case DCCPDIAG_GETSOCK:
- if (inet_rcv_compat == NULL)
+
+ if (!rcu_access_pointer(inet_rcv_compat))
sock_load_diag_module(AF_INET, 0);
- mutex_lock(&sock_diag_table_mutex);
- if (inet_rcv_compat != NULL)
- ret = inet_rcv_compat(skb, nlh);
- else
- ret = -EOPNOTSUPP;
- mutex_unlock(&sock_diag_table_mutex);
+ rcu_read_lock();
+ ptr = rcu_dereference(inet_rcv_compat);
+ if (ptr && !try_module_get(ptr->owner))
+ ptr = NULL;
+ rcu_read_unlock();
+
+ ret = -EOPNOTSUPP;
+ if (ptr) {
+ ret = ptr->fn(skb, nlh);
+ module_put(ptr->owner);
+ }
return ret;
case SOCK_DIAG_BY_FAMILY:
@@ -272,13 +292,9 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
-static DEFINE_MUTEX(sock_diag_mutex);
-
static void sock_diag_rcv(struct sk_buff *skb)
{
- mutex_lock(&sock_diag_mutex);
netlink_rcv_skb(skb, &sock_diag_rcv_msg);
- mutex_unlock(&sock_diag_mutex);
}
static int sock_diag_bind(struct net *net, int group)
@@ -286,12 +302,12 @@ static int sock_diag_bind(struct net *net, int group)
switch (group) {
case SKNLGRP_INET_TCP_DESTROY:
case SKNLGRP_INET_UDP_DESTROY:
- if (!sock_diag_handlers[AF_INET])
+ if (!rcu_access_pointer(sock_diag_handlers[AF_INET]))
sock_load_diag_module(AF_INET, 0);
break;
case SKNLGRP_INET6_TCP_DESTROY:
case SKNLGRP_INET6_UDP_DESTROY:
- if (!sock_diag_handlers[AF_INET6])
+ if (!rcu_access_pointer(sock_diag_handlers[AF_INET6]))
sock_load_diag_module(AF_INET6, 0);
break;
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 4869c1c2d8f3..27b585f3fa81 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -75,7 +75,7 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
xa = container_of(rcu, struct xdp_mem_allocator, rcu);
/* Allow this ID to be reused */
- ida_simple_remove(&mem_id_pool, xa->mem.id);
+ ida_free(&mem_id_pool, xa->mem.id);
kfree(xa);
}
@@ -242,7 +242,7 @@ static int __mem_id_cyclic_get(gfp_t gfp)
int id;
again:
- id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp);
+ id = ida_alloc_range(&mem_id_pool, mem_id_next, MEM_ID_MAX - 1, gfp);
if (id < 0) {
if (id == -ENOSPC) {
/* Cyclic allocator, reset next id */
@@ -317,7 +317,7 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
/* Insert allocator into ID lookup table */
ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
if (IS_ERR(ptr)) {
- ida_simple_remove(&mem_id_pool, mem->id);
+ ida_free(&mem_id_pool, mem->id);
mem->id = 0;
errno = PTR_ERR(ptr);
goto err;
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index c4bbac99740d..1cba001bb4c8 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -376,15 +376,11 @@ EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
int __init dccp_ackvec_init(void)
{
- dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
- sizeof(struct dccp_ackvec), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ dccp_ackvec_slab = KMEM_CACHE(dccp_ackvec, SLAB_HWCACHE_ALIGN);
if (dccp_ackvec_slab == NULL)
goto out_err;
- dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
- sizeof(struct dccp_ackvec_record),
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ dccp_ackvec_record_slab = KMEM_CACHE(dccp_ackvec_record, SLAB_HWCACHE_ALIGN);
if (dccp_ackvec_record_slab == NULL)
goto out_destroy_slab;
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 8a82c5a2c5a8..f5019d95c3ae 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -58,6 +58,7 @@ static int dccp_diag_dump_one(struct netlink_callback *cb,
}
static const struct inet_diag_handler dccp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = dccp_diag_dump,
.dump_one = dccp_diag_dump_one,
.idiag_get_info = dccp_diag_get_info,
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 2717e9d7b612..1aba1d05c27a 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -75,7 +75,7 @@ sja1105_tagger_private(struct dsa_switch *ds)
}
/* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */
-static inline bool sja1105_is_link_local(const struct sk_buff *skb)
+static bool sja1105_is_link_local(const struct sk_buff *skb)
{
const struct ethhdr *hdr = eth_hdr(skb);
u64 dmac = ether_addr_to_u64(hdr->h_dest);
@@ -121,7 +121,7 @@ static void sja1105_meta_unpack(const struct sk_buff *skb,
packing(buf + 7, &meta->switch_id, 7, 0, 1, UNPACK, 0);
}
-static inline bool sja1105_is_meta_frame(const struct sk_buff *skb)
+static bool sja1105_is_meta_frame(const struct sk_buff *skb)
{
const struct ethhdr *hdr = eth_hdr(skb);
u64 smac = ether_addr_to_u64(hdr->h_source);
diff --git a/net/dsa/user.c b/net/dsa/user.c
index b15e71cc342c..5d666dfb317d 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -210,7 +210,7 @@ static int dsa_user_sync_uc(struct net_device *dev,
return 0;
return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
- &ctx);
+ &ctx);
}
static int dsa_user_unsync_uc(struct net_device *dev,
@@ -230,7 +230,7 @@ static int dsa_user_unsync_uc(struct net_device *dev,
return 0;
return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
- &ctx);
+ &ctx);
}
static int dsa_user_sync_mc(struct net_device *dev,
@@ -250,7 +250,7 @@ static int dsa_user_sync_mc(struct net_device *dev,
return 0;
return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
- &ctx);
+ &ctx);
}
static int dsa_user_unsync_mc(struct net_device *dev,
@@ -270,7 +270,7 @@ static int dsa_user_unsync_mc(struct net_device *dev,
return 0;
return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
- &ctx);
+ &ctx);
}
void dsa_user_sync_ha(struct net_device *dev)
@@ -875,8 +875,8 @@ static int dsa_user_port_obj_del(struct net_device *dev, const void *ctx,
return err;
}
-static inline netdev_tx_t dsa_user_netpoll_send_skb(struct net_device *dev,
- struct sk_buff *skb)
+static netdev_tx_t dsa_user_netpoll_send_skb(struct net_device *dev,
+ struct sk_buff *skb)
{
#ifdef CONFIG_NET_POLL_CONTROLLER
struct dsa_user_priv *p = netdev_priv(dev);
@@ -1222,7 +1222,7 @@ static int dsa_user_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
return ret;
}
-static int dsa_user_set_eee(struct net_device *dev, struct ethtool_eee *e)
+static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e)
{
struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
@@ -1242,7 +1242,7 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_eee *e)
return phylink_ethtool_set_eee(dp->pl, e);
}
-static int dsa_user_get_eee(struct net_device *dev, struct ethtool_eee *e)
+static int dsa_user_get_eee(struct net_device *dev, struct ethtool_keee *e)
{
struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 6b2a360dcdf0..ce486cec346c 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -712,3 +712,8 @@ ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size)
}
}
EXPORT_SYMBOL_GPL(ethtool_forced_speed_maps_init);
+
+bool ethtool_eee_use_linkmodes(const struct ethtool_keee *eee)
+{
+ return !linkmode_empty(eee->supported);
+}
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index 28b8aaaf9bcb..0f2b5f7eacee 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -55,5 +55,6 @@ int ethtool_get_module_eeprom_call(struct net_device *dev,
struct ethtool_eeprom *ee, u8 *data);
bool __ethtool_dev_mm_supported(struct net_device *dev);
+bool ethtool_eee_use_linkmodes(const struct ethtool_keee *eee);
#endif /* _ETHTOOL_COMMON_H */
diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c
index 2853394d06a8..db6faa18fe41 100644
--- a/net/ethtool/eee.c
+++ b/net/ethtool/eee.c
@@ -5,7 +5,7 @@
#include "bitset.h"
#define EEE_MODES_COUNT \
- (sizeof_field(struct ethtool_eee, supported) * BITS_PER_BYTE)
+ (sizeof_field(struct ethtool_keee, supported_u32) * BITS_PER_BYTE)
struct eee_req_info {
struct ethnl_req_info base;
@@ -13,7 +13,7 @@ struct eee_req_info {
struct eee_reply_data {
struct ethnl_reply_data base;
- struct ethtool_eee eee;
+ struct ethtool_keee eee;
};
#define EEE_REPDATA(__reply_base) \
@@ -30,6 +30,7 @@ static int eee_prepare_data(const struct ethnl_req_info *req_base,
{
struct eee_reply_data *data = EEE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
+ struct ethtool_keee *eee = &data->eee;
int ret;
if (!dev->ethtool_ops->get_eee)
@@ -37,9 +38,18 @@ static int eee_prepare_data(const struct ethnl_req_info *req_base,
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
- ret = dev->ethtool_ops->get_eee(dev, &data->eee);
+ ret = dev->ethtool_ops->get_eee(dev, eee);
ethnl_ops_complete(dev);
+ if (!ret && !ethtool_eee_use_linkmodes(eee)) {
+ ethtool_convert_legacy_u32_to_link_mode(eee->supported,
+ eee->supported_u32);
+ ethtool_convert_legacy_u32_to_link_mode(eee->advertised,
+ eee->advertised_u32);
+ ethtool_convert_legacy_u32_to_link_mode(eee->lp_advertised,
+ eee->lp_advertised_u32);
+ }
+
return ret;
}
@@ -48,24 +58,26 @@ static int eee_reply_size(const struct ethnl_req_info *req_base,
{
bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
const struct eee_reply_data *data = EEE_REPDATA(reply_base);
- const struct ethtool_eee *eee = &data->eee;
+ const struct ethtool_keee *eee = &data->eee;
int len = 0;
int ret;
- BUILD_BUG_ON(sizeof(eee->advertised) * BITS_PER_BYTE !=
+ BUILD_BUG_ON(sizeof(eee->advertised_u32) * BITS_PER_BYTE !=
EEE_MODES_COUNT);
- BUILD_BUG_ON(sizeof(eee->lp_advertised) * BITS_PER_BYTE !=
+ BUILD_BUG_ON(sizeof(eee->lp_advertised_u32) * BITS_PER_BYTE !=
EEE_MODES_COUNT);
/* MODES_OURS */
- ret = ethnl_bitset32_size(&eee->advertised, &eee->supported,
- EEE_MODES_COUNT, link_mode_names, compact);
+ ret = ethnl_bitset_size(eee->advertised, eee->supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
if (ret < 0)
return ret;
len += ret;
/* MODES_PEERS */
- ret = ethnl_bitset32_size(&eee->lp_advertised, NULL,
- EEE_MODES_COUNT, link_mode_names, compact);
+ ret = ethnl_bitset_size(eee->lp_advertised, NULL,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
if (ret < 0)
return ret;
len += ret;
@@ -84,24 +96,26 @@ static int eee_fill_reply(struct sk_buff *skb,
{
bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
const struct eee_reply_data *data = EEE_REPDATA(reply_base);
- const struct ethtool_eee *eee = &data->eee;
+ const struct ethtool_keee *eee = &data->eee;
int ret;
- ret = ethnl_put_bitset32(skb, ETHTOOL_A_EEE_MODES_OURS,
- &eee->advertised, &eee->supported,
- EEE_MODES_COUNT, link_mode_names, compact);
+ ret = ethnl_put_bitset(skb, ETHTOOL_A_EEE_MODES_OURS,
+ eee->advertised, eee->supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
if (ret < 0)
return ret;
- ret = ethnl_put_bitset32(skb, ETHTOOL_A_EEE_MODES_PEER,
- &eee->lp_advertised, NULL, EEE_MODES_COUNT,
- link_mode_names, compact);
+ ret = ethnl_put_bitset(skb, ETHTOOL_A_EEE_MODES_PEER,
+ eee->lp_advertised, NULL,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
if (ret < 0)
return ret;
- if (nla_put_u8(skb, ETHTOOL_A_EEE_ACTIVE, !!eee->eee_active) ||
- nla_put_u8(skb, ETHTOOL_A_EEE_ENABLED, !!eee->eee_enabled) ||
+ if (nla_put_u8(skb, ETHTOOL_A_EEE_ACTIVE, eee->eee_active) ||
+ nla_put_u8(skb, ETHTOOL_A_EEE_ENABLED, eee->eee_enabled) ||
nla_put_u8(skb, ETHTOOL_A_EEE_TX_LPI_ENABLED,
- !!eee->tx_lpi_enabled) ||
+ eee->tx_lpi_enabled) ||
nla_put_u32(skb, ETHTOOL_A_EEE_TX_LPI_TIMER, eee->tx_lpi_timer))
return -EMSGSIZE;
@@ -132,7 +146,7 @@ ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info)
{
struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct ethtool_eee eee = {};
+ struct ethtool_keee eee = {};
bool mod = false;
int ret;
@@ -140,14 +154,21 @@ ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info)
if (ret < 0)
return ret;
- ret = ethnl_update_bitset32(&eee.advertised, EEE_MODES_COUNT,
- tb[ETHTOOL_A_EEE_MODES_OURS],
- link_mode_names, info->extack, &mod);
+ if (ethtool_eee_use_linkmodes(&eee)) {
+ ret = ethnl_update_bitset(eee.advertised,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ tb[ETHTOOL_A_EEE_MODES_OURS],
+ link_mode_names, info->extack, &mod);
+ } else {
+ ret = ethnl_update_bitset32(&eee.advertised_u32, EEE_MODES_COUNT,
+ tb[ETHTOOL_A_EEE_MODES_OURS],
+ link_mode_names, info->extack, &mod);
+ }
if (ret < 0)
return ret;
- ethnl_update_bool32(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod);
- ethnl_update_bool32(&eee.tx_lpi_enabled,
- tb[ETHTOOL_A_EEE_TX_LPI_ENABLED], &mod);
+ ethnl_update_bool(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod);
+ ethnl_update_bool(&eee.tx_lpi_enabled, tb[ETHTOOL_A_EEE_TX_LPI_ENABLED],
+ &mod);
ethnl_update_u32(&eee.tx_lpi_timer, tb[ETHTOOL_A_EEE_TX_LPI_TIMER],
&mod);
if (!mod)
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 7519b0818b91..1763e8b697e1 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1508,22 +1508,71 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
return 0;
}
+static void eee_to_keee(struct ethtool_keee *keee,
+ const struct ethtool_eee *eee)
+{
+ memset(keee, 0, sizeof(*keee));
+
+ keee->supported_u32 = eee->supported;
+ keee->advertised_u32 = eee->advertised;
+ keee->lp_advertised_u32 = eee->lp_advertised;
+ keee->eee_active = eee->eee_active;
+ keee->eee_enabled = eee->eee_enabled;
+ keee->tx_lpi_enabled = eee->tx_lpi_enabled;
+ keee->tx_lpi_timer = eee->tx_lpi_timer;
+
+ ethtool_convert_legacy_u32_to_link_mode(keee->supported,
+ eee->supported);
+ ethtool_convert_legacy_u32_to_link_mode(keee->advertised,
+ eee->advertised);
+ ethtool_convert_legacy_u32_to_link_mode(keee->lp_advertised,
+ eee->lp_advertised);
+}
+
+static void keee_to_eee(struct ethtool_eee *eee,
+ const struct ethtool_keee *keee)
+{
+ memset(eee, 0, sizeof(*eee));
+
+ eee->eee_active = keee->eee_active;
+ eee->eee_enabled = keee->eee_enabled;
+ eee->tx_lpi_enabled = keee->tx_lpi_enabled;
+ eee->tx_lpi_timer = keee->tx_lpi_timer;
+
+ if (ethtool_eee_use_linkmodes(keee)) {
+ bool overflow;
+
+ overflow = !ethtool_convert_link_mode_to_legacy_u32(&eee->supported,
+ keee->supported);
+ ethtool_convert_link_mode_to_legacy_u32(&eee->advertised,
+ keee->advertised);
+ ethtool_convert_link_mode_to_legacy_u32(&eee->lp_advertised,
+ keee->lp_advertised);
+ if (overflow)
+ pr_warn("Ethtool ioctl interface doesn't support passing EEE linkmodes beyond bit 32\n");
+ } else {
+ eee->supported = keee->supported_u32;
+ eee->advertised = keee->advertised_u32;
+ eee->lp_advertised = keee->lp_advertised_u32;
+ }
+}
+
static int ethtool_get_eee(struct net_device *dev, char __user *useraddr)
{
- struct ethtool_eee edata;
+ struct ethtool_keee keee;
+ struct ethtool_eee eee;
int rc;
if (!dev->ethtool_ops->get_eee)
return -EOPNOTSUPP;
- memset(&edata, 0, sizeof(struct ethtool_eee));
- edata.cmd = ETHTOOL_GEEE;
- rc = dev->ethtool_ops->get_eee(dev, &edata);
-
+ memset(&keee, 0, sizeof(keee));
+ rc = dev->ethtool_ops->get_eee(dev, &keee);
if (rc)
return rc;
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ keee_to_eee(&eee, &keee);
+ if (copy_to_user(useraddr, &eee, sizeof(eee)))
return -EFAULT;
return 0;
@@ -1531,16 +1580,18 @@ static int ethtool_get_eee(struct net_device *dev, char __user *useraddr)
static int ethtool_set_eee(struct net_device *dev, char __user *useraddr)
{
- struct ethtool_eee edata;
+ struct ethtool_keee keee;
+ struct ethtool_eee eee;
int ret;
if (!dev->ethtool_ops->set_eee)
return -EOPNOTSUPP;
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ if (copy_from_user(&eee, useraddr, sizeof(eee)))
return -EFAULT;
- ret = dev->ethtool_ops->set_eee(dev, &edata);
+ eee_to_keee(&keee, &eee);
+ ret = dev->ethtool_ops->set_eee(dev, &keee);
if (!ret)
ethtool_notify(dev, ETHTOOL_MSG_EEE_NTF, NULL);
return ret;
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index fe3553f60bf3..bd04f28d5cf4 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -477,11 +477,7 @@ out:
return ret;
}
-/* Default ->dumpit() handler for GET requests. Device iteration copied from
- * rtnl_dump_ifinfo(); we have to be more careful about device hashtable
- * persistence as we cannot guarantee to hold RTNL lock through the whole
- * function as rtnetnlink does.
- */
+/* Default ->dumpit() handler for GET requests. */
static int ethnl_default_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -490,14 +486,14 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
struct net_device *dev;
int ret = 0;
- rtnl_lock();
+ rcu_read_lock();
for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
dev_hold(dev);
- rtnl_unlock();
+ rcu_read_unlock();
ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb));
- rtnl_lock();
+ rcu_read_lock();
dev_put(dev);
if (ret < 0 && ret != -EOPNOTSUPP) {
@@ -507,7 +503,7 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
}
ret = 0;
}
- rtnl_unlock();
+ rcu_read_unlock();
return ret;
}
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 9d71b66183da..5ef6d437db72 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -28,29 +28,19 @@ static bool is_slave_up(struct net_device *dev)
return dev && is_admin_up(dev) && netif_oper_up(dev);
}
-static void __hsr_set_operstate(struct net_device *dev, int transition)
-{
- write_lock(&dev_base_lock);
- if (dev->operstate != transition) {
- dev->operstate = transition;
- write_unlock(&dev_base_lock);
- netdev_state_change(dev);
- } else {
- write_unlock(&dev_base_lock);
- }
-}
-
static void hsr_set_operstate(struct hsr_port *master, bool has_carrier)
{
- if (!is_admin_up(master->dev)) {
- __hsr_set_operstate(master->dev, IF_OPER_DOWN);
+ struct net_device *dev = master->dev;
+
+ if (!is_admin_up(dev)) {
+ netdev_set_operstate(dev, IF_OPER_DOWN);
return;
}
if (has_carrier)
- __hsr_set_operstate(master->dev, IF_OPER_UP);
+ netdev_set_operstate(dev, IF_OPER_UP);
else
- __hsr_set_operstate(master->dev, IF_OPER_LOWERLAYERDOWN);
+ netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN);
}
static bool hsr_check_carrier(struct hsr_port *master)
@@ -78,14 +68,14 @@ static void hsr_check_announce(struct net_device *hsr_dev,
hsr = netdev_priv(hsr_dev);
- if (hsr_dev->operstate == IF_OPER_UP && old_operstate != IF_OPER_UP) {
+ if (READ_ONCE(hsr_dev->operstate) == IF_OPER_UP && old_operstate != IF_OPER_UP) {
/* Went up */
hsr->announce_count = 0;
mod_timer(&hsr->announce_timer,
jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL));
}
- if (hsr_dev->operstate != IF_OPER_UP && old_operstate == IF_OPER_UP)
+ if (READ_ONCE(hsr_dev->operstate) != IF_OPER_UP && old_operstate == IF_OPER_UP)
/* Went down */
del_timer(&hsr->announce_timer);
}
@@ -100,7 +90,7 @@ void hsr_check_carrier_and_operstate(struct hsr_priv *hsr)
/* netif_stacked_transfer_operstate() cannot be used here since
* it doesn't set IF_OPER_LOWERLAYERDOWN (?)
*/
- old_operstate = master->dev->operstate;
+ old_operstate = READ_ONCE(master->dev->operstate);
has_carrier = hsr_check_carrier(master);
hsr_set_operstate(master, has_carrier);
hsr_check_announce(master->dev, old_operstate);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 2c087b7f17c5..e643f52663f9 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -280,5 +280,6 @@ static void __exit lowpan_cleanup_module(void)
module_init(lowpan_init_module);
module_exit(lowpan_cleanup_module);
+MODULE_DESCRIPTION("IPv6 over Low power Wireless Personal Area Network IEEE 802.15.4 core");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("lowpan");
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 00302e8b9615..990a83455dcf 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -1137,4 +1137,5 @@ module_init(af_ieee802154_init);
module_exit(af_ieee802154_remove);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IEEE 802.15.4 socket interface");
MODULE_ALIAS_NETPROTO(PF_IEEE802154);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a5a820ee2026..ad278009e469 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1326,7 +1326,7 @@ int inet_sk_rebuild_header(struct sock *sk)
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr,
inet->inet_dport, inet->inet_sport,
- sk->sk_protocol, RT_CONN_FLAGS(sk),
+ sk->sk_protocol, ip_sock_rt_tos(sk),
sk->sk_bound_dev_if);
if (!IS_ERR(rt)) {
err = 0;
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index ae8b15e6896f..834edc18463a 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -12,7 +12,7 @@
#include <net/bpf_sk_storage.h>
/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
-extern struct bpf_struct_ops bpf_tcp_congestion_ops;
+static struct bpf_struct_ops bpf_tcp_congestion_ops;
static u32 unsupported_ops[] = {
offsetof(struct tcp_congestion_ops, get_info),
@@ -20,6 +20,7 @@ static u32 unsupported_ops[] = {
static const struct btf_type *tcp_sock_type;
static u32 tcp_sock_id, sock_id;
+static const struct btf_type *tcp_congestion_ops_type;
static int bpf_tcp_ca_init(struct btf *btf)
{
@@ -36,6 +37,11 @@ static int bpf_tcp_ca_init(struct btf *btf)
tcp_sock_id = type_id;
tcp_sock_type = btf_type_by_id(btf, tcp_sock_id);
+ type_id = btf_find_by_name_kind(btf, "tcp_congestion_ops", BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ tcp_congestion_ops_type = btf_type_by_id(btf, type_id);
+
return 0;
}
@@ -149,7 +155,7 @@ static u32 prog_ops_moff(const struct bpf_prog *prog)
u32 midx;
midx = prog->expected_attach_type;
- t = bpf_tcp_congestion_ops.type;
+ t = tcp_congestion_ops_type;
m = &btf_type_member(t)[midx];
return __btf_member_bit_offset(t, m) / 8;
@@ -191,7 +197,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -339,7 +345,7 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
.release = __bpf_tcp_ca_release,
};
-struct bpf_struct_ops bpf_tcp_congestion_ops = {
+static struct bpf_struct_ops bpf_tcp_congestion_ops = {
.verifier_ops = &bpf_tcp_ca_verifier_ops,
.reg = bpf_tcp_ca_reg,
.unreg = bpf_tcp_ca_unreg,
@@ -350,10 +356,16 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
.validate = bpf_tcp_ca_validate,
.name = "tcp_congestion_ops",
.cfi_stubs = &__bpf_ops_tcp_congestion_ops,
+ .owner = THIS_MODULE,
};
static int __init bpf_tcp_ca_kfunc_init(void)
{
- return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+ ret = ret ?: register_bpf_struct_ops(&bpf_tcp_congestion_ops, tcp_congestion_ops);
+
+ return ret;
}
late_initcall(bpf_tcp_ca_kfunc_init);
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 2cc50cbfc2a3..cc6d0bd7b0a9 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -119,7 +119,7 @@ void ip4_datagram_release_cb(struct sock *sk)
rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr,
inet->inet_saddr, inet->inet_dport,
inet->inet_sport, sk->sk_protocol,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
+ ip_sock_rt_tos(sk), sk->sk_bound_dev_if);
dst = !IS_ERR(rt) ? &rt->dst : NULL;
sk_dst_set(sk, dst);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3ff35f811765..0fc7ab5832d1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -501,7 +501,7 @@ static void tnode_free(struct key_vector *tn)
if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
tnode_free_size = 0;
- synchronize_rcu();
+ synchronize_net();
}
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 459af1f89739..747ed7344cbe 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1467,7 +1467,7 @@ static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *f
rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr,
inet->inet_saddr, inet->inet_dport,
inet->inet_sport, sk->sk_protocol,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
+ ip_sock_rt_tos(sk), sk->sk_bound_dev_if);
if (IS_ERR(rt))
rt = NULL;
if (rt)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8e6b6aa0579e..7adace541fe2 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -32,7 +32,7 @@
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
-static const struct inet_diag_handler **inet_diag_table;
+static const struct inet_diag_handler __rcu **inet_diag_table;
struct inet_diag_entry {
const __be32 *saddr;
@@ -48,28 +48,28 @@ struct inet_diag_entry {
#endif
};
-static DEFINE_MUTEX(inet_diag_table_mutex);
-
static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
{
- if (proto < 0 || proto >= IPPROTO_MAX) {
- mutex_lock(&inet_diag_table_mutex);
- return ERR_PTR(-ENOENT);
- }
+ const struct inet_diag_handler *handler;
- if (!inet_diag_table[proto])
+ if (proto < 0 || proto >= IPPROTO_MAX)
+ return NULL;
+
+ if (!READ_ONCE(inet_diag_table[proto]))
sock_load_diag_module(AF_INET, proto);
- mutex_lock(&inet_diag_table_mutex);
- if (!inet_diag_table[proto])
- return ERR_PTR(-ENOENT);
+ rcu_read_lock();
+ handler = rcu_dereference(inet_diag_table[proto]);
+ if (handler && !try_module_get(handler->owner))
+ handler = NULL;
+ rcu_read_unlock();
- return inet_diag_table[proto];
+ return handler;
}
static void inet_diag_unlock_handler(const struct inet_diag_handler *handler)
{
- mutex_unlock(&inet_diag_table_mutex);
+ module_put(handler->owner);
}
void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
@@ -104,9 +104,12 @@ static size_t inet_sk_attr_size(struct sock *sk,
const struct inet_diag_handler *handler;
size_t aux = 0;
- handler = inet_diag_table[req->sdiag_protocol];
+ rcu_read_lock();
+ handler = rcu_dereference(inet_diag_table[req->sdiag_protocol]);
+ DEBUG_NET_WARN_ON_ONCE(!handler);
if (handler && handler->idiag_get_aux_size)
aux = handler->idiag_get_aux_size(sk, net_admin);
+ rcu_read_unlock();
return nla_total_size(sizeof(struct tcp_info))
+ nla_total_size(sizeof(struct inet_diag_msg))
@@ -244,10 +247,16 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct nlmsghdr *nlh;
struct nlattr *attr;
void *info = NULL;
+ int protocol;
cb_data = cb->data;
- handler = inet_diag_table[inet_diag_get_protocol(req, cb_data)];
- BUG_ON(!handler);
+ protocol = inet_diag_get_protocol(req, cb_data);
+
+ /* inet_diag_lock_handler() made sure inet_diag_table[] is stable. */
+ handler = rcu_dereference_protected(inet_diag_table[protocol], 1);
+ DEBUG_NET_WARN_ON_ONCE(!handler);
+ if (!handler)
+ return -ENXIO;
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
@@ -605,9 +614,10 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
protocol = inet_diag_get_protocol(req, &dump_data);
handler = inet_diag_lock_handler(protocol);
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- } else if (cmd == SOCK_DIAG_BY_FAMILY) {
+ if (!handler)
+ return -ENOENT;
+
+ if (cmd == SOCK_DIAG_BY_FAMILY) {
struct netlink_callback cb = {
.nlh = nlh,
.skb = in_skb,
@@ -1035,6 +1045,10 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
num = 0;
ilb = &hashinfo->lhash2[i];
+ if (hlist_nulls_empty(&ilb->nulls_head)) {
+ s_num = 0;
+ continue;
+ }
spin_lock(&ilb->lock);
sk_nulls_for_each(sk, node, &ilb->nulls_head) {
struct inet_sock *inet = inet_sk(sk);
@@ -1099,6 +1113,10 @@ resume_bind_walk:
accum = 0;
ibb = &hashinfo->bhash2[i];
+ if (hlist_empty(&ibb->chain)) {
+ s_num = 0;
+ continue;
+ }
spin_lock_bh(&ibb->lock);
inet_bind_bucket_for_each(tb2, &ibb->chain) {
if (!net_eq(ib2_net(tb2), net))
@@ -1259,12 +1277,12 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
again:
prev_min_dump_alloc = cb->min_dump_alloc;
handler = inet_diag_lock_handler(protocol);
- if (!IS_ERR(handler))
+ if (handler) {
handler->dump(skb, cb, r);
- else
- err = PTR_ERR(handler);
- inet_diag_unlock_handler(handler);
-
+ inet_diag_unlock_handler(handler);
+ } else {
+ err = -ENOENT;
+ }
/* The skb is not large enough to fit one sk info and
* inet_sk_diag_fill() has requested for a larger skb.
*/
@@ -1457,10 +1475,9 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
}
handler = inet_diag_lock_handler(sk->sk_protocol);
- if (IS_ERR(handler)) {
- inet_diag_unlock_handler(handler);
+ if (!handler) {
nlmsg_cancel(skb, nlh);
- return PTR_ERR(handler);
+ return -ENOENT;
}
attr = handler->idiag_info_size
@@ -1479,6 +1496,7 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
}
static const struct sock_diag_handler inet_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_INET,
.dump = inet_diag_handler_cmd,
.get_info = inet_diag_handler_get_info,
@@ -1486,6 +1504,7 @@ static const struct sock_diag_handler inet_diag_handler = {
};
static const struct sock_diag_handler inet6_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_INET6,
.dump = inet_diag_handler_cmd,
.get_info = inet_diag_handler_get_info,
@@ -1495,20 +1514,12 @@ static const struct sock_diag_handler inet6_diag_handler = {
int inet_diag_register(const struct inet_diag_handler *h)
{
const __u16 type = h->idiag_type;
- int err = -EINVAL;
if (type >= IPPROTO_MAX)
- goto out;
+ return -EINVAL;
- mutex_lock(&inet_diag_table_mutex);
- err = -EEXIST;
- if (!inet_diag_table[type]) {
- inet_diag_table[type] = h;
- err = 0;
- }
- mutex_unlock(&inet_diag_table_mutex);
-out:
- return err;
+ return !cmpxchg((const struct inet_diag_handler **)&inet_diag_table[type],
+ NULL, h) ? 0 : -EEXIST;
}
EXPORT_SYMBOL_GPL(inet_diag_register);
@@ -1519,12 +1530,16 @@ void inet_diag_unregister(const struct inet_diag_handler *h)
if (type >= IPPROTO_MAX)
return;
- mutex_lock(&inet_diag_table_mutex);
- inet_diag_table[type] = NULL;
- mutex_unlock(&inet_diag_table_mutex);
+ xchg((const struct inet_diag_handler **)&inet_diag_table[type],
+ NULL);
}
EXPORT_SYMBOL_GPL(inet_diag_unregister);
+static const struct sock_diag_inet_compat inet_diag_compat = {
+ .owner = THIS_MODULE,
+ .fn = inet_diag_rcv_msg_compat,
+};
+
static int __init inet_diag_init(void)
{
const int inet_diag_table_size = (IPPROTO_MAX *
@@ -1543,7 +1558,7 @@ static int __init inet_diag_init(void)
if (err)
goto out_free_inet;
- sock_diag_register_inet_compat(inet_diag_rcv_msg_compat);
+ sock_diag_register_inet_compat(&inet_diag_compat);
out:
return err;
@@ -1558,7 +1573,7 @@ static void __exit inet_diag_exit(void)
{
sock_diag_unregister(&inet6_diag_handler);
sock_diag_unregister(&inet_diag_handler);
- sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat);
+ sock_diag_unregister_inet_compat(&inet_diag_compat);
kfree(inet_diag_table);
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e9fed83e9b3c..5bd759963451 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -81,10 +81,7 @@ void __init inet_initpeers(void)
inet_peer_threshold = clamp_val(nr_entries, 4096, 65536 + 128);
- peer_cachep = kmem_cache_create("inet_peer_cache",
- sizeof(struct inet_peer),
- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
- NULL);
+ peer_cachep = KMEM_CACHE(inet_peer, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
}
/* Called with rcu_read_lock() or base->lock held */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6b9cf5a24c19..7b16c211b904 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1025,14 +1025,16 @@ static int __net_init ipgre_init_net(struct net *net)
return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
}
-static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
+static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
+ ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops,
+ dev_to_kill);
}
static struct pernet_operations ipgre_net_ops = {
.init = ipgre_init_net,
- .exit_batch = ipgre_exit_batch_net,
+ .exit_batch_rtnl = ipgre_exit_batch_rtnl,
.id = &ipgre_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1697,14 +1699,16 @@ static int __net_init ipgre_tap_init_net(struct net *net)
return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
}
-static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
+static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
+ ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops,
+ dev_to_kill);
}
static struct pernet_operations ipgre_tap_net_ops = {
.init = ipgre_tap_init_net,
- .exit_batch = ipgre_tap_exit_batch_net,
+ .exit_batch_rtnl = ipgre_tap_exit_batch_rtnl,
.id = &gre_tap_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1715,14 +1719,16 @@ static int __net_init erspan_init_net(struct net *net)
&erspan_link_ops, "erspan0");
}
-static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
+static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
+ ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops,
+ dev_to_kill);
}
static struct pernet_operations erspan_net_ops = {
.init = erspan_init_net,
- .exit_batch = erspan_exit_batch_net,
+ .exit_batch_rtnl = erspan_exit_batch_rtnl,
.id = &erspan_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 67d846622365..1fe794967211 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -493,7 +493,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
inet->inet_dport,
inet->inet_sport,
sk->sk_protocol,
- RT_CONN_FLAGS_TOS(sk, tos),
+ RT_TOS(tos),
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index a4513ffb66cb..756f8b923883 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -102,10 +102,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (!ip_tunnel_key_match(&t->parms, flags, key))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else
- cand = t;
+ cand = t;
}
hlist_for_each_entry_rcu(t, head, hash_node) {
@@ -117,9 +116,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (!ip_tunnel_key_match(&t->parms, flags, key))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else if (!cand)
+ if (!cand)
cand = t;
}
@@ -137,9 +136,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (!ip_tunnel_key_match(&t->parms, flags, key))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else if (!cand)
+ if (!cand)
cand = t;
}
@@ -150,9 +149,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
!(t->dev->flags & IFF_UP))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else if (!cand)
+ if (!cand)
cand = t;
}
@@ -221,7 +220,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- link == t->parms.link &&
+ link == READ_ONCE(t->parms.link) &&
type == t->dev->type &&
ip_tunnel_key_match(&t->parms, flags, key))
break;
@@ -747,7 +746,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos),
- dev_net(dev), tunnel->parms.link,
+ dev_net(dev), READ_ONCE(tunnel->parms.link),
tunnel->fwmark, skb_get_hash(skb), 0);
if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
@@ -867,7 +866,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
if (t->parms.link != p->link || t->fwmark != fwmark) {
int mtu;
- t->parms.link = p->link;
+ WRITE_ONCE(t->parms.link, p->link);
t->fwmark = fwmark;
mtu = ip_tunnel_bind_dev(dev);
if (set_mtu)
@@ -1057,9 +1056,9 @@ EXPORT_SYMBOL(ip_tunnel_get_link_net);
int ip_tunnel_get_iflink(const struct net_device *dev)
{
- struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct ip_tunnel *tunnel = netdev_priv(dev);
- return tunnel->parms.link;
+ return READ_ONCE(tunnel->parms.link);
}
EXPORT_SYMBOL(ip_tunnel_get_iflink);
@@ -1130,19 +1129,17 @@ static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
}
void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
- struct rtnl_link_ops *ops)
+ struct rtnl_link_ops *ops,
+ struct list_head *dev_to_kill)
{
struct ip_tunnel_net *itn;
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list) {
itn = net_generic(net, id);
- ip_tunnel_destroy(net, itn, &list, ops);
+ ip_tunnel_destroy(net, itn, dev_to_kill, ops);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
@@ -1271,6 +1268,7 @@ int ip_tunnel_init(struct net_device *dev)
if (tunnel->collect_md)
netif_keep_dst(dev);
+ netdev_lockdep_set_classes(dev);
return 0;
}
EXPORT_SYMBOL_GPL(ip_tunnel_init);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index d1d6bb28ed6e..ee587adb169f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -510,14 +510,16 @@ static int __net_init vti_init_net(struct net *net)
return 0;
}
-static void __net_exit vti_exit_batch_net(struct list_head *list_net)
+static void __net_exit vti_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops);
+ ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops,
+ dev_to_kill);
}
static struct pernet_operations vti_net_ops = {
.init = vti_init_net,
- .exit_batch = vti_exit_batch_net,
+ .exit_batch_rtnl = vti_exit_batch_rtnl,
.id = &vti_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 03afa3871efc..f2696eaadbe6 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -592,14 +592,16 @@ static int __net_init ipip_init_net(struct net *net)
return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
}
-static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
+static void __net_exit ipip_exit_batch_rtnl(struct list_head *list_net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
+ ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops,
+ dev_to_kill);
}
static struct pernet_operations ipip_net_ops = {
.init = ipip_init_net,
- .exit_batch = ipip_exit_batch_net,
+ .exit_batch_rtnl = ipip_exit_batch_rtnl,
.id = &ipip_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f71a7e9a7de6..783523087281 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -10,6 +10,10 @@ config NF_DEFRAG_IPV4
tristate
default n
+# old sockopt interface and eval loop
+config IP_NF_IPTABLES_LEGACY
+ tristate
+
config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support"
help
@@ -152,7 +156,7 @@ config IP_NF_MATCH_ECN
config IP_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
- depends on IP_NF_MANGLE || IP_NF_RAW
+ depends on IP_NF_MANGLE || IP_NF_RAW || NFT_COMPAT
help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@@ -173,6 +177,7 @@ config IP_NF_MATCH_TTL
config IP_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
+ select IP_NF_IPTABLES_LEGACY
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@@ -182,7 +187,7 @@ config IP_NF_FILTER
config IP_NF_TARGET_REJECT
tristate "REJECT target support"
- depends on IP_NF_FILTER
+ depends on IP_NF_FILTER || NFT_COMPAT
select NF_REJECT_IPV4
default m if NETFILTER_ADVANCED=n
help
@@ -212,6 +217,7 @@ config IP_NF_NAT
default m if NETFILTER_ADVANCED=n
select NF_NAT
select NETFILTER_XT_NAT
+ select IP6_NF_IPTABLES_LEGACY
help
This enables the `nat' table in iptables. This allows masquerading,
port forwarding and other forms of full Network Address Port
@@ -252,6 +258,7 @@ endif # IP_NF_NAT
config IP_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
+ select IP_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -261,7 +268,7 @@ config IP_NF_MANGLE
config IP_NF_TARGET_ECN
tristate "ECN target support"
- depends on IP_NF_MANGLE
+ depends on IP_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a `ECN' target, which can be used in the iptables mangle
@@ -286,6 +293,7 @@ config IP_NF_TARGET_TTL
# raw + specific targets
config IP_NF_RAW
tristate 'raw table support (required for NOTRACK/TRACE)'
+ select IP_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -299,6 +307,7 @@ config IP_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
+ select IP_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -309,36 +318,34 @@ endif # IP_NF_IPTABLES
# ARP tables
config IP_NF_ARPTABLES
- tristate "ARP tables support"
- select NETFILTER_XTABLES
- select NETFILTER_FAMILY_ARP
- depends on NETFILTER_ADVANCED
- help
- arptables is a general, extensible packet identification framework.
- The ARP packet filtering and mangling (manipulation)subsystems
- use this: say Y or M here if you want to use either of those.
-
- To compile it as a module, choose M here. If unsure, say N.
+ tristate
-if IP_NF_ARPTABLES
+config NFT_COMPAT_ARP
+ tristate
+ depends on NF_TABLES_ARP && NFT_COMPAT
+ default m if NFT_COMPAT=m
+ default y if NFT_COMPAT=y
config IP_NF_ARPFILTER
- tristate "ARP packet filtering"
+ tristate "arptables-legacy packet filtering support"
+ select IP_NF_ARPTABLES
help
ARP packet filtering defines a table `filter', which has a series of
rules for simple ARP packet filtering at local input and
- local output. On a bridge, you can also specify filtering rules
- for forwarded ARP packets. See the man page for arptables(8).
+ local output. This is only needed for arptables-legacy(8).
+ Neither arptables-nft nor nftables need this to work.
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_ARP_MANGLE
tristate "ARP payload mangling"
+ depends on IP_NF_ARPTABLES || NFT_COMPAT_ARP
help
Allows altering the ARP packet payload: source and destination
hardware and network addresses.
-endif # IP_NF_ARPTABLES
+ This option is needed by both arptables-legacy and arptables-nft.
+ It is not used by nftables.
endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 5a26f9de1ab9..85502d4dfbb4 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -25,7 +25,7 @@ obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
# generic IP tables
-obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
+obj-$(CONFIG_IP_NF_IPTABLES_LEGACY) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index bbff68b5b5d4..70509da4f080 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3631,17 +3631,24 @@ unlock:
}
EXPORT_SYMBOL(register_nexthop_notifier);
-int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
+int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
{
int err;
- rtnl_lock();
err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
nb);
- if (err)
- goto unlock;
- nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
-unlock:
+ if (!err)
+ nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
+ return err;
+}
+EXPORT_SYMBOL(__unregister_nexthop_notifier);
+
+int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
+{
+ int err;
+
+ rtnl_lock();
+ err = __unregister_nexthop_notifier(net, nb);
rtnl_unlock();
return err;
}
@@ -3737,16 +3744,20 @@ out:
}
EXPORT_SYMBOL(nexthop_res_grp_activity_update);
-static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
+static void __net_exit nexthop_net_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list) {
+ ASSERT_RTNL();
+ list_for_each_entry(net, net_list, exit_list)
flush_all_nexthops(net);
- kfree(net->nexthop.devhash);
- }
- rtnl_unlock();
+}
+
+static void __net_exit nexthop_net_exit(struct net *net)
+{
+ kfree(net->nexthop.devhash);
+ net->nexthop.devhash = NULL;
}
static int __net_init nexthop_net_init(struct net *net)
@@ -3764,7 +3775,8 @@ static int __net_init nexthop_net_init(struct net *net)
static struct pernet_operations nexthop_net_ops = {
.init = nexthop_net_init,
- .exit_batch = nexthop_net_exit_batch,
+ .exit = nexthop_net_exit,
+ .exit_batch_rtnl = nexthop_net_exit_batch_rtnl,
};
static int __init nexthop_init(void)
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index fe2140c8375c..cc793bd8de25 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -213,6 +213,7 @@ static int raw_diag_destroy(struct sk_buff *in_skb,
#endif
static const struct inet_diag_handler raw_diag_handler = {
+ .owner = THIS_MODULE,
.dump = raw_diag_dump,
.dump_one = raw_diag_dump_one,
.idiag_get_info = raw_diag_get_info,
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 61f1c96cfe63..be88bf586ff9 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -51,15 +51,6 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
count, &syncookie_secret[c]);
}
-/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */
-static u64 tcp_ns_to_ts(bool usec_ts, u64 val)
-{
- if (usec_ts)
- return div_u64(val, NSEC_PER_USEC);
-
- return div_u64(val, NSEC_PER_MSEC);
-}
-
/*
* when syncookies are in effect and tcp timestamps are enabled we encode
* tcp options in the lower bits of the timestamp value that will be
@@ -304,6 +295,24 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
return 0;
}
+#if IS_ENABLED(CONFIG_BPF)
+struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct request_sock *req = inet_reqsk(skb->sk);
+
+ skb->sk = NULL;
+ skb->destructor = NULL;
+
+ if (cookie_tcp_reqsk_init(sk, skb, req)) {
+ reqsk_free(req);
+ req = NULL;
+ }
+
+ return req;
+}
+EXPORT_SYMBOL_GPL(cookie_bpf_check);
+#endif
+
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
struct sock *sk, struct sk_buff *skb,
struct tcp_options_received *tcp_opt,
@@ -404,9 +413,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
!th->ack || th->rst)
goto out;
- req = cookie_tcp_check(net, sk, skb);
- if (IS_ERR(req))
- goto out;
+ if (cookie_bpf_ok(skb)) {
+ req = cookie_bpf_check(sk, skb);
+ } else {
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ }
if (!req)
goto out_drop;
@@ -454,7 +467,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
- ireq->rcv_wscale = rcv_wscale;
+ if (!req->syncookie)
+ ireq->rcv_wscale = rcv_wscale;
ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst);
ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 4cbe4b44425a..f428ecf9120f 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -222,6 +222,7 @@ static int tcp_diag_destroy(struct sk_buff *in_skb,
#endif
static const struct inet_diag_handler tcp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = tcp_diag_dump,
.dump_one = tcp_diag_dump_one,
.idiag_get_info = tcp_diag_get_info,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index df7b13f0e5e0..b1c4462a0798 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6623,7 +6623,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
const struct tcphdr *th = tcp_hdr(skb);
struct request_sock *req;
int queued = 0;
- bool acceptable;
SKB_DR(reason);
switch (sk->sk_state) {
@@ -6649,12 +6648,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
*/
rcu_read_lock();
local_bh_disable();
- acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
+ icsk->icsk_af_ops->conn_request(sk, skb);
local_bh_enable();
rcu_read_unlock();
- if (!acceptable)
- return 1;
consume_skb(skb);
return 0;
}
@@ -6699,17 +6696,21 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
return 0;
/* step 5: check the ACK field */
- acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
- FLAG_UPDATE_TS_RECENT |
- FLAG_NO_CHALLENGE_ACK) > 0;
+ reason = tcp_ack(sk, skb, FLAG_SLOWPATH |
+ FLAG_UPDATE_TS_RECENT |
+ FLAG_NO_CHALLENGE_ACK);
- if (!acceptable) {
+ if ((int)reason <= 0) {
if (sk->sk_state == TCP_SYN_RECV)
return 1; /* send one RST */
- tcp_send_challenge_ack(sk);
- SKB_DR_SET(reason, TCP_OLD_ACK);
- goto discard;
+ /* accept old ack during closing */
+ if ((int)reason < 0) {
+ tcp_send_challenge_ack(sk);
+ reason = -reason;
+ goto discard;
+ }
}
+ SKB_DR_SET(reason, NOT_SPECIFIED);
switch (sk->sk_state) {
case TCP_SYN_RECV:
tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index dc41a22ee80e..38cb3a28e4ed 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -237,6 +237,7 @@ static int udplite_diag_destroy(struct sk_buff *in_skb,
#endif
static const struct inet_diag_handler udp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = udp_diag_dump,
.dump_one = udp_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
@@ -260,6 +261,7 @@ static int udplite_diag_dump_one(struct netlink_callback *cb,
}
static const struct inet_diag_handler udplite_diag_handler = {
+ .owner = THIS_MODULE,
.dump = udplite_diag_dump,
.dump_one = udplite_diag_dump_one,
.idiag_get_info = udp_diag_get_info,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 733ace18806c..d3f4b7b9cf1f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -195,6 +195,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+ .regen_min_advance = REGEN_MIN_ADVANCE,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
@@ -257,6 +258,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+ .regen_min_advance = REGEN_MIN_ADVANCE,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
@@ -1255,6 +1257,7 @@ static void
cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
bool del_rt, bool del_peer)
{
+ struct fib6_table *table;
struct fib6_info *f6i;
f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
@@ -1264,8 +1267,15 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
if (del_rt)
ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
else {
- if (!(f6i->fib6_flags & RTF_EXPIRES))
+ if (!(f6i->fib6_flags & RTF_EXPIRES)) {
+ table = f6i->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
fib6_set_expires(f6i, expires);
+ fib6_add_gc_list(f6i);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
fib6_info_release(f6i);
}
}
@@ -1331,12 +1341,20 @@ out:
in6_ifa_put(ifp);
}
+static unsigned long ipv6_get_regen_advance(struct inet6_dev *idev)
+{
+ return idev->cnf.regen_min_advance + idev->cnf.regen_max_retry *
+ idev->cnf.dad_transmits *
+ max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+}
+
static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block)
{
struct inet6_dev *idev = ifp->idev;
unsigned long tmp_tstamp, age;
unsigned long regen_advance;
unsigned long now = jiffies;
+ u32 if_public_preferred_lft;
s32 cnf_temp_preferred_lft;
struct inet6_ifaddr *ift;
struct ifa6_config cfg;
@@ -1372,9 +1390,7 @@ retry:
age = (now - ifp->tstamp) / HZ;
- regen_advance = idev->cnf.regen_max_retry *
- idev->cnf.dad_transmits *
- max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+ regen_advance = ipv6_get_regen_advance(idev);
/* recalculate max_desync_factor each time and update
* idev->desync_factor if it's larger
@@ -1394,11 +1410,13 @@ retry:
}
}
+ if_public_preferred_lft = ifp->prefered_lft;
+
memset(&cfg, 0, sizeof(cfg));
cfg.valid_lft = min_t(__u32, ifp->valid_lft,
idev->cnf.temp_valid_lft + age);
cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor;
- cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft);
+ cfg.preferred_lft = min_t(__u32, if_public_preferred_lft, cfg.preferred_lft);
cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft);
cfg.plen = ifp->prefix_len;
@@ -1407,19 +1425,41 @@ retry:
write_unlock_bh(&idev->lock);
- /* A temporary address is created only if this calculated Preferred
- * Lifetime is greater than REGEN_ADVANCE time units. In particular,
- * an implementation must not create a temporary address with a zero
- * Preferred Lifetime.
+ /* From RFC 4941:
+ *
+ * A temporary address is created only if this calculated Preferred
+ * Lifetime is greater than REGEN_ADVANCE time units. In
+ * particular, an implementation must not create a temporary address
+ * with a zero Preferred Lifetime.
+ *
+ * ...
+ *
+ * When creating a temporary address, the lifetime values MUST be
+ * derived from the corresponding prefix as follows:
+ *
+ * ...
+ *
+ * * Its Preferred Lifetime is the lower of the Preferred Lifetime
+ * of the public address or TEMP_PREFERRED_LIFETIME -
+ * DESYNC_FACTOR.
+ *
+ * To comply with the RFC's requirements, clamp the preferred lifetime
+ * to a minimum of regen_advance, unless that would exceed valid_lft or
+ * ifp->prefered_lft.
+ *
* Use age calculation as in addrconf_verify to avoid unnecessary
* temporary addresses being generated.
*/
age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
if (cfg.preferred_lft <= regen_advance + age) {
- in6_ifa_put(ifp);
- in6_dev_put(idev);
- ret = -1;
- goto out;
+ cfg.preferred_lft = regen_advance + age + 1;
+ if (cfg.preferred_lft > cfg.valid_lft ||
+ cfg.preferred_lft > if_public_preferred_lft) {
+ in6_ifa_put(ifp);
+ in6_dev_put(idev);
+ ret = -1;
+ goto out;
+ }
}
cfg.ifa_flags = IFA_F_TEMPORARY;
@@ -2706,6 +2746,7 @@ EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
struct prefix_info *pinfo;
+ struct fib6_table *table;
__u32 valid_lft;
__u32 prefered_lft;
int addr_type, err;
@@ -2782,11 +2823,20 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
if (valid_lft == 0) {
ip6_del_rt(net, rt, false);
rt = NULL;
- } else if (addrconf_finite_timeout(rt_expires)) {
- /* not infinity */
- fib6_set_expires(rt, jiffies + rt_expires);
} else {
- fib6_clean_expires(rt);
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (addrconf_finite_timeout(rt_expires)) {
+ /* not infinity */
+ fib6_set_expires(rt, jiffies + rt_expires);
+ fib6_add_gc_list(rt);
+ } else {
+ fib6_clean_expires(rt);
+ fib6_remove_gc_list(rt);
+ }
+
+ spin_unlock_bh(&table->tb6_lock);
}
} else if (valid_lft) {
clock_t expires = 0;
@@ -4577,9 +4627,7 @@ restart:
!ifp->regen_count && ifp->ifpub) {
/* This is a non-regenerated temporary addr. */
- unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
- ifp->idev->cnf.dad_transmits *
- max(NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+ unsigned long regen_advance = ipv6_get_regen_advance(ifp->idev);
if (age + regen_advance >= ifp->prefered_lft) {
struct inet6_ifaddr *ifpub = ifp->ifpub;
@@ -4741,6 +4789,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
unsigned long expires, u32 flags,
bool modify_peer)
{
+ struct fib6_table *table;
struct fib6_info *f6i;
u32 prio;
@@ -4761,10 +4810,18 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
} else {
- if (!expires)
+ table = f6i->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (!(flags & RTF_EXPIRES)) {
fib6_clean_expires(f6i);
- else
+ fib6_remove_gc_list(f6i);
+ } else {
fib6_set_expires(f6i, expires);
+ fib6_add_gc_list(f6i);
+ }
+
+ spin_unlock_bh(&table->tb6_lock);
fib6_info_release(f6i);
}
@@ -5998,7 +6055,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
(dev->ifindex != dev_get_iflink(dev) &&
nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
nla_put_u8(skb, IFLA_OPERSTATE,
- netif_running(dev) ? dev->operstate : IF_OPER_DOWN))
+ netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN))
goto nla_put_failure;
protoinfo = nla_nest_start_noflag(skb, IFLA_PROTINFO);
if (!protoinfo)
@@ -6790,6 +6847,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "regen_min_advance",
+ .data = &ipv6_devconf.regen_min_advance,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "regen_max_retry",
.data = &ipv6_devconf.regen_max_retry,
.maxlen = sizeof(int),
@@ -7349,7 +7413,8 @@ int __init addrconf_init(void)
if (err < 0)
goto out_addrlabel;
- addrconf_wq = create_workqueue("ipv6_addrconf");
+ /* All works using addrconf_wq need to lock rtnl. */
+ addrconf_wq = create_singlethread_workqueue("ipv6_addrconf");
if (!addrconf_wq) {
err = -ENOMEM;
goto out_nowq;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 4fc2cae0d116..805bbf26b3ef 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
+ INIT_HLIST_NODE(&f6i->gc_link);
+
return f6i;
}
@@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
net->ipv6.fib6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
+ INIT_HLIST_HEAD(&table->tb6_gc_hlist);
}
return table;
@@ -751,8 +754,6 @@ static struct fib6_node *fib6_add_1(struct net *net,
int bit;
__be32 dir = 0;
- RT6_TRACE("fib6_add_1\n");
-
/* insert node in tree */
fn = root;
@@ -1057,6 +1058,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
lockdep_is_held(&table->tb6_lock));
}
}
+
+ fib6_clean_expires(rt);
+ fib6_remove_gc_list(rt);
}
/*
@@ -1117,10 +1121,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
rt->fib6_nsiblings = 0;
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
- if (!(rt->fib6_flags & RTF_EXPIRES))
+ if (!(rt->fib6_flags & RTF_EXPIRES)) {
fib6_clean_expires(iter);
- else
+ fib6_remove_gc_list(iter);
+ } else {
fib6_set_expires(iter, rt->expires);
+ fib6_add_gc_list(iter);
+ }
if (rt->fib6_pmtu)
fib6_metric_set(iter, RTAX_MTU,
@@ -1479,6 +1486,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
+
+ if (rt->fib6_flags & RTF_EXPIRES)
+ fib6_add_gc_list(rt);
+
fib6_start_gc(info->nl_net, rt);
}
@@ -1803,7 +1814,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
lockdep_is_held(&table->tb6_lock));
struct fib6_info *new_fn_leaf;
- RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
+ pr_debug("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
iter++;
WARN_ON(fn->fn_flags & RTN_RTINFO);
@@ -1866,7 +1877,8 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
FOR_WALKERS(net, w) {
if (!child) {
if (w->node == fn) {
- RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
+ pr_debug("W %p adjusted by delnode 1, s=%d/%d\n",
+ w, w->state, nstate);
w->node = pn;
w->state = nstate;
}
@@ -1874,10 +1886,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
if (w->node == fn) {
w->node = child;
if (children&2) {
- RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ pr_debug("W %p adjusted by delnode 2, s=%d\n",
+ w, w->state);
w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
} else {
- RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ pr_debug("W %p adjusted by delnode 2, s=%d\n",
+ w, w->state);
w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
}
}
@@ -1905,8 +1919,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
struct net *net = info->nl_net;
bool notify_del = false;
- RT6_TRACE("fib6_del_route\n");
-
/* If the deleted route is the first in the node and it is not part of
* a multipath route, then we need to replace it with the next route
* in the node, if exists.
@@ -1955,7 +1967,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
read_lock(&net->ipv6.fib6_walker_lock);
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
- RT6_TRACE("walker %p adjusted by delroute\n", w);
+ pr_debug("walker %p adjusted by delroute\n", w);
w->leaf = rcu_dereference_protected(rt->fib6_next,
lockdep_is_held(&table->tb6_lock));
if (!w->leaf)
@@ -2281,9 +2293,8 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-static int fib6_age(struct fib6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
- struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;
/*
@@ -2293,7 +2304,7 @@ static int fib6_age(struct fib6_info *rt, void *arg)
if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
if (time_after(now, rt->expires)) {
- RT6_TRACE("expiring %p\n", rt);
+ pr_debug("expiring %p\n", rt);
return -1;
}
gc_args->more++;
@@ -2308,6 +2319,42 @@ static int fib6_age(struct fib6_info *rt, void *arg)
return 0;
}
+static void fib6_gc_table(struct net *net,
+ struct fib6_table *tb6,
+ struct fib6_gc_args *gc_args)
+{
+ struct fib6_info *rt;
+ struct hlist_node *n;
+ struct nl_info info = {
+ .nl_net = net,
+ .skip_notify = false,
+ };
+
+ hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
+ if (fib6_age(rt, gc_args) == -1)
+ fib6_del(rt, &info);
+}
+
+static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
+{
+ struct fib6_table *table;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+ spin_lock_bh(&table->tb6_lock);
+
+ fib6_gc_table(net, table, gc_args);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
+
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
struct fib6_gc_args gc_args;
@@ -2323,7 +2370,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = 0;
- fib6_clean_all(net, fib6_age, &gc_args);
+ fib6_gc_all(net, &gc_args);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;
@@ -2383,6 +2430,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
+ INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
@@ -2395,6 +2443,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
+ INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist);
#endif
fib6_tables_init(net);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 070d87abf7c0..5e97e0aa8e07 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1511,6 +1511,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
ip6gre_tnl_init_features(dev);
netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
cleanup_dst_cache_init:
@@ -1632,21 +1633,19 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
+static void __net_exit ip6gre_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- ip6gre_destroy_tunnels(net, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ ip6gre_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations ip6gre_net_ops = {
.init = ip6gre_init_net,
- .exit_batch = ip6gre_exit_batch_net,
+ .exit_batch_rtnl = ip6gre_exit_batch_rtnl,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
};
@@ -1903,6 +1902,7 @@ static int ip6erspan_tap_init(struct net_device *dev)
ip6erspan_tnl_link_config(tunnel, 1);
netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
cleanup_dst_cache_init:
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9bbabf750a21..44406c28445d 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1898,6 +1898,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len;
netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
destroy_dst:
@@ -2282,21 +2283,19 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
+static void __net_exit ip6_tnl_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- ip6_tnl_destroy_tunnels(net, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ ip6_tnl_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations ip6_tnl_net_ops = {
.init = ip6_tnl_init_net,
- .exit_batch = ip6_tnl_exit_batch_net,
+ .exit_batch_rtnl = ip6_tnl_exit_batch_rtnl,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
};
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index e550240c85e1..7f4f976aa24a 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -935,6 +935,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
}
@@ -1174,24 +1175,22 @@ err_alloc_dev:
return err;
}
-static void __net_exit vti6_exit_batch_net(struct list_head *net_list)
+static void __net_exit vti6_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct vti6_net *ip6n;
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list) {
ip6n = net_generic(net, vti6_net_id);
- vti6_destroy_tunnels(ip6n, &list);
+ vti6_destroy_tunnels(ip6n, dev_to_kill);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
static struct pernet_operations vti6_net_ops = {
.init = vti6_init_net,
- .exit_batch = vti6_exit_batch_net,
+ .exit_batch_rtnl = vti6_exit_batch_rtnl,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
};
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bc6e0a0bad3c..76ee1615ff2a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2719,7 +2719,6 @@ void ipv6_mc_down(struct inet6_dev *idev)
/* Should stop work after group drop. or we will
* start work again in mld_ifc_event()
*/
- synchronize_net();
mld_query_stop_work(idev);
mld_report_stop_work(idev);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a19999b30bc0..73cb31afe935 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1237,6 +1237,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
struct ndisc_options ndopts;
struct fib6_info *rt = NULL;
struct inet6_dev *in6_dev;
+ struct fib6_table *table;
u32 defrtr_usr_metric;
unsigned int pref = 0;
__u32 old_if_flags;
@@ -1382,7 +1383,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
neigh_release(neigh);
rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
- skb->dev, pref, defrtr_usr_metric);
+ skb->dev, pref, defrtr_usr_metric,
+ lifetime);
if (!rt) {
ND_PRINTK(0, err,
"RA: %s failed to add default route\n",
@@ -1409,8 +1411,15 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
}
- if (rt)
+ if (rt) {
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
fib6_set_expires(rt, jiffies + (HZ * lifetime));
+ fib6_add_gc_list(rt);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
ra_msg->icmph.icmp6_hop_limit) {
if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 0ba62f4868f9..f3c8e2d918e1 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -6,6 +6,10 @@
menu "IPv6: Netfilter Configuration"
depends on INET && IPV6 && NETFILTER
+# old sockopt interface and eval loop
+config IP6_NF_IPTABLES_LEGACY
+ tristate
+
config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support"
help
@@ -147,7 +151,7 @@ config IP6_NF_MATCH_MH
config IP6_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
- depends on IP6_NF_MANGLE || IP6_NF_RAW
+ depends on IP6_NF_MANGLE || IP6_NF_RAW || NFT_COMPAT
help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@@ -186,6 +190,8 @@ config IP6_NF_TARGET_HL
config IP6_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
+ select IP6_NF_IPTABLES_LEGACY
+ tristate
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@@ -195,7 +201,7 @@ config IP6_NF_FILTER
config IP6_NF_TARGET_REJECT
tristate "REJECT target support"
- depends on IP6_NF_FILTER
+ depends on IP6_NF_FILTER || NFT_COMPAT
select NF_REJECT_IPV6
default m if NETFILTER_ADVANCED=n
help
@@ -221,6 +227,7 @@ config IP6_NF_TARGET_SYNPROXY
config IP6_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
+ select IP6_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -230,6 +237,7 @@ config IP6_NF_MANGLE
config IP6_NF_RAW
tristate 'raw table support (required for TRACE)'
+ select IP6_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to ip6tables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -243,6 +251,7 @@ config IP6_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
+ select IP6_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -254,6 +263,7 @@ config IP6_NF_NAT
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_NAT
+ select IP6_NF_IPTABLES_LEGACY
select NETFILTER_XT_NAT
help
This enables the `nat' table in ip6tables. This allows masquerading,
@@ -262,25 +272,23 @@ config IP6_NF_NAT
To compile it as a module, choose M here. If unsure, say N.
-if IP6_NF_NAT
-
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
select NETFILTER_XT_TARGET_MASQUERADE
+ depends on IP6_NF_NAT
help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE.
config IP6_NF_TARGET_NPT
tristate "NPT (Network Prefix translation) target support"
+ depends on IP6_NF_NAT || NFT_COMPAT
help
This option adds the `SNPT' and `DNPT' target, which perform
stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
To compile it as a module, choose M here. If unsure, say N.
-endif # IP6_NF_NAT
-
endif # IP6_NF_IPTABLES
endmenu
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index b8d6dc9aeeb6..66ce6fa5b2f5 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -4,7 +4,7 @@
#
# Link order matters here.
-obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
+obj-$(CONFIG_IP6_NF_IPTABLES_LEGACY) += ip6_tables.o
obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ea1dec8448fc..707d65bc9c0e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -931,6 +931,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
struct net *net = dev_net(dev);
struct route_info *rinfo = (struct route_info *) opt;
struct in6_addr prefix_buf, *prefix;
+ struct fib6_table *table;
unsigned int pref;
unsigned long lifetime;
struct fib6_info *rt;
@@ -989,10 +990,18 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
(rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
if (rt) {
- if (!addrconf_finite_timeout(lifetime))
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (!addrconf_finite_timeout(lifetime)) {
fib6_clean_expires(rt);
- else
+ fib6_remove_gc_list(rt);
+ } else {
fib6_set_expires(rt, jiffies + HZ * lifetime);
+ fib6_add_gc_list(rt);
+ }
+
+ spin_unlock_bh(&table->tb6_lock);
fib6_info_release(rt);
}
@@ -2085,12 +2094,12 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
*/
if (!(rt->rt6i_flags & RTF_EXPIRES)) {
if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
- RT6_TRACE("aging clone %p\n", rt);
+ pr_debug("aging clone %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
} else if (time_after(jiffies, rt->dst.expires)) {
- RT6_TRACE("purging expired route %p\n", rt);
+ pr_debug("purging expired route %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
@@ -2101,8 +2110,8 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
if (!(neigh && (neigh->flags & NTF_ROUTER))) {
- RT6_TRACE("purging route %p via non-router but gateway\n",
- rt);
+ pr_debug("purging route %p via non-router but gateway\n",
+ rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
@@ -3765,8 +3774,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (cfg->fc_flags & RTF_EXPIRES)
fib6_set_expires(rt, jiffies +
clock_t_to_jiffies(cfg->fc_expires));
- else
- fib6_clean_expires(rt);
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
@@ -4355,7 +4362,8 @@ struct fib6_info *rt6_add_dflt_router(struct net *net,
const struct in6_addr *gwaddr,
struct net_device *dev,
unsigned int pref,
- u32 defrtr_usr_metric)
+ u32 defrtr_usr_metric,
+ int lifetime)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
@@ -4368,6 +4376,7 @@ struct fib6_info *rt6_add_dflt_router(struct net *net,
.fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = net,
+ .fc_expires = jiffies_to_clock_t(lifetime * HZ),
};
cfg.fc_gateway = *gwaddr;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 5e9f625b76e3..ed3a44aa1e9d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1460,6 +1460,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
return err;
}
netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
}
@@ -1875,22 +1876,19 @@ err_alloc_dev:
return err;
}
-static void __net_exit sit_exit_batch_net(struct list_head *net_list)
+static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
- LIST_HEAD(list);
struct net *net;
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- sit_destroy_tunnels(net, &list);
-
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ sit_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations sit_net_ops = {
.init = sit_init_net,
- .exit_batch = sit_exit_batch_net,
+ .exit_batch_rtnl = sit_exit_batch_rtnl,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
};
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index c8d2ca27220c..6b9c69278819 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -182,9 +182,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
!th->ack || th->rst)
goto out;
- req = cookie_tcp_check(net, sk, skb);
- if (IS_ERR(req))
- goto out;
+ if (cookie_bpf_ok(skb)) {
+ req = cookie_bpf_check(sk, skb);
+ } else {
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ }
if (!req)
goto out_drop;
@@ -247,7 +251,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
- ireq->rcv_wscale = rcv_wscale;
+ if (!req->syncookie)
+ ireq->rcv_wscale = rcv_wscale;
ireq->ecn_ok &= cookie_ecn_ok(net, dst);
ret = tcp_get_cookie_sock(sk, skb, req, dst);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 9a2a9ed3ba47..970af3983d11 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -478,7 +478,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rt = ip_route_output_ports(sock_net(sk), fl4, sk,
daddr, inet->inet_saddr,
inet->inet_dport, inet->inet_sport,
- sk->sk_protocol, RT_CONN_FLAGS(sk),
+ sk->sk_protocol, ip_sock_rt_tos(sk),
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index 5409c2ea3f57..bd8ff5950c8d 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -225,6 +225,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
}
static const struct inet_diag_handler mptcp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = mptcp_diag_dump,
.dump_one = mptcp_diag_dump_one,
.idiag_get_info = mptcp_diag_get_info,
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index e3e96a49f922..23e317ffc901 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -689,8 +689,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
if (!echo) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX);
- opts->ahmac = add_addr_generate_hmac(msk->local_key,
- msk->remote_key,
+ opts->ahmac = add_addr_generate_hmac(READ_ONCE(msk->local_key),
+ READ_ONCE(msk->remote_key),
&opts->addr);
} else {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX);
@@ -792,7 +792,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk,
*size = TCPOLEN_MPTCP_FASTCLOSE;
opts->suboptions |= OPTION_MPTCP_FASTCLOSE;
- opts->rcvr_key = msk->remote_key;
+ opts->rcvr_key = READ_ONCE(msk->remote_key);
pr_debug("FASTCLOSE key=%llu", opts->rcvr_key);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX);
@@ -1031,7 +1031,7 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
{
msk->bytes_acked += new_snd_una - msk->snd_una;
- msk->snd_una = new_snd_una;
+ WRITE_ONCE(msk->snd_una, new_snd_una);
}
static void ack_update_msk(struct mptcp_sock *msk,
@@ -1058,10 +1058,10 @@ static void ack_update_msk(struct mptcp_sock *msk,
new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
if (after64(new_wnd_end, msk->wnd_end))
- msk->wnd_end = new_wnd_end;
+ WRITE_ONCE(msk->wnd_end, new_wnd_end);
/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
- if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
+ if (after64(msk->wnd_end, snd_nxt))
__mptcp_check_push(sk, ssk);
if (after64(new_snd_una, old_snd_una)) {
@@ -1072,7 +1072,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
trace_ack_update_msk(mp_opt->data_ack,
old_snd_una, new_snd_una,
- new_wnd_end, msk->wnd_end);
+ new_wnd_end, READ_ONCE(msk->wnd_end));
}
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
@@ -1100,8 +1100,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
if (mp_opt->echo)
return true;
- hmac = add_addr_generate_hmac(msk->remote_key,
- msk->local_key,
+ hmac = add_addr_generate_hmac(READ_ONCE(msk->remote_key),
+ READ_ONCE(msk->local_key),
&mp_opt->addr);
pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
@@ -1148,7 +1148,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) {
if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) &&
- msk->local_key == mp_opt.rcvr_key) {
+ READ_ONCE(msk->local_key) == mp_opt.rcvr_key) {
WRITE_ONCE(msk->rcv_fastclose, true);
mptcp_schedule_work((struct sock *)msk);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSERX);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 4ae19113b8eb..53e0b08b1123 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -77,7 +77,7 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int
{
struct mptcp_pm_data *pm = &msk->pm;
- pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
+ pr_debug("msk=%p, token=%u side=%d", msk, READ_ONCE(msk->token), server_side);
WRITE_ONCE(pm->server_side, server_side);
mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 287a60381eae..d9ad45959219 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1997,7 +1997,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
const struct mptcp_subflow_context *sf;
u8 sk_err;
- if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)))
return -EMSGSIZE;
if (mptcp_event_add_subflow(skb, ssk))
@@ -2055,7 +2055,7 @@ static int mptcp_event_created(struct sk_buff *skb,
const struct mptcp_sock *msk,
const struct sock *ssk)
{
- int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token);
+ int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token));
if (err)
return err;
@@ -2083,7 +2083,7 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
if (!nlh)
goto nla_put_failure;
- if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)))
goto nla_put_failure;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id))
@@ -2118,7 +2118,7 @@ void mptcp_event_addr_announced(const struct sock *ssk,
if (!nlh)
goto nla_put_failure;
- if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)))
goto nla_put_failure;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
@@ -2234,7 +2234,7 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
goto nla_put_failure;
break;
case MPTCP_EVENT_CLOSED:
- if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0)
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)) < 0)
goto nla_put_failure;
break;
case MPTCP_EVENT_ANNOUNCED:
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 8ef2927ebca2..c7af62c057bc 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -410,6 +410,7 @@ static void mptcp_close_wake_up(struct sock *sk)
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
}
+/* called under the msk socket lock */
static bool mptcp_pending_data_fin_ack(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -441,16 +442,17 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
}
}
+/* can be called with no lock acquired */
static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
{
struct mptcp_sock *msk = mptcp_sk(sk);
if (READ_ONCE(msk->rcv_data_fin) &&
- ((1 << sk->sk_state) &
+ ((1 << inet_sk_state_load(sk)) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);
- if (msk->ack_seq == rcv_data_fin_seq) {
+ if (READ_ONCE(msk->ack_seq) == rcv_data_fin_seq) {
if (seq)
*seq = rcv_data_fin_seq;
@@ -748,7 +750,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
msk->bytes_received += end_seq - msk->ack_seq;
- msk->ack_seq = end_seq;
+ WRITE_ONCE(msk->ack_seq, end_seq);
moved = true;
}
return moved;
@@ -985,6 +987,7 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag)
put_page(dfrag->page);
}
+/* called under both the msk socket lock and the data lock */
static void __mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1033,13 +1036,15 @@ static void __mptcp_clean_una(struct sock *sk)
msk->recovery = false;
out:
- if (snd_una == READ_ONCE(msk->snd_nxt) &&
- snd_una == READ_ONCE(msk->write_seq)) {
+ if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) {
if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
mptcp_stop_rtx_timer(sk);
} else {
mptcp_reset_rtx_timer(sk);
}
+
+ if (mptcp_pending_data_fin_ack(sk))
+ mptcp_schedule_work(sk);
}
static void __mptcp_clean_una_wakeup(struct sock *sk)
@@ -1499,7 +1504,7 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
*/
if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
- msk->snd_nxt = snd_nxt_new;
+ WRITE_ONCE(msk->snd_nxt, snd_nxt_new);
}
}
@@ -2114,7 +2119,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk)
skb = skb_peek(&msk->receive_queue);
if (skb) {
- u64 hint_val = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;
+ u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq;
if (hint_val >= INT_MAX)
return INT_MAX;
@@ -2758,7 +2763,7 @@ static void __mptcp_init_sock(struct sock *sk)
__skb_queue_head_init(&msk->receive_queue);
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
- msk->rmem_fwd_alloc = 0;
+ WRITE_ONCE(msk->rmem_fwd_alloc, 0);
WRITE_ONCE(msk->rmem_released, 0);
msk->timer_ival = TCP_RTO_MIN;
msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
@@ -2974,7 +2979,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk->sk_prot->destroy(sk);
- WARN_ON_ONCE(msk->rmem_fwd_alloc);
+ WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc));
WARN_ON_ONCE(msk->rmem_released);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
@@ -3149,16 +3154,16 @@ static int mptcp_disconnect(struct sock *sk, int flags)
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
msk->recovery = false;
- msk->can_ack = false;
- msk->fully_established = false;
- msk->rcv_data_fin = false;
- msk->snd_data_fin_enable = false;
- msk->rcv_fastclose = false;
- msk->use_64bit_ack = false;
- msk->bytes_consumed = 0;
+ WRITE_ONCE(msk->can_ack, false);
+ WRITE_ONCE(msk->fully_established, false);
+ WRITE_ONCE(msk->rcv_data_fin, false);
+ WRITE_ONCE(msk->snd_data_fin_enable, false);
+ WRITE_ONCE(msk->rcv_fastclose, false);
+ WRITE_ONCE(msk->use_64bit_ack, false);
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_reset(msk);
mptcp_ca_reset(sk);
+ msk->bytes_consumed = 0;
msk->bytes_acked = 0;
msk->bytes_received = 0;
msk->bytes_sent = 0;
@@ -3200,17 +3205,17 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_init_sock(nsk);
msk = mptcp_sk(nsk);
- msk->local_key = subflow_req->local_key;
- msk->token = subflow_req->token;
+ WRITE_ONCE(msk->local_key, subflow_req->local_key);
+ WRITE_ONCE(msk->token, subflow_req->token);
msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(msk->csum_enabled, true);
- msk->write_seq = subflow_req->idsn + 1;
- msk->snd_nxt = msk->write_seq;
- msk->snd_una = msk->write_seq;
- msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+ WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1);
+ WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+ WRITE_ONCE(msk->snd_una, msk->write_seq);
+ WRITE_ONCE(msk->wnd_end, msk->snd_nxt + req->rsk_rcv_wnd);
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
mptcp_init_sched(msk, mptcp_sk(sk)->sched);
@@ -3313,9 +3318,6 @@ void __mptcp_data_acked(struct sock *sk)
__mptcp_clean_una(sk);
else
__set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
-
- if (mptcp_pending_data_fin_ack(sk))
- mptcp_schedule_work(sk);
}
void __mptcp_check_push(struct sock *sk, struct sock *ssk)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index ed50f2015dc3..c5ec056040eb 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -260,8 +260,10 @@ struct mptcp_data_frag {
struct mptcp_sock {
/* inet_connection_sock must be the first member */
struct inet_connection_sock sk;
- u64 local_key;
- u64 remote_key;
+ u64 local_key; /* protected by the first subflow socket lock
+ * lockless access read
+ */
+ u64 remote_key; /* same as above */
u64 write_seq;
u64 bytes_sent;
u64 snd_nxt;
@@ -400,7 +402,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- if (msk->snd_una == READ_ONCE(msk->snd_nxt))
+ if (msk->snd_una == msk->snd_nxt)
return NULL;
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index c40f1428e602..da37e4541a5d 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -942,7 +942,7 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
mptcp_data_unlock(sk);
slow = lock_sock_fast(sk);
- info->mptcpi_csum_enabled = msk->csum_enabled;
+ info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
info->mptcpi_token = msk->token;
info->mptcpi_write_seq = msk->write_seq;
info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index c34ecadee120..02dab0669cfc 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -75,7 +75,8 @@ static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_
get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
- subflow_generate_hmac(msk->local_key, msk->remote_key,
+ subflow_generate_hmac(READ_ONCE(msk->local_key),
+ READ_ONCE(msk->remote_key),
subflow_req->local_nonce,
subflow_req->remote_nonce, hmac);
@@ -714,7 +715,8 @@ static bool subflow_hmac_valid(const struct request_sock *req,
if (!msk)
return false;
- subflow_generate_hmac(msk->remote_key, msk->local_key,
+ subflow_generate_hmac(READ_ONCE(msk->remote_key),
+ READ_ONCE(msk->local_key),
subflow_req->remote_nonce,
subflow_req->local_nonce, hmac);
@@ -1548,8 +1550,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
&flags, &ifindex);
subflow->remote_key_valid = 1;
- subflow->remote_key = msk->remote_key;
- subflow->local_key = msk->local_key;
+ subflow->remote_key = READ_ONCE(msk->remote_key);
+ subflow->local_key = READ_ONCE(msk->local_key);
subflow->token = msk->token;
mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 441d1f134110..df2dc21304ef 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -818,7 +818,7 @@ config NETFILTER_XT_TARGET_AUDIT
config NETFILTER_XT_TARGET_CHECKSUM
tristate "CHECKSUM target support"
- depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a `CHECKSUM' target, which can be used in the iptables mangle
@@ -869,7 +869,7 @@ config NETFILTER_XT_TARGET_CONNSECMARK
config NETFILTER_XT_TARGET_CT
tristate '"CT" target support'
depends on NF_CONNTRACK
- depends on IP_NF_RAW || IP6_NF_RAW
+ depends on IP_NF_RAW || IP6_NF_RAW || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This options adds a `CT' target, which allows to specify initial
@@ -880,7 +880,7 @@ config NETFILTER_XT_TARGET_CT
config NETFILTER_XT_TARGET_DSCP
tristate '"DSCP" and "TOS" target support'
- depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a `DSCP' target, which allows you to manipulate
@@ -896,7 +896,7 @@ config NETFILTER_XT_TARGET_DSCP
config NETFILTER_XT_TARGET_HL
tristate '"HL" hoplimit target support'
- depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds the "HL" (for IPv6) and "TTL" (for IPv4)
@@ -1080,7 +1080,7 @@ config NETFILTER_XT_TARGET_TPROXY
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
- depends on IP_NF_MANGLE
+ depends on IP_NF_MANGLE || NFT_COMPAT
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
select NF_TPROXY_IPV4
@@ -1147,7 +1147,7 @@ config NETFILTER_XT_TARGET_TCPMSS
config NETFILTER_XT_TARGET_TCPOPTSTRIP
tristate '"TCPOPTSTRIP" target support'
- depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT
depends on NETFILTER_ADVANCED
help
This option adds a "TCPOPTSTRIP" target, which allows you to strip
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index a743db073887..98d7dbe3d787 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1511,9 +1511,7 @@ int __init ip_vs_conn_init(void)
return -ENOMEM;
/* Allocate ip_vs_conn slab cache */
- ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
- sizeof(struct ip_vs_conn), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ ip_vs_conn_cachep = KMEM_CACHE(ip_vs_conn, SLAB_HWCACHE_ALIGN);
if (!ip_vs_conn_cachep) {
kvfree(ip_vs_conn_tab);
return -ENOMEM;
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index 0e4beae421f8..5257d5e7eb09 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -314,7 +314,7 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type,
static const struct bpf_func_proto *
bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
const struct bpf_verifier_ops netfilter_verifier_ops = {
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 5d8ed6c90b7e..8715617b02fe 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -605,15 +605,11 @@ static int __init nf_conncount_modinit(void)
for (i = 0; i < CONNCOUNT_SLOTS; ++i)
spin_lock_init(&nf_conncount_locks[i]);
- conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
- sizeof(struct nf_conncount_tuple),
- 0, 0, NULL);
+ conncount_conn_cachep = KMEM_CACHE(nf_conncount_tuple, 0);
if (!conncount_conn_cachep)
return -ENOMEM;
- conncount_rb_cachep = kmem_cache_create("nf_conncount_rb",
- sizeof(struct nf_conncount_rb),
- 0, 0, NULL);
+ conncount_rb_cachep = KMEM_CACHE(nf_conncount_rb, 0);
if (!conncount_rb_cachep) {
kmem_cache_destroy(conncount_conn_cachep);
return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2e5f3864d353..90e6bd2c3000 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2530,7 +2530,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
* netfilter framework. Roll on, two-stage module
* delete...
*/
- synchronize_net();
+ synchronize_rcu_expedited();
i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f8e3f70c35bd..cb6f49a3d809 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1194,8 +1194,10 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1)
#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0)
#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1)
+#define __NFT_TABLE_F_WAS_ORPHAN (__NFT_TABLE_F_INTERNAL << 2)
#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \
- __NFT_TABLE_F_WAS_AWAKEN)
+ __NFT_TABLE_F_WAS_AWAKEN | \
+ __NFT_TABLE_F_WAS_ORPHAN)
static int nf_tables_updtable(struct nft_ctx *ctx)
{
@@ -1215,8 +1217,11 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
if ((nft_table_has_owner(ctx->table) &&
!(flags & NFT_TABLE_F_OWNER)) ||
- (!nft_table_has_owner(ctx->table) &&
- flags & NFT_TABLE_F_OWNER))
+ (flags & NFT_TABLE_F_OWNER &&
+ !nft_table_is_orphan(ctx->table)))
+ return -EOPNOTSUPP;
+
+ if ((flags ^ ctx->table->flags) & NFT_TABLE_F_PERSIST)
return -EOPNOTSUPP;
/* No dormant off/on/off/on games in single transaction */
@@ -1245,6 +1250,13 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
}
}
+ if ((flags & NFT_TABLE_F_OWNER) &&
+ !nft_table_has_owner(ctx->table)) {
+ ctx->table->nlpid = ctx->portid;
+ ctx->table->flags |= NFT_TABLE_F_OWNER |
+ __NFT_TABLE_F_WAS_ORPHAN;
+ }
+
nft_trans_table_update(trans) = true;
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -4235,23 +4247,18 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
* given, in that case the amount of memory per element is used.
*/
static const struct nft_set_ops *
-nft_select_set_ops(const struct nft_ctx *ctx,
- const struct nlattr * const nla[],
+nft_select_set_ops(const struct nft_ctx *ctx, u32 flags,
const struct nft_set_desc *desc)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nft_set_ops *ops, *bops;
struct nft_set_estimate est, best;
const struct nft_set_type *type;
- u32 flags = 0;
int i;
lockdep_assert_held(&nft_net->commit_mutex);
lockdep_nfnl_nft_mutex_not_held();
- if (nla[NFTA_SET_FLAGS] != NULL)
- flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
-
bops = NULL;
best.size = ~0;
best.lookup = ~0;
@@ -5137,7 +5144,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
- ops = nft_select_set_ops(&ctx, nla, &desc);
+ ops = nft_select_set_ops(&ctx, flags, &desc);
if (IS_ERR(ops))
return PTR_ERR(ops);
@@ -10425,6 +10432,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
} else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT;
}
+ if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_ORPHAN) {
+ trans->ctx.table->flags &= ~NFT_TABLE_F_OWNER;
+ trans->ctx.table->nlpid = 0;
+ }
trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
nft_trans_destroy(trans);
} else {
@@ -11351,6 +11362,10 @@ again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
n->portid == table->nlpid) {
+ if (table->flags & NFT_TABLE_F_PERSIST) {
+ table->flags &= ~NFT_TABLE_F_OWNER;
+ continue;
+ }
__nft_release_hook(net, table);
list_del_rcu(&table->list);
to_delete[deleted++] = table;
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 27511c90a26f..7b844581ebee 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -610,7 +610,7 @@ int netlbl_catmap_walk(struct netlbl_lsm_catmap *catmap, u32 offset)
struct netlbl_lsm_catmap *iter;
u32 idx;
u32 bit;
- NETLBL_CATMAP_MAPTYPE bitmap;
+ u64 bitmap;
iter = _netlbl_catmap_getnode(&catmap, offset, _CM_F_WALK, 0);
if (iter == NULL)
@@ -666,8 +666,8 @@ int netlbl_catmap_walkrng(struct netlbl_lsm_catmap *catmap, u32 offset)
struct netlbl_lsm_catmap *prev = NULL;
u32 idx;
u32 bit;
- NETLBL_CATMAP_MAPTYPE bitmask;
- NETLBL_CATMAP_MAPTYPE bitmap;
+ u64 bitmask;
+ u64 bitmap;
iter = _netlbl_catmap_getnode(&catmap, offset, _CM_F_WALK, 0);
if (iter == NULL)
@@ -857,7 +857,7 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap,
offset -= iter->startbit;
idx = offset / NETLBL_CATMAP_MAPSIZE;
- iter->bitmap[idx] |= (NETLBL_CATMAP_MAPTYPE)bitmap
+ iter->bitmap[idx] |= (u64)bitmap
<< (offset % NETLBL_CATMAP_MAPSIZE);
return 0;
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 1eeff9422856..e12c90d5f6ad 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -241,6 +241,7 @@ static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler netlink_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_NETLINK,
.dump = netlink_diag_handler_dump,
};
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index 2140f6724644..ba91284f4086 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -30,15 +30,19 @@ exit:
return r;
}
+static void nfc_llc_del_engine(struct nfc_llc_engine *llc_engine)
+{
+ list_del(&llc_engine->entry);
+ kfree_const(llc_engine->name);
+ kfree(llc_engine);
+}
+
void nfc_llc_exit(void)
{
struct nfc_llc_engine *llc_engine, *n;
- list_for_each_entry_safe(llc_engine, n, &llc_engines, entry) {
- list_del(&llc_engine->entry);
- kfree(llc_engine->name);
- kfree(llc_engine);
- }
+ list_for_each_entry_safe(llc_engine, n, &llc_engines, entry)
+ nfc_llc_del_engine(llc_engine);
}
int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops)
@@ -49,7 +53,7 @@ int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops)
if (llc_engine == NULL)
return -ENOMEM;
- llc_engine->name = kstrdup(name, GFP_KERNEL);
+ llc_engine->name = kstrdup_const(name, GFP_KERNEL);
if (llc_engine->name == NULL) {
kfree(llc_engine);
return -ENOMEM;
@@ -82,9 +86,7 @@ void nfc_llc_unregister(const char *name)
if (llc_engine == NULL)
return;
- list_del(&llc_engine->entry);
- kfree(llc_engine->name);
- kfree(llc_engine);
+ nfc_llc_del_engine(llc_engine);
}
struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev,
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 9a7980e3309d..b3bd2f6c2bf7 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -245,6 +245,7 @@ static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler packet_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_PACKET,
.dump = packet_diag_handler_dump,
};
diff --git a/net/rds/connection.c b/net/rds/connection.c
index b4cc699c5fad..c749c5525b40 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -829,9 +829,7 @@ int rds_conn_init(void)
if (ret)
return ret;
- rds_conn_slab = kmem_cache_create("rds_connection",
- sizeof(struct rds_connection),
- 0, 0, NULL);
+ rds_conn_slab = KMEM_CACHE(rds_connection, 0);
if (!rds_conn_slab) {
rds_loop_net_exit();
return -ENOMEM;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 470c70deffe2..8180d0c12fce 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -737,16 +737,6 @@ config NET_ACT_SAMPLE
To compile this code as a module, choose M here: the
module will be called act_sample.
-config NET_ACT_IPT
- tristate "IPtables targets"
- depends on NET_CLS_ACT && NETFILTER && NETFILTER_XTABLES
- help
- Say Y here to be able to invoke iptables targets after successful
- classification.
-
- To compile this code as a module, choose M here: the
- module will be called act_ipt.
-
config NET_ACT_NAT
tristate "Stateless NAT"
depends on NET_CLS_ACT
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3e30d7260493..9ee622fb1160 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1363,7 +1363,7 @@ struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags,
if (rtnl_held)
rtnl_unlock();
- request_module("act_%s", act_name);
+ request_module(NET_ACT_ALIAS_PREFIX "%s", act_name);
if (rtnl_held)
rtnl_lock();
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 6cfee6658103..0e3cf11ae5fc 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -401,6 +401,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
.init = tcf_bpf_init,
.size = sizeof(struct tcf_bpf),
};
+MODULE_ALIAS_NET_ACT("bpf");
static __net_init int bpf_init_net(struct net *net)
{
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index f8762756657d..0fce631e7c91 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -242,6 +242,7 @@ static struct tc_action_ops act_connmark_ops = {
.cleanup = tcf_connmark_cleanup,
.size = sizeof(struct tcf_connmark_info),
};
+MODULE_ALIAS_NET_ACT("connmark");
static __net_init int connmark_init_net(struct net *net)
{
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 7f8b1f2f2ed9..5cc8e407e791 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -709,6 +709,7 @@ static struct tc_action_ops act_csum_ops = {
.offload_act_setup = tcf_csum_offload_act_setup,
.size = sizeof(struct tcf_csum),
};
+MODULE_ALIAS_NET_ACT("csum");
static __net_init int csum_init_net(struct net *net)
{
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 6124d8b128d1..baac083fd8f1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1600,6 +1600,7 @@ static struct tc_action_ops act_ct_ops = {
.offload_act_setup = tcf_ct_offload_act_setup,
.size = sizeof(struct tcf_ct),
};
+MODULE_ALIAS_NET_ACT("ct");
static __net_init int ct_init_net(struct net *net)
{
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index e620f9a84afe..5dd41a012110 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -363,6 +363,7 @@ static struct tc_action_ops act_ctinfo_ops = {
.cleanup= tcf_ctinfo_cleanup,
.size = sizeof(struct tcf_ctinfo),
};
+MODULE_ALIAS_NET_ACT("ctinfo");
static __net_init int ctinfo_init_net(struct net *net)
{
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 4af3b7ec249f..e949280eb800 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -296,6 +296,7 @@ static struct tc_action_ops act_gact_ops = {
.offload_act_setup = tcf_gact_offload_act_setup,
.size = sizeof(struct tcf_gact),
};
+MODULE_ALIAS_NET_ACT("gact");
static __net_init int gact_init_net(struct net *net)
{
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index c681cd011afd..1dd74125398a 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -645,6 +645,7 @@ static struct tc_action_ops act_gate_ops = {
.offload_act_setup = tcf_gate_offload_act_setup,
.size = sizeof(struct tcf_gate),
};
+MODULE_ALIAS_NET_ACT("gate");
static __net_init int gate_init_net(struct net *net)
{
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 0e867d13beb5..107c6d83dc5c 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -889,6 +889,7 @@ static struct tc_action_ops act_ife_ops = {
.init = tcf_ife_init,
.size = sizeof(struct tcf_ife_info),
};
+MODULE_ALIAS_NET_ACT("ife");
static __net_init int ife_init_net(struct net *net)
{
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0a1a9e40f237..6f4bb1c8ce7b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -641,6 +641,7 @@ static struct tc_action_ops act_mirred_ops = {
.size = sizeof(struct tcf_mirred),
.get_dev = tcf_mirred_get_dev,
};
+MODULE_ALIAS_NET_ACT("mirred");
static __net_init int mirred_init_net(struct net *net)
{
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 34b8edb6cc77..44a37a71ae92 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -452,6 +452,7 @@ static struct tc_action_ops act_mpls_ops = {
.offload_act_setup = tcf_mpls_offload_act_setup,
.size = sizeof(struct tcf_mpls),
};
+MODULE_ALIAS_NET_ACT("mpls");
static __net_init int mpls_init_net(struct net *net)
{
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index a180e724634e..d541f553805f 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -324,6 +324,7 @@ static struct tc_action_ops act_nat_ops = {
.cleanup = tcf_nat_cleanup,
.size = sizeof(struct tcf_nat),
};
+MODULE_ALIAS_NET_ACT("nat");
static __net_init int nat_init_net(struct net *net)
{
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 2ef22969f274..df5a02d5f919 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -620,6 +620,7 @@ static struct tc_action_ops act_pedit_ops = {
.offload_act_setup = tcf_pedit_offload_act_setup,
.size = sizeof(struct tcf_pedit),
};
+MODULE_ALIAS_NET_ACT("pedit");
static __net_init int pedit_init_net(struct net *net)
{
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index e119b4a3db9f..8555125ed34d 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -502,6 +502,7 @@ static struct tc_action_ops act_police_ops = {
.offload_act_setup = tcf_police_offload_act_setup,
.size = sizeof(struct tcf_police),
};
+MODULE_ALIAS_NET_ACT("police");
static __net_init int police_init_net(struct net *net)
{
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index c5c61efe6db4..a69b53d54039 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -316,6 +316,7 @@ static struct tc_action_ops act_sample_ops = {
.offload_act_setup = tcf_sample_offload_act_setup,
.size = sizeof(struct tcf_sample),
};
+MODULE_ALIAS_NET_ACT("sample");
static __net_init int sample_init_net(struct net *net)
{
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 0a3e92888295..f3abe0545989 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -209,6 +209,7 @@ static struct tc_action_ops act_simp_ops = {
.init = tcf_simp_init,
.size = sizeof(struct tcf_defact),
};
+MODULE_ALIAS_NET_ACT("simple");
static __net_init int simp_init_net(struct net *net)
{
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 754f78b35bb8..1f1d9ce3e968 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -426,6 +426,7 @@ static struct tc_action_ops act_skbedit_ops = {
.offload_act_setup = tcf_skbedit_offload_act_setup,
.size = sizeof(struct tcf_skbedit),
};
+MODULE_ALIAS_NET_ACT("skbedit");
static __net_init int skbedit_init_net(struct net *net)
{
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index bcb673ab0008..39945b139c48 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -287,6 +287,7 @@ static struct tc_action_ops act_skbmod_ops = {
.cleanup = tcf_skbmod_cleanup,
.size = sizeof(struct tcf_skbmod),
};
+MODULE_ALIAS_NET_ACT("skbmod");
static __net_init int skbmod_init_net(struct net *net)
{
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 300b08aa8283..1536f8b16f1b 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -842,6 +842,7 @@ static struct tc_action_ops act_tunnel_key_ops = {
.offload_act_setup = tcf_tunnel_key_offload_act_setup,
.size = sizeof(struct tcf_tunnel_key),
};
+MODULE_ALIAS_NET_ACT("tunnel_key");
static __net_init int tunnel_key_init_net(struct net *net)
{
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 836183011a7c..22f4b1e8ade9 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -427,6 +427,7 @@ static struct tc_action_ops act_vlan_ops = {
.offload_act_setup = tcf_vlan_offload_act_setup,
.size = sizeof(struct tcf_vlan),
};
+MODULE_ALIAS_NET_ACT("vlan");
static __net_init int vlan_init_net(struct net *net)
{
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ff3d396a65aa..ca5676b2668e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -257,7 +257,7 @@ tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
#ifdef CONFIG_MODULES
if (rtnl_held)
rtnl_unlock();
- request_module("cls_%s", kind);
+ request_module(NET_CLS_ALIAS_PREFIX "%s", kind);
if (rtnl_held)
rtnl_lock();
ops = __tcf_proto_lookup_ops(kind);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index a1f56931330c..ecfaa4f9a04e 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -328,6 +328,7 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = {
.bind_class = basic_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("basic");
static int __init init_basic(void)
{
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 382c7a71f81f..5e83e890f6a4 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -693,6 +693,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
.dump = cls_bpf_dump,
.bind_class = cls_bpf_bind_class,
};
+MODULE_ALIAS_NET_CLS("bpf");
static int __init cls_bpf_init_mod(void)
{
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 7ee8dbf49ed0..424252982d6a 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -209,6 +209,7 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
.dump = cls_cgroup_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("cgroup");
static int __init init_cgroup_cls(void)
{
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6ab317b48d6c..5502998aace7 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -702,6 +702,7 @@ static struct tcf_proto_ops cls_flow_ops __read_mostly = {
.walk = flow_walk,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("flow");
static int __init cls_flow_init(void)
{
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index efb9d2811b73..bfedc3d4423d 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -3656,6 +3656,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.owner = THIS_MODULE,
.flags = TCF_PROTO_OPS_DOIT_UNLOCKED,
};
+MODULE_ALIAS_NET_CLS("flower");
static int __init cls_fl_init(void)
{
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index afc534ee0a18..cdddc8695228 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -433,6 +433,7 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = {
.bind_class = fw_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("fw");
static int __init init_fw(void)
{
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index c4ed11df6254..9f1e62ca508d 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -398,6 +398,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = {
.bind_class = mall_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("matchall");
static int __init cls_mall_init(void)
{
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 12a505db4183..b9c58c040c30 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -671,6 +671,7 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = {
.bind_class = route4_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("route");
static int __init init_route4(void)
{
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 289e1755c26b..9412d88a99bc 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1453,6 +1453,7 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
.bind_class = u32_bind_class,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_CLS("u32");
static int __init init_u32(void)
{
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 36b025cc4fd2..9d928f6a473a 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -228,7 +228,7 @@ int qdisc_set_default(const char *name)
if (!ops) {
/* Not found, drop lock and try to load module */
write_unlock(&qdisc_mod_lock);
- request_module("sch_%s", name);
+ request_module(NET_SCH_ALIAS_PREFIX "%s", name);
write_lock(&qdisc_mod_lock);
ops = qdisc_lookup_default(name);
@@ -1275,7 +1275,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
* go away in the mean time.
*/
rtnl_unlock();
- request_module("sch_%s", name);
+ request_module(NET_SCH_ALIAS_PREFIX "%s", name);
rtnl_lock();
ops = qdisc_lookup_ops(kind);
if (ops != NULL) {
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 9cff99558694..edee926ccde8 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -3103,6 +3103,7 @@ static struct Qdisc_ops cake_qdisc_ops __read_mostly = {
.dump_stats = cake_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("cake");
static int __init cake_module_init(void)
{
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index beece8e82c23..69001eff0315 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -546,6 +546,7 @@ static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
.dump = cbs_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("cbs");
static struct notifier_block cbs_device_notifier = {
.notifier_call = cbs_dev_notifier,
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ae1da08e268f..ea108030c6b4 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -498,6 +498,7 @@ static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
.dump_stats = choke_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("choke");
static int __init choke_module_init(void)
{
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index d7a4874543de..ecb3f164bb25 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Codel - The Controlled-Delay Active Queue Management algorithm
*
@@ -7,37 +8,6 @@
* Implemented on linux by :
* Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
* Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The names of the authors may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, provided that this notice is retained in full, this
- * software may be distributed under the terms of the GNU General
- * Public License ("GPL") version 2, in which case the provisions of the
- * GPL apply INSTEAD OF those given above.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- *
*/
#include <linux/module.h>
@@ -287,6 +257,7 @@ static struct Qdisc_ops codel_qdisc_ops __read_mostly = {
.dump_stats = codel_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("codel");
static int __init codel_module_init(void)
{
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 097740a9afea..c69b999fae17 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -481,6 +481,7 @@ static struct Qdisc_ops drr_qdisc_ops __read_mostly = {
.destroy = drr_destroy_qdisc,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("drr");
static int __init drr_init(void)
{
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index 4808159a5466..2e4bef713b6a 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -500,6 +500,7 @@ static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
.dump = etf_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("etf");
static int __init etf_module_init(void)
{
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index f7c88495946b..835b4460b448 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -812,6 +812,7 @@ static struct Qdisc_ops ets_qdisc_ops __read_mostly = {
.dump = ets_qdisc_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("ets");
static int __init ets_init(void)
{
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 3a31c47fea9b..cdf23ff16f40 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -1264,6 +1264,7 @@ static struct Qdisc_ops fq_qdisc_ops __read_mostly = {
.dump_stats = fq_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("fq");
static int __init fq_module_init(void)
{
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 8c4fee063436..79f9d6de6c85 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -717,6 +717,7 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
.dump_stats = fq_codel_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("fq_codel");
static int __init fq_codel_module_init(void)
{
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 8c61eb3dc943..79ba9dc70254 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -930,6 +930,7 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
.dump = gred_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("gred");
static int __init gred_module_init(void)
{
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 16c45da4036a..4e626df742d7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1679,6 +1679,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct hfsc_sched),
.owner = THIS_MODULE
};
+MODULE_ALIAS_NET_SCH("hfsc");
static int __init
hfsc_init(void)
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index d26cd436cbe3..3f906df1435b 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -702,6 +702,7 @@ static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
.dump_stats = hhf_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("hhf");
static int __init hhf_module_init(void)
{
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 7349233eaa9b..93e6fb56f3b5 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -2166,6 +2166,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
.dump = htb_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("htb");
static int __init htb_module_init(void)
{
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 5fa9eaa79bfc..c2ef9dcf91d2 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -168,6 +168,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.ingress_block_get = ingress_ingress_block_get,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("ingress");
struct clsact_sched_data {
struct tcf_block *ingress_block;
@@ -344,6 +345,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
.egress_block_get = clsact_egress_block_get,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("clsact");
static int __init ingress_module_init(void)
{
@@ -368,6 +370,5 @@ static void __exit ingress_module_exit(void)
module_init(ingress_module_init);
module_exit(ingress_module_exit);
-MODULE_ALIAS("sch_clsact");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Ingress and clsact based ingress and egress qdiscs");
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 43e53ee00a56..225353fbb3f1 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -774,6 +774,7 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
.dump = mqprio_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("mqprio");
static int __init mqprio_module_init(void)
{
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index d66d5f0ec080..79e93a19d5fa 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -395,6 +395,7 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
.dump = multiq_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("multiq");
static int __init multiq_module_init(void)
{
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index fa678eb88528..edc72962ae63 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -1293,6 +1293,7 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
.dump = netem_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("netem");
static int __init netem_module_init(void)
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 2da6250ec346..1764059b0635 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -556,6 +556,7 @@ static struct Qdisc_ops pie_qdisc_ops __read_mostly = {
.dump_stats = pie_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("pie");
static int __init pie_module_init(void)
{
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 992f0c8d7988..cefb65201e17 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -213,6 +213,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
.reset = qdisc_reset_queue,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("plug");
static int __init plug_module_init(void)
{
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 8ecdd3ef6f8e..cc30f7a32f1a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -418,6 +418,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
.dump = prio_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("prio");
static int __init prio_module_init(void)
{
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 48a604c320c7..d584c0c25899 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1521,6 +1521,7 @@ static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
.destroy = qfq_destroy_qdisc,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("qfq");
static int __init qfq_init(void)
{
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 607b6c8b3a9b..b5f096588fae 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -548,6 +548,7 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {
.dump_stats = red_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("red");
static int __init red_module_init(void)
{
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 1871a1c0224d..b717e15a3a17 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -709,6 +709,7 @@ static struct Qdisc_ops sfb_qdisc_ops __read_mostly = {
.dump_stats = sfb_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("sfb");
static int __init sfb_module_init(void)
{
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index eb77558fa367..e66f4afb920d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -925,6 +925,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
.dump = sfq_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("sfq");
static int __init sfq_module_init(void)
{
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 28beb11762d8..b4dd626c309c 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -292,6 +292,7 @@ static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = {
.destroy = skbprio_destroy,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("skbprio");
static int __init skbprio_module_init(void)
{
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 31a8252bd09c..c5de70efdc86 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -40,6 +40,8 @@ static struct static_key_false taprio_have_working_mqprio;
#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
+#define TAPRIO_SUPPORTED_FLAGS \
+ (TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
#define TAPRIO_FLAGS_INVALID U32_MAX
struct sched_entry {
@@ -408,19 +410,6 @@ static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
return entry;
}
-static bool taprio_flags_valid(u32 flags)
-{
- /* Make sure no other flag bits are set. */
- if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST |
- TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
- return false;
- /* txtime-assist and full offload are mutually exclusive */
- if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
- (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
- return false;
- return true;
-}
-
/* This returns the tstamp value set by TCP in terms of the set clock. */
static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
{
@@ -1031,7 +1020,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
- [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
+ [TCA_TAPRIO_ATTR_FLAGS] =
+ NLA_POLICY_MASK(NLA_U32, TAPRIO_SUPPORTED_FLAGS),
[TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
[TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED },
};
@@ -1815,33 +1805,6 @@ static int taprio_mqprio_cmp(const struct net_device *dev,
return 0;
}
-/* The semantics of the 'flags' argument in relation to 'change()'
- * requests, are interpreted following two rules (which are applied in
- * this order): (1) an omitted 'flags' argument is interpreted as
- * zero; (2) the 'flags' of a "running" taprio instance cannot be
- * changed.
- */
-static int taprio_new_flags(const struct nlattr *attr, u32 old,
- struct netlink_ext_ack *extack)
-{
- u32 new = 0;
-
- if (attr)
- new = nla_get_u32(attr);
-
- if (old != TAPRIO_FLAGS_INVALID && old != new) {
- NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
- return -EOPNOTSUPP;
- }
-
- if (!taprio_flags_valid(new)) {
- NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
- return -EINVAL;
- }
-
- return new;
-}
-
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -1852,6 +1815,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
unsigned long flags;
+ u32 taprio_flags;
ktime_t start;
int i, err;
@@ -1863,12 +1827,28 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
- err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS],
- q->flags, extack);
- if (err < 0)
- return err;
+ /* The semantics of the 'flags' argument in relation to 'change()'
+ * requests, are interpreted following two rules (which are applied in
+ * this order): (1) an omitted 'flags' argument is interpreted as
+ * zero; (2) the 'flags' of a "running" taprio instance cannot be
+ * changed.
+ */
+ taprio_flags = tb[TCA_TAPRIO_ATTR_FLAGS] ? nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]) : 0;
- q->flags = err;
+ /* txtime-assist and full offload are mutually exclusive */
+ if ((taprio_flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
+ (taprio_flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_TAPRIO_ATTR_FLAGS],
+ "TXTIME_ASSIST and FULL_OFFLOAD are mutually exclusive");
+ return -EINVAL;
+ }
+
+ if (q->flags != TAPRIO_FLAGS_INVALID && q->flags != taprio_flags) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Changing 'flags' of a running schedule is not supported");
+ return -EOPNOTSUPP;
+ }
+ q->flags = taprio_flags;
err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
if (err < 0)
@@ -2548,6 +2528,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
.dump_stats = taprio_dump_stats,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("taprio");
static struct notifier_block taprio_device_notifier = {
.notifier_call = taprio_dev_notifier,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index dd6b1a723bf7..f1d09183ae63 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -608,6 +608,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
.dump = tbf_dump,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_NET_SCH("tbf");
static int __init tbf_module_init(void)
{
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index eb05131ff1dd..23359e522273 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -507,6 +507,7 @@ done:
}
static const struct inet_diag_handler sctp_diag_handler = {
+ .owner = THIS_MODULE,
.dump = sctp_diag_dump,
.dump_one = sctp_diag_dump_one,
.idiag_get_info = sctp_diag_get_info,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 94c6dd53cd62..e849f368ed91 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1495,17 +1495,11 @@ static __init int sctp_init(void)
/* Allocate bind_bucket and chunk caches. */
status = -ENOBUFS;
- sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket",
- sizeof(struct sctp_bind_bucket),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ sctp_bucket_cachep = KMEM_CACHE(sctp_bind_bucket, SLAB_HWCACHE_ALIGN);
if (!sctp_bucket_cachep)
goto out;
- sctp_chunk_cachep = kmem_cache_create("sctp_chunk",
- sizeof(struct sctp_chunk),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ sctp_chunk_cachep = KMEM_CACHE(sctp_chunk, SLAB_HWCACHE_ALIGN);
if (!sctp_chunk_cachep)
goto err_chunk_cachep;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 0f53a5c6fd9d..4b52b3b159c0 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1046,7 +1046,7 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
int rc = SMC_CLC_DECL_NOSMCDDEV;
struct smcd_dev *smcd;
int i = 1, entry = 1;
- bool is_virtual;
+ bool is_emulated;
u16 chid;
if (smcd_indicated(ini->smc_type_v1))
@@ -1058,12 +1058,12 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
chid = smc_ism_get_chid(smcd);
if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
continue;
- is_virtual = __smc_ism_is_virtual(chid);
+ is_emulated = __smc_ism_is_emulated(chid);
if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
- if (is_virtual && entry == SMCD_CLC_MAX_V2_GID_ENTRIES)
+ if (is_emulated && entry == SMCD_CLC_MAX_V2_GID_ENTRIES)
/* It's the last GID-CHID entry left in CLC
- * Proposal SMC-Dv2 extension, but a virtual
+ * Proposal SMC-Dv2 extension, but an Emulated-
* ISM device will take two entries. So give
* up it and try the next potential ISM device.
*/
@@ -1073,7 +1073,7 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
ini->is_smcd = true;
rc = 0;
i++;
- entry = is_virtual ? entry + 2 : entry + 1;
+ entry = is_emulated ? entry + 2 : entry + 1;
if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES)
break;
}
@@ -1414,10 +1414,10 @@ static int smc_connect_ism(struct smc_sock *smc,
if (rc)
return rc;
- if (__smc_ism_is_virtual(ini->ism_chid[ini->ism_selected]))
+ if (__smc_ism_is_emulated(ini->ism_chid[ini->ism_selected]))
ini->ism_peer_gid[ini->ism_selected].gid_ext =
ntohll(aclc->d1.gid_ext);
- /* for non-virtual ISM devices, peer gid_ext remains 0. */
+ /* for non-Emulated-ISM devices, peer gid_ext remains 0. */
}
ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
@@ -2118,10 +2118,10 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini,
if (smc_ism_get_chid(smcd) == proposed_chid &&
!smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
ini->ism_peer_gid[*matches].gid = proposed_gid->gid;
- if (__smc_ism_is_virtual(proposed_chid))
+ if (__smc_ism_is_emulated(proposed_chid))
ini->ism_peer_gid[*matches].gid_ext =
proposed_gid->gid_ext;
- /* non-virtual ISM's peer gid_ext remains 0. */
+ /* non-Emulated-ISM's peer gid_ext remains 0. */
ini->ism_dev[*matches] = smcd;
(*matches)++;
break;
@@ -2171,10 +2171,10 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid);
smcd_gid.gid_ext = 0;
chid = ntohs(smcd_v2_ext->gidchid[i].chid);
- if (__smc_ism_is_virtual(chid)) {
+ if (__smc_ism_is_emulated(chid)) {
if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt ||
chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid))
- /* each virtual ISM device takes two GID-CHID
+ /* each Emulated-ISM device takes two GID-CHID
* entries and CHID of the second entry repeats
* that of the first entry.
*
diff --git a/net/smc/smc.h b/net/smc/smc.h
index df64efd2dee8..18c8b7870198 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -56,11 +56,11 @@ enum smc_state { /* possible states of an SMC socket */
};
enum smc_supplemental_features {
- SMC_SPF_VIRT_ISM_DEV = 0,
+ SMC_SPF_EMULATED_ISM_DEV = 0,
};
#define SMC_FEATURE_MASK \
- (BIT(SMC_SPF_VIRT_ISM_DEV))
+ (BIT(SMC_SPF_EMULATED_ISM_DEV))
struct smc_link_group;
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 9a13709bea1c..e55026c7529c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -952,8 +952,8 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
gidchids[entry].chid =
htons(smc_ism_get_chid(ini->ism_dev[i]));
gidchids[entry].gid = htonll(smcd_gid.gid);
- if (smc_ism_is_virtual(smcd)) {
- /* a virtual ISM device takes two
+ if (smc_ism_is_emulated(smcd)) {
+ /* an Emulated-ISM device takes two
* entries. CHID of the second entry
* repeats that of the first entry.
*/
@@ -1055,7 +1055,7 @@ smcd_clc_prep_confirm_accept(struct smc_connection *conn,
clc->d1.chid = htons(chid);
if (eid && eid[0])
memcpy(clc->d1.eid, eid, SMC_MAX_EID_LEN);
- if (__smc_ism_is_virtual(chid))
+ if (__smc_ism_is_emulated(chid))
clc->d1.gid_ext = htonll(smcd_gid.gid_ext);
len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
if (first_contact) {
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index a9f9bdd26dcd..7cc7070b9772 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -175,7 +175,7 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
#define SMCD_CLC_MAX_V2_GID_ENTRIES 8 /* max # of CHID-GID entries in CLC
* proposal SMC-Dv2 extension.
* each ISM device takes one entry and
- * each virtual ISM takes two entries.
+ * each Emulated-ISM takes two entries
*/
struct smc_clc_msg_proposal_area {
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index e4c858411207..9b84d5897aa5 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1535,7 +1535,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
if ((!peer_gid->gid ||
(lgr->peer_gid.gid == peer_gid->gid &&
- !smc_ism_is_virtual(dev) ? 1 :
+ !smc_ism_is_emulated(dev) ? 1 :
lgr->peer_gid.gid_ext == peer_gid->gid_ext)) &&
(vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
if (peer_gid->gid) /* peer triggered termination */
@@ -1881,7 +1881,7 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
lgr->smcd != smcismdev)
return false;
- if (smc_ism_is_virtual(smcismdev) &&
+ if (smc_ism_is_emulated(smcismdev) &&
lgr->peer_gid.gid_ext != peer_gid->gid_ext)
return false;
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 5a33908015f3..6fdb2d96777a 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -255,6 +255,7 @@ static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler smc_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_SMC,
.dump = smc_diag_handler_dump,
};
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index ffff40c30a06..165cd013404b 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -15,7 +15,7 @@
#include "smc.h"
-#define SMC_VIRTUAL_ISM_CHID_MASK 0xFF00
+#define SMC_EMULATED_ISM_CHID_MASK 0xFF00
#define SMC_ISM_IDENT_MASK 0x00FFFF
struct smcd_dev_list { /* List of SMCD devices */
@@ -66,10 +66,10 @@ static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok,
return rc < 0 ? rc : 0;
}
-static inline bool __smc_ism_is_virtual(u16 chid)
+static inline bool __smc_ism_is_emulated(u16 chid)
{
/* CHIDs in range of 0xFF00 to 0xFFFF are reserved
- * for virtual ISM device.
+ * for Emulated-ISM device.
*
* loopback-ism: 0xFFFF
* virtio-ism: 0xFF00 ~ 0xFFFE
@@ -77,11 +77,11 @@ static inline bool __smc_ism_is_virtual(u16 chid)
return ((chid & 0xFF00) == 0xFF00);
}
-static inline bool smc_ism_is_virtual(struct smcd_dev *smcd)
+static inline bool smc_ism_is_emulated(struct smcd_dev *smcd)
{
u16 chid = smcd->ops->get_chid(smcd);
- return __smc_ism_is_virtual(chid);
+ return __smc_ism_is_emulated(chid);
}
#endif
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index be1c4003d67d..bb0d71eb02a6 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -32,16 +32,17 @@ config TIPC_MEDIA_UDP
bool "IP/UDP media type support"
depends on TIPC
select NET_UDP_TUNNEL
+ default y
help
Saying Y here will enable support for running TIPC over IP/UDP
- bool
- default y
+
config TIPC_CRYPTO
bool "TIPC encryption support"
depends on TIPC
select CRYPTO
select CRYPTO_AES
select CRYPTO_GCM
+ default y
help
Saying Y here will enable support for TIPC encryption.
All TIPC messages will be encrypted/decrypted by using the currently most
@@ -49,8 +50,6 @@ config TIPC_CRYPTO
entering the TIPC stack.
Key setting from user-space is performed via netlink by a user program
(e.g. the iproute2 'tipc' tool).
- bool
- default y
config TIPC_DIAG
tristate "TIPC: socket monitoring interface"
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index ee49a9f1dd4f..18e1636aa036 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -18,5 +18,5 @@ tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
tipc-$(CONFIG_SYSCTL) += sysctl.o
tipc-$(CONFIG_TIPC_CRYPTO) += crypto.o
-
-obj-$(CONFIG_TIPC_DIAG) += diag.o
+obj-$(CONFIG_TIPC_DIAG) += tipc_diag.o
+tipc_diag-y += diag.o
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 878415c43527..5a526ebafeb4 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1079,30 +1079,27 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
b = tipc_bearer_find(net, name);
if (!b) {
- rtnl_unlock();
NL_SET_ERR_MSG(info->extack, "Bearer not found");
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
#ifdef CONFIG_TIPC_MEDIA_UDP
if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
- rtnl_unlock();
NL_SET_ERR_MSG(info->extack, "UDP option is unsupported");
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
err = tipc_udp_nl_bearer_add(b,
attrs[TIPC_NLA_BEARER_UDP_OPTS]);
- if (err) {
- rtnl_unlock();
- return err;
- }
}
#endif
+out:
rtnl_unlock();
- return 0;
+ return err;
}
int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/tipc/diag.c b/net/tipc/diag.c
index 18733451c9e0..54dde8c4e4d4 100644
--- a/net/tipc/diag.c
+++ b/net/tipc/diag.c
@@ -95,6 +95,7 @@ static int tipc_sock_diag_handler_dump(struct sk_buff *skb,
}
static const struct sock_diag_handler tipc_sock_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_TIPC,
.dump = tipc_sock_diag_handler_dump,
};
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 3105abe97bb9..c1e890a82434 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -86,8 +86,6 @@ struct tipc_bclink_entry {
* @lock: rwlock governing access to structure
* @net: the applicable net namespace
* @hash: links to adjacent nodes in unsorted hash chain
- * @inputq: pointer to input queue containing messages for msg event
- * @namedq: pointer to name table input queue with name table messages
* @active_links: bearer ids of active links, used as index into links[] array
* @links: array containing references to all links to node
* @bc_entry: broadcast link entry
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index bb1118d02f95..7e4135db5816 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -80,7 +80,6 @@ struct sockaddr_pair {
* @phdr: preformatted message header used when sending messages
* @cong_links: list of congested links
* @publications: list of publications for port
- * @blocking_link: address of the congested link we are currently sleeping on
* @pub_count: total # of publications port has made during its lifetime
* @conn_timeout: the time we can wait for an unresponded setup request
* @probe_unacked: probe has not received ack yet
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index 28b232f281ab..8b5d04210d7c 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -16,11 +16,6 @@ config UNIX
Say Y unless you know what you are doing.
-config UNIX_SCM
- bool
- depends on UNIX
- default y
-
config AF_UNIX_OOB
bool
depends on UNIX
diff --git a/net/unix/Makefile b/net/unix/Makefile
index 20491825b4d0..4ddd125c4642 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -11,5 +11,3 @@ unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o
obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
unix_diag-y := diag.o
-
-obj-$(CONFIG_UNIX_SCM) += scm.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 30b178ebba60..4892e9428c9f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -118,8 +118,6 @@
#include <linux/btf_ids.h>
#include <linux/bpf-cgroup.h>
-#include "scm.h"
-
static atomic_long_t unix_nr_socks;
static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
@@ -993,11 +991,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
sk->sk_write_space = unix_write_space;
sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
sk->sk_destruct = unix_sock_destructor;
- u = unix_sk(sk);
+ u = unix_sk(sk);
+ u->inflight = 0;
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
- atomic_long_set(&u->inflight, 0);
INIT_LIST_HEAD(&u->link);
mutex_init(&u->iolock); /* single task reading lock */
mutex_init(&u->bindlock); /* single task binding lock */
@@ -1788,6 +1786,52 @@ out:
return err;
}
+/* The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+ struct user_struct *user = current_user();
+
+ if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ return false;
+}
+
+static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ int i;
+
+ if (too_many_unix_fds(current))
+ return -ETOOMANYREFS;
+
+ /* Need to duplicate file references for the sake of garbage
+ * collection. Otherwise a socket in the fps might become a
+ * candidate for GC while the skb is not yet queued.
+ */
+ UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+ if (!UNIXCB(skb).fp)
+ return -ENOMEM;
+
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_inflight(scm->fp->user, scm->fp->fp[i]);
+
+ return 0;
+}
+
+static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ int i;
+
+ scm->fp = UNIXCB(skb).fp;
+ UNIXCB(skb).fp = NULL;
+
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_notinflight(scm->fp->user, scm->fp->fp[i]);
+}
+
static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
scm->fp = scm_fp_dup(UNIXCB(skb).fp);
@@ -1835,6 +1879,21 @@ static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
spin_unlock(&unix_gc_lock);
}
+static void unix_destruct_scm(struct sk_buff *skb)
+{
+ struct scm_cookie scm;
+
+ memset(&scm, 0, sizeof(scm));
+ scm.pid = UNIXCB(skb).pid;
+ if (UNIXCB(skb).fp)
+ unix_detach_fds(&scm, skb);
+
+ /* Alas, it calls VFS */
+ /* So fscking what? fput() had been SMP-safe since the last Summer */
+ scm_destroy(&scm);
+ sock_wfree(skb);
+}
+
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
{
int err = 0;
@@ -1921,11 +1980,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
long timeo;
int err;
- wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
if (err < 0)
return err;
+ wait_for_unix_gc(scm.fp);
+
err = -EOPNOTSUPP;
if (msg->msg_flags&MSG_OOB)
goto out;
@@ -2197,11 +2257,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
bool fds_sent = false;
int data_len;
- wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
if (err < 0)
return err;
+ wait_for_unix_gc(scm.fp);
+
err = -EOPNOTSUPP;
if (msg->msg_flags & MSG_OOB) {
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
diff --git a/net/unix/diag.c b/net/unix/diag.c
index be19827eca36..ae39538c5042 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -322,6 +322,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler unix_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_UNIX,
.dump = unix_diag_handler_dump,
};
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2ff7ddbaa782..51acf795f096 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -81,12 +81,80 @@
#include <net/scm.h>
#include <net/tcp_states.h>
-#include "scm.h"
+struct unix_sock *unix_get_socket(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+
+ /* Socket ? */
+ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
+ struct socket *sock = SOCKET_I(inode);
+ const struct proto_ops *ops;
+ struct sock *sk = sock->sk;
+
+ ops = READ_ONCE(sock->ops);
-/* Internal data structures and random procedures: */
+ /* PF_UNIX ? */
+ if (sk && ops && ops->family == PF_UNIX)
+ return unix_sk(sk);
+ }
+
+ return NULL;
+}
+DEFINE_SPINLOCK(unix_gc_lock);
+unsigned int unix_tot_inflight;
static LIST_HEAD(gc_candidates);
-static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
+static LIST_HEAD(gc_inflight_list);
+
+/* Keep the number of times in flight count for the file
+ * descriptor if it is for an AF_UNIX socket.
+ */
+void unix_inflight(struct user_struct *user, struct file *filp)
+{
+ struct unix_sock *u = unix_get_socket(filp);
+
+ spin_lock(&unix_gc_lock);
+
+ if (u) {
+ if (!u->inflight) {
+ WARN_ON_ONCE(!list_empty(&u->link));
+ list_add_tail(&u->link, &gc_inflight_list);
+ } else {
+ WARN_ON_ONCE(list_empty(&u->link));
+ }
+ u->inflight++;
+
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
+ }
+
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
+
+ spin_unlock(&unix_gc_lock);
+}
+
+void unix_notinflight(struct user_struct *user, struct file *filp)
+{
+ struct unix_sock *u = unix_get_socket(filp);
+
+ spin_lock(&unix_gc_lock);
+
+ if (u) {
+ WARN_ON_ONCE(!u->inflight);
+ WARN_ON_ONCE(list_empty(&u->link));
+
+ u->inflight--;
+ if (!u->inflight)
+ list_del_init(&u->link);
+
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
+ }
+
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
+
+ spin_unlock(&unix_gc_lock);
+}
static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
struct sk_buff_head *hitlist)
@@ -105,20 +173,15 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
while (nfd--) {
/* Get the socket the fd matches if it indeed does so */
- struct sock *sk = unix_get_socket(*fp++);
-
- if (sk) {
- struct unix_sock *u = unix_sk(sk);
+ struct unix_sock *u = unix_get_socket(*fp++);
- /* Ignore non-candidates, they could
- * have been added to the queues after
- * starting the garbage collection
- */
- if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
- hit = true;
+ /* Ignore non-candidates, they could have been added
+ * to the queues after starting the garbage collection
+ */
+ if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
+ hit = true;
- func(u);
- }
+ func(u);
}
}
if (hit && hitlist != NULL) {
@@ -151,7 +214,7 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
/* An embryo cannot be in-flight, so it's safe
* to use the list link.
*/
- BUG_ON(!list_empty(&u->link));
+ WARN_ON_ONCE(!list_empty(&u->link));
list_add_tail(&u->link, &embryos);
}
spin_unlock(&x->sk_receive_queue.lock);
@@ -166,17 +229,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
static void dec_inflight(struct unix_sock *usk)
{
- atomic_long_dec(&usk->inflight);
+ usk->inflight--;
}
static void inc_inflight(struct unix_sock *usk)
{
- atomic_long_inc(&usk->inflight);
+ usk->inflight++;
}
static void inc_inflight_move_tail(struct unix_sock *u)
{
- atomic_long_inc(&u->inflight);
+ u->inflight++;
+
/* If this still might be part of a cycle, move it to the end
* of the list, so that it's checked even if it was already
* passed over
@@ -186,40 +250,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
}
static bool gc_in_progress;
-#define UNIX_INFLIGHT_TRIGGER_GC 16000
-
-void wait_for_unix_gc(void)
-{
- /* If number of inflight sockets is insane,
- * force a garbage collect right now.
- * Paired with the WRITE_ONCE() in unix_inflight(),
- * unix_notinflight() and gc_in_progress().
- */
- if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
- !READ_ONCE(gc_in_progress))
- unix_gc();
- wait_event(unix_gc_wait, gc_in_progress == false);
-}
-/* The external entry point: unix_gc() */
-void unix_gc(void)
+static void __unix_gc(struct work_struct *work)
{
- struct sk_buff *next_skb, *skb;
- struct unix_sock *u;
- struct unix_sock *next;
struct sk_buff_head hitlist;
- struct list_head cursor;
+ struct unix_sock *u, *next;
LIST_HEAD(not_cycle_list);
+ struct list_head cursor;
spin_lock(&unix_gc_lock);
- /* Avoid a recursive GC. */
- if (gc_in_progress)
- goto out;
-
- /* Paired with READ_ONCE() in wait_for_unix_gc(). */
- WRITE_ONCE(gc_in_progress, true);
-
/* First, select candidates for garbage collection. Only
* in-flight sockets are considered, and from those only ones
* which don't have any external reference.
@@ -237,14 +277,12 @@ void unix_gc(void)
*/
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
long total_refs;
- long inflight_refs;
total_refs = file_count(u->sk.sk_socket->file);
- inflight_refs = atomic_long_read(&u->inflight);
- BUG_ON(inflight_refs < 1);
- BUG_ON(total_refs < inflight_refs);
- if (total_refs == inflight_refs) {
+ WARN_ON_ONCE(!u->inflight);
+ WARN_ON_ONCE(total_refs < u->inflight);
+ if (total_refs == u->inflight) {
list_move_tail(&u->link, &gc_candidates);
__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
@@ -271,7 +309,7 @@ void unix_gc(void)
/* Move cursor to after the current position. */
list_move(&cursor, &u->link);
- if (atomic_long_read(&u->inflight) > 0) {
+ if (u->inflight) {
list_move_tail(&u->link, &not_cycle_list);
__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
scan_children(&u->sk, inc_inflight_move_tail, NULL);
@@ -298,19 +336,6 @@ void unix_gc(void)
spin_unlock(&unix_gc_lock);
- /* We need io_uring to clean its registered files, ignore all io_uring
- * originated skbs. It's fine as io_uring doesn't keep references to
- * other io_uring instances and so killing all other files in the cycle
- * will put all io_uring references forcing it to go through normal
- * release.path eventually putting registered files.
- */
- skb_queue_walk_safe(&hitlist, skb, next_skb) {
- if (skb->destructor == io_uring_destruct_scm) {
- __skb_unlink(skb, &hitlist);
- skb_queue_tail(&skb->sk->sk_receive_queue, skb);
- }
- }
-
/* Here we are. Hitlist is filled. Die. */
__skb_queue_purge(&hitlist);
@@ -328,20 +353,45 @@ void unix_gc(void)
spin_lock(&unix_gc_lock);
- /* There could be io_uring registered files, just push them back to
- * the inflight list
- */
- list_for_each_entry_safe(u, next, &gc_candidates, link)
- list_move_tail(&u->link, &gc_inflight_list);
-
/* All candidates should have been detached by now. */
- BUG_ON(!list_empty(&gc_candidates));
+ WARN_ON_ONCE(!list_empty(&gc_candidates));
/* Paired with READ_ONCE() in wait_for_unix_gc(). */
WRITE_ONCE(gc_in_progress, false);
- wake_up(&unix_gc_wait);
-
- out:
spin_unlock(&unix_gc_lock);
}
+
+static DECLARE_WORK(unix_gc_work, __unix_gc);
+
+void unix_gc(void)
+{
+ WRITE_ONCE(gc_in_progress, true);
+ queue_work(system_unbound_wq, &unix_gc_work);
+}
+
+#define UNIX_INFLIGHT_TRIGGER_GC 16000
+#define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8)
+
+void wait_for_unix_gc(struct scm_fp_list *fpl)
+{
+ /* If number of inflight sockets is insane,
+ * force a garbage collect right now.
+ *
+ * Paired with the WRITE_ONCE() in unix_inflight(),
+ * unix_notinflight(), and __unix_gc().
+ */
+ if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+ !READ_ONCE(gc_in_progress))
+ unix_gc();
+
+ /* Penalise users who want to send AF_UNIX sockets
+ * but whose sockets have not been received yet.
+ */
+ if (!fpl || !fpl->count_unix ||
+ READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER)
+ return;
+
+ if (READ_ONCE(gc_in_progress))
+ flush_work(&unix_gc_work);
+}
diff --git a/net/unix/scm.c b/net/unix/scm.c
deleted file mode 100644
index 822ce0d0d791..000000000000
--- a/net/unix/scm.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/net.h>
-#include <linux/fs.h>
-#include <net/af_unix.h>
-#include <net/scm.h>
-#include <linux/init.h>
-#include <linux/io_uring.h>
-
-#include "scm.h"
-
-unsigned int unix_tot_inflight;
-EXPORT_SYMBOL(unix_tot_inflight);
-
-LIST_HEAD(gc_inflight_list);
-EXPORT_SYMBOL(gc_inflight_list);
-
-DEFINE_SPINLOCK(unix_gc_lock);
-EXPORT_SYMBOL(unix_gc_lock);
-
-struct sock *unix_get_socket(struct file *filp)
-{
- struct sock *u_sock = NULL;
- struct inode *inode = file_inode(filp);
-
- /* Socket ? */
- if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
- struct socket *sock = SOCKET_I(inode);
- const struct proto_ops *ops = READ_ONCE(sock->ops);
- struct sock *s = sock->sk;
-
- /* PF_UNIX ? */
- if (s && ops && ops->family == PF_UNIX)
- u_sock = s;
- }
-
- return u_sock;
-}
-EXPORT_SYMBOL(unix_get_socket);
-
-/* Keep the number of times in flight count for the file
- * descriptor if it is for an AF_UNIX socket.
- */
-void unix_inflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- if (atomic_long_inc_return(&u->inflight) == 1) {
- BUG_ON(!list_empty(&u->link));
- list_add_tail(&u->link, &gc_inflight_list);
- } else {
- BUG_ON(list_empty(&u->link));
- }
- /* Paired with READ_ONCE() in wait_for_unix_gc() */
- WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
- }
- WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
- spin_unlock(&unix_gc_lock);
-}
-
-void unix_notinflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- BUG_ON(!atomic_long_read(&u->inflight));
- BUG_ON(list_empty(&u->link));
-
- if (atomic_long_dec_and_test(&u->inflight))
- list_del_init(&u->link);
- /* Paired with READ_ONCE() in wait_for_unix_gc() */
- WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
- }
- WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
- spin_unlock(&unix_gc_lock);
-}
-
-/*
- * The "user->unix_inflight" variable is protected by the garbage
- * collection lock, and we just read it locklessly here. If you go
- * over the limit, there might be a tiny race in actually noticing
- * it across threads. Tough.
- */
-static inline bool too_many_unix_fds(struct task_struct *p)
-{
- struct user_struct *user = current_user();
-
- if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
- return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
- return false;
-}
-
-int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- if (too_many_unix_fds(current))
- return -ETOOMANYREFS;
-
- /*
- * Need to duplicate file references for the sake of garbage
- * collection. Otherwise a socket in the fps might become a
- * candidate for GC while the skb is not yet queued.
- */
- UNIXCB(skb).fp = scm_fp_dup(scm->fp);
- if (!UNIXCB(skb).fp)
- return -ENOMEM;
-
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->user, scm->fp->fp[i]);
- return 0;
-}
-EXPORT_SYMBOL(unix_attach_fds);
-
-void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- scm->fp = UNIXCB(skb).fp;
- UNIXCB(skb).fp = NULL;
-
- for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->user, scm->fp->fp[i]);
-}
-EXPORT_SYMBOL(unix_detach_fds);
-
-void unix_destruct_scm(struct sk_buff *skb)
-{
- struct scm_cookie scm;
-
- memset(&scm, 0, sizeof(scm));
- scm.pid = UNIXCB(skb).pid;
- if (UNIXCB(skb).fp)
- unix_detach_fds(&scm, skb);
-
- /* Alas, it calls VFS */
- /* So fscking what? fput() had been SMP-safe since the last Summer */
- scm_destroy(&scm);
- sock_wfree(skb);
-}
-EXPORT_SYMBOL(unix_destruct_scm);
-
-void io_uring_destruct_scm(struct sk_buff *skb)
-{
- unix_destruct_scm(skb);
-}
-EXPORT_SYMBOL(io_uring_destruct_scm);
diff --git a/net/unix/scm.h b/net/unix/scm.h
deleted file mode 100644
index 5a255a477f16..000000000000
--- a/net/unix/scm.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef NET_UNIX_SCM_H
-#define NET_UNIX_SCM_H
-
-extern struct list_head gc_inflight_list;
-extern spinlock_t unix_gc_lock;
-
-int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb);
-void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb);
-
-#endif
diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c
index 2e29994f92ff..ab87ef66c1e8 100644
--- a/net/vmw_vsock/diag.c
+++ b/net/vmw_vsock/diag.c
@@ -157,6 +157,7 @@ static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
static const struct sock_diag_handler vsock_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_VSOCK,
.dump = vsock_diag_handler_dump,
};
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index 9f8955367275..09dcea0cbbed 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -194,6 +194,7 @@ static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
}
static const struct sock_diag_handler xsk_diag_handler = {
+ .owner = THIS_MODULE,
.family = AF_XDP,
.dump = xsk_diag_handler_dump,
};
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 21d50d75c260..dafefef3cf51 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -957,12 +957,12 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
.get_link_net = xfrmi_get_link_net,
};
-static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
+static void __net_exit xfrmi_exit_batch_rtnl(struct list_head *net_exit_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_exit_list, exit_list) {
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
struct xfrm_if __rcu **xip;
@@ -973,18 +973,16 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
for (xip = &xfrmn->xfrmi[i];
(xi = rtnl_dereference(*xip)) != NULL;
xip = &xi->next)
- unregister_netdevice_queue(xi->dev, &list);
+ unregister_netdevice_queue(xi->dev, dev_to_kill);
}
xi = rtnl_dereference(xfrmn->collect_md_xfrmi);
if (xi)
- unregister_netdevice_queue(xi->dev, &list);
+ unregister_netdevice_queue(xi->dev, dev_to_kill);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
static struct pernet_operations xfrmi_net_ops = {
- .exit_batch = xfrmi_exit_batch_net,
+ .exit_batch_rtnl = xfrmi_exit_batch_rtnl,
.id = &xfrmi_net_id,
.size = sizeof(struct xfrmi_net),
};
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index fee9b5cf37a7..5f9bf8e5c933 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -52,6 +52,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX);
+ xfrm_state_update_stats(net);
snmp_get_cpu_field_batch(buff, xfrm_mib_list,
net->mib.xfrm_statistics);
for (i = 0; xfrm_mib_list[i].name; i++)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index bda5327bf34d..0c306473a79d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -570,7 +570,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
int err = 0;
spin_lock(&x->lock);
- xfrm_dev_state_update_curlft(x);
+ xfrm_dev_state_update_stats(x);
if (x->km.state == XFRM_STATE_DEAD)
goto out;
@@ -1935,7 +1935,7 @@ EXPORT_SYMBOL(xfrm_state_update);
int xfrm_state_check_expire(struct xfrm_state *x)
{
- xfrm_dev_state_update_curlft(x);
+ xfrm_dev_state_update_stats(x);
if (!READ_ONCE(x->curlft.use_time))
WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds());
@@ -1957,6 +1957,19 @@ int xfrm_state_check_expire(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_check_expire);
+void xfrm_state_update_stats(struct net *net)
+{
+ struct xfrm_state *x;
+ int i;
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ for (i = 0; i <= net->xfrm.state_hmask; i++) {
+ hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst)
+ xfrm_dev_state_update_stats(x);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+}
+
struct xfrm_state *
xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsigned short family)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f037be190bae..a5232dcfea46 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -902,7 +902,7 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
memcpy(&p->sel, &x->sel, sizeof(p->sel));
memcpy(&p->lft, &x->lft, sizeof(p->lft));
if (x->xso.dev)
- xfrm_dev_state_update_curlft(x);
+ xfrm_dev_state_update_stats(x);
memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
put_unaligned(x->stats.replay_window, &p->stats.replay_window);
put_unaligned(x->stats.replay, &p->stats.replay);