summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2021-10-18 09:38:54 +0200
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2021-10-18 09:38:54 +0200
commit412a5feba414127a6c69452dfad454086867011f (patch)
treebf5934b41bff82f4ec69283d10fb1a799f42a641 /net
parent5f0ac3a1dae17ec0af6b845376247d7cc948ea3d (diff)
parent519d81956ee277b4419c723adfb154603c2565ba (diff)
Merge 5.15-rc6 into tty-next
We need the serial/tty fixes in here as well. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net')
-rw-r--r--net/bpf/test_run.c14
-rw-r--r--net/bridge/br_multicast.c6
-rw-r--r--net/bridge/br_netlink.c3
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/core/dev_addr_lists.c6
-rw-r--r--net/core/net-procfs.c24
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/sock.c52
-rw-r--r--net/dsa/Kconfig5
-rw-r--r--net/dsa/dsa2.c4
-rw-r--r--net/dsa/switch.c2
-rw-r--r--net/dsa/tag_dsa.c30
-rw-r--r--net/dsa/tag_ocelot.c1
-rw-r--r--net/dsa/tag_ocelot_8021q.c40
-rw-r--r--net/dsa/tag_sja1105.c43
-rw-r--r--net/ipv4/fib_semantics.c16
-rw-r--r--net/ipv4/icmp.c23
-rw-r--r--net/ipv4/inet_hashtables.c4
-rw-r--r--net/ipv4/netfilter/iptable_raw.c2
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c30
-rw-r--r--net/ipv4/udp.c13
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ioam6.c70
-rw-r--r--net/ipv6/ioam6_iptunnel.c6
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c25
-rw-r--r--net/ipv6/route.c5
-rw-r--r--net/ipv6/udp.c5
-rw-r--r--net/mac80211/mesh_pathtbl.c5
-rw-r--r--net/mac80211/mesh_ps.c3
-rw-r--r--net/mac80211/rate.c4
-rw-r--r--net/mac80211/rx.c3
-rw-r--r--net/mac80211/tx.c12
-rw-r--r--net/mac80211/wpa.c6
-rw-r--r--net/mptcp/mptcp_diag.c2
-rw-r--r--net/mptcp/pm_netlink.c4
-rw-r--r--net/mptcp/protocol.c57
-rw-r--r--net/mptcp/protocol.h2
-rw-r--r--net/mptcp/subflow.c2
-rw-r--r--net/mptcp/syncookies.c13
-rw-r--r--net/mptcp/token.c11
-rw-r--r--net/mptcp/token_test.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h4
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c4
-rw-r--r--net/netfilter/nf_conntrack_core.c154
-rw-r--r--net/netfilter/nf_nat_core.c17
-rw-r--r--net/netfilter/nf_nat_masquerade.c168
-rw-r--r--net/netfilter/nf_tables_api.c121
-rw-r--r--net/netfilter/nft_compat.c17
-rw-r--r--net/netfilter/nft_quota.c2
-rw-r--r--net/netfilter/xt_LOG.c10
-rw-r--r--net/netfilter/xt_NFLOG.c10
-rw-r--r--net/netlink/af_netlink.c14
-rw-r--r--net/nfc/af_nfc.c3
-rw-r--r--net/nfc/digital_core.c9
-rw-r--r--net/nfc/digital_technology.c8
-rw-r--r--net/nfc/nci/rsp.c2
-rw-r--r--net/sched/cls_flower.c6
-rw-r--r--net/sched/sch_api.c6
-rw-r--r--net/sched/sch_fifo.c3
-rw-r--r--net/sched/sch_mqprio.c30
-rw-r--r--net/sched/sch_taprio.c4
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/smc/smc_cdc.c7
-rw-r--r--net/smc/smc_core.c20
-rw-r--r--net/smc/smc_llc.c63
-rw-r--r--net/smc/smc_tx.c22
-rw-r--r--net/smc/smc_wr.h14
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/unix/af_unix.c94
-rw-r--r--net/xfrm/xfrm_user.c67
73 files changed, 939 insertions, 522 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2eb0e55ef54d..b5f4ef35357c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -552,6 +552,12 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
__skb->gso_segs = skb_shinfo(skb)->gso_segs;
}
+static struct proto bpf_dummy_proto = {
+ .name = "bpf_dummy",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct sock),
+};
+
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
@@ -596,20 +602,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
break;
}
- sk = kzalloc(sizeof(struct sock), GFP_USER);
+ sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
if (!sk) {
kfree(data);
kfree(ctx);
return -ENOMEM;
}
- sock_net_set(sk, net);
sock_init_data(NULL, sk);
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
kfree(ctx);
- kfree(sk);
+ sk_free(sk);
return -ENOMEM;
}
skb->sk = sk;
@@ -682,8 +687,7 @@ out:
if (dev && dev != net->loopback_dev)
dev_put(dev);
kfree_skb(skb);
- bpf_sk_storage_free(sk);
- kfree(sk);
+ sk_free(sk);
kfree(ctx);
return ret;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 3523c8c7068f..f3d751105343 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1677,8 +1677,6 @@ static void br_multicast_update_querier(struct net_bridge_mcast *brmctx,
int ifindex,
struct br_ip *saddr)
{
- lockdep_assert_held_once(&brmctx->br->multicast_lock);
-
write_seqcount_begin(&querier->seq);
querier->port_ifidx = ifindex;
memcpy(&querier->addr, saddr, sizeof(*saddr));
@@ -3867,13 +3865,13 @@ void br_multicast_ctx_init(struct net_bridge *br,
brmctx->ip4_other_query.delay_time = 0;
brmctx->ip4_querier.port_ifidx = 0;
- seqcount_init(&brmctx->ip4_querier.seq);
+ seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock);
brmctx->multicast_igmp_version = 2;
#if IS_ENABLED(CONFIG_IPV6)
brmctx->multicast_mld_version = 1;
brmctx->ip6_other_query.delay_time = 0;
brmctx->ip6_querier.port_ifidx = 0;
- seqcount_init(&brmctx->ip6_querier.seq);
+ seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock);
#endif
timer_setup(&brmctx->ip4_mc_router_timer,
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 6c58fc14d2cb..5c6c4305ed23 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1666,7 +1666,8 @@ static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
}
return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) +
- nla_total_size(sizeof(struct br_mcast_stats)) +
+ nla_total_size_64bit(sizeof(struct br_mcast_stats)) +
+ (p ? nla_total_size_64bit(sizeof(p->stp_xstats)) : 0) +
nla_total_size(0);
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index b4cef3a97f12..e8136db44462 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -82,7 +82,7 @@ struct bridge_mcast_other_query {
struct bridge_mcast_querier {
struct br_ip addr;
int port_ifidx;
- seqcount_t seq;
+ seqcount_spinlock_t seq;
};
/* IGMP/MLD statistics */
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 8c39283c26ae..f0cb38344126 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -50,6 +50,11 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
if (addr_len > MAX_ADDR_LEN)
return -EINVAL;
+ ha = list_first_entry(&list->list, struct netdev_hw_addr, list);
+ if (ha && !memcmp(addr, ha->addr, addr_len) &&
+ (!addr_type || addr_type == ha->type))
+ goto found_it;
+
while (*ins_point) {
int diff;
@@ -64,6 +69,7 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
} else if (diff > 0) {
ins_point = &parent->rb_right;
} else {
+found_it:
if (exclusive)
return -EEXIST;
if (global) {
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index eab5fc88a002..d8b9dbabd4a4 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -77,8 +77,8 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
- seq_printf(seq, "%9s: %16llu %12llu %4llu %6llu %4llu %5llu %10llu %9llu "
- "%16llu %12llu %4llu %6llu %4llu %5llu %7llu %10llu\n",
+ seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+ "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
dev->name, stats->rx_bytes, stats->rx_packets,
stats->rx_errors,
stats->rx_dropped + stats->rx_missed_errors,
@@ -103,11 +103,11 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
static int dev_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Interface| Receive "
- " | Transmit\n"
- " | bytes packets errs drop fifo frame "
- "compressed multicast| bytes packets errs "
- " drop fifo colls carrier compressed\n");
+ seq_puts(seq, "Inter-| Receive "
+ " | Transmit\n"
+ " face |bytes packets errs drop fifo frame "
+ "compressed multicast|bytes packets errs "
+ "drop fifo colls carrier compressed\n");
else
dev_seq_printf_stats(seq, v);
return 0;
@@ -259,14 +259,14 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
struct packet_type *pt = v;
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Type Device Function\n");
+ seq_puts(seq, "Type Device Function\n");
else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
seq_printf(seq, "%04x", ntohs(pt->type));
- seq_printf(seq, " %-9s %ps\n",
+ seq_printf(seq, " %-8s %ps\n",
pt->dev ? pt->dev->name : "", pt->func);
}
@@ -327,14 +327,12 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
struct netdev_hw_addr *ha;
struct net_device *dev = v;
- if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "Ifindex Interface Refcount Global_use Address\n");
+ if (v == SEQ_START_TOKEN)
return 0;
- }
netif_addr_lock_bh(dev);
netdev_for_each_mc_addr(ha, dev) {
- seq_printf(seq, "%-7d %-9s %-8d %-10d %*phN\n",
+ seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n",
dev->ifindex, dev->name,
ha->refcount, ha->global_use,
(int)dev->addr_len, ha->addr);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 972c8cb303a5..8ccce85562a1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -5262,7 +5262,7 @@ nla_put_failure:
static size_t if_nlmsg_stats_size(const struct net_device *dev,
u32 filter_mask)
{
- size_t size = 0;
+ size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg));
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
diff --git a/net/core/sock.c b/net/core/sock.c
index 512e629f9780..c1601f75ec4b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1376,6 +1376,16 @@ set_sndbuf:
}
EXPORT_SYMBOL(sock_setsockopt);
+static const struct cred *sk_get_peer_cred(struct sock *sk)
+{
+ const struct cred *cred;
+
+ spin_lock(&sk->sk_peer_lock);
+ cred = get_cred(sk->sk_peer_cred);
+ spin_unlock(&sk->sk_peer_lock);
+
+ return cred;
+}
static void cred_to_ucred(struct pid *pid, const struct cred *cred,
struct ucred *ucred)
@@ -1552,7 +1562,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
struct ucred peercred;
if (len > sizeof(peercred))
len = sizeof(peercred);
+
+ spin_lock(&sk->sk_peer_lock);
cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+ spin_unlock(&sk->sk_peer_lock);
+
if (copy_to_user(optval, &peercred, len))
return -EFAULT;
goto lenout;
@@ -1560,20 +1574,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_PEERGROUPS:
{
+ const struct cred *cred;
int ret, n;
- if (!sk->sk_peer_cred)
+ cred = sk_get_peer_cred(sk);
+ if (!cred)
return -ENODATA;
- n = sk->sk_peer_cred->group_info->ngroups;
+ n = cred->group_info->ngroups;
if (len < n * sizeof(gid_t)) {
len = n * sizeof(gid_t);
+ put_cred(cred);
return put_user(len, optlen) ? -EFAULT : -ERANGE;
}
len = n * sizeof(gid_t);
- ret = groups_to_user((gid_t __user *)optval,
- sk->sk_peer_cred->group_info);
+ ret = groups_to_user((gid_t __user *)optval, cred->group_info);
+ put_cred(cred);
if (ret)
return ret;
goto lenout;
@@ -1935,9 +1952,10 @@ static void __sk_destruct(struct rcu_head *head)
sk->sk_frag.page = NULL;
}
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ /* We do not need to acquire sk->sk_peer_lock, we are the last user. */
+ put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
+
if (likely(sk->sk_net_refcnt))
put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
@@ -3145,6 +3163,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_peer_pid = NULL;
sk->sk_peer_cred = NULL;
+ spin_lock_init(&sk->sk_peer_lock);
+
sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
@@ -3210,24 +3230,8 @@ void release_sock(struct sock *sk)
}
EXPORT_SYMBOL(release_sock);
-/**
- * lock_sock_fast - fast version of lock_sock
- * @sk: socket
- *
- * This version should be used for very small section, where process wont block
- * return false if fast path is taken:
- *
- * sk_lock.slock locked, owned = 0, BH disabled
- *
- * return true if slow path is taken:
- *
- * sk_lock.slock unlocked, owned = 1, BH enabled
- */
-bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
+bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
{
- /* The sk_lock has mutex_lock() semantics here. */
- mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
-
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
@@ -3256,7 +3260,7 @@ bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
spin_unlock_bh(&sk->sk_lock.slock);
return true;
}
-EXPORT_SYMBOL(lock_sock_fast);
+EXPORT_SYMBOL(__lock_sock_fast);
int sock_gettstamp(struct socket *sock, void __user *userstamp,
bool timeval, bool time32)
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 548285539752..d8ee15f1c7a9 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -101,8 +101,6 @@ config NET_DSA_TAG_RTL4_A
config NET_DSA_TAG_OCELOT
tristate "Tag driver for Ocelot family of switches, using NPI port"
- depends on MSCC_OCELOT_SWITCH_LIB || \
- (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
select PACKING
help
Say Y or M if you want to enable NPI tagging for the Ocelot switches
@@ -114,8 +112,6 @@ config NET_DSA_TAG_OCELOT
config NET_DSA_TAG_OCELOT_8021Q
tristate "Tag driver for Ocelot family of switches, using VLAN"
- depends on MSCC_OCELOT_SWITCH_LIB || \
- (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
help
Say Y or M if you want to enable support for tagging frames with a
custom VLAN-based header. Frames that require timestamping, such as
@@ -138,7 +134,6 @@ config NET_DSA_TAG_LAN9303
config NET_DSA_TAG_SJA1105
tristate "Tag driver for NXP SJA1105 switches"
- depends on NET_DSA_SJA1105 || !NET_DSA_SJA1105
select PACKING
help
Say Y or M if you want to enable support for tagging frames with the
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index b29262eee00b..da18094b5a04 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -170,7 +170,7 @@ void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num)
/* Check if the bridge is still in use, otherwise it is time
* to clean it up so we can reuse this bridge_num later.
*/
- if (!dsa_bridge_num_find(bridge_dev))
+ if (dsa_bridge_num_find(bridge_dev) < 0)
clear_bit(bridge_num, &dsa_fwd_offloading_bridges);
}
@@ -811,7 +811,9 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds)
if (!dsa_is_cpu_port(ds, port))
continue;
+ rtnl_lock();
err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto);
+ rtnl_unlock();
if (err) {
dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n",
tag_ops->name, ERR_PTR(err));
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 1c797ec8e2c2..6466d0539af9 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -168,7 +168,7 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
if (extack._msg)
dev_err(ds->dev, "port %d: %s\n", info->port,
extack._msg);
- if (err && err != EOPNOTSUPP)
+ if (err && err != -EOPNOTSUPP)
return err;
}
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 77d0ce89ab77..b3da4b2ea11c 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -45,6 +45,7 @@
* 6 6 2 2 4 2 N
*/
+#include <linux/dsa/mv88e6xxx.h>
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -129,12 +130,9 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
u8 tag_dev, tag_port;
enum dsa_cmd cmd;
u8 *dsa_header;
- u16 pvid = 0;
- int err;
if (skb->offload_fwd_mark) {
struct dsa_switch_tree *dst = dp->ds->dst;
- struct net_device *br = dp->bridge_dev;
cmd = DSA_CMD_FORWARD;
@@ -144,19 +142,6 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
*/
tag_dev = dst->last_switch + 1 + dp->bridge_num;
tag_port = 0;
-
- /* If we are offloading forwarding for a VLAN-unaware bridge,
- * inject packets to hardware using the bridge's pvid, since
- * that's where the packets ingressed from.
- */
- if (!br_vlan_enabled(br)) {
- /* Safe because __dev_queue_xmit() runs under
- * rcu_read_lock_bh()
- */
- err = br_vlan_get_pvid_rcu(br, &pvid);
- if (err)
- return NULL;
- }
} else {
cmd = DSA_CMD_FROM_CPU;
tag_dev = dp->ds->index;
@@ -180,16 +165,21 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
dsa_header[2] &= ~0x10;
}
} else {
+ struct net_device *br = dp->bridge_dev;
+ u16 vid;
+
+ vid = br ? MV88E6XXX_VID_BRIDGED : MV88E6XXX_VID_STANDALONE;
+
skb_push(skb, DSA_HLEN + extra);
dsa_alloc_etype_header(skb, DSA_HLEN + extra);
- /* Construct untagged DSA tag. */
+ /* Construct DSA header from untagged frame. */
dsa_header = dsa_etype_header_pos_tx(skb) + extra;
dsa_header[0] = (cmd << 6) | tag_dev;
dsa_header[1] = tag_port << 3;
- dsa_header[2] = pvid >> 8;
- dsa_header[3] = pvid & 0xff;
+ dsa_header[2] = vid >> 8;
+ dsa_header[3] = vid & 0xff;
}
return skb;
@@ -210,7 +200,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
cmd = dsa_header[0] >> 6;
switch (cmd) {
case DSA_CMD_FORWARD:
- trunk = !!(dsa_header[1] & 7);
+ trunk = !!(dsa_header[1] & 4);
break;
case DSA_CMD_TO_CPU:
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 8025ed778d33..605b51ca6921 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -2,7 +2,6 @@
/* Copyright 2019 NXP
*/
#include <linux/dsa/ocelot.h>
-#include <soc/mscc/ocelot.h>
#include "dsa_priv.h"
static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 59072930cb02..3412051981d7 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -9,10 +9,32 @@
* that on egress
*/
#include <linux/dsa/8021q.h>
-#include <soc/mscc/ocelot.h>
-#include <soc/mscc/ocelot_ptp.h>
+#include <linux/dsa/ocelot.h>
#include "dsa_priv.h"
+static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp,
+ struct sk_buff *skb)
+{
+ struct felix_deferred_xmit_work *xmit_work;
+ struct felix_port *felix_port = dp->priv;
+
+ xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC);
+ if (!xmit_work)
+ return NULL;
+
+ /* Calls felix_port_deferred_xmit in felix.c */
+ kthread_init_work(&xmit_work->work, felix_port->xmit_work_fn);
+ /* Increase refcount so the kfree_skb in dsa_slave_xmit
+ * won't really free the packet.
+ */
+ xmit_work->dp = dp;
+ xmit_work->skb = skb_get(skb);
+
+ kthread_queue_work(felix_port->xmit_worker, &xmit_work->work);
+
+ return NULL;
+}
+
static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
@@ -20,18 +42,10 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
- struct ocelot *ocelot = dp->ds->priv;
- int port = dp->index;
- u32 rew_op = 0;
+ struct ethhdr *hdr = eth_hdr(skb);
- rew_op = ocelot_ptp_rew_op(skb);
- if (rew_op) {
- if (!ocelot_can_inject(ocelot, 0))
- return NULL;
-
- ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
- return NULL;
- }
+ if (ocelot_ptp_rew_op(skb) || is_link_local_ether_addr(hdr->h_dest))
+ return ocelot_defer_xmit(dp, skb);
return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q,
((pcp << VLAN_PRIO_SHIFT) | tx_vid));
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index c054f48541c8..2edede9ddac9 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -4,6 +4,7 @@
#include <linux/if_vlan.h>
#include <linux/dsa/sja1105.h>
#include <linux/dsa/8021q.h>
+#include <linux/skbuff.h>
#include <linux/packing.h>
#include "dsa_priv.h"
@@ -53,6 +54,11 @@
#define SJA1110_TX_TRAILER_LEN 4
#define SJA1110_MAX_PADDING_LEN 15
+enum sja1110_meta_tstamp {
+ SJA1110_META_TSTAMP_TX = 0,
+ SJA1110_META_TSTAMP_RX = 1,
+};
+
/* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */
static inline bool sja1105_is_link_local(const struct sk_buff *skb)
{
@@ -520,6 +526,43 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
is_meta);
}
+static void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port,
+ u8 ts_id, enum sja1110_meta_tstamp dir,
+ u64 tstamp)
+{
+ struct sk_buff *skb, *skb_tmp, *skb_match = NULL;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct skb_shared_hwtstamps shwt = {0};
+ struct sja1105_port *sp = dp->priv;
+
+ if (!dsa_port_is_sja1105(dp))
+ return;
+
+ /* We don't care about RX timestamps on the CPU port */
+ if (dir == SJA1110_META_TSTAMP_RX)
+ return;
+
+ spin_lock(&sp->data->skb_txtstamp_queue.lock);
+
+ skb_queue_walk_safe(&sp->data->skb_txtstamp_queue, skb, skb_tmp) {
+ if (SJA1105_SKB_CB(skb)->ts_id != ts_id)
+ continue;
+
+ __skb_unlink(skb, &sp->data->skb_txtstamp_queue);
+ skb_match = skb;
+
+ break;
+ }
+
+ spin_unlock(&sp->data->skb_txtstamp_queue.lock);
+
+ if (WARN_ON(!skb_match))
+ return;
+
+ shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp));
+ skb_complete_tx_timestamp(skb_match, &shwt);
+}
+
static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
{
u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b42c429cebbe..3364cb9c67e0 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1661,7 +1661,7 @@ EXPORT_SYMBOL_GPL(fib_nexthop_info);
#if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6)
int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
- int nh_weight, u8 rt_family)
+ int nh_weight, u8 rt_family, u32 nh_tclassid)
{
const struct net_device *dev = nhc->nhc_dev;
struct rtnexthop *rtnh;
@@ -1679,6 +1679,9 @@ int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
rtnh->rtnh_flags = flags;
+ if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid))
+ goto nla_put_failure;
+
/* length of rtnetlink header + attributes */
rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
@@ -1706,14 +1709,13 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
}
for_nexthops(fi) {
- if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
- AF_INET) < 0)
- goto nla_put_failure;
+ u32 nh_tclassid = 0;
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (nh->nh_tclassid &&
- nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
- goto nla_put_failure;
+ nh_tclassid = nh->nh_tclassid;
#endif
+ if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
+ AF_INET, nh_tclassid) < 0)
+ goto nla_put_failure;
} endfor_nexthops(fi);
mp_end:
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 8b30cadff708..b7e277d8a84d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1054,14 +1054,19 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr), &_iio);
if (!ext_hdr || !iio)
goto send_mal_query;
- if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr))
+ if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr) ||
+ ntohs(iio->extobj_hdr.length) > sizeof(_iio))
goto send_mal_query;
ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr);
+ iio = skb_header_pointer(skb, sizeof(_ext_hdr),
+ sizeof(iio->extobj_hdr) + ident_len, &_iio);
+ if (!iio)
+ goto send_mal_query;
+
status = 0;
dev = NULL;
switch (iio->extobj_hdr.class_type) {
case ICMP_EXT_ECHO_CTYPE_NAME:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
if (ident_len >= IFNAMSIZ)
goto send_mal_query;
memset(buff, 0, sizeof(buff));
@@ -1069,30 +1074,24 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
dev = dev_get_by_name(net, buff);
break;
case ICMP_EXT_ECHO_CTYPE_INDEX:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
- sizeof(iio->ident.ifindex), &_iio);
if (ident_len != sizeof(iio->ident.ifindex))
goto send_mal_query;
dev = dev_get_by_index(net, ntohl(iio->ident.ifindex));
break;
case ICMP_EXT_ECHO_CTYPE_ADDR:
- if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
+ if (ident_len < sizeof(iio->ident.addr.ctype3_hdr) ||
+ ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
iio->ident.addr.ctype3_hdr.addrlen)
goto send_mal_query;
switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) {
case ICMP_AFI_IP:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
- sizeof(struct in_addr), &_iio);
- if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
- sizeof(struct in_addr))
+ if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in_addr))
goto send_mal_query;
dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr);
break;
#if IS_ENABLED(CONFIG_IPV6)
case ICMP_AFI_IP6:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
- if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
- sizeof(struct in6_addr))
+ if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in6_addr))
goto send_mal_query;
dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
dev_hold(dev);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 80aeaf9e6e16..bfb522e51346 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -242,8 +242,10 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
+ score = sk->sk_bound_dev_if ? 2 : 1;
- score = sk->sk_family == PF_INET ? 2 : 1;
+ if (sk->sk_family == PF_INET)
+ score++;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
}
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index b88e0f36cd05..8265c6765705 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -42,7 +42,7 @@ iptable_raw_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *rawtable_ops __read_mostly;
-static int __net_init iptable_raw_table_init(struct net *net)
+static int iptable_raw_table_init(struct net *net)
{
struct ipt_replace *repl;
const struct xt_table *table = &packet_raw;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 613432a36f0a..e61ea428ea18 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -20,13 +20,8 @@
#endif
#include <net/netfilter/nf_conntrack_zones.h>
-static unsigned int defrag4_pernet_id __read_mostly;
static DEFINE_MUTEX(defrag4_mutex);
-struct defrag4_pernet {
- unsigned int users;
-};
-
static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
u_int32_t user)
{
@@ -111,19 +106,15 @@ static const struct nf_hook_ops ipv4_defrag_ops[] = {
static void __net_exit defrag4_net_exit(struct net *net)
{
- struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
-
- if (nf_defrag->users) {
+ if (net->nf.defrag_ipv4_users) {
nf_unregister_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
- nf_defrag->users = 0;
+ net->nf.defrag_ipv4_users = 0;
}
}
static struct pernet_operations defrag4_net_ops = {
.exit = defrag4_net_exit,
- .id = &defrag4_pernet_id,
- .size = sizeof(struct defrag4_pernet),
};
static int __init nf_defrag_init(void)
@@ -138,24 +129,23 @@ static void __exit nf_defrag_fini(void)
int nf_defrag_ipv4_enable(struct net *net)
{
- struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
int err = 0;
mutex_lock(&defrag4_mutex);
- if (nf_defrag->users == UINT_MAX) {
+ if (net->nf.defrag_ipv4_users == UINT_MAX) {
err = -EOVERFLOW;
goto out_unlock;
}
- if (nf_defrag->users) {
- nf_defrag->users++;
+ if (net->nf.defrag_ipv4_users) {
+ net->nf.defrag_ipv4_users++;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
if (err == 0)
- nf_defrag->users = 1;
+ net->nf.defrag_ipv4_users = 1;
out_unlock:
mutex_unlock(&defrag4_mutex);
@@ -165,12 +155,10 @@ EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
void nf_defrag_ipv4_disable(struct net *net)
{
- struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
-
mutex_lock(&defrag4_mutex);
- if (nf_defrag->users) {
- nf_defrag->users--;
- if (nf_defrag->users == 0)
+ if (net->nf.defrag_ipv4_users) {
+ net->nf.defrag_ipv4_users--;
+ if (net->nf.defrag_ipv4_users == 0)
nf_unregister_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8851c9463b4b..8536b2a7210b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -390,7 +390,8 @@ static int compute_score(struct sock *sk, struct net *net,
dif, sdif);
if (!dev_match)
return -1;
- score += 4;
+ if (sk->sk_bound_dev_if)
+ score += 4;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
@@ -1053,7 +1054,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
__be16 dport;
u8 tos;
int err, is_udplite = IS_UDPLITE(sk);
- int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+ int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
struct sk_buff *skb;
struct ip_options_data opt_copy;
@@ -1361,7 +1362,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
}
up->len += size;
- if (!(up->corkflag || (flags&MSG_MORE)))
+ if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE)))
ret = udp_push_pending_frames(sk);
if (!ret)
ret = size;
@@ -2662,9 +2663,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case UDP_CORK:
if (val != 0) {
- up->corkflag = 1;
+ WRITE_ONCE(up->corkflag, 1);
} else {
- up->corkflag = 0;
+ WRITE_ONCE(up->corkflag, 0);
lock_sock(sk);
push_pending_frames(sk);
release_sock(sk);
@@ -2787,7 +2788,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case UDP_CORK:
- val = up->corkflag;
+ val = READ_ONCE(up->corkflag);
break;
case UDP_ENCAP:
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 55c290d55605..67c9114835c8 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -106,7 +106,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
- score = 1;
+ score = sk->sk_bound_dev_if ? 2 : 1;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
}
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index 5e8961004832..d128172bb549 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -770,6 +770,66 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
data += sizeof(__be32);
}
+ /* bit12 undefined: filled with empty value */
+ if (trace->type.bit12) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit13 undefined: filled with empty value */
+ if (trace->type.bit13) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit14 undefined: filled with empty value */
+ if (trace->type.bit14) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit15 undefined: filled with empty value */
+ if (trace->type.bit15) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit16 undefined: filled with empty value */
+ if (trace->type.bit16) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit17 undefined: filled with empty value */
+ if (trace->type.bit17) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit18 undefined: filled with empty value */
+ if (trace->type.bit18) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit19 undefined: filled with empty value */
+ if (trace->type.bit19) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit20 undefined: filled with empty value */
+ if (trace->type.bit20) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit21 undefined: filled with empty value */
+ if (trace->type.bit21) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
/* opaque state snapshot */
if (trace->type.bit22) {
if (!sc) {
@@ -791,16 +851,10 @@ void ioam6_fill_trace_data(struct sk_buff *skb,
struct ioam6_schema *sc;
u8 sclen = 0;
- /* Skip if Overflow flag is set OR
- * if an unknown type (bit 12-21) is set
+ /* Skip if Overflow flag is set
*/
- if (trace->overflow ||
- trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
- trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
- trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
- trace->type.bit21) {
+ if (trace->overflow)
return;
- }
/* NodeLen does not include Opaque State Snapshot length. We need to
* take it into account if the corresponding bit is set (bit 22) and
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index f9ee04541c17..9b7b726f8f45 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -75,7 +75,11 @@ static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
u32 fields;
if (!trace->type_be32 || !trace->remlen ||
- trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4)
+ trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4 ||
+ trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
+ trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
+ trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
+ trace->type.bit21)
return false;
trace->nodelen = 0;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index de2cf3943b91..a579ea14a69b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -273,6 +273,7 @@ ip6t_do_table(struct sk_buff *skb,
* things we don't know, ie. tcp syn flag or ports). If the
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
+ acpar.fragoff = 0;
acpar.hotdrop = false;
acpar.state = state;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index a0108415275f..5c47be29b9ee 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -33,7 +33,7 @@
static const char nf_frags_cache_name[] = "nf-frags";
-unsigned int nf_frag_pernet_id __read_mostly;
+static unsigned int nf_frag_pernet_id __read_mostly;
static struct inet_frags nf_frags;
static struct nft_ct_frag6_pernet *nf_frag_pernet(struct net *net)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index e8a59d8bf2ad..cb4eb1d2c620 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -25,8 +25,6 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
-extern unsigned int nf_frag_pernet_id;
-
static DEFINE_MUTEX(defrag6_mutex);
static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
@@ -91,12 +89,10 @@ static const struct nf_hook_ops ipv6_defrag_ops[] = {
static void __net_exit defrag6_net_exit(struct net *net)
{
- struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
-
- if (nf_frag->users) {
+ if (net->nf.defrag_ipv6_users) {
nf_unregister_net_hooks(net, ipv6_defrag_ops,
ARRAY_SIZE(ipv6_defrag_ops));
- nf_frag->users = 0;
+ net->nf.defrag_ipv6_users = 0;
}
}
@@ -134,24 +130,23 @@ static void __exit nf_defrag_fini(void)
int nf_defrag_ipv6_enable(struct net *net)
{
- struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
int err = 0;
mutex_lock(&defrag6_mutex);
- if (nf_frag->users == UINT_MAX) {
+ if (net->nf.defrag_ipv6_users == UINT_MAX) {
err = -EOVERFLOW;
goto out_unlock;
}
- if (nf_frag->users) {
- nf_frag->users++;
+ if (net->nf.defrag_ipv6_users) {
+ net->nf.defrag_ipv6_users++;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv6_defrag_ops,
ARRAY_SIZE(ipv6_defrag_ops));
if (err == 0)
- nf_frag->users = 1;
+ net->nf.defrag_ipv6_users = 1;
out_unlock:
mutex_unlock(&defrag6_mutex);
@@ -161,12 +156,10 @@ EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
void nf_defrag_ipv6_disable(struct net *net)
{
- struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
-
mutex_lock(&defrag6_mutex);
- if (nf_frag->users) {
- nf_frag->users--;
- if (nf_frag->users == 0)
+ if (net->nf.defrag_ipv6_users) {
+ net->nf.defrag_ipv6_users--;
+ if (net->nf.defrag_ipv6_users == 0)
nf_unregister_net_hooks(net, ipv6_defrag_ops,
ARRAY_SIZE(ipv6_defrag_ops));
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dbc224023977..9b9ef09382ab 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5681,14 +5681,15 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
- rt->fib6_nh->fib_nh_weight, AF_INET6) < 0)
+ rt->fib6_nh->fib_nh_weight, AF_INET6,
+ 0) < 0)
goto nla_put_failure;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings) {
if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
sibling->fib6_nh->fib_nh_weight,
- AF_INET6) < 0)
+ AF_INET6, 0) < 0)
goto nla_put_failure;
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ea53847b5b7e..8d785232b479 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -133,7 +133,8 @@ static int compute_score(struct sock *sk, struct net *net,
dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif);
if (!dev_match)
return -1;
- score++;
+ if (sk->sk_bound_dev_if)
+ score++;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
@@ -1303,7 +1304,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int addr_len = msg->msg_namelen;
bool connected = false;
int ulen = len;
- int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+ int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
int err;
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index efbefcbac3ac..7cab1cf09bf1 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -60,7 +60,10 @@ static struct mesh_table *mesh_table_alloc(void)
atomic_set(&newtbl->entries, 0);
spin_lock_init(&newtbl->gates_lock);
spin_lock_init(&newtbl->walk_lock);
- rhashtable_init(&newtbl->rhead, &mesh_rht_params);
+ if (rhashtable_init(&newtbl->rhead, &mesh_rht_params)) {
+ kfree(newtbl);
+ return NULL;
+ }
return newtbl;
}
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
index 204830a55240..3fbd0b9ff913 100644
--- a/net/mac80211/mesh_ps.c
+++ b/net/mac80211/mesh_ps.c
@@ -2,6 +2,7 @@
/*
* Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
* Copyright 2012-2013, cozybit Inc.
+ * Copyright (C) 2021 Intel Corporation
*/
#include "mesh.h"
@@ -588,7 +589,7 @@ void ieee80211_mps_frame_release(struct sta_info *sta,
/* only transmit to PS STA with announced, non-zero awake window */
if (test_sta_flag(sta, WLAN_STA_PS_STA) &&
- (!elems->awake_window || !le16_to_cpu(*elems->awake_window)))
+ (!elems->awake_window || !get_unaligned_le16(elems->awake_window)))
return;
if (!test_sta_flag(sta, WLAN_STA_MPSP_OWNER))
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index e5935e3d7a07..8c6416129d5b 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -392,10 +392,6 @@ static bool rate_control_send_low(struct ieee80211_sta *pubsta,
int mcast_rate;
bool use_basicrate = false;
- if (ieee80211_is_tx_data(txrc->skb) &&
- info->flags & IEEE80211_TX_CTL_NO_ACK)
- return false;
-
if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) {
__rate_control_send_low(txrc->hw, sband, pubsta, info,
txrc->rate_idx_mask);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 99ed68f7dc36..c4071b015c18 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4131,7 +4131,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
if (!bssid)
return false;
if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
- ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
+ ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2) ||
+ !is_valid_ether_addr(hdr->addr2))
return false;
if (ieee80211_is_beacon(hdr->frame_control))
return true;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2d1193ed3eb5..8921088a5df6 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2209,7 +2209,11 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
}
vht_mcs = iterator.this_arg[4] >> 4;
+ if (vht_mcs > 11)
+ vht_mcs = 0;
vht_nss = iterator.this_arg[4] & 0xF;
+ if (!vht_nss || vht_nss > 8)
+ vht_nss = 1;
break;
/*
@@ -3380,6 +3384,14 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
goto out;
+ /* If n == 2, the "while (*frag_tail)" loop above didn't execute
+ * and frag_tail should be &skb_shinfo(head)->frag_list.
+ * However, ieee80211_amsdu_prepare_head() can reallocate it.
+ * Reload frag_tail to have it pointing to the correct place.
+ */
+ if (n == 2)
+ frag_tail = &skb_shinfo(head)->frag_list;
+
/*
* Pad out the previous subframe to a multiple of 4 by adding the
* padding to the next one, that's being added. Note that head->len
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index bca47fad5a16..4eed23e27610 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -520,6 +520,9 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
return RX_DROP_UNUSABLE;
}
+ /* reload hdr - skb might have been reallocated */
+ hdr = (void *)rx->skb->data;
+
data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len;
if (!rx->sta || data_len < 0)
return RX_DROP_UNUSABLE;
@@ -749,6 +752,9 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
return RX_DROP_UNUSABLE;
}
+ /* reload hdr - skb might have been reallocated */
+ hdr = (void *)rx->skb->data;
+
data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len;
if (!rx->sta || data_len < 0)
return RX_DROP_UNUSABLE;
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f48eb6315bbb..292374fb0779 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -36,7 +36,7 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb,
struct sock *sk;
net = sock_net(in_skb->sk);
- msk = mptcp_token_get_sock(req->id.idiag_cookie[0]);
+ msk = mptcp_token_get_sock(net, req->id.idiag_cookie[0]);
if (!msk)
goto out_nosk;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index c4f9a5ce3815..050eea231528 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1718,9 +1718,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
list_for_each_entry(entry, &pernet->local_addr_list, list) {
if (addresses_equal(&entry->addr, &addr.addr, true)) {
- ret = mptcp_nl_addr_backup(net, &entry->addr, bkup);
- if (ret)
- return ret;
+ mptcp_nl_addr_backup(net, &entry->addr, bkup);
if (bkup)
entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index dbcebf56798f..d073b2111382 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -528,7 +528,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
sk->sk_shutdown |= RCV_SHUTDOWN;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- set_bit(MPTCP_DATA_READY, &msk->flags);
switch (sk->sk_state) {
case TCP_ESTABLISHED:
@@ -742,10 +741,9 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
/* Wake-up the reader only for in-sequence data */
mptcp_data_lock(sk);
- if (move_skbs_to_msk(msk, ssk)) {
- set_bit(MPTCP_DATA_READY, &msk->flags);
+ if (move_skbs_to_msk(msk, ssk))
sk->sk_data_ready(sk);
- }
+
mptcp_data_unlock(sk);
}
@@ -847,7 +845,6 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
sk->sk_shutdown |= RCV_SHUTDOWN;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- set_bit(MPTCP_DATA_READY, &msk->flags);
sk->sk_data_ready(sk);
}
@@ -1759,21 +1756,6 @@ out:
return copied ? : ret;
}
-static void mptcp_wait_data(struct sock *sk, long *timeo)
-{
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
- sk_wait_event(sk, timeo,
- test_bit(MPTCP_DATA_READY, &msk->flags), &wait);
-
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
-}
-
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
struct msghdr *msg,
size_t len, int flags,
@@ -2077,19 +2059,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
pr_debug("block timeout %ld", timeo);
- mptcp_wait_data(sk, &timeo);
- }
-
- if (skb_queue_empty_lockless(&sk->sk_receive_queue) &&
- skb_queue_empty(&msk->receive_queue)) {
- /* entire backlog drained, clear DATA_READY. */
- clear_bit(MPTCP_DATA_READY, &msk->flags);
-
- /* .. race-breaker: ssk might have gotten new data
- * after last __mptcp_move_skbs() returned false.
- */
- if (unlikely(__mptcp_move_skbs(msk)))
- set_bit(MPTCP_DATA_READY, &msk->flags);
+ sk_wait_data(sk, &timeo, NULL);
}
out_err:
@@ -2098,9 +2068,9 @@ out_err:
tcp_recv_timestamp(msg, sk, &tss);
}
- pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d",
- msk, test_bit(MPTCP_DATA_READY, &msk->flags),
- skb_queue_empty_lockless(&sk->sk_receive_queue), copied);
+ pr_debug("msk=%p rx queue empty=%d:%d copied=%d",
+ msk, skb_queue_empty_lockless(&sk->sk_receive_queue),
+ skb_queue_empty(&msk->receive_queue), copied);
if (!(flags & MSG_PEEK))
mptcp_rcv_space_adjust(msk, copied);
@@ -2368,7 +2338,6 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
inet_sk_state_store(sk, TCP_CLOSE);
sk->sk_shutdown = SHUTDOWN_MASK;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- set_bit(MPTCP_DATA_READY, &msk->flags);
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
mptcp_close_wake_up(sk);
@@ -2735,7 +2704,7 @@ cleanup:
inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- bool slow = lock_sock_fast(ssk);
+ bool slow = lock_sock_fast_nested(ssk);
sock_orphan(ssk);
unlock_sock_fast(ssk, slow);
@@ -3385,8 +3354,14 @@ unlock_fail:
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
{
- return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM :
- 0;
+ /* Concurrent splices from sk_receive_queue into receive_queue will
+ * always show at least one non-empty queue when checked in this order.
+ */
+ if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) &&
+ skb_queue_empty_lockless(&msk->receive_queue))
+ return 0;
+
+ return EPOLLIN | EPOLLRDNORM;
}
static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
@@ -3421,7 +3396,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
if (state == TCP_LISTEN)
- return mptcp_check_readable(msk);
+ return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM : 0;
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
mask |= mptcp_check_readable(msk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d3e6fd1615f1..dc984676c5eb 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -709,7 +709,7 @@ int mptcp_token_new_connect(struct sock *sk);
void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
struct mptcp_sock *msk);
bool mptcp_token_exists(u32 token);
-struct mptcp_sock *mptcp_token_get_sock(u32 token);
+struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token);
struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
long *s_num);
void mptcp_token_destroy(struct mptcp_sock *msk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 1de7ce883c37..6172f380dfb7 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -86,7 +86,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
struct mptcp_sock *msk;
int local_id;
- msk = mptcp_token_get_sock(subflow_req->token);
+ msk = mptcp_token_get_sock(sock_net(req_to_sk(req)), subflow_req->token);
if (!msk) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
return NULL;
diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c
index 37127781aee9..7f22526346a7 100644
--- a/net/mptcp/syncookies.c
+++ b/net/mptcp/syncookies.c
@@ -108,18 +108,12 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl
e->valid = 0;
- msk = mptcp_token_get_sock(e->token);
+ msk = mptcp_token_get_sock(net, e->token);
if (!msk) {
spin_unlock_bh(&join_entry_locks[i]);
return false;
}
- /* If this fails, the token got re-used in the mean time by another
- * mptcp socket in a different netns, i.e. entry is outdated.
- */
- if (!net_eq(sock_net((struct sock *)msk), net))
- goto err_put;
-
subflow_req->remote_nonce = e->remote_nonce;
subflow_req->local_nonce = e->local_nonce;
subflow_req->backup = e->backup;
@@ -128,11 +122,6 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl
subflow_req->msk = msk;
spin_unlock_bh(&join_entry_locks[i]);
return true;
-
-err_put:
- spin_unlock_bh(&join_entry_locks[i]);
- sock_put((struct sock *)msk);
- return false;
}
void __init mptcp_join_cookie_init(void)
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index a98e554b034f..e581b341c5be 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -231,6 +231,7 @@ found:
/**
* mptcp_token_get_sock - retrieve mptcp connection sock using its token
+ * @net: restrict to this namespace
* @token: token of the mptcp connection to retrieve
*
* This function returns the mptcp connection structure with the given token.
@@ -238,7 +239,7 @@ found:
*
* returns NULL if no connection with the given token value exists.
*/
-struct mptcp_sock *mptcp_token_get_sock(u32 token)
+struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token)
{
struct hlist_nulls_node *pos;
struct token_bucket *bucket;
@@ -251,11 +252,15 @@ struct mptcp_sock *mptcp_token_get_sock(u32 token)
again:
sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
msk = mptcp_sk(sk);
- if (READ_ONCE(msk->token) != token)
+ if (READ_ONCE(msk->token) != token ||
+ !net_eq(sock_net(sk), net))
continue;
+
if (!refcount_inc_not_zero(&sk->sk_refcnt))
goto not_found;
- if (READ_ONCE(msk->token) != token) {
+
+ if (READ_ONCE(msk->token) != token ||
+ !net_eq(sock_net(sk), net)) {
sock_put(sk);
goto again;
}
diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c
index e1bd6f0a0676..5d984bec1cd8 100644
--- a/net/mptcp/token_test.c
+++ b/net/mptcp/token_test.c
@@ -11,6 +11,7 @@ static struct mptcp_subflow_request_sock *build_req_sock(struct kunit *test)
GFP_USER);
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, req);
mptcp_token_init_request((struct request_sock *)req);
+ sock_net_set((struct sock *)req, &init_net);
return req;
}
@@ -22,7 +23,7 @@ static void mptcp_token_test_req_basic(struct kunit *test)
KUNIT_ASSERT_EQ(test, 0,
mptcp_token_new_request((struct request_sock *)req));
KUNIT_EXPECT_NE(test, 0, (int)req->token);
- KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(req->token));
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, req->token));
/* cleanup */
mptcp_token_destroy_request((struct request_sock *)req);
@@ -55,6 +56,7 @@ static struct mptcp_sock *build_msk(struct kunit *test)
msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER);
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk);
refcount_set(&((struct sock *)msk)->sk_refcnt, 1);
+ sock_net_set((struct sock *)msk, &init_net);
return msk;
}
@@ -74,11 +76,11 @@ static void mptcp_token_test_msk_basic(struct kunit *test)
mptcp_token_new_connect((struct sock *)icsk));
KUNIT_EXPECT_NE(test, 0, (int)ctx->token);
KUNIT_EXPECT_EQ(test, ctx->token, msk->token);
- KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(ctx->token));
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, ctx->token));
KUNIT_EXPECT_EQ(test, 2, (int)refcount_read(&sk->sk_refcnt));
mptcp_token_destroy(msk);
- KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(ctx->token));
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, ctx->token));
}
static void mptcp_token_test_accept(struct kunit *test)
@@ -90,11 +92,11 @@ static void mptcp_token_test_accept(struct kunit *test)
mptcp_token_new_request((struct request_sock *)req));
msk->token = req->token;
mptcp_token_accept(req, msk);
- KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token));
/* this is now a no-op */
mptcp_token_destroy_request((struct request_sock *)req);
- KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token));
/* cleanup */
mptcp_token_destroy(msk);
@@ -116,7 +118,7 @@ static void mptcp_token_test_destroyed(struct kunit *test)
/* simulate race on removal */
refcount_set(&sk->sk_refcnt, 0);
- KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(msk->token));
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, msk->token));
/* cleanup */
mptcp_token_destroy(msk);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 6186358eac7c..6e391308431d 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -130,11 +130,11 @@ htable_size(u8 hbits)
{
size_t hsize;
- /* We must fit both into u32 in jhash and size_t */
+ /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */
if (hbits > 31)
return 0;
hsize = jhash_size(hbits);
- if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
+ if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *)
< hsize)
return 0;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index c100c6b112c8..2c467c422dc6 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1468,6 +1468,10 @@ int __init ip_vs_conn_init(void)
int idx;
/* Compute size and mask */
+ if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) {
+ pr_info("conn_tab_bits not in [8, 20]. Using default value\n");
+ ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+ }
ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 94e18fb9690d..770a63103c7a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -74,10 +74,14 @@ static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+/* serialize hash resizes and nf_ct_iterate_cleanup */
+static DEFINE_MUTEX(nf_conntrack_mutex);
+
#define GC_SCAN_INTERVAL (120u * HZ)
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
-#define MAX_CHAINLEN 64u
+#define MIN_CHAINLEN 8u
+#define MAX_CHAINLEN (32u - MIN_CHAINLEN)
static struct conntrack_gc_work conntrack_gc_work;
@@ -188,11 +192,13 @@ seqcount_spinlock_t nf_conntrack_generation __read_mostly;
static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
+ unsigned int zoneid,
const struct net *net)
{
struct {
struct nf_conntrack_man src;
union nf_inet_addr dst_addr;
+ unsigned int zone;
u32 net_mix;
u16 dport;
u16 proto;
@@ -205,6 +211,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
/* The direction must be ignored, so handle usable members manually. */
combined.src = tuple->src;
combined.dst_addr = tuple->dst.u3;
+ combined.zone = zoneid;
combined.net_mix = net_hash_mix(net);
combined.dport = (__force __u16)tuple->dst.u.all;
combined.proto = tuple->dst.protonum;
@@ -219,15 +226,17 @@ static u32 scale_hash(u32 hash)
static u32 __hash_conntrack(const struct net *net,
const struct nf_conntrack_tuple *tuple,
+ unsigned int zoneid,
unsigned int size)
{
- return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
+ return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size);
}
static u32 hash_conntrack(const struct net *net,
- const struct nf_conntrack_tuple *tuple)
+ const struct nf_conntrack_tuple *tuple,
+ unsigned int zoneid)
{
- return scale_hash(hash_conntrack_raw(tuple, net));
+ return scale_hash(hash_conntrack_raw(tuple, zoneid, net));
}
static bool nf_ct_get_tuple_ports(const struct sk_buff *skb,
@@ -650,9 +659,11 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
reply_hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
clean_from_lists(ct);
@@ -819,8 +830,20 @@ struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
- return __nf_conntrack_find_get(net, zone, tuple,
- hash_conntrack_raw(tuple, net));
+ unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+ struct nf_conntrack_tuple_hash *thash;
+
+ thash = __nf_conntrack_find_get(net, zone, tuple,
+ hash_conntrack_raw(tuple, zone_id, net));
+
+ if (thash)
+ return thash;
+
+ rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+ if (rid != zone_id)
+ return __nf_conntrack_find_get(net, zone, tuple,
+ hash_conntrack_raw(tuple, rid, net));
+ return thash;
}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
@@ -842,6 +865,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
+ unsigned int max_chainlen;
unsigned int chainlen = 0;
unsigned int sequence;
int err = -EEXIST;
@@ -852,18 +876,22 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
reply_hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+ max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
+
/* See if there's one in the list already, including reverse */
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN)
+ if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@@ -873,7 +901,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN)
+ if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@@ -1103,8 +1131,8 @@ drop:
int
__nf_conntrack_confirm(struct sk_buff *skb)
{
+ unsigned int chainlen = 0, sequence, max_chainlen;
const struct nf_conntrack_zone *zone;
- unsigned int chainlen = 0, sequence;
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@@ -1133,8 +1161,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
hash = scale_hash(hash);
reply_hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* We're not in hash table, and we refuse to set up related
@@ -1168,6 +1196,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
goto dying;
}
+ max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
@@ -1175,7 +1204,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN)
+ if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@@ -1184,7 +1213,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN) {
+ if (chainlen++ > max_chainlen) {
chaintoolong:
nf_ct_add_to_dying_list(ct);
NF_CT_STAT_INC(net, chaintoolong);
@@ -1246,7 +1275,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
rcu_read_lock();
begin:
nf_conntrack_get_ht(&ct_hash, &hsize);
- hash = __hash_conntrack(net, tuple, hsize);
+ hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1687,8 +1716,8 @@ resolve_normal_ct(struct nf_conn *tmpl,
struct nf_conntrack_tuple_hash *h;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
+ u32 hash, zone_id, rid;
struct nf_conn *ct;
- u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, state->pf, protonum, state->net,
@@ -1699,8 +1728,20 @@ resolve_normal_ct(struct nf_conn *tmpl,
/* look for tuple match */
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
- hash = hash_conntrack_raw(&tuple, state->net);
+
+ zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+ hash = hash_conntrack_raw(&tuple, zone_id, state->net);
h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
+
+ if (!h) {
+ rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+ if (zone_id != rid) {
+ u32 tmp = hash_conntrack_raw(&tuple, rid, state->net);
+
+ h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp);
+ }
+ }
+
if (!h) {
h = init_conntrack(state->net, tmpl, &tuple,
skb, dataoff, hash);
@@ -2225,28 +2266,31 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
spinlock_t *lockp;
for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+ struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket];
+
+ if (hlist_nulls_empty(hslot))
+ continue;
+
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
local_bh_disable();
nf_conntrack_lock(lockp);
- if (*bucket < nf_conntrack_htable_size) {
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
- if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
- continue;
- /* All nf_conn objects are added to hash table twice, one
- * for original direction tuple, once for the reply tuple.
- *
- * Exception: In the IPS_NAT_CLASH case, only the reply
- * tuple is added (the original tuple already existed for
- * a different object).
- *
- * We only need to call the iterator once for each
- * conntrack, so we just use the 'reply' direction
- * tuple while iterating.
- */
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (iter(ct, data))
- goto found;
- }
+ hlist_nulls_for_each_entry(h, n, hslot, hnnode) {
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
+ continue;
+ /* All nf_conn objects are added to hash table twice, one
+ * for original direction tuple, once for the reply tuple.
+ *
+ * Exception: In the IPS_NAT_CLASH case, only the reply
+ * tuple is added (the original tuple already existed for
+ * a different object).
+ *
+ * We only need to call the iterator once for each
+ * conntrack, so we just use the 'reply' direction
+ * tuple while iterating.
+ */
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
}
spin_unlock(lockp);
local_bh_enable();
@@ -2264,26 +2308,20 @@ found:
static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report)
{
- unsigned int bucket = 0, sequence;
+ unsigned int bucket = 0;
struct nf_conn *ct;
might_sleep();
- for (;;) {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
-
- while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
- /* Time to push up daises... */
+ mutex_lock(&nf_conntrack_mutex);
+ while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+ /* Time to push up daises... */
- nf_ct_delete(ct, portid, report);
- nf_ct_put(ct);
- cond_resched();
- }
-
- if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
- break;
- bucket = 0;
+ nf_ct_delete(ct, portid, report);
+ nf_ct_put(ct);
+ cond_resched();
}
+ mutex_unlock(&nf_conntrack_mutex);
}
struct iter_data {
@@ -2519,8 +2557,10 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
if (!hash)
return -ENOMEM;
+ mutex_lock(&nf_conntrack_mutex);
old_size = nf_conntrack_htable_size;
if (old_size == hashsize) {
+ mutex_unlock(&nf_conntrack_mutex);
kvfree(hash);
return 0;
}
@@ -2537,12 +2577,16 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
for (i = 0; i < nf_conntrack_htable_size; i++) {
while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
+ unsigned int zone_id;
+
h = hlist_nulls_entry(nf_conntrack_hash[i].first,
struct nf_conntrack_tuple_hash, hnnode);
ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
+
+ zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h));
bucket = __hash_conntrack(nf_ct_net(ct),
- &h->tuple, hashsize);
+ &h->tuple, zone_id, hashsize);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
@@ -2556,6 +2600,8 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
nf_conntrack_all_unlock();
local_bh_enable();
+ mutex_unlock(&nf_conntrack_mutex);
+
synchronize_net();
kvfree(old_hash);
return 0;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7008961f5cb0..273117683922 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -150,13 +150,16 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
/* We keep an extra hash for each conntrack, for fast searching. */
static unsigned int
-hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net,
+ const struct nf_conntrack_zone *zone,
+ const struct nf_conntrack_tuple *tuple)
{
unsigned int hash;
struct {
struct nf_conntrack_man src;
u32 net_mix;
u32 protonum;
+ u32 zone;
} __aligned(SIPHASH_ALIGNMENT) combined;
get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
@@ -165,9 +168,13 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
/* Original src, to ensure we map it consistently if poss. */
combined.src = tuple->src;
- combined.net_mix = net_hash_mix(n);
+ combined.net_mix = net_hash_mix(net);
combined.protonum = tuple->dst.protonum;
+ /* Zone ID can be used provided its valid for both directions */
+ if (zone->dir == NF_CT_DEFAULT_ZONE_DIR)
+ combined.zone = zone->id;
+
hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd);
return reciprocal_scale(hash, nf_nat_htable_size);
@@ -272,7 +279,7 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range2 *range)
{
- unsigned int h = hash_by_src(net, tuple);
+ unsigned int h = hash_by_src(net, zone, tuple);
const struct nf_conn *ct;
hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
@@ -619,7 +626,7 @@ nf_nat_setup_info(struct nf_conn *ct,
unsigned int srchash;
spinlock_t *lock;
- srchash = hash_by_src(net,
+ srchash = hash_by_src(net, nf_ct_zone(ct),
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
spin_lock_bh(lock);
@@ -788,7 +795,7 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
unsigned int h;
- h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ h = hash_by_src(nf_ct_net(ct), nf_ct_zone(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
hlist_del_rcu(&ct->nat_bysource);
spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..acd73f717a08 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -9,8 +9,19 @@
#include <net/netfilter/nf_nat_masquerade.h>
+struct masq_dev_work {
+ struct work_struct work;
+ struct net *net;
+ union nf_inet_addr addr;
+ int ifindex;
+ int (*iter)(struct nf_conn *i, void *data);
+};
+
+#define MAX_MASQ_WORKER_COUNT 16
+
static DEFINE_MUTEX(masq_mutex);
static unsigned int masq_refcnt __read_mostly;
+static atomic_t masq_worker_count __read_mostly;
unsigned int
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
@@ -63,13 +74,71 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
-static int device_cmp(struct nf_conn *i, void *ifindex)
+static void iterate_cleanup_work(struct work_struct *work)
+{
+ struct masq_dev_work *w;
+
+ w = container_of(work, struct masq_dev_work, work);
+
+ nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
+
+ put_net(w->net);
+ kfree(w);
+ atomic_dec(&masq_worker_count);
+ module_put(THIS_MODULE);
+}
+
+/* Iterate conntrack table in the background and remove conntrack entries
+ * that use the device/address being removed.
+ *
+ * In case too many work items have been queued already or memory allocation
+ * fails iteration is skipped, conntrack entries will time out eventually.
+ */
+static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
+ int ifindex,
+ int (*iter)(struct nf_conn *i, void *data),
+ gfp_t gfp_flags)
+{
+ struct masq_dev_work *w;
+
+ if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
+ return;
+
+ net = maybe_get_net(net);
+ if (!net)
+ return;
+
+ if (!try_module_get(THIS_MODULE))
+ goto err_module;
+
+ w = kzalloc(sizeof(*w), gfp_flags);
+ if (w) {
+ /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
+ atomic_inc(&masq_worker_count);
+
+ INIT_WORK(&w->work, iterate_cleanup_work);
+ w->ifindex = ifindex;
+ w->net = net;
+ w->iter = iter;
+ if (addr)
+ w->addr = *addr;
+ schedule_work(&w->work);
+ return;
+ }
+
+ module_put(THIS_MODULE);
+ err_module:
+ put_net(net);
+}
+
+static int device_cmp(struct nf_conn *i, void *arg)
{
const struct nf_conn_nat *nat = nfct_nat(i);
+ const struct masq_dev_work *w = arg;
if (!nat)
return 0;
- return nat->masq_index == (int)(long)ifindex;
+ return nat->masq_index == w->ifindex;
}
static int masq_device_event(struct notifier_block *this,
@@ -85,8 +154,8 @@ static int masq_device_event(struct notifier_block *this,
* and forget them.
*/
- nf_ct_iterate_cleanup_net(net, device_cmp,
- (void *)(long)dev->ifindex, 0, 0);
+ nf_nat_masq_schedule(net, NULL, dev->ifindex,
+ device_cmp, GFP_KERNEL);
}
return NOTIFY_DONE;
@@ -94,35 +163,45 @@ static int masq_device_event(struct notifier_block *this,
static int inet_cmp(struct nf_conn *ct, void *ptr)
{
- struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
- struct net_device *dev = ifa->ifa_dev->dev;
struct nf_conntrack_tuple *tuple;
+ struct masq_dev_work *w = ptr;
- if (!device_cmp(ct, (void *)(long)dev->ifindex))
+ if (!device_cmp(ct, ptr))
return 0;
tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- return ifa->ifa_address == tuple->dst.u3.ip;
+ return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
}
static int masq_inet_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
- struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
- struct net *net = dev_net(idev->dev);
+ const struct in_ifaddr *ifa = ptr;
+ const struct in_device *idev;
+ const struct net_device *dev;
+ union nf_inet_addr addr;
+
+ if (event != NETDEV_DOWN)
+ return NOTIFY_DONE;
/* The masq_dev_notifier will catch the case of the device going
* down. So if the inetdev is dead and being destroyed we have
* no work to do. Otherwise this is an individual address removal
* and we have to perform the flush.
*/
+ idev = ifa->ifa_dev;
if (idev->dead)
return NOTIFY_DONE;
- if (event == NETDEV_DOWN)
- nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
+ memset(&addr, 0, sizeof(addr));
+
+ addr.ip = ifa->ifa_address;
+
+ dev = idev->dev;
+ nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
+ inet_cmp, GFP_KERNEL);
return NOTIFY_DONE;
}
@@ -136,8 +215,6 @@ static struct notifier_block masq_inet_notifier = {
};
#if IS_ENABLED(CONFIG_IPV6)
-static atomic_t v6_worker_count __read_mostly;
-
static int
nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
const struct in6_addr *daddr, unsigned int srcprefs,
@@ -187,40 +264,6 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
-struct masq_dev_work {
- struct work_struct work;
- struct net *net;
- struct in6_addr addr;
- int ifindex;
-};
-
-static int inet6_cmp(struct nf_conn *ct, void *work)
-{
- struct masq_dev_work *w = (struct masq_dev_work *)work;
- struct nf_conntrack_tuple *tuple;
-
- if (!device_cmp(ct, (void *)(long)w->ifindex))
- return 0;
-
- tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-
- return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
-}
-
-static void iterate_cleanup_work(struct work_struct *work)
-{
- struct masq_dev_work *w;
-
- w = container_of(work, struct masq_dev_work, work);
-
- nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
-
- put_net(w->net);
- kfree(w);
- atomic_dec(&v6_worker_count);
- module_put(THIS_MODULE);
-}
-
/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
*
* Defer it to the system workqueue.
@@ -233,36 +276,19 @@ static int masq_inet6_event(struct notifier_block *this,
{
struct inet6_ifaddr *ifa = ptr;
const struct net_device *dev;
- struct masq_dev_work *w;
- struct net *net;
+ union nf_inet_addr addr;
- if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
+ if (event != NETDEV_DOWN)
return NOTIFY_DONE;
dev = ifa->idev->dev;
- net = maybe_get_net(dev_net(dev));
- if (!net)
- return NOTIFY_DONE;
- if (!try_module_get(THIS_MODULE))
- goto err_module;
+ memset(&addr, 0, sizeof(addr));
- w = kmalloc(sizeof(*w), GFP_ATOMIC);
- if (w) {
- atomic_inc(&v6_worker_count);
-
- INIT_WORK(&w->work, iterate_cleanup_work);
- w->ifindex = dev->ifindex;
- w->net = net;
- w->addr = ifa->addr;
- schedule_work(&w->work);
+ addr.in6 = ifa->addr;
- return NOTIFY_DONE;
- }
-
- module_put(THIS_MODULE);
- err_module:
- put_net(net);
+ nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
+ GFP_ATOMIC);
return NOTIFY_DONE;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 081437dd75b7..c0851fec11d4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -780,6 +780,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{
struct nftables_pernet *nft_net;
struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -790,8 +791,11 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
if (skb == NULL)
goto err;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->family, ctx->table);
+ event, flags, ctx->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -1563,6 +1567,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
{
struct nftables_pernet *nft_net;
struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -1573,8 +1578,11 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
if (skb == NULL)
goto err;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->family, ctx->table,
+ event, flags, ctx->family, ctx->table,
ctx->chain);
if (err < 0) {
kfree_skb(skb);
@@ -2866,8 +2874,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
u32 flags, int family,
const struct nft_table *table,
const struct nft_chain *chain,
- const struct nft_rule *rule,
- const struct nft_rule *prule)
+ const struct nft_rule *rule, u64 handle)
{
struct nlmsghdr *nlh;
const struct nft_expr *expr, *next;
@@ -2887,9 +2894,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
NFTA_RULE_PAD))
goto nla_put_failure;
- if (event != NFT_MSG_DELRULE && prule) {
- if (nla_put_be64(skb, NFTA_RULE_POSITION,
- cpu_to_be64(prule->handle),
+ if (event != NFT_MSG_DELRULE && handle) {
+ if (nla_put_be64(skb, NFTA_RULE_POSITION, cpu_to_be64(handle),
NFTA_RULE_PAD))
goto nla_put_failure;
}
@@ -2925,7 +2931,10 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
const struct nft_rule *rule, int event)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+ const struct nft_rule *prule;
struct sk_buff *skb;
+ u64 handle = 0;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -2936,9 +2945,20 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
if (skb == NULL)
goto err;
+ if (event == NFT_MSG_NEWRULE &&
+ !list_is_first(&rule->list, &ctx->chain->rules) &&
+ !list_is_last(&rule->list, &ctx->chain->rules)) {
+ prule = list_prev_entry(rule, list);
+ handle = prule->handle;
+ }
+ if (ctx->flags & (NLM_F_APPEND | NLM_F_REPLACE))
+ flags |= NLM_F_APPEND;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->family, ctx->table,
- ctx->chain, rule, NULL);
+ event, flags, ctx->family, ctx->table,
+ ctx->chain, rule, handle);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -2964,6 +2984,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
const struct nft_rule *rule, *prule;
unsigned int s_idx = cb->args[0];
+ u64 handle;
prule = NULL;
list_for_each_entry_rcu(rule, &chain->rules, list) {
@@ -2975,12 +2996,17 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
}
+ if (prule)
+ handle = prule->handle;
+ else
+ handle = 0;
+
if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
table->family,
- table, chain, rule, prule) < 0)
+ table, chain, rule, handle) < 0)
return 1;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -3143,7 +3169,7 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
- family, table, chain, rule, NULL);
+ family, table, chain, rule, 0);
if (err < 0)
goto err_fill_rule_info;
@@ -3403,17 +3429,15 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
+ err = nft_delrule(&ctx, old_rule);
+ if (err < 0)
+ goto err_destroy_flow_rule;
+
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (trans == NULL) {
err = -ENOMEM;
goto err_destroy_flow_rule;
}
- err = nft_delrule(&ctx, old_rule);
- if (err < 0) {
- nft_trans_destroy(trans);
- goto err_destroy_flow_rule;
- }
-
list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
@@ -3943,8 +3967,9 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
gfp_t gfp_flags)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
- struct sk_buff *skb;
u32 portid = ctx->portid;
+ struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -3955,7 +3980,10 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
if (skb == NULL)
goto err;
- err = nf_tables_fill_set(skb, ctx, set, event, 0);
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
+ err = nf_tables_fill_set(skb, ctx, set, event, flags);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -4336,7 +4364,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (ops->privsize != NULL)
size = ops->privsize(nla, &desc);
alloc_size = sizeof(*set) + size + udlen;
- if (alloc_size < size)
+ if (alloc_size < size || alloc_size > INT_MAX)
return -ENOMEM;
set = kvzalloc(alloc_size, GFP_KERNEL);
if (!set)
@@ -5231,12 +5259,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nft_set_elem *elem,
- int event, u16 flags)
+ int event)
{
struct nftables_pernet *nft_net;
struct net *net = ctx->net;
u32 portid = ctx->portid;
struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
@@ -5246,6 +5275,9 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
if (skb == NULL)
goto err;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
set, elem);
if (err < 0) {
@@ -6921,7 +6953,7 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq, int event,
- int family, int report, gfp_t gfp)
+ u16 flags, int family, int report, gfp_t gfp)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct sk_buff *skb;
@@ -6946,8 +6978,9 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
if (skb == NULL)
goto err;
- err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family,
- table, obj, false);
+ err = nf_tables_fill_obj_info(skb, net, portid, seq, event,
+ flags & (NLM_F_CREATE | NLM_F_EXCL),
+ family, table, obj, false);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -6964,7 +6997,7 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx,
struct nft_object *obj, int event)
{
nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
- ctx->family, ctx->report, GFP_KERNEL);
+ ctx->flags, ctx->family, ctx->report, GFP_KERNEL);
}
/*
@@ -7745,6 +7778,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -7755,8 +7789,11 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
if (skb == NULL)
goto err;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
- ctx->seq, event, 0,
+ ctx->seq, event, flags,
ctx->family, flowtable, hook_list);
if (err < 0) {
kfree_skb(skb);
@@ -8634,7 +8671,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_setelem_activate(net, te->set, &te->elem);
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
- NFT_MSG_NEWSETELEM, 0);
+ NFT_MSG_NEWSETELEM);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
@@ -8642,7 +8679,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
- NFT_MSG_DELSETELEM, 0);
+ NFT_MSG_DELSETELEM);
nft_setelem_remove(net, te->set, &te->elem);
if (!nft_setelem_is_catchall(te->set, &te->elem)) {
atomic_dec(&te->set->nelems);
@@ -9599,7 +9636,6 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
table->use--;
nf_tables_chain_destroy(&ctx);
}
- list_del(&table->list);
nf_tables_table_destroy(&ctx);
}
@@ -9612,6 +9648,8 @@ static void __nft_release_tables(struct net *net)
if (nft_table_has_owner(table))
continue;
+ list_del(&table->list);
+
__nft_release_table(net, table);
}
}
@@ -9619,31 +9657,38 @@ static void __nft_release_tables(struct net *net)
static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
+ struct nft_table *table, *to_delete[8];
struct nftables_pernet *nft_net;
struct netlink_notify *n = ptr;
- struct nft_table *table, *nt;
struct net *net = n->net;
- bool release = false;
+ unsigned int deleted;
+ bool restart = false;
if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
return NOTIFY_DONE;
nft_net = nft_pernet(net);
+ deleted = 0;
mutex_lock(&nft_net->commit_mutex);
+again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
n->portid == table->nlpid) {
__nft_release_hook(net, table);
- release = true;
+ list_del_rcu(&table->list);
+ to_delete[deleted++] = table;
+ if (deleted >= ARRAY_SIZE(to_delete))
+ break;
}
}
- if (release) {
+ if (deleted) {
+ restart = deleted >= ARRAY_SIZE(to_delete);
synchronize_rcu();
- list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
- if (nft_table_has_owner(table) &&
- n->portid == table->nlpid)
- __nft_release_table(net, table);
- }
+ while (deleted)
+ __nft_release_table(net, to_delete[--deleted]);
+
+ if (restart)
+ goto again;
}
mutex_unlock(&nft_net->commit_mutex);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 272bcdb1392d..f69cc73c5813 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -19,6 +19,7 @@
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_arp/arp_tables.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
/* Used for matches where *info is larger than X byte */
#define NFT_MATCH_LARGE_THRESH 192
@@ -257,8 +258,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
nft_compat_wait_for_destructors();
ret = xt_check_target(&par, size, proto, inv);
- if (ret < 0)
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ const char *modname = NULL;
+
+ if (strcmp(target->name, "LOG") == 0)
+ modname = "nf_log_syslog";
+ else if (strcmp(target->name, "NFLOG") == 0)
+ modname = "nfnetlink_log";
+
+ if (modname &&
+ nft_request_module(ctx->net, "%s", modname) == -EAGAIN)
+ return -EAGAIN;
+ }
+
return ret;
+ }
/* The standard target cannot be used */
if (!target->target)
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 0363f533a42b..c4d1389f7185 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -60,7 +60,7 @@ static void nft_quota_obj_eval(struct nft_object *obj,
if (overquota &&
!test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0,
- NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC);
+ NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC);
}
static int nft_quota_do_init(const struct nlattr * const tb[],
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index 2ff75f7637b0..f39244f9c0ed 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -44,6 +44,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
static int log_tg_check(const struct xt_tgchk_param *par)
{
const struct xt_log_info *loginfo = par->targinfo;
+ int ret;
if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6)
return -EINVAL;
@@ -58,7 +59,14 @@ static int log_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
}
- return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+ if (ret != 0 && !par->nft_compat) {
+ request_module("%s", "nf_log_syslog");
+
+ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+ }
+
+ return ret;
}
static void log_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index fb5793208059..e660c3710a10 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -42,13 +42,21 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
static int nflog_tg_check(const struct xt_tgchk_param *par)
{
const struct xt_nflog_info *info = par->targinfo;
+ int ret;
if (info->flags & ~XT_NFLOG_MASK)
return -EINVAL;
if (info->prefix[sizeof(info->prefix) - 1] != '\0')
return -EINVAL;
- return nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+ if (ret != 0 && !par->nft_compat) {
+ request_module("%s", "nfnetlink_log");
+
+ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+ }
+
+ return ret;
}
static void nflog_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 24b7cf447bc5..ada47e59647a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -594,7 +594,10 @@ static int netlink_insert(struct sock *sk, u32 portid)
/* We need to ensure that the socket is hashed and visible. */
smp_wmb();
- nlk_sk(sk)->bound = portid;
+ /* Paired with lockless reads from netlink_bind(),
+ * netlink_connect() and netlink_sendmsg().
+ */
+ WRITE_ONCE(nlk_sk(sk)->bound, portid);
err:
release_sock(sk);
@@ -1012,7 +1015,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (nlk->ngroups < BITS_PER_LONG)
groups &= (1UL << nlk->ngroups) - 1;
- bound = nlk->bound;
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ bound = READ_ONCE(nlk->bound);
if (bound) {
/* Ensure nlk->portid is up-to-date. */
smp_rmb();
@@ -1098,8 +1102,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
/* No need for barriers here as we return to user-space without
* using any of the bound attributes.
+ * Paired with WRITE_ONCE() in netlink_insert().
*/
- if (!nlk->bound)
+ if (!READ_ONCE(nlk->bound))
err = netlink_autobind(sock);
if (err == 0) {
@@ -1888,7 +1893,8 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
dst_group = nlk->dst_group;
}
- if (!nlk->bound) {
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ if (!READ_ONCE(nlk->bound)) {
err = netlink_autobind(sock);
if (err)
goto out;
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index 6024fad905ff..dda323e0a473 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -60,6 +60,9 @@ int nfc_proto_register(const struct nfc_protocol *nfc_proto)
proto_tab[nfc_proto->id] = nfc_proto;
write_unlock(&proto_tab_lock);
+ if (rc)
+ proto_unregister(nfc_proto->proto);
+
return rc;
}
EXPORT_SYMBOL(nfc_proto_register);
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index fefc03674f4f..d63d2e5dc60c 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -277,6 +277,7 @@ int digital_tg_configure_hw(struct nfc_digital_dev *ddev, int type, int param)
static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
{
struct digital_tg_mdaa_params *params;
+ int rc;
params = kzalloc(sizeof(*params), GFP_KERNEL);
if (!params)
@@ -291,8 +292,12 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
get_random_bytes(params->nfcid2 + 2, NFC_NFCID2_MAXSIZE - 2);
params->sc = DIGITAL_SENSF_FELICA_SC;
- return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params,
- 500, digital_tg_recv_atr_req, NULL);
+ rc = digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params,
+ 500, digital_tg_recv_atr_req, NULL);
+ if (rc)
+ kfree(params);
+
+ return rc;
}
static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech)
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index 84d2345c75a3..3adf4589852a 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -465,8 +465,12 @@ static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev,
skb_put_u8(skb, sel_cmd);
skb_put_u8(skb, DIGITAL_SDD_REQ_SEL_PAR);
- return digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res,
- target);
+ rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res,
+ target);
+ if (rc)
+ kfree_skb(skb);
+
+ return rc;
}
static void digital_in_recv_sens_res(struct nfc_digital_dev *ddev, void *arg,
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index a2e72c003805..b911ab78bed9 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -334,6 +334,8 @@ static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
ndev->cur_conn_id);
if (conn_info) {
list_del(&conn_info->list);
+ if (conn_info == ndev->rf_conn_info)
+ ndev->rf_conn_info = NULL;
devm_kfree(&ndev->nfc_dev->dev, conn_info);
}
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 23b21253b3c3..eb6345a027e1 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2188,18 +2188,24 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
arg->count = arg->skip;
+ rcu_read_lock();
idr_for_each_entry_continue_ul(&head->handle_idr, f, tmp, id) {
/* don't return filters that are being deleted */
if (!refcount_inc_not_zero(&f->refcnt))
continue;
+ rcu_read_unlock();
+
if (arg->fn(tp, f, arg) < 0) {
__fl_put(f);
arg->stop = 1;
+ rcu_read_lock();
break;
}
__fl_put(f);
arg->count++;
+ rcu_read_lock();
}
+ rcu_read_unlock();
arg->cookie = id;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 5e90e9b160e3..12f39a2dffd4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -513,6 +513,12 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
return stab;
}
+ if (s->size_log > STAB_SIZE_LOG_MAX ||
+ s->cell_log > STAB_SIZE_LOG_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
+ return ERR_PTR(-EINVAL);
+ }
+
stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
if (!stab)
return ERR_PTR(-ENOMEM);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index a579a4131d22..e1040421b797 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -233,6 +233,9 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit)
if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
return 0;
+ if (!q->ops->change)
+ return 0;
+
nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
if (nla) {
nla->nla_type = RTM_NEWQDISC;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 8766ab5b8788..5eb3b1b7ae5e 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -529,22 +529,28 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
for (i = tc.offset; i < tc.offset + tc.count; i++) {
struct netdev_queue *q = netdev_get_tx_queue(dev, i);
struct Qdisc *qdisc = rtnl_dereference(q->qdisc);
- struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
- struct gnet_stats_queue __percpu *cpu_qstats = NULL;
spin_lock_bh(qdisc_lock(qdisc));
+
if (qdisc_is_percpu_stats(qdisc)) {
- cpu_bstats = qdisc->cpu_bstats;
- cpu_qstats = qdisc->cpu_qstats;
+ qlen = qdisc_qlen_sum(qdisc);
+
+ __gnet_stats_copy_basic(NULL, &bstats,
+ qdisc->cpu_bstats,
+ &qdisc->bstats);
+ __gnet_stats_copy_queue(&qstats,
+ qdisc->cpu_qstats,
+ &qdisc->qstats,
+ qlen);
+ } else {
+ qlen += qdisc->q.qlen;
+ bstats.bytes += qdisc->bstats.bytes;
+ bstats.packets += qdisc->bstats.packets;
+ qstats.backlog += qdisc->qstats.backlog;
+ qstats.drops += qdisc->qstats.drops;
+ qstats.requeues += qdisc->qstats.requeues;
+ qstats.overlimits += qdisc->qstats.overlimits;
}
-
- qlen = qdisc_qlen_sum(qdisc);
- __gnet_stats_copy_basic(NULL, &sch->bstats,
- cpu_bstats, &qdisc->bstats);
- __gnet_stats_copy_queue(&sch->qstats,
- cpu_qstats,
- &qdisc->qstats,
- qlen);
spin_unlock_bh(qdisc_lock(qdisc));
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 1ab2fc933a21..b9fd18d98646 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1641,6 +1641,10 @@ static void taprio_destroy(struct Qdisc *sch)
list_del(&q->taprio_list);
spin_unlock(&taprio_list_lock);
+ /* Note that taprio_reset() might not be called if an error
+ * happens in qdisc_create(), after taprio_init() has been called.
+ */
+ hrtimer_cancel(&q->advance_timer);
taprio_disable_offload(dev, q, NULL);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 5ef86fdb1176..1f1786021d9c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -702,7 +702,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
/* Break out if chunk length is less then minimal. */
- if (ntohs(ch->length) < sizeof(_ch))
+ if (!ch || ntohs(ch->length) < sizeof(_ch))
break;
ch_end = offset + SCTP_PAD4(ntohs(ch->length));
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index b8fa8f1a7277..c7503fd64915 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3697,7 +3697,7 @@ struct sctp_chunk *sctp_make_strreset_req(
outlen = (sizeof(outreq) + stream_len) * out;
inlen = (sizeof(inreq) + stream_len) * in;
- retval = sctp_make_reconf(asoc, outlen + inlen);
+ retval = sctp_make_reconf(asoc, SCTP_PAD4(outlen) + SCTP_PAD4(inlen));
if (!retval)
return NULL;
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index f23f558054a7..99acd337ba90 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -150,9 +150,11 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
again:
link = conn->lnk;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend);
if (rc)
- return rc;
+ goto put_out;
spin_lock_bh(&conn->send_lock);
if (link != conn->lnk) {
@@ -160,6 +162,7 @@ again:
spin_unlock_bh(&conn->send_lock);
smc_wr_tx_put_slot(link,
(struct smc_wr_tx_pend_priv *)pend);
+ smc_wr_tx_link_put(link);
if (again)
return -ENOLINK;
again = true;
@@ -167,6 +170,8 @@ again:
}
rc = smc_cdc_msg_send(conn, wr_buf, pend);
spin_unlock_bh(&conn->send_lock);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 8280c938be80..d2206743dc71 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -949,7 +949,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
to_lnk = &lgr->lnk[i];
break;
}
- if (!to_lnk) {
+ if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) {
smc_lgr_terminate_sched(lgr);
return NULL;
}
@@ -981,24 +981,26 @@ again:
read_unlock_bh(&lgr->conns_lock);
/* pre-fetch buffer outside of send_lock, might sleep */
rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
- if (rc) {
- smcr_link_down_cond_sched(to_lnk);
- return NULL;
- }
+ if (rc)
+ goto err_out;
/* avoid race with smcr_tx_sndbuf_nonempty() */
spin_lock_bh(&conn->send_lock);
smc_switch_link_and_count(conn, to_lnk);
rc = smc_switch_cursor(smc, pend, wr_buf);
spin_unlock_bh(&conn->send_lock);
sock_put(&smc->sk);
- if (rc) {
- smcr_link_down_cond_sched(to_lnk);
- return NULL;
- }
+ if (rc)
+ goto err_out;
goto again;
}
read_unlock_bh(&lgr->conns_lock);
+ smc_wr_tx_link_put(to_lnk);
return to_lnk;
+
+err_out:
+ smcr_link_down_cond_sched(to_lnk);
+ smc_wr_tx_link_put(to_lnk);
+ return NULL;
}
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 2e7560eba981..72f4b72eb175 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -383,9 +383,11 @@ int smc_llc_send_confirm_link(struct smc_link *link,
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
memset(confllc, 0, sizeof(*confllc));
confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
@@ -402,6 +404,8 @@ int smc_llc_send_confirm_link(struct smc_link *link,
confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -415,9 +419,11 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
struct smc_link *link;
int i, rc, rtok_ix;
+ if (!smc_wr_tx_link_hold(send_link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc));
rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
@@ -444,6 +450,8 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
(u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl));
/* send llc message */
rc = smc_wr_tx_send(send_link, pend);
+put_out:
+ smc_wr_tx_link_put(send_link);
return rc;
}
@@ -456,9 +464,11 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc));
rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
@@ -467,6 +477,8 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -480,9 +492,11 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
addllc = (struct smc_llc_msg_add_link *)wr_buf;
memset(addllc, 0, sizeof(*addllc));
@@ -504,6 +518,8 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
}
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -517,9 +533,11 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
delllc = (struct smc_llc_msg_del_link *)wr_buf;
memset(delllc, 0, sizeof(*delllc));
@@ -536,6 +554,8 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
delllc->reason = htonl(reason);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -547,9 +567,11 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
testllc = (struct smc_llc_msg_test_link *)wr_buf;
memset(testllc, 0, sizeof(*testllc));
testllc->hd.common.type = SMC_LLC_TEST_LINK;
@@ -557,6 +579,8 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -567,13 +591,16 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
struct smc_wr_buf *wr_buf;
int rc;
- if (!smc_link_usable(link))
+ if (!smc_wr_tx_link_hold(link))
return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
- return smc_wr_tx_send(link, pend);
+ rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
+ return rc;
}
/* schedule an llc send on link, may wait for buffers,
@@ -586,13 +613,16 @@ static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf)
struct smc_wr_buf *wr_buf;
int rc;
- if (!smc_link_usable(link))
+ if (!smc_wr_tx_link_hold(link))
return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
- return smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
+ rc = smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
+put_out:
+ smc_wr_tx_link_put(link);
+ return rc;
}
/********************************* receive ***********************************/
@@ -672,9 +702,11 @@ static int smc_llc_add_link_cont(struct smc_link *link,
struct smc_buf_desc *rmb;
u8 n;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf;
memset(addc_llc, 0, sizeof(*addc_llc));
@@ -706,7 +738,10 @@ static int smc_llc_add_link_cont(struct smc_link *link,
addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
if (lgr->role == SMC_CLNT)
addc_llc->hd.flags |= SMC_LLC_FLAG_RESP;
- return smc_wr_tx_send(link, pend);
+ rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
+ return rc;
}
static int smc_llc_cli_rkey_exchange(struct smc_link *link,
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index c79361dfcdfb..738a4a99c827 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -496,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
/* Wakeup sndbuf consumers from any context (IRQ or process)
* since there is more data to transmit; usable snd_wnd as max transmit
*/
-static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
struct smc_link *link = conn->lnk;
@@ -505,8 +505,11 @@ static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
struct smc_wr_buf *wr_buf;
int rc;
+ if (!link || !smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend);
if (rc < 0) {
+ smc_wr_tx_link_put(link);
if (rc == -EBUSY) {
struct smc_sock *smc =
container_of(conn, struct smc_sock, conn);
@@ -547,22 +550,7 @@ static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
out_unlock:
spin_unlock_bh(&conn->send_lock);
- return rc;
-}
-
-static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
-{
- struct smc_link *link = conn->lnk;
- int rc = -ENOLINK;
-
- if (!link)
- return rc;
-
- atomic_inc(&link->wr_tx_refcnt);
- if (smc_link_usable(link))
- rc = _smcr_tx_sndbuf_nonempty(conn);
- if (atomic_dec_and_test(&link->wr_tx_refcnt))
- wake_up_all(&link->wr_tx_wait);
+ smc_wr_tx_link_put(link);
return rc;
}
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 423b8709f1c9..2bc626f230a5 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -60,6 +60,20 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)
atomic_long_set(wr_tx_id, val);
}
+static inline bool smc_wr_tx_link_hold(struct smc_link *link)
+{
+ if (!smc_link_usable(link))
+ return false;
+ atomic_inc(&link->wr_tx_refcnt);
+ return true;
+}
+
+static inline void smc_wr_tx_link_put(struct smc_link *link)
+{
+ if (atomic_dec_and_test(&link->wr_tx_refcnt))
+ wake_up_all(&link->wr_tx_wait);
+}
+
static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk)
{
wake_up_all(&lnk->wr_tx_wait);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 3e776e3dff91..1f2817195549 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -645,7 +645,7 @@ static bool gss_check_seq_num(const struct svc_rqst *rqstp, struct rsc *rsci,
}
__set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win);
goto ok;
- } else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) {
+ } else if (seq_num + GSS_SEQ_WIN <= sd->sd_max) {
goto toolow;
}
if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win))
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 92345c9bb60c..89f9e85ae970 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -608,20 +608,42 @@ static void unix_release_sock(struct sock *sk, int embrion)
static void init_peercred(struct sock *sk)
{
- put_pid(sk->sk_peer_pid);
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ spin_lock(&sk->sk_peer_lock);
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
sk->sk_peer_pid = get_pid(task_tgid(current));
sk->sk_peer_cred = get_current_cred();
+ spin_unlock(&sk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
}
static void copy_peercred(struct sock *sk, struct sock *peersk)
{
- put_pid(sk->sk_peer_pid);
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ if (sk < peersk) {
+ spin_lock(&sk->sk_peer_lock);
+ spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&peersk->sk_peer_lock);
+ spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ }
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+
+ spin_unlock(&sk->sk_peer_lock);
+ spin_unlock(&peersk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
}
static int unix_listen(struct socket *sock, int backlog)
@@ -806,7 +828,7 @@ static void unix_unhash(struct sock *sk)
}
struct proto unix_dgram_proto = {
- .name = "UNIX-DGRAM",
+ .name = "UNIX",
.owner = THIS_MODULE,
.obj_size = sizeof(struct unix_sock),
.close = unix_close,
@@ -828,20 +850,25 @@ struct proto unix_stream_proto = {
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
{
- struct sock *sk = NULL;
struct unix_sock *u;
+ struct sock *sk;
+ int err;
atomic_long_inc(&unix_nr_socks);
- if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
- goto out;
+ if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
+ err = -ENFILE;
+ goto err;
+ }
if (type == SOCK_STREAM)
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
else /*dgram and seqpacket */
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
- if (!sk)
- goto out;
+ if (!sk) {
+ err = -ENOMEM;
+ goto err;
+ }
sock_init_data(sock, sk);
@@ -861,20 +888,23 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
memset(&u->scm_stat, 0, sizeof(struct scm_stat));
unix_insert_socket(unix_sockets_unbound(sk), sk);
-out:
- if (sk == NULL)
- atomic_long_dec(&unix_nr_socks);
- else {
- local_bh_disable();
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- local_bh_enable();
- }
+
+ local_bh_disable();
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ local_bh_enable();
+
return sk;
+
+err:
+ atomic_long_dec(&unix_nr_socks);
+ return ERR_PTR(err);
}
static int unix_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
+ struct sock *sk;
+
if (protocol && protocol != PF_UNIX)
return -EPROTONOSUPPORT;
@@ -901,7 +931,11 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
return -ESOCKTNOSUPPORT;
}
- return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM;
+ sk = unix_create1(net, sock, kern, sock->type);
+ if (IS_ERR(sk))
+ return PTR_ERR(sk);
+
+ return 0;
}
static int unix_release(struct socket *sock)
@@ -1314,12 +1348,15 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
we will have to recheck all again in any case.
*/
- err = -ENOMEM;
-
/* create new sock for complete connection */
newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
- if (newsk == NULL)
+ if (IS_ERR(newsk)) {
+ err = PTR_ERR(newsk);
+ newsk = NULL;
goto out;
+ }
+
+ err = -ENOMEM;
/* Allocate skb for sending to listening sock */
skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
@@ -2845,6 +2882,9 @@ static int unix_shutdown(struct socket *sock, int mode)
unix_state_lock(sk);
sk->sk_shutdown |= mode;
+ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
+ mode == SHUTDOWN_MASK)
+ sk->sk_state = TCP_CLOSE;
other = unix_peer(sk);
if (other)
sock_hold(other);
@@ -2867,12 +2907,10 @@ static int unix_shutdown(struct socket *sock, int mode)
other->sk_shutdown |= peer_mode;
unix_state_unlock(other);
other->sk_state_change(other);
- if (peer_mode == SHUTDOWN_MASK) {
+ if (peer_mode == SHUTDOWN_MASK)
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
- other->sk_state = TCP_CLOSE;
- } else if (peer_mode & RCV_SHUTDOWN) {
+ else if (peer_mode & RCV_SHUTDOWN)
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
- }
}
if (other)
sock_put(other);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 03b66d154b2b..3a3cb09eec12 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1961,24 +1961,65 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
return skb;
}
+static int xfrm_notify_userpolicy(struct net *net)
+{
+ struct xfrm_userpolicy_default *up;
+ int len = NLMSG_ALIGN(sizeof(*up));
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ int err;
+
+ skb = nlmsg_new(len, GFP_ATOMIC);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_GETDEFAULT, sizeof(*up), 0);
+ if (nlh == NULL) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ up = nlmsg_data(nlh);
+ up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+
+ nlmsg_end(skb, nlh);
+
+ rcu_read_lock();
+ err = xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY);
+ rcu_read_unlock();
+
+ return err;
+}
+
static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr **attrs)
{
struct net *net = sock_net(skb->sk);
struct xfrm_userpolicy_default *up = nlmsg_data(nlh);
- u8 dirmask;
- u8 old_default = net->xfrm.policy_default;
- if (up->dirmask >= XFRM_USERPOLICY_DIRMASK_MAX)
- return -EINVAL;
+ if (up->in == XFRM_USERPOLICY_BLOCK)
+ net->xfrm.policy_default |= XFRM_POL_DEFAULT_IN;
+ else if (up->in == XFRM_USERPOLICY_ACCEPT)
+ net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_IN;
- dirmask = (1 << up->dirmask) & XFRM_POL_DEFAULT_MASK;
+ if (up->fwd == XFRM_USERPOLICY_BLOCK)
+ net->xfrm.policy_default |= XFRM_POL_DEFAULT_FWD;
+ else if (up->fwd == XFRM_USERPOLICY_ACCEPT)
+ net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_FWD;
- net->xfrm.policy_default = (old_default & (0xff ^ dirmask))
- | (up->action << up->dirmask);
+ if (up->out == XFRM_USERPOLICY_BLOCK)
+ net->xfrm.policy_default |= XFRM_POL_DEFAULT_OUT;
+ else if (up->out == XFRM_USERPOLICY_ACCEPT)
+ net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_OUT;
rt_genid_bump_all(net);
+ xfrm_notify_userpolicy(net);
return 0;
}
@@ -1988,13 +2029,11 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
struct sk_buff *r_skb;
struct nlmsghdr *r_nlh;
struct net *net = sock_net(skb->sk);
- struct xfrm_userpolicy_default *r_up, *up;
+ struct xfrm_userpolicy_default *r_up;
int len = NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_default));
u32 portid = NETLINK_CB(skb).portid;
u32 seq = nlh->nlmsg_seq;
- up = nlmsg_data(nlh);
-
r_skb = nlmsg_new(len, GFP_ATOMIC);
if (!r_skb)
return -ENOMEM;
@@ -2007,8 +2046,12 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
r_up = nlmsg_data(r_nlh);
- r_up->action = ((net->xfrm.policy_default & (1 << up->dirmask)) >> up->dirmask);
- r_up->dirmask = up->dirmask;
+ r_up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ r_up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ r_up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
nlmsg_end(r_skb, r_nlh);
return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid);