summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-08-12 16:24:03 -1000
committerLinus Torvalds <torvalds@linux-foundation.org>2021-08-12 16:24:03 -1000
commitf8e6dfc64f6135d1b6c5215c14cd30b9b60a0008 (patch)
tree2c278a5e83c6066f92fa8c6ff2a1e5870aeecb30 /net
parent3a03c67de276a6abb412771311f93a73e192b615 (diff)
parenta9a507013a6f98218d1797c8808bd9ba1e79782d (diff)
Merge tag 'net-5.14-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski: "Networking fixes, including fixes from netfilter, bpf, can and ieee802154. The size of this is pretty normal, but we got more fixes for 5.14 changes this week than last week. Nothing major but the trend is the opposite of what we like. We'll see how the next week goes.. Current release - regressions: - r8169: fix ASPM-related link-up regressions - bridge: fix flags interpretation for extern learn fdb entries - phy: micrel: fix link detection on ksz87xx switch - Revert "tipc: Return the correct errno code" - ptp: fix possible memory leak caused by invalid cast Current release - new code bugs: - bpf: add missing bpf_read_[un]lock_trace() for syscall program - bpf: fix potentially incorrect results with bpf_get_local_storage() - page_pool: mask the page->signature before the checking, avoid dma mapping leaks - netfilter: nfnetlink_hook: 5 fixes to information in netlink dumps - bnxt_en: fix firmware interface issues with PTP - mlx5: Bridge, fix ageing time Previous releases - regressions: - linkwatch: fix failure to restore device state across suspend/resume - bareudp: fix invalid read beyond skb's linear data Previous releases - always broken: - bpf: fix integer overflow involving bucket_size - ppp: fix issues when desired interface name is specified via netlink - wwan: mhi_wwan_ctrl: fix possible deadlock - dsa: microchip: ksz8795: fix number of VLAN related bugs - dsa: drivers: fix broken backpressure in .port_fdb_dump - dsa: qca: ar9331: make proper initial port defaults Misc: - bpf: add lockdown check for probe_write_user helper - netfilter: conntrack: remove offload_pickup sysctl before 5.14 is out - netfilter: conntrack: collect all entries in one cycle, heuristically slow down garbage collection scans on idle systems to prevent frequent wake ups" * tag 'net-5.14-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (87 commits) vsock/virtio: avoid potential deadlock when vsock device remove wwan: core: Avoid returning NULL from wwan_create_dev() net: dsa: sja1105: unregister the MDIO buses during teardown Revert "tipc: Return the correct errno code" net: mscc: Fix non-GPL export of regmap APIs net: igmp: increase size of mr_ifc_count MAINTAINERS: switch to my OMP email for Renesas Ethernet drivers tcp_bbr: fix u32 wrap bug in round logic if bbr_init() called after 2B packets net: pcs: xpcs: fix error handling on failed to allocate memory net: linkwatch: fix failure to restore device state across suspend/resume net: bridge: fix memleak in br_add_if() net: switchdev: zero-initialize struct switchdev_notifier_fdb_info emitted by drivers towards the bridge net: bridge: fix flags interpretation for extern learn fdb entries net: dsa: sja1105: fix broken backpressure in .port_fdb_dump net: dsa: lantiq: fix broken backpressure in .port_fdb_dump net: dsa: lan9303: fix broken backpressure in .port_fdb_dump net: dsa: hellcreek: fix broken backpressure in .port_fdb_dump bpf, core: Fix kernel-doc notation net: igmp: fix data-race in igmp_ifc_timer_expire() net: Fix memory leak in ieee802154_raw_deliver ...
Diffstat (limited to 'net')
-rw-r--r--net/bpf/test_run.c4
-rw-r--r--net/bridge/br.c3
-rw-r--r--net/bridge/br_fdb.c11
-rw-r--r--net/bridge/br_if.c2
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c6
-rw-r--r--net/core/link_watch.c5
-rw-r--r--net/core/page_pool.c10
-rw-r--r--net/dccp/dccp.h6
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/ieee802154/socket.c7
-rw-r--r--net/ipv4/igmp.c21
-rw-r--r--net/ipv4/tcp_bbr.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c9
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c11
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c11
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c16
-rw-r--r--net/netfilter/nf_conntrack_core.c71
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c1
-rw-r--r--net/netfilter/nf_conntrack_standalone.c16
-rw-r--r--net/netfilter/nf_flow_table_core.c11
-rw-r--r--net/netfilter/nfnetlink_hook.c24
-rw-r--r--net/openvswitch/flow.c13
-rw-r--r--net/sched/act_mirred.c3
-rw-r--r--net/smc/af_smc.c2
-rw-r--r--net/smc/smc_core.c4
-rw-r--r--net/smc/smc_core.h4
-rw-r--r--net/smc/smc_llc.c10
-rw-r--r--net/smc/smc_tx.c18
-rw-r--r--net/smc/smc_wr.c10
-rw-r--r--net/tipc/link.c6
-rw-r--r--net/vmw_vsock/virtio_transport.c7
39 files changed, 244 insertions, 130 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 1cc75c811e24..caa16bf30fb5 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -7,6 +7,7 @@
#include <linux/vmalloc.h>
#include <linux/etherdevice.h>
#include <linux/filter.h>
+#include <linux/rcupdate_trace.h>
#include <linux/sched/signal.h>
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
@@ -951,7 +952,10 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
goto out;
}
}
+
+ rcu_read_lock_trace();
retval = bpf_prog_run_pin_on_cpu(prog, ctx);
+ rcu_read_unlock_trace();
if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) {
err = -EFAULT;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index bbab9984f24e..ef743f94254d 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -166,8 +166,7 @@ static int br_switchdev_event(struct notifier_block *unused,
case SWITCHDEV_FDB_ADD_TO_BRIDGE:
fdb_info = ptr;
err = br_fdb_external_learn_add(br, p, fdb_info->addr,
- fdb_info->vid,
- fdb_info->is_local, false);
+ fdb_info->vid, false);
if (err) {
err = notifier_from_errno(err);
break;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 835cec1e5a03..5dee30966ed3 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -1044,10 +1044,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
"FDB entry towards bridge must be permanent");
return -EINVAL;
}
-
- err = br_fdb_external_learn_add(br, p, addr, vid,
- ndm->ndm_state & NUD_PERMANENT,
- true);
+ err = br_fdb_external_learn_add(br, p, addr, vid, true);
} else {
spin_lock_bh(&br->hash_lock);
err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb);
@@ -1275,7 +1272,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
}
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid, bool is_local,
+ const unsigned char *addr, u16 vid,
bool swdev_notify)
{
struct net_bridge_fdb_entry *fdb;
@@ -1293,7 +1290,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
if (swdev_notify)
flags |= BIT(BR_FDB_ADDED_BY_USER);
- if (is_local)
+ if (!p)
flags |= BIT(BR_FDB_LOCAL);
fdb = fdb_create(br, p, addr, vid, flags);
@@ -1322,7 +1319,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
if (swdev_notify)
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
- if (is_local)
+ if (!p)
set_bit(BR_FDB_LOCAL, &fdb->flags);
if (modified)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 6e4a32354a13..14cd6ef96111 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -616,6 +616,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
err = dev_set_allmulti(dev, 1);
if (err) {
+ br_multicast_del_port(p);
kfree(p); /* kobject not yet init'd, manually free */
goto err1;
}
@@ -729,6 +730,7 @@ err4:
err3:
sysfs_remove_link(br->ifobj, p->dev->name);
err2:
+ br_multicast_del_port(p);
kobject_put(&p->kobj);
dev_set_allmulti(dev, -1);
err1:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index aa64d8d63ca3..2b48b204205e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -711,7 +711,7 @@ int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid, bool is_local,
+ const unsigned char *addr, u16 vid,
bool swdev_notify);
int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid,
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 8d033a75a766..fdbed3158555 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -88,6 +88,12 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
skb = ip_fraglist_next(&iter);
}
+
+ if (!err)
+ return 0;
+
+ kfree_skb_list(iter.frag);
+
return err;
}
slow_path:
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 75431ca9300f..1a455847da54 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -158,7 +158,7 @@ static void linkwatch_do_dev(struct net_device *dev)
clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
rfc2863_policy(dev);
- if (dev->flags & IFF_UP && netif_device_present(dev)) {
+ if (dev->flags & IFF_UP) {
if (netif_carrier_ok(dev))
dev_activate(dev);
else
@@ -204,7 +204,8 @@ static void __linkwatch_run_queue(int urgent_only)
dev = list_first_entry(&wrk, struct net_device, link_watch_list);
list_del_init(&dev->link_watch_list);
- if (urgent_only && !linkwatch_urgent_event(dev)) {
+ if (!netif_device_present(dev) ||
+ (urgent_only && !linkwatch_urgent_event(dev))) {
list_add_tail(&dev->link_watch_list, &lweventlist);
continue;
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 5e4eb45b139c..8ab7b402244c 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -634,7 +634,15 @@ bool page_pool_return_skb_page(struct page *page)
struct page_pool *pp;
page = compound_head(page);
- if (unlikely(page->pp_magic != PP_SIGNATURE))
+
+ /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
+ * in order to preserve any existing bits, such as bit 0 for the
+ * head page of compound page and bit 1 for pfmemalloc page, so
+ * mask those bits for freeing side when doing below checking,
+ * and page_is_pfmemalloc() is checked in __page_pool_put_page()
+ * to avoid recycling the pfmemalloc page.
+ */
+ if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
return false;
pp = page->pp;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 9cc9d1ee6cdb..c5c1d2b8045e 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -41,9 +41,9 @@ extern bool dccp_debug;
#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
#else
-#define dccp_pr_debug(format, a...)
-#define dccp_pr_debug_cat(format, a...)
-#define dccp_debug(format, a...)
+#define dccp_pr_debug(format, a...) do {} while (0)
+#define dccp_pr_debug_cat(format, a...) do {} while (0)
+#define dccp_debug(format, a...) do {} while (0)
#endif
extern struct inet_hashinfo dccp_hashinfo;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 532085da8d8f..23be8e01026b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2291,8 +2291,8 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
static void
dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work)
{
+ struct switchdev_notifier_fdb_info info = {};
struct dsa_switch *ds = switchdev_work->ds;
- struct switchdev_notifier_fdb_info info;
struct dsa_port *dp;
if (!dsa_is_user_port(ds, switchdev_work->port))
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a45a0401adc5..c25f7617770c 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -984,6 +984,11 @@ static const struct proto_ops ieee802154_dgram_ops = {
.sendpage = sock_no_sendpage,
};
+static void ieee802154_sock_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
+}
+
/* Create a socket. Initialise the socket, blank the addresses
* set the state.
*/
@@ -1024,7 +1029,7 @@ static int ieee802154_create(struct net *net, struct socket *sock,
sock->ops = ops;
sock_init_data(sock, sk);
- /* FIXME: sk->sk_destruct */
+ sk->sk_destruct = ieee802154_sock_destruct;
sk->sk_family = PF_IEEE802154;
/* Checksums on by default */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6b3c558a4f23..00576bae183d 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -803,10 +803,17 @@ static void igmp_gq_timer_expire(struct timer_list *t)
static void igmp_ifc_timer_expire(struct timer_list *t)
{
struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer);
+ u32 mr_ifc_count;
igmpv3_send_cr(in_dev);
- if (in_dev->mr_ifc_count) {
- in_dev->mr_ifc_count--;
+restart:
+ mr_ifc_count = READ_ONCE(in_dev->mr_ifc_count);
+
+ if (mr_ifc_count) {
+ if (cmpxchg(&in_dev->mr_ifc_count,
+ mr_ifc_count,
+ mr_ifc_count - 1) != mr_ifc_count)
+ goto restart;
igmp_ifc_start_timer(in_dev,
unsolicited_report_interval(in_dev));
}
@@ -818,7 +825,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
struct net *net = dev_net(in_dev->dev);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
igmp_ifc_start_timer(in_dev, 1);
}
@@ -957,7 +964,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
in_dev->mr_qri;
}
/* cancel the interface change timer */
- in_dev->mr_ifc_count = 0;
+ WRITE_ONCE(in_dev->mr_ifc_count, 0);
if (del_timer(&in_dev->mr_ifc_timer))
__in_dev_put(in_dev);
/* clear deleted report items */
@@ -1724,7 +1731,7 @@ void ip_mc_down(struct in_device *in_dev)
igmp_group_dropped(pmc);
#ifdef CONFIG_IP_MULTICAST
- in_dev->mr_ifc_count = 0;
+ WRITE_ONCE(in_dev->mr_ifc_count, 0);
if (del_timer(&in_dev->mr_ifc_timer))
__in_dev_put(in_dev);
in_dev->mr_gq_running = 0;
@@ -1941,7 +1948,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
- in_dev->mr_ifc_count = pmc->crcount;
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
igmp_ifc_event(pmc->interface);
@@ -2120,7 +2127,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
/* else no filters; keep old mode for reports */
pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
- in_dev->mr_ifc_count = pmc->crcount;
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
igmp_ifc_event(in_dev);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 6ea3dc2e4219..6274462b86b4 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1041,7 +1041,7 @@ static void bbr_init(struct sock *sk)
bbr->prior_cwnd = 0;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
bbr->rtt_cnt = 0;
- bbr->next_rtt_delivered = 0;
+ bbr->next_rtt_delivered = tp->delivered;
bbr->prev_ca_state = TCP_CA_Open;
bbr->packet_conservation = 0;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index d1bef23fd4f5..dd30c03d5a23 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -132,8 +132,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to)
+ if (ip > ip_to) {
+ if (ip_to == 0)
+ return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to);
+ }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -144,6 +147,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
+ /* 64bit division is not allowed on 32bit */
+ if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried) {
ip = ntohl(h->next.ip);
e.ip = htonl(ip);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 18346d18aa16..153de3457423 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -121,6 +121,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask;
+ if (e.mark == 0 && e.ip == 0)
+ return -IPSET_ERR_HASH_ELEM;
if (adt == IPSET_TEST ||
!(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
@@ -133,8 +135,11 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to)
+ if (ip > ip_to) {
+ if (e.mark == 0 && ip_to == 0)
+ return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to);
+ }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -143,6 +148,9 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr);
}
+ if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index e1ca11196515..7303138e46be 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -173,6 +173,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index ab179e064597..334fb1ad0e86 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -180,6 +180,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 8f075b44cf64..7df94f437f60 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -253,6 +253,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index c1a11f041ac6..1422739d9aa2 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -140,7 +140,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0;
+ u32 ip = 0, ip_to = 0, ipn, n = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -188,6 +188,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
do {
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index ddd51c2e1cb3..9810f5bf63f5 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0;
+ u32 ip = 0, ip_to = 0, ipn, n = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -256,6 +256,14 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 6532f0505e66..3d09eefe998a 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -168,7 +168,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0;
- u32 ip2 = 0, ip2_from = 0, ip2_to = 0;
+ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
+ u64 n = 0, m = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -244,6 +245,19 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+ n++;
+ } while (ipn++ < ip_to);
+ ipn = ip2_from;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+ m++;
+ } while (ipn++ < ip2_to);
+
+ if (n*m > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index ec1564a1cb5a..09cf72eb37f8 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -158,7 +158,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 port, port_to, p = 0, ip = 0, ip_to = 0;
+ u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
+ u64 n = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -235,6 +236,14 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip);
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 0e91d1e82f1c..19bcdb3141f6 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -182,7 +182,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2;
+ u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
+ u64 n = 0, m = 0;
bool with_ports = false;
int ret;
@@ -284,6 +285,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+ n++;
+ } while (ipn++ < ip_to);
+ ipn = ip2_from;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+ m++;
+ } while (ipn++ < ip2_to);
+
+ if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5c03e5106751..d31dbccbe7bd 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,22 +66,17 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
struct conntrack_gc_work {
struct delayed_work dwork;
- u32 last_bucket;
+ u32 next_bucket;
bool exiting;
bool early_drop;
- long next_gc_run;
};
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
-/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
-#define GC_MAX_BUCKETS_DIV 128u
-/* upper bound of full table scan */
-#define GC_MAX_SCAN_JIFFIES (16u * HZ)
-/* desired ratio of entries found to be expired */
-#define GC_EVICT_RATIO 50u
+#define GC_SCAN_INTERVAL (120u * HZ)
+#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
static struct conntrack_gc_work conntrack_gc_work;
@@ -1363,17 +1358,13 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
static void gc_worker(struct work_struct *work)
{
- unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
- unsigned int i, goal, buckets = 0, expired_count = 0;
- unsigned int nf_conntrack_max95 = 0;
+ unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
+ unsigned int i, hashsz, nf_conntrack_max95 = 0;
+ unsigned long next_run = GC_SCAN_INTERVAL;
struct conntrack_gc_work *gc_work;
- unsigned int ratio, scanned = 0;
- unsigned long next_run;
-
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
- goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
- i = gc_work->last_bucket;
+ i = gc_work->next_bucket;
if (gc_work->early_drop)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
@@ -1381,15 +1372,15 @@ static void gc_worker(struct work_struct *work)
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
- unsigned int hashsz;
struct nf_conn *tmp;
- i++;
rcu_read_lock();
nf_conntrack_get_ht(&ct_hash, &hashsz);
- if (i >= hashsz)
- i = 0;
+ if (i >= hashsz) {
+ rcu_read_unlock();
+ break;
+ }
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
struct nf_conntrack_net *cnet;
@@ -1397,7 +1388,6 @@ static void gc_worker(struct work_struct *work)
tmp = nf_ct_tuplehash_to_ctrack(h);
- scanned++;
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
nf_ct_offload_timeout(tmp);
continue;
@@ -1405,7 +1395,6 @@ static void gc_worker(struct work_struct *work)
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
- expired_count++;
continue;
}
@@ -1438,7 +1427,14 @@ static void gc_worker(struct work_struct *work)
*/
rcu_read_unlock();
cond_resched();
- } while (++buckets < goal);
+ i++;
+
+ if (time_after(jiffies, end_time) && i < hashsz) {
+ gc_work->next_bucket = i;
+ next_run = 0;
+ break;
+ }
+ } while (i < hashsz);
if (gc_work->exiting)
return;
@@ -1449,40 +1445,17 @@ static void gc_worker(struct work_struct *work)
*
* This worker is only here to reap expired entries when system went
* idle after a busy period.
- *
- * The heuristics below are supposed to balance conflicting goals:
- *
- * 1. Minimize time until we notice a stale entry
- * 2. Maximize scan intervals to not waste cycles
- *
- * Normally, expire ratio will be close to 0.
- *
- * As soon as a sizeable fraction of the entries have expired
- * increase scan frequency.
*/
- ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio > GC_EVICT_RATIO) {
- gc_work->next_gc_run = min_interval;
- } else {
- unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
-
- BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
-
- gc_work->next_gc_run += min_interval;
- if (gc_work->next_gc_run > max)
- gc_work->next_gc_run = max;
+ if (next_run) {
+ gc_work->early_drop = false;
+ gc_work->next_bucket = 0;
}
-
- next_run = gc_work->next_gc_run;
- gc_work->last_bucket = i;
- gc_work->early_drop = false;
queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
- gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3259416f2ea4..af5115e127cf 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1478,7 +1478,6 @@ void nf_conntrack_tcp_init_net(struct net *net)
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
tn->offload_timeout = 30 * HZ;
- tn->offload_pickup = 120 * HZ;
#endif
}
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 698fee49e732..f8e3c0d2602f 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -271,7 +271,6 @@ void nf_conntrack_udp_init_net(struct net *net)
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
un->offload_timeout = 30 * HZ;
- un->offload_pickup = 30 * HZ;
#endif
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 214d9f9e499b..e84b499b7bfa 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -575,7 +575,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
- NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
#endif
NF_SYSCTL_CT_PROTO_TCP_LOOSE,
NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
@@ -585,7 +584,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
- NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
#endif
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
@@ -776,12 +774,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
- .procname = "nf_flowtable_tcp_pickup",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
#endif
[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
.procname = "nf_conntrack_tcp_loose",
@@ -832,12 +824,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
- .procname = "nf_flowtable_udp_pickup",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
#endif
[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
.procname = "nf_conntrack_icmp_timeout",
@@ -1018,7 +1004,6 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
- table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
#endif
}
@@ -1111,7 +1096,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
- table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
#endif
nf_conntrack_standalone_init_tcp_sysctl(net, table);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 551976e4284c..8788b519255e 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -183,7 +183,7 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
const struct nf_conntrack_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
int l4num = nf_ct_protonum(ct);
- unsigned int timeout;
+ s32 timeout;
l4proto = nf_ct_l4proto_find(l4num);
if (!l4proto)
@@ -192,15 +192,20 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
if (l4num == IPPROTO_TCP) {
struct nf_tcp_net *tn = nf_tcp_pernet(net);
- timeout = tn->offload_pickup;
+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+ timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net);
- timeout = tn->offload_pickup;
+ timeout = tn->timeouts[UDP_CT_REPLIED];
+ timeout -= tn->offload_timeout;
} else {
return;
}
+ if (timeout < 0)
+ timeout = 0;
+
if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
ct->timeout = nfct_time_stamp + timeout;
}
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index 202f57d17bab..f554e2ea32ee 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -89,11 +89,15 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
if (!nest2)
goto cancel_nest;
- ret = nla_put_string(nlskb, NFTA_CHAIN_TABLE, chain->table->name);
+ ret = nla_put_string(nlskb, NFNLA_CHAIN_TABLE, chain->table->name);
if (ret)
goto cancel_nest;
- ret = nla_put_string(nlskb, NFTA_CHAIN_NAME, chain->name);
+ ret = nla_put_string(nlskb, NFNLA_CHAIN_NAME, chain->name);
+ if (ret)
+ goto cancel_nest;
+
+ ret = nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, chain->table->family);
if (ret)
goto cancel_nest;
@@ -109,18 +113,19 @@ cancel_nest:
static int nfnl_hook_dump_one(struct sk_buff *nlskb,
const struct nfnl_dump_hook_data *ctx,
const struct nf_hook_ops *ops,
- unsigned int seq)
+ int family, unsigned int seq)
{
u16 event = nfnl_msg_type(NFNL_SUBSYS_HOOK, NFNL_MSG_HOOK_GET);
unsigned int portid = NETLINK_CB(nlskb).portid;
struct nlmsghdr *nlh;
int ret = -EMSGSIZE;
+ u32 hooknum;
#ifdef CONFIG_KALLSYMS
char sym[KSYM_SYMBOL_LEN];
char *module_name;
#endif
nlh = nfnl_msg_put(nlskb, portid, seq, event,
- NLM_F_MULTI, ops->pf, NFNETLINK_V0, 0);
+ NLM_F_MULTI, family, NFNETLINK_V0, 0);
if (!nlh)
goto nla_put_failure;
@@ -135,6 +140,7 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
if (module_name) {
char *end;
+ *module_name = '\0';
module_name += 2;
end = strchr(module_name, ']');
if (end) {
@@ -151,7 +157,12 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
goto nla_put_failure;
#endif
- ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(ops->hooknum));
+ if (ops->pf == NFPROTO_INET && ops->hooknum == NF_INET_INGRESS)
+ hooknum = NF_NETDEV_INGRESS;
+ else
+ hooknum = ops->hooknum;
+
+ ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(hooknum));
if (ret)
goto nla_put_failure;
@@ -259,7 +270,8 @@ static int nfnl_hook_dump(struct sk_buff *nlskb,
ops = nf_hook_entries_get_hook_ops(e);
for (; i < e->num_hook_entries; i++) {
- err = nfnl_hook_dump_one(nlskb, ctx, ops[i], cb->seq);
+ err = nfnl_hook_dump_one(nlskb, ctx, ops[i], family,
+ cb->nlh->nlmsg_seq);
if (err)
break;
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index e586424d8b04..9713035b89e3 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -293,14 +293,14 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
}
/**
- * Parse vlan tag from vlan header.
+ * parse_vlan_tag - Parse vlan tag from vlan header.
* @skb: skb containing frame to parse
* @key_vh: pointer to parsed vlan tag
* @untag_vlan: should the vlan header be removed from the frame
*
- * Returns ERROR on memory error.
- * Returns 0 if it encounters a non-vlan or incomplete packet.
- * Returns 1 after successfully parsing vlan tag.
+ * Return: ERROR on memory error.
+ * %0 if it encounters a non-vlan or incomplete packet.
+ * %1 after successfully parsing vlan tag.
*/
static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
bool untag_vlan)
@@ -532,6 +532,7 @@ static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
* L3 header
* @key: output flow key
*
+ * Return: %0 if successful, otherwise a negative errno value.
*/
static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
{
@@ -748,8 +749,6 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
*
* The caller must ensure that skb->len >= ETH_HLEN.
*
- * Returns 0 if successful, otherwise a negative errno value.
- *
* Initializes @skb header fields as follows:
*
* - skb->mac_header: the L2 header.
@@ -764,6 +763,8 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
*
* - skb->protocol: the type of the data starting at skb->network_header.
* Equals to key->eth.type.
+ *
+ * Return: %0 if successful, otherwise a negative errno value.
*/
static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 7153c67f641e..2ef4cd2c848b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -273,6 +273,9 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
+ /* All mirred/redirected skbs should clear previous ct info */
+ nf_reset_ct(skb2);
+
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
expects_nh = want_ingress || !m_mac_header_xmit;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 898389611ae8..c038efc23ce3 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -795,7 +795,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
reason_code = SMC_CLC_DECL_NOSRVLINK;
goto connect_abort;
}
- smc->conn.lnk = link;
+ smc_switch_link_and_count(&smc->conn, link);
}
/* create send buffer and rmb */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index cd0d7c908b2a..c160ff50c053 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -917,8 +917,8 @@ static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
return rc;
}
-static void smc_switch_link_and_count(struct smc_connection *conn,
- struct smc_link *to_lnk)
+void smc_switch_link_and_count(struct smc_connection *conn,
+ struct smc_link *to_lnk)
{
atomic_dec(&conn->lnk->conn_cnt);
conn->lnk = to_lnk;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 6d6fd1397c87..c043ecdca5c4 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -97,6 +97,7 @@ struct smc_link {
unsigned long *wr_tx_mask; /* bit mask of used indexes */
u32 wr_tx_cnt; /* number of WR send buffers */
wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */
+ atomic_t wr_tx_refcnt; /* tx refs to link */
struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */
struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */
@@ -109,6 +110,7 @@ struct smc_link {
struct ib_reg_wr wr_reg; /* WR register memory region */
wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */
+ atomic_t wr_reg_refcnt; /* reg refs to link */
enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */
u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/
@@ -444,6 +446,8 @@ void smc_core_exit(void);
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini);
void smcr_link_clear(struct smc_link *lnk, bool log);
+void smc_switch_link_and_count(struct smc_connection *conn,
+ struct smc_link *to_lnk);
int smcr_buf_map_lgr(struct smc_link *lnk);
int smcr_buf_reg_lgr(struct smc_link *lnk);
void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 273eaf1bfe49..2e7560eba981 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -888,6 +888,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
if (!rc)
goto out;
out_clear_lnk:
+ lnk_new->state = SMC_LNK_INACTIVE;
smcr_link_clear(lnk_new, false);
out_reject:
smc_llc_cli_add_link_reject(qentry);
@@ -1184,6 +1185,7 @@ int smc_llc_srv_add_link(struct smc_link *link)
goto out_err;
return 0;
out_err:
+ link_new->state = SMC_LNK_INACTIVE;
smcr_link_clear(link_new, false);
return rc;
}
@@ -1286,10 +1288,8 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
del_llc->reason = 0;
smc_llc_send_message(lnk, &qentry->msg); /* response */
- if (smc_link_downing(&lnk_del->state)) {
- if (smc_switch_conns(lgr, lnk_del, false))
- smc_wr_tx_wait_no_pending_sends(lnk_del);
- }
+ if (smc_link_downing(&lnk_del->state))
+ smc_switch_conns(lgr, lnk_del, false);
smcr_link_clear(lnk_del, true);
active_links = smc_llc_active_link_count(lgr);
@@ -1805,8 +1805,6 @@ void smc_llc_link_clear(struct smc_link *link, bool log)
link->smcibdev->ibdev->name, link->ibport);
complete(&link->llc_testlink_resp);
cancel_delayed_work_sync(&link->llc_testlink_wrk);
- smc_wr_wakeup_reg_wait(link);
- smc_wr_wakeup_tx_wait(link);
}
/* register a new rtoken at the remote peer (for all links) */
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 289025cd545a..c79361dfcdfb 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -496,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
/* Wakeup sndbuf consumers from any context (IRQ or process)
* since there is more data to transmit; usable snd_wnd as max transmit
*/
-static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
struct smc_link *link = conn->lnk;
@@ -550,6 +550,22 @@ out_unlock:
return rc;
}
+static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+{
+ struct smc_link *link = conn->lnk;
+ int rc = -ENOLINK;
+
+ if (!link)
+ return rc;
+
+ atomic_inc(&link->wr_tx_refcnt);
+ if (smc_link_usable(link))
+ rc = _smcr_tx_sndbuf_nonempty(conn);
+ if (atomic_dec_and_test(&link->wr_tx_refcnt))
+ wake_up_all(&link->wr_tx_wait);
+ return rc;
+}
+
static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index cbc73a7e4d59..a419e9af36b9 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -322,9 +322,12 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
if (rc)
return rc;
+ atomic_inc(&link->wr_reg_refcnt);
rc = wait_event_interruptible_timeout(link->wr_reg_wait,
(link->wr_reg_state != POSTED),
SMC_WR_REG_MR_WAIT_TIME);
+ if (atomic_dec_and_test(&link->wr_reg_refcnt))
+ wake_up_all(&link->wr_reg_wait);
if (!rc) {
/* timeout - terminate link */
smcr_link_down_cond_sched(link);
@@ -566,10 +569,15 @@ void smc_wr_free_link(struct smc_link *lnk)
return;
ibdev = lnk->smcibdev->ibdev;
+ smc_wr_wakeup_reg_wait(lnk);
+ smc_wr_wakeup_tx_wait(lnk);
+
if (smc_wr_tx_wait_no_pending_sends(lnk))
memset(lnk->wr_tx_mask, 0,
BITS_TO_LONGS(SMC_WR_BUF_CNT) *
sizeof(*lnk->wr_tx_mask));
+ wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
+ wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
if (lnk->wr_rx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
@@ -728,7 +736,9 @@ int smc_wr_create_link(struct smc_link *lnk)
memset(lnk->wr_tx_mask, 0,
BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
init_waitqueue_head(&lnk->wr_tx_wait);
+ atomic_set(&lnk->wr_tx_refcnt, 0);
init_waitqueue_head(&lnk->wr_reg_wait);
+ atomic_set(&lnk->wr_reg_refcnt, 0);
return rc;
dma_unmap:
diff --git a/net/tipc/link.c b/net/tipc/link.c
index cf586840caeb..1b7a487c8841 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -913,7 +913,7 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr)
skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
dnode, l->addr, dport, 0, 0);
if (!skb)
- return -ENOMEM;
+ return -ENOBUFS;
msg_set_dest_droppable(buf_msg(skb), true);
TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr);
skb_queue_tail(&l->wakeupq, skb);
@@ -1031,7 +1031,7 @@ void tipc_link_reset(struct tipc_link *l)
*
* Consumes the buffer chain.
* Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
- * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS or -ENOMEM
+ * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
*/
int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
struct sk_buff_head *xmitq)
@@ -1089,7 +1089,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
if (!_skb) {
kfree_skb(skb);
__skb_queue_purge(list);
- return -ENOMEM;
+ return -ENOBUFS;
}
__skb_queue_tail(transmq, skb);
tipc_link_set_skb_retransmit_time(skb, l);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index e0c2c992ad9c..4f7c99dfd16c 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -357,11 +357,14 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
static void virtio_vsock_reset_sock(struct sock *sk)
{
- lock_sock(sk);
+ /* vmci_transport.c doesn't take sk_lock here either. At least we're
+ * under vsock_table_lock so the sock cannot disappear while we're
+ * executing.
+ */
+
sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
sk_error_report(sk);
- release_sock(sk);
}
static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)