summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-02-22 12:18:07 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-02-22 12:18:07 -0800
commitdea08e604408d0303e2332896c5fdd8c1f7d79a2 (patch)
treedbe8aa3fbfba7dc0e9878de152e8efe537b2b4b5 /net/ipv6
parent5c102d0eca3c41b10ccc1525cbb1eba7fd6efd7c (diff)
parentd856626d3b051a3ad7139ba59463b692c131f844 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: "Looks like a lot, but mostly driver fixes scattered all over as usual. Of note: 1) Add conditional sched in nf conntrack in cleanup to avoid NMI watchdogs. From Florian Westphal. 2) Fix deadlock in nfnetlink cttimeout, also from Floarian. 3) Fix handling of slaves in bonding ARP monitor validation, from Jay Vosburgh. 4) Callers of ip_cmsg_send() are responsible for freeing IP options, some were not doing so. Fix from Eric Dumazet. 5) Fix per-cpu bugs in mvneta driver, from Gregory CLEMENT. 6) Fix vlan handling in mv88e6xxx DSA driver, from Vivien Didelot. 7) bcm7xxx PHY driver bug fixes from Florian Fainelli. 8) Avoid unaligned accesses to protocol headers wrt. GRE, from Alexander Duyck. 9) SKB leaks and other problems in arc_emac driver, from Alexander Kochetkov. 10) tcp_v4_inbound_md5_hash() releases listener socket instead of request socket on error path, oops. Fix from Eric Dumazet. 11) Missing socket release in pppoe_rcv_core() that seems to have existed basically forever. From Guillaume Nault. 12) Missing slave_dev unregister in dsa_slave_create() error path, from Florian Fainelli. 13) crypto_alloc_hash() never returns NULL, fix return value check in __tcp_alloc_md5sig_pool. From Insu Yun. 14) Properly expire exception route entries in ipv4, from Xin Long. 15) Fix races in tcp/dccp listener socket dismantle, from Eric Dumazet. 16) Don't set IFF_TX_SKB_SHARING in vxlan, geneve, or GRE, it's not legal. These drivers modify the SKB on transmit. From Jiri Benc. 17) Fix regression in the initialziation of netdev->tx_queue_len. From Phil Sutter. 18) Missing unlock in tipc_nl_add_bc_link() error path, from Insu Yun. 19) SCTP port hash sizing does not properly ensure that table is a power of two in size. From Neil Horman. 20) Fix initializing of software copy of MAC address in fmvj18x_cs driver, from Ken Kawasaki" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (129 commits) bnx2x: Fix 84833 phy command handler bnx2x: Fix led setting for 84858 phy. bnx2x: Correct 84858 PHY fw version bnx2x: Fix 84833 RX CRC bnx2x: Fix link-forcing for KR2 net: ethernet: davicom: fix devicetree irq resource fmvj18x_cs: fix incorrect indexing of dev->dev_addr[] when copying the MAC address Driver: Vmxnet3: Update Rx ring 2 max size net: netcp: rework the code for get/set sw_data in dma desc soc: ti: knav_dma: rename pad in struct knav_dma_desc to sw_data net: ti: netcp: restore get/set_pad_info() functionality MAINTAINERS: Drop myself as xen netback maintainer sctp: Fix port hash table size computation can: ems_usb: Fix possible tx overflow Bluetooth: hci_core: Avoid mixing up req_complete and req_complete_skb net: bcmgenet: Fix internal PHY link state af_unix: Don't use continue to re-execute unix_stream_read_generic loop unix_diag: fix incorrect sign extension in unix_lookup_by_ino bnxt_en: Failure to update PHY is not fatal condition. bnxt_en: Remove unnecessary call to update PHY settings. ...
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/ip6_gre.c1
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c74
-rw-r--r--net/ipv6/tcp_ipv6.c14
4 files changed, 80 insertions, 11 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9efd9ffdc34c..bdd7eac4307a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -583,7 +583,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f37f18b6b40c..a69aad1e29d1 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1512,6 +1512,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
dev->destructor = ip6gre_dev_free;
dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
}
static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 31ba7ca19757..051b6a6bfff6 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -21,6 +21,10 @@
#include <net/ipv6.h>
#include <net/netfilter/ipv6/nf_nat_masquerade.h>
+#define MAX_WORK_COUNT 16
+
+static atomic_t v6_worker_count;
+
unsigned int
nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
const struct net_device *out)
@@ -78,14 +82,78 @@ static struct notifier_block masq_dev_notifier = {
.notifier_call = masq_device_event,
};
+struct masq_dev_work {
+ struct work_struct work;
+ struct net *net;
+ int ifindex;
+};
+
+static void iterate_cleanup_work(struct work_struct *work)
+{
+ struct masq_dev_work *w;
+ long index;
+
+ w = container_of(work, struct masq_dev_work, work);
+
+ index = w->ifindex;
+ nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0);
+
+ put_net(w->net);
+ kfree(w);
+ atomic_dec(&v6_worker_count);
+ module_put(THIS_MODULE);
+}
+
+/* ipv6 inet notifier is an atomic notifier, i.e. we cannot
+ * schedule.
+ *
+ * Unfortunately, nf_ct_iterate_cleanup can run for a long
+ * time if there are lots of conntracks and the system
+ * handles high softirq load, so it frequently calls cond_resched
+ * while iterating the conntrack table.
+ *
+ * So we defer nf_ct_iterate_cleanup walk to the system workqueue.
+ *
+ * As we can have 'a lot' of inet_events (depending on amount
+ * of ipv6 addresses being deleted), we also need to add an upper
+ * limit to the number of queued work items.
+ */
static int masq_inet_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = ptr;
- struct netdev_notifier_info info;
+ const struct net_device *dev;
+ struct masq_dev_work *w;
+ struct net *net;
+
+ if (event != NETDEV_DOWN ||
+ atomic_read(&v6_worker_count) >= MAX_WORK_COUNT)
+ return NOTIFY_DONE;
+
+ dev = ifa->idev->dev;
+ net = maybe_get_net(dev_net(dev));
+ if (!net)
+ return NOTIFY_DONE;
- netdev_notifier_info_init(&info, ifa->idev->dev);
- return masq_device_event(this, event, &info);
+ if (!try_module_get(THIS_MODULE))
+ goto err_module;
+
+ w = kmalloc(sizeof(*w), GFP_ATOMIC);
+ if (w) {
+ atomic_inc(&v6_worker_count);
+
+ INIT_WORK(&w->work, iterate_cleanup_work);
+ w->ifindex = dev->ifindex;
+ w->net = net;
+ schedule_work(&w->work);
+
+ return NOTIFY_DONE;
+ }
+
+ module_put(THIS_MODULE);
+ err_module:
+ put_net(net);
+ return NOTIFY_DONE;
}
static struct notifier_block masq_inet_notifier = {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1a5a70fb8551..5c8c84273028 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1387,7 +1387,7 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
tcp_v6_fill_cb(skb, hdr, th);
@@ -1395,24 +1395,24 @@ process:
reqsk_put(req);
goto discard_it;
}
- if (likely(sk->sk_state == TCP_LISTEN)) {
- nsk = tcp_check_req(sk, skb, req, false);
- } else {
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
tcp_v6_restore_cb(skb);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v6_send_reset(nsk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}