summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/Kconfig9
-rw-r--r--net/9p/Makefile4
-rw-r--r--net/9p/trans_xen.c545
-rw-r--r--net/bridge/netfilter/ebt_dnat.c20
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv6/addrconf.c7
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c2
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/route.c26
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c22
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_helper.c26
-rw-r--r--net/netfilter/nf_conntrack_netlink.c89
-rw-r--r--net/netfilter/nf_tables_api.c5
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/netfilter/nft_set_bitmap.c5
-rw-r--r--net/netfilter/x_tables.c4
-rw-r--r--net/netfilter/xt_CT.c11
-rw-r--r--net/netfilter/xt_socket.c2
-rw-r--r--net/openvswitch/conntrack.c30
-rw-r--r--net/sched/cls_matchall.c3
-rw-r--r--net/smc/smc_ib.c11
-rw-r--r--net/sysctl_net.c1
25 files changed, 754 insertions, 83 deletions
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index a75174a33723..e6014e0e51f7 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -22,6 +22,15 @@ config NET_9P_VIRTIO
This builds support for a transports between
guest partitions and a host partition.
+config NET_9P_XEN
+ depends on XEN
+ select XEN_XENBUS_FRONTEND
+ tristate "9P Xen Transport"
+ help
+ This builds support for a transport for 9pfs between
+ two Xen domains.
+
+
config NET_9P_RDMA
depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS
tristate "9P RDMA Transport (Experimental)"
diff --git a/net/9p/Makefile b/net/9p/Makefile
index a0874cc1f718..697ea7caf466 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -1,4 +1,5 @@
obj-$(CONFIG_NET_9P) := 9pnet.o
+obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o
obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
@@ -14,5 +15,8 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
9pnet_virtio-objs := \
trans_virtio.o \
+9pnet_xen-objs := \
+ trans_xen.o \
+
9pnet_rdma-objs := \
trans_rdma.o \
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
new file mode 100644
index 000000000000..71e85643b3f9
--- /dev/null
+++ b/net/9p/trans_xen.c
@@ -0,0 +1,545 @@
+/*
+ * linux/fs/9p/trans_xen
+ *
+ * Xen transport layer.
+ *
+ * Copyright (C) 2017 by Stefano Stabellini <stefano@aporeto.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/interface/io/9pfs.h>
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/rwlock.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include <net/9p/transport.h>
+
+#define XEN_9PFS_NUM_RINGS 2
+#define XEN_9PFS_RING_ORDER 6
+#define XEN_9PFS_RING_SIZE XEN_FLEX_RING_SIZE(XEN_9PFS_RING_ORDER)
+
+struct xen_9pfs_header {
+ uint32_t size;
+ uint8_t id;
+ uint16_t tag;
+
+ /* uint8_t sdata[]; */
+} __attribute__((packed));
+
+/* One per ring, more than one per 9pfs share */
+struct xen_9pfs_dataring {
+ struct xen_9pfs_front_priv *priv;
+
+ struct xen_9pfs_data_intf *intf;
+ grant_ref_t ref;
+ int evtchn;
+ int irq;
+ /* protect a ring from concurrent accesses */
+ spinlock_t lock;
+
+ struct xen_9pfs_data data;
+ wait_queue_head_t wq;
+ struct work_struct work;
+};
+
+/* One per 9pfs share */
+struct xen_9pfs_front_priv {
+ struct list_head list;
+ struct xenbus_device *dev;
+ char *tag;
+ struct p9_client *client;
+
+ int num_rings;
+ struct xen_9pfs_dataring *rings;
+};
+
+static LIST_HEAD(xen_9pfs_devs);
+static DEFINE_RWLOCK(xen_9pfs_lock);
+
+/* We don't currently allow canceling of requests */
+static int p9_xen_cancel(struct p9_client *client, struct p9_req_t *req)
+{
+ return 1;
+}
+
+static int p9_xen_create(struct p9_client *client, const char *addr, char *args)
+{
+ struct xen_9pfs_front_priv *priv;
+
+ read_lock(&xen_9pfs_lock);
+ list_for_each_entry(priv, &xen_9pfs_devs, list) {
+ if (!strcmp(priv->tag, addr)) {
+ priv->client = client;
+ read_unlock(&xen_9pfs_lock);
+ return 0;
+ }
+ }
+ read_unlock(&xen_9pfs_lock);
+ return -EINVAL;
+}
+
+static void p9_xen_close(struct p9_client *client)
+{
+ struct xen_9pfs_front_priv *priv;
+
+ read_lock(&xen_9pfs_lock);
+ list_for_each_entry(priv, &xen_9pfs_devs, list) {
+ if (priv->client == client) {
+ priv->client = NULL;
+ read_unlock(&xen_9pfs_lock);
+ return;
+ }
+ }
+ read_unlock(&xen_9pfs_lock);
+}
+
+static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size)
+{
+ RING_IDX cons, prod;
+
+ cons = ring->intf->out_cons;
+ prod = ring->intf->out_prod;
+ virt_mb();
+
+ return XEN_9PFS_RING_SIZE -
+ xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) >= size;
+}
+
+static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
+{
+ struct xen_9pfs_front_priv *priv = NULL;
+ RING_IDX cons, prod, masked_cons, masked_prod;
+ unsigned long flags;
+ u32 size = p9_req->tc->size;
+ struct xen_9pfs_dataring *ring;
+ int num;
+
+ read_lock(&xen_9pfs_lock);
+ list_for_each_entry(priv, &xen_9pfs_devs, list) {
+ if (priv->client == client)
+ break;
+ }
+ read_unlock(&xen_9pfs_lock);
+ if (!priv || priv->client != client)
+ return -EINVAL;
+
+ num = p9_req->tc->tag % priv->num_rings;
+ ring = &priv->rings[num];
+
+again:
+ while (wait_event_interruptible(ring->wq,
+ p9_xen_write_todo(ring, size)) != 0)
+ ;
+
+ spin_lock_irqsave(&ring->lock, flags);
+ cons = ring->intf->out_cons;
+ prod = ring->intf->out_prod;
+ virt_mb();
+
+ if (XEN_9PFS_RING_SIZE - xen_9pfs_queued(prod, cons,
+ XEN_9PFS_RING_SIZE) < size) {
+ spin_unlock_irqrestore(&ring->lock, flags);
+ goto again;
+ }
+
+ masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
+ masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
+
+ xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size,
+ &masked_prod, masked_cons, XEN_9PFS_RING_SIZE);
+
+ p9_req->status = REQ_STATUS_SENT;
+ virt_wmb(); /* write ring before updating pointer */
+ prod += size;
+ ring->intf->out_prod = prod;
+ spin_unlock_irqrestore(&ring->lock, flags);
+ notify_remote_via_irq(ring->irq);
+
+ return 0;
+}
+
+static void p9_xen_response(struct work_struct *work)
+{
+ struct xen_9pfs_front_priv *priv;
+ struct xen_9pfs_dataring *ring;
+ RING_IDX cons, prod, masked_cons, masked_prod;
+ struct xen_9pfs_header h;
+ struct p9_req_t *req;
+ int status;
+
+ ring = container_of(work, struct xen_9pfs_dataring, work);
+ priv = ring->priv;
+
+ while (1) {
+ cons = ring->intf->in_cons;
+ prod = ring->intf->in_prod;
+ virt_rmb();
+
+ if (xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) <
+ sizeof(h)) {
+ notify_remote_via_irq(ring->irq);
+ return;
+ }
+
+ masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
+ masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
+
+ /* First, read just the header */
+ xen_9pfs_read_packet(&h, ring->data.in, sizeof(h),
+ masked_prod, &masked_cons,
+ XEN_9PFS_RING_SIZE);
+
+ req = p9_tag_lookup(priv->client, h.tag);
+ if (!req || req->status != REQ_STATUS_SENT) {
+ dev_warn(&priv->dev->dev, "Wrong req tag=%x\n", h.tag);
+ cons += h.size;
+ virt_mb();
+ ring->intf->in_cons = cons;
+ continue;
+ }
+
+ memcpy(req->rc, &h, sizeof(h));
+ req->rc->offset = 0;
+
+ masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
+ /* Then, read the whole packet (including the header) */
+ xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size,
+ masked_prod, &masked_cons,
+ XEN_9PFS_RING_SIZE);
+
+ virt_mb();
+ cons += h.size;
+ ring->intf->in_cons = cons;
+
+ status = (req->status != REQ_STATUS_ERROR) ?
+ REQ_STATUS_RCVD : REQ_STATUS_ERROR;
+
+ p9_client_cb(priv->client, req, status);
+ }
+}
+
+static irqreturn_t xen_9pfs_front_event_handler(int irq, void *r)
+{
+ struct xen_9pfs_dataring *ring = r;
+
+ if (!ring || !ring->priv->client) {
+ /* ignore spurious interrupt */
+ return IRQ_HANDLED;
+ }
+
+ wake_up_interruptible(&ring->wq);
+ schedule_work(&ring->work);
+
+ return IRQ_HANDLED;
+}
+
+static struct p9_trans_module p9_xen_trans = {
+ .name = "xen",
+ .maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT),
+ .def = 1,
+ .create = p9_xen_create,
+ .close = p9_xen_close,
+ .request = p9_xen_request,
+ .cancel = p9_xen_cancel,
+ .owner = THIS_MODULE,
+};
+
+static const struct xenbus_device_id xen_9pfs_front_ids[] = {
+ { "9pfs" },
+ { "" }
+};
+
+static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
+{
+ int i, j;
+
+ write_lock(&xen_9pfs_lock);
+ list_del(&priv->list);
+ write_unlock(&xen_9pfs_lock);
+
+ for (i = 0; i < priv->num_rings; i++) {
+ if (!priv->rings[i].intf)
+ break;
+ if (priv->rings[i].irq > 0)
+ unbind_from_irqhandler(priv->rings[i].irq, priv->dev);
+ if (priv->rings[i].data.in) {
+ for (j = 0; j < (1 << XEN_9PFS_RING_ORDER); j++) {
+ grant_ref_t ref;
+
+ ref = priv->rings[i].intf->ref[j];
+ gnttab_end_foreign_access(ref, 0, 0);
+ }
+ free_pages((unsigned long)priv->rings[i].data.in,
+ XEN_9PFS_RING_ORDER -
+ (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ }
+ gnttab_end_foreign_access(priv->rings[i].ref, 0, 0);
+ free_page((unsigned long)priv->rings[i].intf);
+ }
+ kfree(priv->rings);
+ kfree(priv->tag);
+ kfree(priv);
+}
+
+static int xen_9pfs_front_remove(struct xenbus_device *dev)
+{
+ struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev);
+
+ dev_set_drvdata(&dev->dev, NULL);
+ xen_9pfs_front_free(priv);
+ return 0;
+}
+
+static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
+ struct xen_9pfs_dataring *ring)
+{
+ int i = 0;
+ int ret = -ENOMEM;
+ void *bytes = NULL;
+
+ init_waitqueue_head(&ring->wq);
+ spin_lock_init(&ring->lock);
+ INIT_WORK(&ring->work, p9_xen_response);
+
+ ring->intf = (struct xen_9pfs_data_intf *)get_zeroed_page(GFP_KERNEL);
+ if (!ring->intf)
+ return ret;
+ ret = gnttab_grant_foreign_access(dev->otherend_id,
+ virt_to_gfn(ring->intf), 0);
+ if (ret < 0)
+ goto out;
+ ring->ref = ret;
+ bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ XEN_9PFS_RING_ORDER - (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ if (!bytes) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (; i < (1 << XEN_9PFS_RING_ORDER); i++) {
+ ret = gnttab_grant_foreign_access(
+ dev->otherend_id, virt_to_gfn(bytes) + i, 0);
+ if (ret < 0)
+ goto out;
+ ring->intf->ref[i] = ret;
+ }
+ ring->intf->ring_order = XEN_9PFS_RING_ORDER;
+ ring->data.in = bytes;
+ ring->data.out = bytes + XEN_9PFS_RING_SIZE;
+
+ ret = xenbus_alloc_evtchn(dev, &ring->evtchn);
+ if (ret)
+ goto out;
+ ring->irq = bind_evtchn_to_irqhandler(ring->evtchn,
+ xen_9pfs_front_event_handler,
+ 0, "xen_9pfs-frontend", ring);
+ if (ring->irq >= 0)
+ return 0;
+
+ xenbus_free_evtchn(dev, ring->evtchn);
+ ret = ring->irq;
+out:
+ if (bytes) {
+ for (i--; i >= 0; i--)
+ gnttab_end_foreign_access(ring->intf->ref[i], 0, 0);
+ free_pages((unsigned long)bytes,
+ XEN_9PFS_RING_ORDER -
+ (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ }
+ gnttab_end_foreign_access(ring->ref, 0, 0);
+ free_page((unsigned long)ring->intf);
+ return ret;
+}
+
+static int xen_9pfs_front_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int ret, i;
+ struct xenbus_transaction xbt;
+ struct xen_9pfs_front_priv *priv = NULL;
+ char *versions;
+ unsigned int max_rings, max_ring_order, len = 0;
+
+ versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
+ if (!len)
+ return -EINVAL;
+ if (strcmp(versions, "1")) {
+ kfree(versions);
+ return -EINVAL;
+ }
+ kfree(versions);
+ max_rings = xenbus_read_unsigned(dev->otherend, "max-rings", 0);
+ if (max_rings < XEN_9PFS_NUM_RINGS)
+ return -EINVAL;
+ max_ring_order = xenbus_read_unsigned(dev->otherend,
+ "max-ring-page-order", 0);
+ if (max_ring_order < XEN_9PFS_RING_ORDER)
+ return -EINVAL;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->dev = dev;
+ priv->num_rings = XEN_9PFS_NUM_RINGS;
+ priv->rings = kcalloc(priv->num_rings, sizeof(*priv->rings),
+ GFP_KERNEL);
+ if (!priv->rings) {
+ kfree(priv);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < priv->num_rings; i++) {
+ priv->rings[i].priv = priv;
+ ret = xen_9pfs_front_alloc_dataring(dev, &priv->rings[i]);
+ if (ret < 0)
+ goto error;
+ }
+
+ again:
+ ret = xenbus_transaction_start(&xbt);
+ if (ret) {
+ xenbus_dev_fatal(dev, ret, "starting transaction");
+ goto error;
+ }
+ ret = xenbus_printf(xbt, dev->nodename, "version", "%u", 1);
+ if (ret)
+ goto error_xenbus;
+ ret = xenbus_printf(xbt, dev->nodename, "num-rings", "%u",
+ priv->num_rings);
+ if (ret)
+ goto error_xenbus;
+ for (i = 0; i < priv->num_rings; i++) {
+ char str[16];
+
+ BUILD_BUG_ON(XEN_9PFS_NUM_RINGS > 9);
+ sprintf(str, "ring-ref%u", i);
+ ret = xenbus_printf(xbt, dev->nodename, str, "%d",
+ priv->rings[i].ref);
+ if (ret)
+ goto error_xenbus;
+
+ sprintf(str, "event-channel-%u", i);
+ ret = xenbus_printf(xbt, dev->nodename, str, "%u",
+ priv->rings[i].evtchn);
+ if (ret)
+ goto error_xenbus;
+ }
+ priv->tag = xenbus_read(xbt, dev->nodename, "tag", NULL);
+ if (!priv->tag) {
+ ret = -EINVAL;
+ goto error_xenbus;
+ }
+ ret = xenbus_transaction_end(xbt, 0);
+ if (ret) {
+ if (ret == -EAGAIN)
+ goto again;
+ xenbus_dev_fatal(dev, ret, "completing transaction");
+ goto error;
+ }
+
+ write_lock(&xen_9pfs_lock);
+ list_add_tail(&priv->list, &xen_9pfs_devs);
+ write_unlock(&xen_9pfs_lock);
+ dev_set_drvdata(&dev->dev, priv);
+ xenbus_switch_state(dev, XenbusStateInitialised);
+
+ return 0;
+
+ error_xenbus:
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(dev, ret, "writing xenstore");
+ error:
+ dev_set_drvdata(&dev->dev, NULL);
+ xen_9pfs_front_free(priv);
+ return ret;
+}
+
+static int xen_9pfs_front_resume(struct xenbus_device *dev)
+{
+ dev_warn(&dev->dev, "suspsend/resume unsupported\n");
+ return 0;
+}
+
+static void xen_9pfs_front_changed(struct xenbus_device *dev,
+ enum xenbus_state backend_state)
+{
+ switch (backend_state) {
+ case XenbusStateReconfiguring:
+ case XenbusStateReconfigured:
+ case XenbusStateInitialising:
+ case XenbusStateInitialised:
+ case XenbusStateUnknown:
+ break;
+
+ case XenbusStateInitWait:
+ break;
+
+ case XenbusStateConnected:
+ xenbus_switch_state(dev, XenbusStateConnected);
+ break;
+
+ case XenbusStateClosed:
+ if (dev->state == XenbusStateClosed)
+ break;
+ /* Missed the backend's CLOSING state -- fallthrough */
+ case XenbusStateClosing:
+ xenbus_frontend_closed(dev);
+ break;
+ }
+}
+
+static struct xenbus_driver xen_9pfs_front_driver = {
+ .ids = xen_9pfs_front_ids,
+ .probe = xen_9pfs_front_probe,
+ .remove = xen_9pfs_front_remove,
+ .resume = xen_9pfs_front_resume,
+ .otherend_changed = xen_9pfs_front_changed,
+};
+
+int p9_trans_xen_init(void)
+{
+ if (!xen_domain())
+ return -ENODEV;
+
+ pr_info("Initialising Xen transport for 9pfs\n");
+
+ v9fs_register_trans(&p9_xen_trans);
+ return xenbus_register_frontend(&xen_9pfs_front_driver);
+}
+module_init(p9_trans_xen_init);
+
+void p9_trans_xen_exit(void)
+{
+ v9fs_unregister_trans(&p9_xen_trans);
+ return xenbus_unregister_driver(&xen_9pfs_front_driver);
+}
+module_exit(p9_trans_xen_exit);
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 4e0b0c359325..e0bb624c3845 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -9,6 +9,7 @@
*/
#include <linux/module.h>
#include <net/sock.h>
+#include "../br_private.h"
#include <linux/netfilter.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_bridge/ebtables.h>
@@ -18,11 +19,30 @@ static unsigned int
ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_nat_info *info = par->targinfo;
+ struct net_device *dev;
if (!skb_make_writable(skb, 0))
return EBT_DROP;
ether_addr_copy(eth_hdr(skb)->h_dest, info->mac);
+
+ if (is_multicast_ether_addr(info->mac)) {
+ if (is_broadcast_ether_addr(info->mac))
+ skb->pkt_type = PACKET_BROADCAST;
+ else
+ skb->pkt_type = PACKET_MULTICAST;
+ } else {
+ if (xt_hooknum(par) != NF_BR_BROUTING)
+ dev = br_port_get_rcu(xt_in(par))->br->dev;
+ else
+ dev = xt_in(par);
+
+ if (ether_addr_equal(info->mac, dev->dev_addr))
+ skb->pkt_type = PACKET_HOST;
+ else
+ skb->pkt_type = PACKET_OTHERHOST;
+ }
+
return info->target;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6e67315ec368..bcb0f610ee42 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1054,7 +1054,7 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
return err;
}
- if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name))
+ if (nla_put_string(skb, IFLA_PHYS_PORT_NAME, name))
return -EMSGSIZE;
return 0;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9d943974de2b..bdffad875691 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -358,6 +358,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
rt->dst.dev->mtu);
return -EMSGSIZE;
}
+ if (length < sizeof(struct iphdr))
+ return -EINVAL;
+
if (flags&MSG_PROBE)
goto out;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 8f6373b0cd77..717be4de5324 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -523,6 +523,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
tcp_ecn_openreq_child(newtp, req);
+ newtp->fastopen_req = NULL;
newtp->fastopen_rsk = NULL;
newtp->syn_data_acked = 0;
newtp->rack.mstamp.v64 = 0;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b09ac38d8dc4..77a4bd526d6e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3328,7 +3328,8 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
idev->dev, 0, 0);
}
- addrconf_dad_start(ifp);
+ if (ifp->state == INET6_IFADDR_STATE_PREDAD)
+ addrconf_dad_start(ifp);
return 0;
}
@@ -3683,7 +3684,7 @@ restart:
if (keep) {
/* set state to skip the notifier below */
state = INET6_IFADDR_STATE_DEAD;
- ifa->state = 0;
+ ifa->state = INET6_IFADDR_STATE_PREDAD;
if (!(ifa->flags & IFA_F_NODAD))
ifa->flags |= IFA_F_TENTATIVE;
@@ -6572,6 +6573,8 @@ int __init addrconf_init(void)
goto errlo;
}
+ ip6_route_init_special_entries();
+
for (i = 0; i < IN6_ADDR_HSIZE; i++)
INIT_HLIST_HEAD(&inet6_addr_lst[i]);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index bf3ad3e7b647..b2b4f031b3a1 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -235,7 +235,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
inside->icmp6.icmp6_cksum =
csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
skb->len - hdrlen, IPPROTO_ICMPV6,
- csum_partial(&inside->icmp6,
+ skb_checksum(skb, hdrlen,
skb->len - hdrlen, 0));
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0da6a12b5472..1f992d9e261d 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -632,6 +632,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
return -EMSGSIZE;
}
+ if (length < sizeof(struct ipv6hdr))
+ return -EINVAL;
if (flags&MSG_PROBE)
goto out;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a1bf426c959b..2f1136627dcb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4027,6 +4027,21 @@ static struct notifier_block ip6_route_dev_notifier = {
.priority = 0,
};
+void __init ip6_route_init_special_entries(void)
+{
+ /* Registering of the loopback is done before this portion of code,
+ * the loopback reference in rt6_info will not be taken, do it
+ * manually for init_net */
+ init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ #endif
+}
+
int __init ip6_route_init(void)
{
int ret;
@@ -4053,17 +4068,6 @@ int __init ip6_route_init(void)
ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
- /* Registering of the loopback is done before this portion of code,
- * the loopback reference in rt6_info will not be taken, do it
- * manually for init_net */
- init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
- init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
- #ifdef CONFIG_IPV6_MULTIPLE_TABLES
- init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
- init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
- init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
- init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
- #endif
ret = fib6_init();
if (ret)
goto out_register_subsys;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 668d9643f0cc..1fa3c2307b6e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3078,6 +3078,17 @@ nla_put_failure:
return skb->len;
}
+static bool ip_vs_is_af_valid(int af)
+{
+ if (af == AF_INET)
+ return true;
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6 && ipv6_mod_enabled())
+ return true;
+#endif
+ return false;
+}
+
static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry,
@@ -3105,11 +3116,7 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
memset(usvc, 0, sizeof(*usvc));
usvc->af = nla_get_u16(nla_af);
-#ifdef CONFIG_IP_VS_IPV6
- if (usvc->af != AF_INET && usvc->af != AF_INET6)
-#else
- if (usvc->af != AF_INET)
-#endif
+ if (!ip_vs_is_af_valid(usvc->af))
return -EAFNOSUPPORT;
if (nla_fwmark) {
@@ -3612,6 +3619,11 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
if (udest.af == 0)
udest.af = svc->af;
+ if (!ip_vs_is_af_valid(udest.af)) {
+ ret = -EAFNOSUPPORT;
+ goto out;
+ }
+
if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
/* The synchronization protocol is incompatible
* with mixed family services
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f9245dbfe435..3c8f1ed2f555 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1853,7 +1853,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
-static unsigned int total_extension_size(void)
+static __always_inline unsigned int total_extension_size(void)
{
/* remember to add new extensions below */
BUILD_BUG_ON(NF_CT_EXT_NUM > 9);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 4b9dfe3eef62..3a60efa7799b 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -385,7 +385,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
unsigned int h = helper_hash(&me->tuple);
struct nf_conntrack_helper *cur;
- int ret = 0;
+ int ret = 0, i;
BUG_ON(me->expect_policy == NULL);
BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
@@ -395,10 +395,26 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
return -EINVAL;
mutex_lock(&nf_ct_helper_mutex);
- hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
- if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) {
- ret = -EEXIST;
- goto out;
+ for (i = 0; i < nf_ct_helper_hsize; i++) {
+ hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) {
+ if (!strcmp(cur->name, me->name) &&
+ (cur->tuple.src.l3num == NFPROTO_UNSPEC ||
+ cur->tuple.src.l3num == me->tuple.src.l3num) &&
+ cur->tuple.dst.protonum == me->tuple.dst.protonum) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+ }
+
+ /* avoid unpredictable behaviour for auto_assign_helper */
+ if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) {
+ hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
+ if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple,
+ &mask)) {
+ ret = -EEXIST;
+ goto out;
+ }
}
}
hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 5f6f2f388928..dcf561b5c97a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -417,8 +417,7 @@ nla_put_failure:
return -1;
}
-static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
- const struct nf_conn *ct)
+static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct)
{
struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
struct nf_ct_seqadj *seq;
@@ -426,15 +425,20 @@ static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
return 0;
+ spin_lock_bh(&ct->lock);
seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
- return -1;
+ goto err;
seq = &seqadj->seq[IP_CT_DIR_REPLY];
if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
- return -1;
+ goto err;
+ spin_unlock_bh(&ct->lock);
return 0;
+err:
+ spin_unlock_bh(&ct->lock);
+ return -1;
}
static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
@@ -1417,6 +1421,24 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
}
#endif
+static void
+__ctnetlink_change_status(struct nf_conn *ct, unsigned long on,
+ unsigned long off)
+{
+ unsigned int bit;
+
+ /* Ignore these unchangable bits */
+ on &= ~IPS_UNCHANGEABLE_MASK;
+ off &= ~IPS_UNCHANGEABLE_MASK;
+
+ for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
+ if (on & (1 << bit))
+ set_bit(bit, &ct->status);
+ else if (off & (1 << bit))
+ clear_bit(bit, &ct->status);
+ }
+}
+
static int
ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
{
@@ -1436,10 +1458,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
/* ASSURED bit can only be set */
return -EBUSY;
- /* Be careful here, modifying NAT bits can screw up things,
- * so don't let users modify them directly if they don't pass
- * nf_nat_range. */
- ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+ __ctnetlink_change_status(ct, status, 0);
return 0;
}
@@ -1508,23 +1527,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
return 0;
}
+ rcu_read_lock();
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper == NULL) {
-#ifdef CONFIG_MODULES
- spin_unlock_bh(&nf_conntrack_expect_lock);
-
- if (request_module("nfct-helper-%s", helpname) < 0) {
- spin_lock_bh(&nf_conntrack_expect_lock);
- return -EOPNOTSUPP;
- }
-
- spin_lock_bh(&nf_conntrack_expect_lock);
- helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
- nf_ct_protonum(ct));
- if (helper)
- return -EAGAIN;
-#endif
+ rcu_read_unlock();
return -EOPNOTSUPP;
}
@@ -1533,13 +1540,16 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
/* update private helper data if allowed. */
if (helper->from_nlattr)
helper->from_nlattr(helpinfo, ct);
- return 0;
+ err = 0;
} else
- return -EBUSY;
+ err = -EBUSY;
+ } else {
+ /* we cannot set a helper for an existing conntrack */
+ err = -EOPNOTSUPP;
}
- /* we cannot set a helper for an existing conntrack */
- return -EOPNOTSUPP;
+ rcu_read_unlock();
+ return err;
}
static int ctnetlink_change_timeout(struct nf_conn *ct,
@@ -1630,25 +1640,30 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
if (!seqadj)
return 0;
+ spin_lock_bh(&ct->lock);
if (cda[CTA_SEQ_ADJ_ORIG]) {
ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
cda[CTA_SEQ_ADJ_ORIG]);
if (ret < 0)
- return ret;
+ goto err;
- ct->status |= IPS_SEQ_ADJUST;
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
}
if (cda[CTA_SEQ_ADJ_REPLY]) {
ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
cda[CTA_SEQ_ADJ_REPLY]);
if (ret < 0)
- return ret;
+ goto err;
- ct->status |= IPS_SEQ_ADJUST;
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
}
+ spin_unlock_bh(&ct->lock);
return 0;
+err:
+ spin_unlock_bh(&ct->lock);
+ return ret;
}
static int
@@ -1959,9 +1974,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h);
if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
- spin_lock_bh(&nf_conntrack_expect_lock);
err = ctnetlink_change_conntrack(ct, cda);
- spin_unlock_bh(&nf_conntrack_expect_lock);
if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
(1 << IPCT_ASSURED) |
@@ -2294,10 +2307,10 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[])
/* This check is less strict than ctnetlink_change_status()
* because callers often flip IPS_EXPECTED bits when sending
* an NFQA_CT attribute to the kernel. So ignore the
- * unchangeable bits but do not error out.
+ * unchangeable bits but do not error out. Also user programs
+ * are allowed to clear the bits that they are allowed to change.
*/
- ct->status = (status & ~IPS_UNCHANGEABLE_MASK) |
- (ct->status & IPS_UNCHANGEABLE_MASK);
+ __ctnetlink_change_status(ct, status, ~status);
return 0;
}
@@ -2351,11 +2364,7 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
if (ret < 0)
return ret;
- spin_lock_bh(&nf_conntrack_expect_lock);
- ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
- spin_unlock_bh(&nf_conntrack_expect_lock);
-
- return ret;
+ return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
}
static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 1c6482d2c4dc..559225029740 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3778,6 +3778,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = set->ops->insert(ctx->net, set, &elem, &ext2);
if (err) {
if (err == -EEXIST) {
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
+ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
+ nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
+ nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
+ return -EBUSY;
if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
memcmp(nft_set_ext_data(ext),
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 3948da380259..66221ad891a9 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -82,8 +82,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
timeout = priv->timeout ? : set->timeout;
*nft_set_ext_expiration(ext) = jiffies + timeout;
- } else if (sexpr == NULL)
- goto out;
+ }
if (sexpr != NULL)
sexpr->ops->eval(sexpr, regs, pkt);
@@ -92,7 +91,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
return;
}
-out:
+
if (!priv->invert)
regs->verdict.code = NFT_BREAK;
}
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 8ebbc2940f4c..b988162b5b15 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -257,6 +257,11 @@ static int nft_bitmap_init(const struct nft_set *set,
static void nft_bitmap_destroy(const struct nft_set *set)
{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_bitmap_elem *be, *n;
+
+ list_for_each_entry_safe(be, n, &priv->list, head)
+ nft_set_elem_destroy(set, be, true);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 14857afc9937..f134d384852f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1051,8 +1051,10 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
list_for_each_entry(t, &init_net.xt.tables[af], list) {
if (strcmp(t->name, name))
continue;
- if (!try_module_get(t->me))
+ if (!try_module_get(t->me)) {
+ mutex_unlock(&xt[af].mutex);
return NULL;
+ }
mutex_unlock(&xt[af].mutex);
if (t->table_init(net) != 0) {
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 3cbe1bcf6a74..bb7ad82dcd56 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -168,8 +168,10 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
goto err_put_timeout;
}
timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
- if (timeout_ext == NULL)
+ if (!timeout_ext) {
ret = -ENOMEM;
+ goto err_put_timeout;
+ }
rcu_read_unlock();
return ret;
@@ -201,6 +203,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
struct xt_ct_target_info_v1 *info)
{
struct nf_conntrack_zone zone;
+ struct nf_conn_help *help;
struct nf_conn *ct;
int ret = -EOPNOTSUPP;
@@ -249,7 +252,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
if (info->timeout[0]) {
ret = xt_ct_set_timeout(ct, par, info->timeout);
if (ret < 0)
- goto err3;
+ goto err4;
}
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
nf_conntrack_get(&ct->ct_general);
@@ -257,6 +260,10 @@ out:
info->ct = ct;
return 0;
+err4:
+ help = nfct_help(ct);
+ if (help)
+ module_put(help->helper->me);
err3:
nf_ct_tmpl_free(ct);
err2:
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 770bbec878f1..e75ef39669c5 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -152,7 +152,7 @@ static int socket_mt_enable_defrag(struct net *net, int family)
switch (family) {
case NFPROTO_IPV4:
return nf_defrag_ipv4_enable(net);
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
case NFPROTO_IPV6:
return nf_defrag_ipv6_enable(net);
#endif
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 42a95919df09..bf602e33c40a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -516,10 +516,38 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
u16 proto, const struct sk_buff *skb)
{
struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_expect *exp;
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
return NULL;
- return __nf_ct_expect_find(net, zone, &tuple);
+
+ exp = __nf_ct_expect_find(net, zone, &tuple);
+ if (exp) {
+ struct nf_conntrack_tuple_hash *h;
+
+ /* Delete existing conntrack entry, if it clashes with the
+ * expectation. This can happen since conntrack ALGs do not
+ * check for clashes between (new) expectations and existing
+ * conntrack entries. nf_conntrack_in() will check the
+ * expectations only if a conntrack entry can not be found,
+ * which can lead to OVS finding the expectation (here) in the
+ * init direction, but which will not be removed by the
+ * nf_conntrack_in() call, if a matching conntrack entry is
+ * found instead. In this case all init direction packets
+ * would be reported as new related packets, while reply
+ * direction packets would be reported as un-related
+ * established packets.
+ */
+ h = nf_conntrack_find_get(net, zone, &tuple);
+ if (h) {
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ nf_ct_delete(ct, 0, 0);
+ nf_conntrack_put(&ct->ct_general);
+ }
+ }
+
+ return exp;
}
/* This replicates logic from nf_conntrack_core.c that is not exported. */
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 2efb36c08f2a..dee469fed967 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -203,8 +203,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
*arg = (unsigned long) head;
rcu_assign_pointer(tp->root, new);
- if (head)
- call_rcu(&head->rcu, mall_destroy_rcu);
+ call_rcu(&head->rcu, mall_destroy_rcu);
return 0;
err_replace_hw_filter:
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 16b7c801f8b6..cb69ab977cd7 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -80,12 +80,11 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk)
memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.qp_state = IB_QPS_RTR;
qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu);
- qp_attr.ah_attr.port_num = lnk->ibport;
- qp_attr.ah_attr.ah_flags = IB_AH_GRH;
- qp_attr.ah_attr.grh.hop_limit = 1;
- memcpy(&qp_attr.ah_attr.grh.dgid, lnk->peer_gid,
- sizeof(lnk->peer_gid));
- memcpy(&qp_attr.ah_attr.dmac, lnk->peer_mac,
+ qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport);
+ rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, 0, 1, 0);
+ rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid);
+ memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
sizeof(lnk->peer_mac));
qp_attr.dest_qp_num = lnk->peer_qpn;
qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 919981324171..9aed6fe1bf1a 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -106,7 +106,6 @@ __init int net_sysctl_init(void)
ret = register_pernet_subsys(&sysctl_pernet_ops);
if (ret)
goto out1;
- register_sysctl_root(&net_sysctl_root);
out:
return ret;
out1: