summaryrefslogtreecommitdiff
path: root/net/xdp/xsk.c
diff options
context:
space:
mode:
authorBjörn Töpel <bjorn.topel@intel.com>2018-05-02 13:01:23 +0200
committerAlexei Starovoitov <ast@kernel.org>2018-05-03 15:55:23 -0700
commitc0c77d8fb787cfe0c3fca689c2a30d1dad4eaba7 (patch)
tree04413efa068765eb74d50fbb1ab6a4318f3d606f /net/xdp/xsk.c
parent68e8b849b221b37a78a110a0307717d45e3593a0 (diff)
xsk: add user memory registration support sockopt
In this commit the base structure of the AF_XDP address family is set up. Further, we introduce the abilty register a window of user memory to the kernel via the XDP_UMEM_REG setsockopt syscall. The memory window is viewed by an AF_XDP socket as a set of equally large frames. After a user memory registration all frames are "owned" by the user application, and not the kernel. v2: More robust checks on umem creation and unaccount on error. Call set_page_dirty_lock on cleanup. Simplified xdp_umem_reg. Co-authored-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'net/xdp/xsk.c')
-rw-r--r--net/xdp/xsk.c215
1 files changed, 215 insertions, 0 deletions
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
new file mode 100644
index 000000000000..84e0e867febb
--- /dev/null
+++ b/net/xdp/xsk.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XDP sockets
+ *
+ * AF_XDP sockets allows a channel between XDP programs and userspace
+ * applications.
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * Author(s): Björn Töpel <bjorn.topel@intel.com>
+ * Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
+
+#include <linux/if_xdp.h>
+#include <linux/init.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
+#include <linux/socket.h>
+#include <linux/file.h>
+#include <linux/uaccess.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <net/xdp_sock.h>
+
+#include "xdp_umem.h"
+
+static struct xdp_sock *xdp_sk(struct sock *sk)
+{
+ return (struct xdp_sock *)sk;
+}
+
+static int xsk_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct net *net;
+
+ if (!sk)
+ return 0;
+
+ net = sock_net(sk);
+
+ local_bh_disable();
+ sock_prot_inuse_add(net, sk->sk_prot, -1);
+ local_bh_enable();
+
+ sock_orphan(sk);
+ sock->sk = NULL;
+
+ sk_refcnt_debug_release(sk);
+ sock_put(sk);
+
+ return 0;
+}
+
+static int xsk_setsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ struct sock *sk = sock->sk;
+ struct xdp_sock *xs = xdp_sk(sk);
+ int err;
+
+ if (level != SOL_XDP)
+ return -ENOPROTOOPT;
+
+ switch (optname) {
+ case XDP_UMEM_REG:
+ {
+ struct xdp_umem_reg mr;
+ struct xdp_umem *umem;
+
+ if (xs->umem)
+ return -EBUSY;
+
+ if (copy_from_user(&mr, optval, sizeof(mr)))
+ return -EFAULT;
+
+ mutex_lock(&xs->mutex);
+ err = xdp_umem_create(&umem);
+
+ err = xdp_umem_reg(umem, &mr);
+ if (err) {
+ kfree(umem);
+ mutex_unlock(&xs->mutex);
+ return err;
+ }
+
+ /* Make sure umem is ready before it can be seen by others */
+ smp_wmb();
+
+ xs->umem = umem;
+ mutex_unlock(&xs->mutex);
+ return 0;
+ }
+ default:
+ break;
+ }
+
+ return -ENOPROTOOPT;
+}
+
+static struct proto xsk_proto = {
+ .name = "XDP",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct xdp_sock),
+};
+
+static const struct proto_ops xsk_proto_ops = {
+ .family = PF_XDP,
+ .owner = THIS_MODULE,
+ .release = xsk_release,
+ .bind = sock_no_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = sock_no_poll,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = xsk_setsockopt,
+ .getsockopt = sock_no_getsockopt,
+ .sendmsg = sock_no_sendmsg,
+ .recvmsg = sock_no_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static void xsk_destruct(struct sock *sk)
+{
+ struct xdp_sock *xs = xdp_sk(sk);
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ return;
+
+ xdp_put_umem(xs->umem);
+
+ sk_refcnt_debug_dec(sk);
+}
+
+static int xsk_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
+{
+ struct sock *sk;
+ struct xdp_sock *xs;
+
+ if (!ns_capable(net->user_ns, CAP_NET_RAW))
+ return -EPERM;
+ if (sock->type != SOCK_RAW)
+ return -ESOCKTNOSUPPORT;
+
+ if (protocol)
+ return -EPROTONOSUPPORT;
+
+ sock->state = SS_UNCONNECTED;
+
+ sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern);
+ if (!sk)
+ return -ENOBUFS;
+
+ sock->ops = &xsk_proto_ops;
+
+ sock_init_data(sock, sk);
+
+ sk->sk_family = PF_XDP;
+
+ sk->sk_destruct = xsk_destruct;
+ sk_refcnt_debug_inc(sk);
+
+ xs = xdp_sk(sk);
+ mutex_init(&xs->mutex);
+
+ local_bh_disable();
+ sock_prot_inuse_add(net, &xsk_proto, 1);
+ local_bh_enable();
+
+ return 0;
+}
+
+static const struct net_proto_family xsk_family_ops = {
+ .family = PF_XDP,
+ .create = xsk_create,
+ .owner = THIS_MODULE,
+};
+
+static int __init xsk_init(void)
+{
+ int err;
+
+ err = proto_register(&xsk_proto, 0 /* no slab */);
+ if (err)
+ goto out;
+
+ err = sock_register(&xsk_family_ops);
+ if (err)
+ goto out_proto;
+
+ return 0;
+
+out_proto:
+ proto_unregister(&xsk_proto);
+out:
+ return err;
+}
+
+fs_initcall(xsk_init);