summaryrefslogtreecommitdiff
path: root/net/vmw_vsock/af_vsock.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/vmw_vsock/af_vsock.c')
-rw-r--r--net/vmw_vsock/af_vsock.c263
1 files changed, 208 insertions, 55 deletions
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 54ba7316f808..2e7a3034e965 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -112,16 +112,19 @@
#include <net/sock.h>
#include <net/af_vsock.h>
#include <uapi/linux/vm_sockets.h>
+#include <uapi/asm-generic/ioctls.h>
static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
static void vsock_sk_destruct(struct sock *sk);
static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+static void vsock_close(struct sock *sk, long timeout);
/* Protocol family. */
struct proto vsock_proto = {
.name = "AF_VSOCK",
.owner = THIS_MODULE,
.obj_size = sizeof(struct vsock_sock),
+ .close = vsock_close,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = vsock_bpf_update_proto,
#endif
@@ -334,7 +337,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
void vsock_remove_sock(struct vsock_sock *vsk)
{
- vsock_remove_bound(vsk);
+ /* Transport reassignment must not remove the binding. */
+ if (sock_flag(sk_vsock(vsk), SOCK_DEAD))
+ vsock_remove_bound(vsk);
+
vsock_remove_connected(vsk);
}
EXPORT_SYMBOL_GPL(vsock_remove_sock);
@@ -488,6 +494,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
*/
vsk->transport->release(vsk);
vsock_deassign_transport(vsk);
+
+ /* transport's release() and destruct() can touch some socket
+ * state, since we are reassigning the socket to a new transport
+ * during vsock_connect(), let's reset these fields to have a
+ * clean state.
+ */
+ sock_reset_flag(sk, SOCK_DONE);
+ sk->sk_state = TCP_CLOSE;
+ vsk->peer_shutdown = 0;
}
/* We increase the module refcnt to prevent the transport unloading
@@ -796,45 +811,53 @@ static bool sock_type_connectible(u16 type)
static void __vsock_release(struct sock *sk, int level)
{
- if (sk) {
- struct sock *pending;
- struct vsock_sock *vsk;
+ struct vsock_sock *vsk;
+ struct sock *pending;
- vsk = vsock_sk(sk);
- pending = NULL; /* Compiler warning. */
+ vsk = vsock_sk(sk);
+ pending = NULL; /* Compiler warning. */
- /* When "level" is SINGLE_DEPTH_NESTING, use the nested
- * version to avoid the warning "possible recursive locking
- * detected". When "level" is 0, lock_sock_nested(sk, level)
- * is the same as lock_sock(sk).
- */
- lock_sock_nested(sk, level);
+ /* When "level" is SINGLE_DEPTH_NESTING, use the nested
+ * version to avoid the warning "possible recursive locking
+ * detected". When "level" is 0, lock_sock_nested(sk, level)
+ * is the same as lock_sock(sk).
+ */
+ lock_sock_nested(sk, level);
- if (vsk->transport)
- vsk->transport->release(vsk);
- else if (sock_type_connectible(sk->sk_type))
- vsock_remove_sock(vsk);
+ /* Indicate to vsock_remove_sock() that the socket is being released and
+ * can be removed from the bound_table. Unlike transport reassignment
+ * case, where the socket must remain bound despite vsock_remove_sock()
+ * being called from the transport release() callback.
+ */
+ sock_set_flag(sk, SOCK_DEAD);
- sock_orphan(sk);
- sk->sk_shutdown = SHUTDOWN_MASK;
+ if (vsk->transport)
+ vsk->transport->release(vsk);
+ else if (sock_type_connectible(sk->sk_type))
+ vsock_remove_sock(vsk);
- skb_queue_purge(&sk->sk_receive_queue);
+ sock_orphan(sk);
+ sk->sk_shutdown = SHUTDOWN_MASK;
- /* Clean up any sockets that never were accepted. */
- while ((pending = vsock_dequeue_accept(sk)) != NULL) {
- __vsock_release(pending, SINGLE_DEPTH_NESTING);
- sock_put(pending);
- }
+ skb_queue_purge(&sk->sk_receive_queue);
- release_sock(sk);
- sock_put(sk);
+ /* Clean up any sockets that never were accepted. */
+ while ((pending = vsock_dequeue_accept(sk)) != NULL) {
+ __vsock_release(pending, SINGLE_DEPTH_NESTING);
+ sock_put(pending);
}
+
+ release_sock(sk);
+ sock_put(sk);
}
static void vsock_sk_destruct(struct sock *sk)
{
struct vsock_sock *vsk = vsock_sk(sk);
+ /* Flush MSG_ZEROCOPY leftovers. */
+ __skb_queue_purge(&sk->sk_error_queue);
+
vsock_deassign_transport(vsk);
/* When clearing these addresses, there's no need to set the family and
@@ -866,6 +889,9 @@ EXPORT_SYMBOL_GPL(vsock_create_connected);
s64 vsock_stream_has_data(struct vsock_sock *vsk)
{
+ if (WARN_ON(!vsk->transport))
+ return 0;
+
return vsk->transport->stream_has_data(vsk);
}
EXPORT_SYMBOL_GPL(vsock_stream_has_data);
@@ -874,6 +900,9 @@ s64 vsock_connectible_has_data(struct vsock_sock *vsk)
{
struct sock *sk = sk_vsock(vsk);
+ if (WARN_ON(!vsk->transport))
+ return 0;
+
if (sk->sk_type == SOCK_SEQPACKET)
return vsk->transport->seqpacket_has_data(vsk);
else
@@ -883,6 +912,9 @@ EXPORT_SYMBOL_GPL(vsock_connectible_has_data);
s64 vsock_stream_has_space(struct vsock_sock *vsk)
{
+ if (WARN_ON(!vsk->transport))
+ return 0;
+
return vsk->transport->stream_has_space(vsk);
}
EXPORT_SYMBOL_GPL(vsock_stream_has_space);
@@ -897,9 +929,22 @@ void vsock_data_ready(struct sock *sk)
}
EXPORT_SYMBOL_GPL(vsock_data_ready);
+/* Dummy callback required by sockmap.
+ * See unconditional call of saved_close() in sock_map_close().
+ */
+static void vsock_close(struct sock *sk, long timeout)
+{
+}
+
static int vsock_release(struct socket *sock)
{
- __vsock_release(sock->sk, 0);
+ struct sock *sk = sock->sk;
+
+ if (!sk)
+ return 0;
+
+ sk->sk_prot->close(sk, 0);
+ __vsock_release(sk, 0);
sock->sk = NULL;
sock->state = SS_FREE;
@@ -968,6 +1013,39 @@ out:
return err;
}
+void vsock_linger(struct sock *sk)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ ssize_t (*unsent)(struct vsock_sock *vsk);
+ struct vsock_sock *vsk = vsock_sk(sk);
+ long timeout;
+
+ if (!sock_flag(sk, SOCK_LINGER))
+ return;
+
+ timeout = sk->sk_lingertime;
+ if (!timeout)
+ return;
+
+ /* Transports must implement `unsent_bytes` if they want to support
+ * SOCK_LINGER through `vsock_linger()` since we use it to check when
+ * the socket can be closed.
+ */
+ unsent = vsk->transport->unsent_bytes;
+ if (!unsent)
+ return;
+
+ add_wait_queue(sk_sleep(sk), &wait);
+
+ do {
+ if (sk_wait_event(sk, &timeout, unsent(vsk) == 0, &wait))
+ break;
+ } while (!signal_pending(current) && timeout);
+
+ remove_wait_queue(sk_sleep(sk), &wait);
+}
+EXPORT_SYMBOL_GPL(vsock_linger);
+
static int vsock_shutdown(struct socket *sock, int mode)
{
int err;
@@ -1050,6 +1128,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
mask |= EPOLLRDHUP;
}
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
if (sock->type == SOCK_DGRAM) {
/* For datagram sockets we can read if there is something in
* the queue and write as long as the socket isn't shutdown for
@@ -1141,6 +1222,9 @@ static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor)
{
struct vsock_sock *vsk = vsock_sk(sk);
+ if (WARN_ON_ONCE(!vsk->transport))
+ return -ENODEV;
+
return vsk->transport->read_skb(vsk, read_actor);
}
@@ -1270,28 +1354,82 @@ out:
return err;
}
+int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct vsock_sock *vsk = vsock_sk(sk);
+
+ return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
+}
+
int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
#ifdef CONFIG_BPF_SYSCALL
+ struct sock *sk = sock->sk;
const struct proto *prot;
-#endif
- struct vsock_sock *vsk;
- struct sock *sk;
-
- sk = sock->sk;
- vsk = vsock_sk(sk);
-#ifdef CONFIG_BPF_SYSCALL
prot = READ_ONCE(sk->sk_prot);
if (prot != &vsock_proto)
return prot->recvmsg(sk, msg, len, flags, NULL);
#endif
- return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
+ return __vsock_dgram_recvmsg(sock, msg, len, flags);
}
EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);
+static int vsock_do_ioctl(struct socket *sock, unsigned int cmd,
+ int __user *arg)
+{
+ struct sock *sk = sock->sk;
+ struct vsock_sock *vsk;
+ int ret;
+
+ vsk = vsock_sk(sk);
+
+ switch (cmd) {
+ case SIOCOUTQ: {
+ ssize_t n_bytes;
+
+ if (!vsk->transport || !vsk->transport->unsent_bytes) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ if (sock_type_connectible(sk->sk_type) && sk->sk_state == TCP_LISTEN) {
+ ret = -EINVAL;
+ break;
+ }
+
+ n_bytes = vsk->transport->unsent_bytes(vsk);
+ if (n_bytes < 0) {
+ ret = n_bytes;
+ break;
+ }
+
+ ret = put_user(n_bytes, arg);
+ break;
+ }
+ default:
+ ret = -ENOIOCTLCMD;
+ }
+
+ return ret;
+}
+
+static int vsock_ioctl(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret;
+
+ lock_sock(sock->sk);
+ ret = vsock_do_ioctl(sock, cmd, (int __user *)arg);
+ release_sock(sock->sk);
+
+ return ret;
+}
+
static const struct proto_ops vsock_dgram_ops = {
.family = PF_VSOCK,
.owner = THIS_MODULE,
@@ -1302,7 +1440,7 @@ static const struct proto_ops vsock_dgram_ops = {
.accept = sock_no_accept,
.getname = vsock_getname,
.poll = vsock_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = vsock_ioctl,
.listen = sock_no_listen,
.shutdown = vsock_shutdown,
.sendmsg = vsock_dgram_sendmsg,
@@ -1427,6 +1565,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
if (err < 0)
goto out;
+ /* sk_err might have been set as a result of an earlier
+ * (failed) connect attempt.
+ */
+ sk->sk_err = 0;
+
/* Mark sock as connecting and set the error code to in
* progress in case this is a non-blocking connect.
*/
@@ -1441,7 +1584,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
timeout = vsk->connect_timeout;
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
+ /* If the socket is already closing or it is in an error state, there
+ * is no point in waiting.
+ */
+ while (sk->sk_state != TCP_ESTABLISHED &&
+ sk->sk_state != TCP_CLOSING && sk->sk_err == 0) {
if (flags & O_NONBLOCK) {
/* If we're not going to block, we schedule a timeout
* function to generate a timeout on the connection
@@ -1500,8 +1647,8 @@ out:
return err;
}
-static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
+static int vsock_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg)
{
struct sock *listener;
int err;
@@ -1528,7 +1675,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
/* Wait for children sockets to appear; these are the new sockets
* created upon connection establishment.
*/
- timeout = sock_rcvtimeo(listener, flags & O_NONBLOCK);
+ timeout = sock_rcvtimeo(listener, arg->flags & O_NONBLOCK);
prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
while ((connected = vsock_dequeue_accept(listener)) == NULL &&
@@ -2174,15 +2321,12 @@ out:
}
int
-vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
- int flags)
+__vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
{
struct sock *sk;
struct vsock_sock *vsk;
const struct vsock_transport *transport;
-#ifdef CONFIG_BPF_SYSCALL
- const struct proto *prot;
-#endif
int err;
sk = sock->sk;
@@ -2233,14 +2377,6 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
goto out;
}
-#ifdef CONFIG_BPF_SYSCALL
- prot = READ_ONCE(sk->sk_prot);
- if (prot != &vsock_proto) {
- release_sock(sk);
- return prot->recvmsg(sk, msg, len, flags, NULL);
- }
-#endif
-
if (sk->sk_type == SOCK_STREAM)
err = __vsock_stream_recvmsg(sk, msg, len, flags);
else
@@ -2250,6 +2386,22 @@ out:
release_sock(sk);
return err;
}
+
+int
+vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
+{
+#ifdef CONFIG_BPF_SYSCALL
+ struct sock *sk = sock->sk;
+ const struct proto *prot;
+
+ prot = READ_ONCE(sk->sk_prot);
+ if (prot != &vsock_proto)
+ return prot->recvmsg(sk, msg, len, flags, NULL);
+#endif
+
+ return __vsock_connectible_recvmsg(sock, msg, len, flags);
+}
EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg);
static int vsock_set_rcvlowat(struct sock *sk, int val)
@@ -2286,7 +2438,7 @@ static const struct proto_ops vsock_stream_ops = {
.accept = vsock_accept,
.getname = vsock_getname,
.poll = vsock_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = vsock_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
.setsockopt = vsock_connectible_setsockopt,
@@ -2308,7 +2460,7 @@ static const struct proto_ops vsock_seqpacket_ops = {
.accept = vsock_accept,
.getname = vsock_getname,
.poll = vsock_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = vsock_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
.setsockopt = vsock_connectible_setsockopt,
@@ -2357,6 +2509,7 @@ static int vsock_create(struct net *net, struct socket *sock,
if (sock->type == SOCK_DGRAM) {
ret = vsock_assign_transport(vsk, NULL);
if (ret < 0) {
+ sock->sk = NULL;
sock_put(sk);
return ret;
}