diff options
Diffstat (limited to 'net/vmw_vsock/af_vsock.c')
| -rw-r--r-- | net/vmw_vsock/af_vsock.c | 297 |
1 files changed, 228 insertions, 69 deletions
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 25b28b1434f5..adcba1b7bf74 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -117,12 +117,14 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); static void vsock_sk_destruct(struct sock *sk); static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +static void vsock_close(struct sock *sk, long timeout); /* Protocol family. */ struct proto vsock_proto = { .name = "AF_VSOCK", .owner = THIS_MODULE, .obj_size = sizeof(struct vsock_sock), + .close = vsock_close, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = vsock_bpf_update_proto, #endif @@ -335,7 +337,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket); void vsock_remove_sock(struct vsock_sock *vsk) { - vsock_remove_bound(vsk); + /* Transport reassignment must not remove the binding. */ + if (sock_flag(sk_vsock(vsk), SOCK_DEAD)) + vsock_remove_bound(vsk); + vsock_remove_connected(vsk); } EXPORT_SYMBOL_GPL(vsock_remove_sock); @@ -402,6 +407,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept); static bool vsock_use_local_transport(unsigned int remote_cid) { + lockdep_assert_held(&vsock_register_mutex); + if (!transport_local) return false; @@ -459,6 +466,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) remote_flags = vsk->remote_addr.svm_flags; + mutex_lock(&vsock_register_mutex); + switch (sk->sk_type) { case SOCK_DGRAM: new_transport = transport_dgram; @@ -474,13 +483,30 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) new_transport = transport_h2g; break; default: - return -ESOCKTNOSUPPORT; + ret = -ESOCKTNOSUPPORT; + goto err; } - if (vsk->transport) { - if (vsk->transport == new_transport) - return 0; + if (vsk->transport && vsk->transport == new_transport) { + ret = 0; + goto err; + } + + /* We increase the module refcnt to prevent the transport unloading + * while there are open sockets assigned to it. + */ + if (!new_transport || !try_module_get(new_transport->module)) { + ret = -ENODEV; + goto err; + } + /* It's safe to release the mutex after a successful try_module_get(). + * Whichever transport `new_transport` points at, it won't go away until + * the last module_put() below or in vsock_deassign_transport(). + */ + mutex_unlock(&vsock_register_mutex); + + if (vsk->transport) { /* transport->release() must be called with sock lock acquired. * This path can only be taken during vsock_connect(), where we * have already held the sock lock. In the other cases, this @@ -489,13 +515,16 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) */ vsk->transport->release(vsk); vsock_deassign_transport(vsk); - } - /* We increase the module refcnt to prevent the transport unloading - * while there are open sockets assigned to it. - */ - if (!new_transport || !try_module_get(new_transport->module)) - return -ENODEV; + /* transport's release() and destruct() can touch some socket + * state, since we are reassigning the socket to a new transport + * during vsock_connect(), let's reset these fields to have a + * clean state. + */ + sock_reset_flag(sk, SOCK_DONE); + sk->sk_state = TCP_CLOSE; + vsk->peer_shutdown = 0; + } if (sk->sk_type == SOCK_SEQPACKET) { if (!new_transport->seqpacket_allow || @@ -514,12 +543,31 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) vsk->transport = new_transport; return 0; +err: + mutex_unlock(&vsock_register_mutex); + return ret; } EXPORT_SYMBOL_GPL(vsock_assign_transport); +/* + * Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks. + * Otherwise we may race with module removal. Do not use on `vsk->transport`. + */ +static u32 vsock_registered_transport_cid(const struct vsock_transport **transport) +{ + u32 cid = VMADDR_CID_ANY; + + mutex_lock(&vsock_register_mutex); + if (*transport) + cid = (*transport)->get_local_cid(); + mutex_unlock(&vsock_register_mutex); + + return cid; +} + bool vsock_find_cid(unsigned int cid) { - if (transport_g2h && cid == transport_g2h->get_local_cid()) + if (cid == vsock_registered_transport_cid(&transport_g2h)) return true; if (transport_h2g && cid == VMADDR_CID_HOST) @@ -641,7 +689,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, unsigned int i; for (i = 0; i < MAX_PORT_RETRIES; i++) { - if (port <= LAST_RESERVED_PORT) + if (port == VMADDR_PORT_ANY || + port <= LAST_RESERVED_PORT) port = LAST_RESERVED_PORT + 1; new_addr.svm_port = port++; @@ -797,39 +846,44 @@ static bool sock_type_connectible(u16 type) static void __vsock_release(struct sock *sk, int level) { - if (sk) { - struct sock *pending; - struct vsock_sock *vsk; + struct vsock_sock *vsk; + struct sock *pending; - vsk = vsock_sk(sk); - pending = NULL; /* Compiler warning. */ + vsk = vsock_sk(sk); + pending = NULL; /* Compiler warning. */ - /* When "level" is SINGLE_DEPTH_NESTING, use the nested - * version to avoid the warning "possible recursive locking - * detected". When "level" is 0, lock_sock_nested(sk, level) - * is the same as lock_sock(sk). - */ - lock_sock_nested(sk, level); + /* When "level" is SINGLE_DEPTH_NESTING, use the nested + * version to avoid the warning "possible recursive locking + * detected". When "level" is 0, lock_sock_nested(sk, level) + * is the same as lock_sock(sk). + */ + lock_sock_nested(sk, level); - if (vsk->transport) - vsk->transport->release(vsk); - else if (sock_type_connectible(sk->sk_type)) - vsock_remove_sock(vsk); + /* Indicate to vsock_remove_sock() that the socket is being released and + * can be removed from the bound_table. Unlike transport reassignment + * case, where the socket must remain bound despite vsock_remove_sock() + * being called from the transport release() callback. + */ + sock_set_flag(sk, SOCK_DEAD); - sock_orphan(sk); - sk->sk_shutdown = SHUTDOWN_MASK; + if (vsk->transport) + vsk->transport->release(vsk); + else if (sock_type_connectible(sk->sk_type)) + vsock_remove_sock(vsk); - skb_queue_purge(&sk->sk_receive_queue); + sock_orphan(sk); + sk->sk_shutdown = SHUTDOWN_MASK; - /* Clean up any sockets that never were accepted. */ - while ((pending = vsock_dequeue_accept(sk)) != NULL) { - __vsock_release(pending, SINGLE_DEPTH_NESTING); - sock_put(pending); - } + skb_queue_purge(&sk->sk_receive_queue); - release_sock(sk); - sock_put(sk); + /* Clean up any sockets that never were accepted. */ + while ((pending = vsock_dequeue_accept(sk)) != NULL) { + __vsock_release(pending, SINGLE_DEPTH_NESTING); + sock_put(pending); } + + release_sock(sk); + sock_put(sk); } static void vsock_sk_destruct(struct sock *sk) @@ -870,6 +924,9 @@ EXPORT_SYMBOL_GPL(vsock_create_connected); s64 vsock_stream_has_data(struct vsock_sock *vsk) { + if (WARN_ON(!vsk->transport)) + return 0; + return vsk->transport->stream_has_data(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_data); @@ -878,6 +935,9 @@ s64 vsock_connectible_has_data(struct vsock_sock *vsk) { struct sock *sk = sk_vsock(vsk); + if (WARN_ON(!vsk->transport)) + return 0; + if (sk->sk_type == SOCK_SEQPACKET) return vsk->transport->seqpacket_has_data(vsk); else @@ -887,6 +947,9 @@ EXPORT_SYMBOL_GPL(vsock_connectible_has_data); s64 vsock_stream_has_space(struct vsock_sock *vsk) { + if (WARN_ON(!vsk->transport)) + return 0; + return vsk->transport->stream_has_space(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_space); @@ -901,9 +964,22 @@ void vsock_data_ready(struct sock *sk) } EXPORT_SYMBOL_GPL(vsock_data_ready); +/* Dummy callback required by sockmap. + * See unconditional call of saved_close() in sock_map_close(). + */ +static void vsock_close(struct sock *sk, long timeout) +{ +} + static int vsock_release(struct socket *sock) { - __vsock_release(sock->sk, 0); + struct sock *sk = sock->sk; + + if (!sk) + return 0; + + sk->sk_prot->close(sk, 0); + __vsock_release(sk, 0); sock->sk = NULL; sock->state = SS_FREE; @@ -911,7 +987,7 @@ static int vsock_release(struct socket *sock) } static int -vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +vsock_bind(struct socket *sock, struct sockaddr_unsized *addr, int addr_len) { int err; struct sock *sk; @@ -953,17 +1029,7 @@ static int vsock_getname(struct socket *sock, vm_addr = &vsk->local_addr; } - if (!vm_addr) { - err = -EINVAL; - goto out; - } - - /* sys_getsockname() and sys_getpeername() pass us a - * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately - * that macro is defined in socket.c instead of .h, so we hardcode its - * value here. - */ - BUILD_BUG_ON(sizeof(*vm_addr) > 128); + BUILD_BUG_ON(sizeof(*vm_addr) > sizeof(struct sockaddr_storage)); memcpy(addr, vm_addr, sizeof(*vm_addr)); err = sizeof(*vm_addr); @@ -972,6 +1038,39 @@ out: return err; } +void vsock_linger(struct sock *sk) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + ssize_t (*unsent)(struct vsock_sock *vsk); + struct vsock_sock *vsk = vsock_sk(sk); + long timeout; + + if (!sock_flag(sk, SOCK_LINGER)) + return; + + timeout = sk->sk_lingertime; + if (!timeout) + return; + + /* Transports must implement `unsent_bytes` if they want to support + * SOCK_LINGER through `vsock_linger()` since we use it to check when + * the socket can be closed. + */ + unsent = vsk->transport->unsent_bytes; + if (!unsent) + return; + + add_wait_queue(sk_sleep(sk), &wait); + + do { + if (sk_wait_event(sk, &timeout, unsent(vsk) == 0, &wait)) + break; + } while (!signal_pending(current) && timeout); + + remove_wait_queue(sk_sleep(sk), &wait); +} +EXPORT_SYMBOL_GPL(vsock_linger); + static int vsock_shutdown(struct socket *sock, int mode) { int err; @@ -1054,6 +1153,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, mask |= EPOLLRDHUP; } + if (sk_is_readable(sk)) + mask |= EPOLLIN | EPOLLRDNORM; + if (sock->type == SOCK_DGRAM) { /* For datagram sockets we can read if there is something in * the queue and write as long as the socket isn't shutdown for @@ -1145,6 +1247,9 @@ static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor) { struct vsock_sock *vsk = vsock_sk(sk); + if (WARN_ON_ONCE(!vsk->transport)) + return -ENODEV; + return vsk->transport->read_skb(vsk, read_actor); } @@ -1223,7 +1328,7 @@ out: } static int vsock_dgram_connect(struct socket *sock, - struct sockaddr *addr, int addr_len, int flags) + struct sockaddr_unsized *addr, int addr_len, int flags) { int err; struct sock *sk; @@ -1309,6 +1414,28 @@ static int vsock_do_ioctl(struct socket *sock, unsigned int cmd, vsk = vsock_sk(sk); switch (cmd) { + case SIOCINQ: { + ssize_t n_bytes; + + if (!vsk->transport) { + ret = -EOPNOTSUPP; + break; + } + + if (sock_type_connectible(sk->sk_type) && + sk->sk_state == TCP_LISTEN) { + ret = -EINVAL; + break; + } + + n_bytes = vsock_stream_has_data(vsk); + if (n_bytes < 0) { + ret = n_bytes; + break; + } + ret = put_user(n_bytes, arg); + break; + } case SIOCOUTQ: { ssize_t n_bytes; @@ -1401,7 +1528,7 @@ static void vsock_connect_timeout(struct work_struct *work) sock_put(sk); } -static int vsock_connect(struct socket *sock, struct sockaddr *addr, +static int vsock_connect(struct socket *sock, struct sockaddr_unsized *addr, int addr_len, int flags) { int err; @@ -1485,6 +1612,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, if (err < 0) goto out; + /* sk_err might have been set as a result of an earlier + * (failed) connect attempt. + */ + sk->sk_err = 0; + /* Mark sock as connecting and set the error code to in * progress in case this is a non-blocking connect. */ @@ -1499,7 +1631,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, timeout = vsk->connect_timeout; prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) { + /* If the socket is already closing or it is in an error state, there + * is no point in waiting. + */ + while (sk->sk_state != TCP_ESTABLISHED && + sk->sk_state != TCP_CLOSING && sk->sk_err == 0) { if (flags & O_NONBLOCK) { /* If we're not going to block, we schedule a timeout * function to generate a timeout on the connection @@ -1513,7 +1649,7 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, * reschedule it, then ungrab the socket refcount to * keep it balanced. */ - if (mod_delayed_work(system_wq, &vsk->connect_work, + if (mod_delayed_work(system_percpu_wq, &vsk->connect_work, timeout)) sock_put(sk); @@ -1525,18 +1661,40 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, timeout = schedule_timeout(timeout); lock_sock(sk); - if (signal_pending(current)) { - err = sock_intr_errno(timeout); - sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE; - sock->state = SS_UNCONNECTED; - vsock_transport_cancel_pkt(vsk); - vsock_remove_connected(vsk); - goto out_wait; - } else if ((sk->sk_state != TCP_ESTABLISHED) && (timeout == 0)) { - err = -ETIMEDOUT; + /* Connection established. Whatever happens to socket once we + * release it, that's not connect()'s concern. No need to go + * into signal and timeout handling. Call it a day. + * + * Note that allowing to "reset" an already established socket + * here is racy and insecure. + */ + if (sk->sk_state == TCP_ESTABLISHED) + break; + + /* If connection was _not_ established and a signal/timeout came + * to be, we want the socket's state reset. User space may want + * to retry. + * + * sk_state != TCP_ESTABLISHED implies that socket is not on + * vsock_connected_table. We keep the binding and the transport + * assigned. + */ + if (signal_pending(current) || timeout == 0) { + err = timeout == 0 ? -ETIMEDOUT : sock_intr_errno(timeout); + + /* Listener might have already responded with + * VIRTIO_VSOCK_OP_RESPONSE. Its handling expects our + * sk_state == TCP_SYN_SENT, which hereby we break. + * In such case VIRTIO_VSOCK_OP_RST will follow. + */ sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; + + /* Try to cancel VIRTIO_VSOCK_OP_REQUEST skb sent out by + * transport->connect(). + */ vsock_transport_cancel_pkt(vsk); + goto out_wait; } @@ -2447,18 +2605,19 @@ static long vsock_dev_do_ioctl(struct file *filp, unsigned int cmd, void __user *ptr) { u32 __user *p = ptr; - u32 cid = VMADDR_CID_ANY; int retval = 0; + u32 cid; switch (cmd) { case IOCTL_VM_SOCKETS_GET_LOCAL_CID: /* To be compatible with the VMCI behavior, we prioritize the * guest CID instead of well-know host CID (VMADDR_CID_HOST). */ - if (transport_g2h) - cid = transport_g2h->get_local_cid(); - else if (transport_h2g) - cid = transport_h2g->get_local_cid(); + cid = vsock_registered_transport_cid(&transport_g2h); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_h2g); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_local); if (put_user(cid, p) != 0) retval = -EFAULT; |
