diff options
Diffstat (limited to 'fs/ocfs2/cluster/tcp.c')
| -rw-r--r-- | fs/ocfs2/cluster/tcp.c | 171 |
1 files changed, 63 insertions, 108 deletions
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index e9f236af1927..79b281e32f4c 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1,33 +1,17 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * - * vim: noexpandtab sw=8 ts=8 sts=0: +// SPDX-License-Identifier: GPL-2.0-or-later +/* * * Copyright (C) 2004 Oracle. All rights reserved. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * * ---- * - * Callers for this were originally written against a very simple synchronus + * Callers for this were originally written against a very simple synchronous * API. This implementation reflects those simple callers. Some day I'm sure * we'll need to move to a more robust posting/callback mechanism. * * Transmit calls pass in kernel virtual addresses and block copying this into * the socket's tx buffers via a usual blocking sendmsg. They'll block waiting - * for a failed socket to timeout. TX callers can also pass in a poniter to an + * for a failed socket to timeout. TX callers can also pass in a pointer to an * 'int' which gets filled with an errno off the wire in response to the * message they send. * @@ -62,6 +46,7 @@ #include <linux/net.h> #include <linux/export.h> #include <net/tcp.h> +#include <trace/events/sock.h> #include <linux/uaccess.h> @@ -116,7 +101,7 @@ static struct socket *o2net_listen_sock; * o2net_wq. teardown detaches the callbacks before destroying the workqueue. * quorum work is queued as sock containers are shutdown.. stop_listening * tears down all the node's sock containers, preventing future shutdowns - * and queued quroum work, before canceling delayed quorum work and + * and queued quorum work, before canceling delayed quorum work and * destroying the work queue. */ static struct workqueue_struct *o2net_wq; @@ -601,6 +586,8 @@ static void o2net_data_ready(struct sock *sk) void (*ready)(struct sock *sk); struct o2net_sock_container *sc; + trace_sk_data_ready(sk); + read_lock_bh(&sk->sk_callback_lock); sc = sk->sk_user_data; if (sc) { @@ -737,7 +724,7 @@ static void o2net_shutdown_sc(struct work_struct *work) if (o2net_unregister_callbacks(sc->sc_sock->sk, sc)) { /* we shouldn't flush as we're in the thread, the * races with pending sc work structs are harmless */ - del_timer_sync(&sc->sc_idle_timeout); + timer_delete_sync(&sc->sc_idle_timeout); o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); sc_put(sc); kernel_sock_shutdown(sc->sc_sock, SHUT_RDWR); @@ -916,7 +903,7 @@ static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len) { struct kvec vec = { .iov_len = len, .iov_base = data, }; struct msghdr msg = { .msg_flags = MSG_DONTWAIT, }; - iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, len); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, len); return sock_recvmsg(sock, &msg, MSG_DONTWAIT); } @@ -943,19 +930,22 @@ out: } static void o2net_sendpage(struct o2net_sock_container *sc, - void *kmalloced_virt, - size_t size) + void *virt, size_t size) { struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); + struct msghdr msg = {}; + struct bio_vec bv; ssize_t ret; + bvec_set_virt(&bv, virt, size); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, size); + while (1) { + msg.msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES; mutex_lock(&sc->sc_send_lock); - ret = sc->sc_sock->ops->sendpage(sc->sc_sock, - virt_to_page(kmalloced_virt), - offset_in_page(kmalloced_virt), - size, MSG_DONTWAIT); + ret = sock_sendmsg(sc->sc_sock, &msg); mutex_unlock(&sc->sc_send_lock); + if (ret == size) break; if (ret == (ssize_t)-EAGAIN) { @@ -1006,14 +996,12 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, } /* Get a map of all nodes to which this node is currently connected to */ -void o2net_fill_node_map(unsigned long *map, unsigned bytes) +void o2net_fill_node_map(unsigned long *map, unsigned int bits) { struct o2net_sock_container *sc; int node, ret; - BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long))); - - memset(map, 0, bytes); + bitmap_zero(map, bits); for (node = 0; node < O2NM_MAX_NODES; ++node) { if (!o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret)) continue; @@ -1212,7 +1200,6 @@ static int o2net_process_message(struct o2net_sock_container *sc, msglog(hdr, "bad magic\n"); ret = -EINVAL; goto out; - break; } /* find a handler for it */ @@ -1432,7 +1419,7 @@ out: return ret; } -/* this work func is triggerd by data ready. it reads until it can read no +/* this work func is triggered by data ready. it reads until it can read no * more. it interprets 0, eof, as fatal. if data_ready hits while we're doing * our work the work struct will be marked and we'll be called again. */ static void o2net_rx_until_empty(struct work_struct *work) @@ -1455,22 +1442,6 @@ static void o2net_rx_until_empty(struct work_struct *work) sc_put(sc); } -static int o2net_set_nodelay(struct socket *sock) -{ - int val = 1; - - return kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (void *)&val, sizeof(val)); -} - -static int o2net_set_usertimeout(struct socket *sock) -{ - int user_timeout = O2NET_TCP_USER_TIMEOUT; - - return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, - (void *)&user_timeout, sizeof(user_timeout)); -} - static void o2net_initialize_handshake(void) { o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( @@ -1512,12 +1483,13 @@ static void o2net_sc_send_keep_req(struct work_struct *work) sc_put(sc); } -/* socket shutdown does a del_timer_sync against this as it tears down. +/* socket shutdown does a timer_delete_sync against this as it tears down. * we can't start this timer until we've got to the point in sc buildup * where shutdown is going to be involved */ static void o2net_idle_timer(struct timer_list *t) { - struct o2net_sock_container *sc = from_timer(sc, t, sc_idle_timeout); + struct o2net_sock_container *sc = timer_container_of(sc, t, + sc_idle_timeout); struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); #ifdef CONFIG_DEBUG_FS unsigned long msecs = ktime_to_ms(ktime_get()) - @@ -1584,15 +1556,13 @@ static void o2net_start_connect(struct work_struct *work) struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; int ret = 0, stop; unsigned int timeout; - unsigned int noio_flag; + unsigned int nofs_flag; /* - * sock_create allocates the sock with GFP_KERNEL. We must set - * per-process flag PF_MEMALLOC_NOIO so that all allocations done - * by this process are done as if GFP_NOIO was specified. So we - * are not reentering filesystem while doing memory reclaim. + * sock_create allocates the sock with GFP_KERNEL. We must + * prevent the filesystem from being reentered by memory reclaim. */ - noio_flag = memalloc_noio_save(); + nofs_flag = memalloc_nofs_save(); /* if we're greater we initiate tx, otherwise we accept */ if (o2nm_this_node() <= o2net_num_from_nn(nn)) goto out; @@ -1639,12 +1609,13 @@ static void o2net_start_connect(struct work_struct *work) sc->sc_sock = sock; /* freed by sc_kref_release */ sock->sk->sk_allocation = GFP_ATOMIC; + sock->sk->sk_use_task_frag = false; myaddr.sin_family = AF_INET; myaddr.sin_addr.s_addr = mynode->nd_ipv4_address; myaddr.sin_port = htons(0); /* any port */ - ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr, + ret = sock->ops->bind(sock, (struct sockaddr_unsized *)&myaddr, sizeof(myaddr)); if (ret) { mlog(ML_ERROR, "bind failed with %d at address %pI4\n", @@ -1652,17 +1623,8 @@ static void o2net_start_connect(struct work_struct *work) goto out; } - ret = o2net_set_nodelay(sc->sc_sock); - if (ret) { - mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret); - goto out; - } - - ret = o2net_set_usertimeout(sock); - if (ret) { - mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret); - goto out; - } + tcp_sock_set_nodelay(sc->sc_sock->sk); + tcp_sock_set_user_timeout(sock->sk, O2NET_TCP_USER_TIMEOUT); o2net_register_callbacks(sc->sc_sock->sk, sc); @@ -1676,7 +1638,7 @@ static void o2net_start_connect(struct work_struct *work) remoteaddr.sin_port = node->nd_ipv4_port; ret = sc->sc_sock->ops->connect(sc->sc_sock, - (struct sockaddr *)&remoteaddr, + (struct sockaddr_unsized *)&remoteaddr, sizeof(remoteaddr), O_NONBLOCK); if (ret == -EINPROGRESS) @@ -1697,7 +1659,7 @@ out: if (mynode) o2nm_node_put(mynode); - memalloc_noio_restore(noio_flag); + memalloc_nofs_restore(nofs_flag); return; } @@ -1776,7 +1738,7 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, (msecs_to_jiffies(o2net_reconnect_delay()) + 1); if (node_num != o2nm_this_node()) { - /* believe it or not, accept and node hearbeating testing + /* believe it or not, accept and node heartbeating testing * can succeed for this node before we got here.. so * only use set_nn_state to clear the persistent error * if that hasn't already happened */ @@ -1823,16 +1785,17 @@ static int o2net_accept_one(struct socket *sock, int *more) struct o2nm_node *node = NULL; struct o2nm_node *local_node = NULL; struct o2net_sock_container *sc = NULL; + struct proto_accept_arg arg = { + .flags = O_NONBLOCK, + }; struct o2net_node *nn; - unsigned int noio_flag; + unsigned int nofs_flag; /* - * sock_create_lite allocates the sock with GFP_KERNEL. We must set - * per-process flag PF_MEMALLOC_NOIO so that all allocations done - * by this process are done as if GFP_NOIO was specified. So we - * are not reentering filesystem while doing memory reclaim. + * sock_create_lite allocates the sock with GFP_KERNEL. We must + * prevent the filesystem from being reentered by memory reclaim. */ - noio_flag = memalloc_noio_save(); + nofs_flag = memalloc_nofs_save(); BUG_ON(sock == NULL); *more = 0; @@ -1843,24 +1806,15 @@ static int o2net_accept_one(struct socket *sock, int *more) new_sock->type = sock->type; new_sock->ops = sock->ops; - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false); + ret = sock->ops->accept(sock, new_sock, &arg); if (ret < 0) goto out; *more = 1; new_sock->sk->sk_allocation = GFP_ATOMIC; - ret = o2net_set_nodelay(new_sock); - if (ret) { - mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret); - goto out; - } - - ret = o2net_set_usertimeout(new_sock); - if (ret) { - mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret); - goto out; - } + tcp_sock_set_nodelay(new_sock->sk); + tcp_sock_set_user_timeout(new_sock->sk, O2NET_TCP_USER_TIMEOUT); ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1); if (ret < 0) @@ -1948,7 +1902,7 @@ out: if (sc) sc_put(sc); - memalloc_noio_restore(noio_flag); + memalloc_nofs_restore(nofs_flag); return ret; } @@ -1962,7 +1916,6 @@ static void o2net_accept_many(struct work_struct *work) { struct socket *sock = o2net_listen_sock; int more; - int err; /* * It is critical to note that due to interrupt moderation @@ -1977,7 +1930,7 @@ static void o2net_accept_many(struct work_struct *work) */ for (;;) { - err = o2net_accept_one(sock, &more); + o2net_accept_one(sock, &more); if (!more) break; cond_resched(); @@ -1988,6 +1941,8 @@ static void o2net_listen_data_ready(struct sock *sk) { void (*ready)(struct sock *sk); + trace_sk_data_ready(sk); + read_lock_bh(&sk->sk_callback_lock); ready = sk->sk_user_data; if (ready == NULL) { /* check for teardown race */ @@ -2047,7 +2002,7 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port) INIT_WORK(&o2net_listen_work, o2net_accept_many); sock->sk->sk_reuse = SK_CAN_REUSE; - ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); + ret = sock->ops->bind(sock, (struct sockaddr_unsized *)&sin, sizeof(sin)); if (ret < 0) { printk(KERN_ERR "o2net: Error %d while binding socket at " "%pI4:%u\n", ret, &addr, ntohs(port)); @@ -2139,18 +2094,23 @@ void o2net_stop_listening(struct o2nm_node *node) int o2net_init(void) { + struct folio *folio; + void *p; unsigned long i; o2quo_init(); + o2net_debugfs_init(); - if (o2net_debugfs_init()) + folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, 0); + if (!folio) goto out; - o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); - o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); - o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); - if (!o2net_hand || !o2net_keep_req || !o2net_keep_resp) - goto out; + p = folio_address(folio); + o2net_hand = p; + p += sizeof(struct o2net_handshake); + o2net_keep_req = p; + p += sizeof(struct o2net_msg); + o2net_keep_resp = p; o2net_hand->protocol_version = cpu_to_be64(O2NET_PROTOCOL_VERSION); o2net_hand->connector_id = cpu_to_be64(1); @@ -2177,9 +2137,6 @@ int o2net_init(void) return 0; out: - kfree(o2net_hand); - kfree(o2net_keep_req); - kfree(o2net_keep_resp); o2net_debugfs_exit(); o2quo_exit(); return -ENOMEM; @@ -2188,8 +2145,6 @@ out: void o2net_exit(void) { o2quo_exit(); - kfree(o2net_hand); - kfree(o2net_keep_req); - kfree(o2net_keep_resp); o2net_debugfs_exit(); + folio_put(virt_to_folio(o2net_hand)); } |
