summaryrefslogtreecommitdiff
path: root/fs/ocfs2/cluster/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/cluster/tcp.c')
-rw-r--r--fs/ocfs2/cluster/tcp.c171
1 files changed, 63 insertions, 108 deletions
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index e9f236af1927..79b281e32f4c 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1,33 +1,17 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- *
- * vim: noexpandtab sw=8 ts=8 sts=0:
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
*
* Copyright (C) 2004 Oracle. All rights reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
* ----
*
- * Callers for this were originally written against a very simple synchronus
+ * Callers for this were originally written against a very simple synchronous
* API. This implementation reflects those simple callers. Some day I'm sure
* we'll need to move to a more robust posting/callback mechanism.
*
* Transmit calls pass in kernel virtual addresses and block copying this into
* the socket's tx buffers via a usual blocking sendmsg. They'll block waiting
- * for a failed socket to timeout. TX callers can also pass in a poniter to an
+ * for a failed socket to timeout. TX callers can also pass in a pointer to an
* 'int' which gets filled with an errno off the wire in response to the
* message they send.
*
@@ -62,6 +46,7 @@
#include <linux/net.h>
#include <linux/export.h>
#include <net/tcp.h>
+#include <trace/events/sock.h>
#include <linux/uaccess.h>
@@ -116,7 +101,7 @@ static struct socket *o2net_listen_sock;
* o2net_wq. teardown detaches the callbacks before destroying the workqueue.
* quorum work is queued as sock containers are shutdown.. stop_listening
* tears down all the node's sock containers, preventing future shutdowns
- * and queued quroum work, before canceling delayed quorum work and
+ * and queued quorum work, before canceling delayed quorum work and
* destroying the work queue.
*/
static struct workqueue_struct *o2net_wq;
@@ -601,6 +586,8 @@ static void o2net_data_ready(struct sock *sk)
void (*ready)(struct sock *sk);
struct o2net_sock_container *sc;
+ trace_sk_data_ready(sk);
+
read_lock_bh(&sk->sk_callback_lock);
sc = sk->sk_user_data;
if (sc) {
@@ -737,7 +724,7 @@ static void o2net_shutdown_sc(struct work_struct *work)
if (o2net_unregister_callbacks(sc->sc_sock->sk, sc)) {
/* we shouldn't flush as we're in the thread, the
* races with pending sc work structs are harmless */
- del_timer_sync(&sc->sc_idle_timeout);
+ timer_delete_sync(&sc->sc_idle_timeout);
o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
sc_put(sc);
kernel_sock_shutdown(sc->sc_sock, SHUT_RDWR);
@@ -916,7 +903,7 @@ static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
{
struct kvec vec = { .iov_len = len, .iov_base = data, };
struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
- iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, len);
+ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, len);
return sock_recvmsg(sock, &msg, MSG_DONTWAIT);
}
@@ -943,19 +930,22 @@ out:
}
static void o2net_sendpage(struct o2net_sock_container *sc,
- void *kmalloced_virt,
- size_t size)
+ void *virt, size_t size)
{
struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
+ struct msghdr msg = {};
+ struct bio_vec bv;
ssize_t ret;
+ bvec_set_virt(&bv, virt, size);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, size);
+
while (1) {
+ msg.msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES;
mutex_lock(&sc->sc_send_lock);
- ret = sc->sc_sock->ops->sendpage(sc->sc_sock,
- virt_to_page(kmalloced_virt),
- offset_in_page(kmalloced_virt),
- size, MSG_DONTWAIT);
+ ret = sock_sendmsg(sc->sc_sock, &msg);
mutex_unlock(&sc->sc_send_lock);
+
if (ret == size)
break;
if (ret == (ssize_t)-EAGAIN) {
@@ -1006,14 +996,12 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,
}
/* Get a map of all nodes to which this node is currently connected to */
-void o2net_fill_node_map(unsigned long *map, unsigned bytes)
+void o2net_fill_node_map(unsigned long *map, unsigned int bits)
{
struct o2net_sock_container *sc;
int node, ret;
- BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long)));
-
- memset(map, 0, bytes);
+ bitmap_zero(map, bits);
for (node = 0; node < O2NM_MAX_NODES; ++node) {
if (!o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret))
continue;
@@ -1212,7 +1200,6 @@ static int o2net_process_message(struct o2net_sock_container *sc,
msglog(hdr, "bad magic\n");
ret = -EINVAL;
goto out;
- break;
}
/* find a handler for it */
@@ -1432,7 +1419,7 @@ out:
return ret;
}
-/* this work func is triggerd by data ready. it reads until it can read no
+/* this work func is triggered by data ready. it reads until it can read no
* more. it interprets 0, eof, as fatal. if data_ready hits while we're doing
* our work the work struct will be marked and we'll be called again. */
static void o2net_rx_until_empty(struct work_struct *work)
@@ -1455,22 +1442,6 @@ static void o2net_rx_until_empty(struct work_struct *work)
sc_put(sc);
}
-static int o2net_set_nodelay(struct socket *sock)
-{
- int val = 1;
-
- return kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
- (void *)&val, sizeof(val));
-}
-
-static int o2net_set_usertimeout(struct socket *sock)
-{
- int user_timeout = O2NET_TCP_USER_TIMEOUT;
-
- return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
- (void *)&user_timeout, sizeof(user_timeout));
-}
-
static void o2net_initialize_handshake(void)
{
o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
@@ -1512,12 +1483,13 @@ static void o2net_sc_send_keep_req(struct work_struct *work)
sc_put(sc);
}
-/* socket shutdown does a del_timer_sync against this as it tears down.
+/* socket shutdown does a timer_delete_sync against this as it tears down.
* we can't start this timer until we've got to the point in sc buildup
* where shutdown is going to be involved */
static void o2net_idle_timer(struct timer_list *t)
{
- struct o2net_sock_container *sc = from_timer(sc, t, sc_idle_timeout);
+ struct o2net_sock_container *sc = timer_container_of(sc, t,
+ sc_idle_timeout);
struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
#ifdef CONFIG_DEBUG_FS
unsigned long msecs = ktime_to_ms(ktime_get()) -
@@ -1584,15 +1556,13 @@ static void o2net_start_connect(struct work_struct *work)
struct sockaddr_in myaddr = {0, }, remoteaddr = {0, };
int ret = 0, stop;
unsigned int timeout;
- unsigned int noio_flag;
+ unsigned int nofs_flag;
/*
- * sock_create allocates the sock with GFP_KERNEL. We must set
- * per-process flag PF_MEMALLOC_NOIO so that all allocations done
- * by this process are done as if GFP_NOIO was specified. So we
- * are not reentering filesystem while doing memory reclaim.
+ * sock_create allocates the sock with GFP_KERNEL. We must
+ * prevent the filesystem from being reentered by memory reclaim.
*/
- noio_flag = memalloc_noio_save();
+ nofs_flag = memalloc_nofs_save();
/* if we're greater we initiate tx, otherwise we accept */
if (o2nm_this_node() <= o2net_num_from_nn(nn))
goto out;
@@ -1639,12 +1609,13 @@ static void o2net_start_connect(struct work_struct *work)
sc->sc_sock = sock; /* freed by sc_kref_release */
sock->sk->sk_allocation = GFP_ATOMIC;
+ sock->sk->sk_use_task_frag = false;
myaddr.sin_family = AF_INET;
myaddr.sin_addr.s_addr = mynode->nd_ipv4_address;
myaddr.sin_port = htons(0); /* any port */
- ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr,
+ ret = sock->ops->bind(sock, (struct sockaddr_unsized *)&myaddr,
sizeof(myaddr));
if (ret) {
mlog(ML_ERROR, "bind failed with %d at address %pI4\n",
@@ -1652,17 +1623,8 @@ static void o2net_start_connect(struct work_struct *work)
goto out;
}
- ret = o2net_set_nodelay(sc->sc_sock);
- if (ret) {
- mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret);
- goto out;
- }
-
- ret = o2net_set_usertimeout(sock);
- if (ret) {
- mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
- goto out;
- }
+ tcp_sock_set_nodelay(sc->sc_sock->sk);
+ tcp_sock_set_user_timeout(sock->sk, O2NET_TCP_USER_TIMEOUT);
o2net_register_callbacks(sc->sc_sock->sk, sc);
@@ -1676,7 +1638,7 @@ static void o2net_start_connect(struct work_struct *work)
remoteaddr.sin_port = node->nd_ipv4_port;
ret = sc->sc_sock->ops->connect(sc->sc_sock,
- (struct sockaddr *)&remoteaddr,
+ (struct sockaddr_unsized *)&remoteaddr,
sizeof(remoteaddr),
O_NONBLOCK);
if (ret == -EINPROGRESS)
@@ -1697,7 +1659,7 @@ out:
if (mynode)
o2nm_node_put(mynode);
- memalloc_noio_restore(noio_flag);
+ memalloc_nofs_restore(nofs_flag);
return;
}
@@ -1776,7 +1738,7 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
(msecs_to_jiffies(o2net_reconnect_delay()) + 1);
if (node_num != o2nm_this_node()) {
- /* believe it or not, accept and node hearbeating testing
+ /* believe it or not, accept and node heartbeating testing
* can succeed for this node before we got here.. so
* only use set_nn_state to clear the persistent error
* if that hasn't already happened */
@@ -1823,16 +1785,17 @@ static int o2net_accept_one(struct socket *sock, int *more)
struct o2nm_node *node = NULL;
struct o2nm_node *local_node = NULL;
struct o2net_sock_container *sc = NULL;
+ struct proto_accept_arg arg = {
+ .flags = O_NONBLOCK,
+ };
struct o2net_node *nn;
- unsigned int noio_flag;
+ unsigned int nofs_flag;
/*
- * sock_create_lite allocates the sock with GFP_KERNEL. We must set
- * per-process flag PF_MEMALLOC_NOIO so that all allocations done
- * by this process are done as if GFP_NOIO was specified. So we
- * are not reentering filesystem while doing memory reclaim.
+ * sock_create_lite allocates the sock with GFP_KERNEL. We must
+ * prevent the filesystem from being reentered by memory reclaim.
*/
- noio_flag = memalloc_noio_save();
+ nofs_flag = memalloc_nofs_save();
BUG_ON(sock == NULL);
*more = 0;
@@ -1843,24 +1806,15 @@ static int o2net_accept_one(struct socket *sock, int *more)
new_sock->type = sock->type;
new_sock->ops = sock->ops;
- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
+ ret = sock->ops->accept(sock, new_sock, &arg);
if (ret < 0)
goto out;
*more = 1;
new_sock->sk->sk_allocation = GFP_ATOMIC;
- ret = o2net_set_nodelay(new_sock);
- if (ret) {
- mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret);
- goto out;
- }
-
- ret = o2net_set_usertimeout(new_sock);
- if (ret) {
- mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
- goto out;
- }
+ tcp_sock_set_nodelay(new_sock->sk);
+ tcp_sock_set_user_timeout(new_sock->sk, O2NET_TCP_USER_TIMEOUT);
ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1);
if (ret < 0)
@@ -1948,7 +1902,7 @@ out:
if (sc)
sc_put(sc);
- memalloc_noio_restore(noio_flag);
+ memalloc_nofs_restore(nofs_flag);
return ret;
}
@@ -1962,7 +1916,6 @@ static void o2net_accept_many(struct work_struct *work)
{
struct socket *sock = o2net_listen_sock;
int more;
- int err;
/*
* It is critical to note that due to interrupt moderation
@@ -1977,7 +1930,7 @@ static void o2net_accept_many(struct work_struct *work)
*/
for (;;) {
- err = o2net_accept_one(sock, &more);
+ o2net_accept_one(sock, &more);
if (!more)
break;
cond_resched();
@@ -1988,6 +1941,8 @@ static void o2net_listen_data_ready(struct sock *sk)
{
void (*ready)(struct sock *sk);
+ trace_sk_data_ready(sk);
+
read_lock_bh(&sk->sk_callback_lock);
ready = sk->sk_user_data;
if (ready == NULL) { /* check for teardown race */
@@ -2047,7 +2002,7 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port)
INIT_WORK(&o2net_listen_work, o2net_accept_many);
sock->sk->sk_reuse = SK_CAN_REUSE;
- ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+ ret = sock->ops->bind(sock, (struct sockaddr_unsized *)&sin, sizeof(sin));
if (ret < 0) {
printk(KERN_ERR "o2net: Error %d while binding socket at "
"%pI4:%u\n", ret, &addr, ntohs(port));
@@ -2139,18 +2094,23 @@ void o2net_stop_listening(struct o2nm_node *node)
int o2net_init(void)
{
+ struct folio *folio;
+ void *p;
unsigned long i;
o2quo_init();
+ o2net_debugfs_init();
- if (o2net_debugfs_init())
+ folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, 0);
+ if (!folio)
goto out;
- o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL);
- o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
- o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
- if (!o2net_hand || !o2net_keep_req || !o2net_keep_resp)
- goto out;
+ p = folio_address(folio);
+ o2net_hand = p;
+ p += sizeof(struct o2net_handshake);
+ o2net_keep_req = p;
+ p += sizeof(struct o2net_msg);
+ o2net_keep_resp = p;
o2net_hand->protocol_version = cpu_to_be64(O2NET_PROTOCOL_VERSION);
o2net_hand->connector_id = cpu_to_be64(1);
@@ -2177,9 +2137,6 @@ int o2net_init(void)
return 0;
out:
- kfree(o2net_hand);
- kfree(o2net_keep_req);
- kfree(o2net_keep_resp);
o2net_debugfs_exit();
o2quo_exit();
return -ENOMEM;
@@ -2188,8 +2145,6 @@ out:
void o2net_exit(void)
{
o2quo_exit();
- kfree(o2net_hand);
- kfree(o2net_keep_req);
- kfree(o2net_keep_resp);
o2net_debugfs_exit();
+ folio_put(virt_to_folio(o2net_hand));
}