From a5b7ab6352bfaab6eec4df6618a135341d72c247 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 26 Jun 2020 13:26:49 -0400 Subject: fs: dlm: set skb mark for listen socket This patch adds support to set the skb mark value for the DLM listen tcp and sctp sockets. The mark value will be offered as cluster configuration. At creation time of the listen socket it will be set as socket option. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/dlm/lowcomms.c') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 3543a8fec907..eaedad7d069a 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1111,6 +1111,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con, goto create_out; } + sock_set_mark(sock->sk, dlm_config.ci_mark); + /* Turn off Nagle's algorithm */ tcp_sock_set_nodelay(sock->sk); @@ -1185,6 +1187,7 @@ static int sctp_listen_for_all(void) } sock_set_rcvbuf(sock->sk, NEEDED_RMEM); + sock_set_mark(sock->sk, dlm_config.ci_mark); sctp_sock_set_nodelay(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); -- cgit From 9c9f168f5b145986535f727d259ef75f6ea26990 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 26 Jun 2020 13:26:50 -0400 Subject: fs: dlm: set skb mark per peer socket This patch adds support to set the skb mark value for the DLM tcp and sctp socket per peer. The mark value will be offered as per comm value of configfs. At creation time of the peer socket it will be set as socket option. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs/dlm/lowcomms.c') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index eaedad7d069a..3fa1b93dbbc7 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -914,6 +914,7 @@ static void sctp_connect_to_sock(struct connection *con) int result; int addr_len; struct socket *sock; + unsigned int mark; if (con->nodeid == 0) { log_print("attempt to connect sock 0 foiled"); @@ -944,6 +945,13 @@ static void sctp_connect_to_sock(struct connection *con) if (result < 0) goto socket_err; + /* set skb mark */ + result = dlm_comm_mark(con->nodeid, &mark); + if (result < 0) + goto bind_err; + + sock_set_mark(sock->sk, mark); + con->rx_action = receive_from_sock; con->connect_action = sctp_connect_to_sock; add_sock(sock, con); @@ -1006,6 +1014,7 @@ static void tcp_connect_to_sock(struct connection *con) struct sockaddr_storage saddr, src_addr; int addr_len; struct socket *sock = NULL; + unsigned int mark; int result; if (con->nodeid == 0) { @@ -1027,6 +1036,13 @@ static void tcp_connect_to_sock(struct connection *con) if (result < 0) goto out_err; + /* set skb mark */ + result = dlm_comm_mark(con->nodeid, &mark); + if (result < 0) + goto out_err; + + sock_set_mark(sock->sk, mark); + memset(&saddr, 0, sizeof(saddr)); result = nodeid_to_addr(con->nodeid, &saddr, NULL, false); if (result < 0) { -- cgit From 0ea47e4d2109efd61e9949d014b37ea835f20861 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 27 Jul 2020 09:13:36 -0400 Subject: fs: dlm: don't close socket on invalid message This patch doesn't close sockets when there is an invalid dlm message received. The connection will probably reconnect anyway so. To not close the connection will reduce the number of possible failtures. As we don't have a different strategy to react on such scenario just keep going the connection and ignore the message. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/dlm/lowcomms.c') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 3fa1b93dbbc7..9e6acbb47bb9 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -685,14 +685,14 @@ static int receive_from_sock(struct connection *con) page_address(con->rx_page), con->cb.base, con->cb.len, PAGE_SIZE); - if (ret == -EBADMSG) { - log_print("lowcomms: addr=%p, base=%u, len=%u, read=%d", - page_address(con->rx_page), con->cb.base, + if (ret < 0) { + log_print("lowcomms err %d: addr=%p, base=%u, len=%u, read=%d", + ret, page_address(con->rx_page), con->cb.base, con->cb.len, r); + cbuf_eat(&con->cb, r); + } else { + cbuf_eat(&con->cb, ret); } - if (ret < 0) - goto out_close; - cbuf_eat(&con->cb, ret); if (cbuf_empty(&con->cb) && !call_again_soon) { __free_page(con->rx_page); -- cgit From ba3ab3ca68caafb7700c4abae357b7fb7538df11 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 27 Jul 2020 09:13:37 -0400 Subject: fs: dlm: change handling of reconnects This patch changes the handling of reconnects. At first we only close the connection related to the communication failure. If we get a new connection for an already existing connection we close the existing connection and take the new one. This patch improves significantly the stability of tcp connections while running "tcpkill -9 -i $IFACE port 21064" while generating a lot of dlm messages e.g. on a gfs2 mount with many files. My test setup shows that a deadlock is "more" unlikely. Before this patch I wasn't able to get not a deadlock after 5 seconds. After this patch my observation is that it's more likely to survive after 5 seconds and more, but still a deadlock occurs after certain time. My guess is that there are still "segments" inside the tcp writequeue or retransmit queue which get dropped when receiving a tcp reset [1]. Hard to reproduce because the right message need to be inside these queues, which might even be in the 5 first seconds with this patch. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/net/ipv4/tcp_input.c?h=v5.8-rc6#n4122 Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'fs/dlm/lowcomms.c') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 9e6acbb47bb9..289439fdca99 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -713,7 +713,7 @@ out_resched: out_close: mutex_unlock(&con->sock_mutex); if (ret != -EAGAIN) { - close_connection(con, true, true, false); + close_connection(con, false, true, false); /* Reconnect when there is something to send */ } /* Don't return success if we really got EOF */ @@ -804,21 +804,16 @@ static int accept_from_sock(struct connection *con) INIT_WORK(&othercon->swork, process_send_sockets); INIT_WORK(&othercon->rwork, process_recv_sockets); set_bit(CF_IS_OTHERCON, &othercon->flags); + } else { + /* close other sock con if we have something new */ + close_connection(othercon, false, true, false); } + mutex_lock_nested(&othercon->sock_mutex, 2); - if (!othercon->sock) { - newcon->othercon = othercon; - add_sock(newsock, othercon); - addcon = othercon; - mutex_unlock(&othercon->sock_mutex); - } - else { - printk("Extra connection from node %d attempted\n", nodeid); - result = -EAGAIN; - mutex_unlock(&othercon->sock_mutex); - mutex_unlock(&newcon->sock_mutex); - goto accept_err; - } + newcon->othercon = othercon; + add_sock(newsock, othercon); + addcon = othercon; + mutex_unlock(&othercon->sock_mutex); } else { newcon->rx_action = receive_from_sock; @@ -1415,7 +1410,7 @@ out: send_error: mutex_unlock(&con->sock_mutex); - close_connection(con, true, false, true); + close_connection(con, false, false, true); /* Requeue the send work. When the work daemon runs again, it will try a new connection, then call this function again. */ queue_work(send_workqueue, &con->swork); -- cgit From 055923bf6b48659755b5f0169e34107ee2cb9b68 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 27 Jul 2020 09:13:38 -0400 Subject: fs: dlm: implement tcp graceful shutdown During my code inspection I saw there is no implementation of a graceful shutdown for tcp. This patch will introduce a graceful shutdown for tcp connections. The shutdown is implemented synchronized as dlm_lowcomms_stop() is called to end all dlm communication. After shutdown is done, a lot of flush and closing functionality will be called. However I don't see a problem with that. The waitqueue for synchronize the shutdown has a timeout of 10 seconds, if timeout a force close will be exectued. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 5 deletions(-) (limited to 'fs/dlm/lowcomms.c') diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 289439fdca99..5050fe05769b 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -63,6 +63,7 @@ /* Number of messages to send before rescheduling */ #define MAX_SEND_MSG_COUNT 25 +#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000) struct cbuf { unsigned int base; @@ -110,10 +111,12 @@ struct connection { #define CF_CLOSE 6 #define CF_APP_LIMITED 7 #define CF_CLOSING 8 +#define CF_SHUTDOWN 9 struct list_head writequeue; /* List of outgoing writequeue_entries */ spinlock_t writequeue_lock; int (*rx_action) (struct connection *); /* What to do when active */ void (*connect_action) (struct connection *); /* What to do to connect */ + void (*shutdown_action)(struct connection *con); /* What to do to shutdown */ struct page *rx_page; struct cbuf cb; int retries; @@ -122,6 +125,7 @@ struct connection { struct connection *othercon; struct work_struct rwork; /* Receive workqueue */ struct work_struct swork; /* Send workqueue */ + wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */ }; #define sock2con(x) ((struct connection *)(x)->sk_user_data) @@ -218,6 +222,7 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) spin_lock_init(&con->writequeue_lock); INIT_WORK(&con->swork, process_send_sockets); INIT_WORK(&con->rwork, process_recv_sockets); + init_waitqueue_head(&con->shutdown_wait); /* Setup action pointers for child sockets */ if (con->nodeid) { @@ -619,6 +624,54 @@ static void close_connection(struct connection *con, bool and_other, clear_bit(CF_CLOSING, &con->flags); } +static void shutdown_connection(struct connection *con) +{ + int ret; + + if (cancel_work_sync(&con->swork)) { + log_print("canceled swork for node %d", con->nodeid); + clear_bit(CF_WRITE_PENDING, &con->flags); + } + + mutex_lock(&con->sock_mutex); + /* nothing to shutdown */ + if (!con->sock) { + mutex_unlock(&con->sock_mutex); + return; + } + + set_bit(CF_SHUTDOWN, &con->flags); + ret = kernel_sock_shutdown(con->sock, SHUT_WR); + mutex_unlock(&con->sock_mutex); + if (ret) { + log_print("Connection %p failed to shutdown: %d will force close", + con, ret); + goto force_close; + } else { + ret = wait_event_timeout(con->shutdown_wait, + !test_bit(CF_SHUTDOWN, &con->flags), + DLM_SHUTDOWN_WAIT_TIMEOUT); + if (ret == 0) { + log_print("Connection %p shutdown timed out, will force close", + con); + goto force_close; + } + } + + return; + +force_close: + clear_bit(CF_SHUTDOWN, &con->flags); + close_connection(con, false, true, true); +} + +static void dlm_tcp_shutdown(struct connection *con) +{ + if (con->othercon) + shutdown_connection(con->othercon); + shutdown_connection(con); +} + /* Data received from remote end */ static int receive_from_sock(struct connection *con) { @@ -713,13 +766,18 @@ out_resched: out_close: mutex_unlock(&con->sock_mutex); if (ret != -EAGAIN) { - close_connection(con, false, true, false); /* Reconnect when there is something to send */ + close_connection(con, false, true, false); + if (ret == 0) { + log_print("connection %p got EOF from %d", + con, con->nodeid); + /* handling for tcp shutdown */ + clear_bit(CF_SHUTDOWN, &con->flags); + wake_up(&con->shutdown_wait); + /* signal to breaking receive worker */ + ret = -1; + } } - /* Don't return success if we really got EOF */ - if (ret == 0) - ret = -EAGAIN; - return ret; } @@ -803,6 +861,7 @@ static int accept_from_sock(struct connection *con) spin_lock_init(&othercon->writequeue_lock); INIT_WORK(&othercon->swork, process_send_sockets); INIT_WORK(&othercon->rwork, process_recv_sockets); + init_waitqueue_head(&othercon->shutdown_wait); set_bit(CF_IS_OTHERCON, &othercon->flags); } else { /* close other sock con if we have something new */ @@ -1047,6 +1106,7 @@ static void tcp_connect_to_sock(struct connection *con) con->rx_action = receive_from_sock; con->connect_action = tcp_connect_to_sock; + con->shutdown_action = dlm_tcp_shutdown; add_sock(sock, con); /* Bind to our cluster-known address connecting to avoid @@ -1542,6 +1602,12 @@ static void stop_conn(struct connection *con) _stop_conn(con, true); } +static void shutdown_conn(struct connection *con) +{ + if (con->shutdown_action) + con->shutdown_action(con); +} + static void free_conn(struct connection *con) { close_connection(con, true, true, true); @@ -1593,6 +1659,7 @@ void dlm_lowcomms_stop(void) mutex_lock(&connections_lock); dlm_allow_conn = 0; mutex_unlock(&connections_lock); + foreach_conn(shutdown_conn); work_flush(); clean_writequeues(); foreach_conn(free_conn); -- cgit