summaryrefslogtreecommitdiff
path: root/fs/dlm
diff options
context:
space:
mode:
authorBob Peterson <rpeterso@redhat.com>2017-09-12 08:55:04 +0000
committerDavid Teigland <teigland@redhat.com>2017-09-25 12:45:21 -0500
commit61d9102b62129e13a2258c1e0566962f9a1732f0 (patch)
tree94f017d688925ebecfd3da5e96735c2f55af1eaf /fs/dlm
parente19b205be43d11bff638cad4487008c48d21c103 (diff)
DLM: Eliminate CF_CONNECT_PENDING flag
Before this patch, there was a flag in the con structure that was used to determine whether or not a connect was needed. The bit was set here and there, and cleared here and there, so it left some race conditions: the bit was set, work was queued, then the worker cleared the bit, allowing someone else to set it while the worker ran. For the most part, this worked okay, but we got into trouble if connections were lost and it needed to reconnect. This patch eliminates the flag in favor of simply checking if we actually have a sock pointer while protected by the mutex. Signed-off-by: Bob Peterson <rpeterso@redhat.com> Reviewed-by: Tadashi Miyauchi <miyauchi@toshiba-tops.co.jp> Signed-off-by: David Teigland <teigland@redhat.com>
Diffstat (limited to 'fs/dlm')
-rw-r--r--fs/dlm/lowcomms.c10
1 files changed, 3 insertions, 7 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 4813d0e0cd9b..6a7a49b93374 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -107,7 +107,6 @@ struct connection {
unsigned long flags;
#define CF_READ_PENDING 1
#define CF_WRITE_PENDING 2
-#define CF_CONNECT_PENDING 3
#define CF_INIT_PENDING 4
#define CF_IS_OTHERCON 5
#define CF_CLOSE 6
@@ -435,8 +434,8 @@ static inline void lowcomms_connect_sock(struct connection *con)
{
if (test_bit(CF_CLOSE, &con->flags))
return;
- if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
- queue_work(send_workqueue, &con->swork);
+ queue_work(send_workqueue, &con->swork);
+ cond_resched();
}
static void lowcomms_state_change(struct sock *sk)
@@ -579,7 +578,6 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
static void close_connection(struct connection *con, bool and_other,
bool tx, bool rx)
{
- clear_bit(CF_CONNECT_PENDING, &con->flags);
clear_bit(CF_WRITE_PENDING, &con->flags);
if (tx && cancel_work_sync(&con->swork))
log_print("canceled swork for node %d", con->nodeid);
@@ -1098,7 +1096,6 @@ socket_err:
con->retries, result);
mutex_unlock(&con->sock_mutex);
msleep(1000);
- clear_bit(CF_CONNECT_PENDING, &con->flags);
lowcomms_connect_sock(con);
return;
}
@@ -1194,7 +1191,6 @@ out_err:
con->retries, result);
mutex_unlock(&con->sock_mutex);
msleep(1000);
- clear_bit(CF_CONNECT_PENDING, &con->flags);
lowcomms_connect_sock(con);
return;
}
@@ -1593,7 +1589,7 @@ static void process_send_sockets(struct work_struct *work)
{
struct connection *con = container_of(work, struct connection, swork);
- if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags))
+ if (con->sock == NULL) /* not mutex protected so check it inside too */
con->connect_action(con);
if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags))
send_to_sock(con);