From 8113aa91360a013ebe00763bb0823b5a41b11c4d Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 3 Jan 2023 19:37:49 +0800
Subject: fs: dlm: fix return value check in dlm_memory_init()

It should check 'cb_cache', after calling kmem_cache_create("dlm_cb").

Fixes: 61bed0baa4db ("fs: dlm: use a non-static queue for callbacks")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index eb7a08641fcf..cdbaa452fc05 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -51,7 +51,7 @@ int __init dlm_memory_init(void)
 	cb_cache = kmem_cache_create("dlm_cb", sizeof(struct dlm_callback),
 				     __alignof__(struct dlm_callback), 0,
 				     NULL);
-	if (!rsb_cache)
+	if (!cb_cache)
 		goto cb;
 
 	return 0;
-- 
cgit 


From aa7f4a21f6e5b0fa256b4de0924fea506ad801ef Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 4 Jan 2023 16:38:05 -0800
Subject: fs/dlm: Remove "select SRCU"

Now that the SRCU Kconfig option is unconditionally selected, there is
no longer any point in selecting it.  Therefore, remove the "select SRCU"
Kconfig statements.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/Kconfig | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 1105ce3c80cb..b3b86dbdc187 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -4,7 +4,6 @@ menuconfig DLM
 	depends on INET
 	depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
 	select IP_SCTP
-	select SRCU
 	help
 	A general purpose distributed lock manager for kernel or userspace
 	applications.
-- 
cgit 


From aad633dc0cf90093998b1ae0ba9f19b5f1dab644 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:10:31 -0500
Subject: fs: dlm: start midcomms before scand

The scand kthread can send dlm messages out, especially dlm remove
messages to free memory for unused rsb on other nodes. To send out dlm
messages, midcomms must be initialized. This patch moves the midcomms
start before scand is started.

Cc: stable@vger.kernel.org
Fixes: e7fd41792fc0 ("[DLM] The core of the DLM for GFS2/CLVM")
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lockspace.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index d0b4e2181a5f..99bc96f90779 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -381,23 +381,23 @@ static int threads_start(void)
 {
 	int error;
 
-	error = dlm_scand_start();
+	/* Thread for sending/receiving messages for all lockspace's */
+	error = dlm_midcomms_start();
 	if (error) {
-		log_print("cannot start dlm_scand thread %d", error);
+		log_print("cannot start dlm midcomms %d", error);
 		goto fail;
 	}
 
-	/* Thread for sending/receiving messages for all lockspace's */
-	error = dlm_midcomms_start();
+	error = dlm_scand_start();
 	if (error) {
-		log_print("cannot start dlm midcomms %d", error);
-		goto scand_fail;
+		log_print("cannot start dlm_scand thread %d", error);
+		goto midcomms_fail;
 	}
 
 	return 0;
 
- scand_fail:
-	dlm_scand_stop();
+ midcomms_fail:
+	dlm_midcomms_stop();
  fail:
 	return error;
 }
-- 
cgit 


From 724b6bab0d75f1dc01fdfbf7fe8d4217a5cb90ba Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:10:32 -0500
Subject: fs: dlm: fix use after free in midcomms commit

While working on processing dlm message in softirq context I experienced
the following KASAN use-after-free warning:

[  151.760477] ==================================================================
[  151.761803] BUG: KASAN: use-after-free in dlm_midcomms_commit_mhandle+0x19d/0x4b0
[  151.763414] Read of size 4 at addr ffff88811a980c60 by task lock_torture/1347

[  151.765284] CPU: 7 PID: 1347 Comm: lock_torture Not tainted 6.1.0-rc4+ #2828
[  151.766778] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.16.0-3.module+el8.7.0+16134+e5908aa2 04/01/2014
[  151.768726] Call Trace:
[  151.769277]  <TASK>
[  151.769748]  dump_stack_lvl+0x5b/0x86
[  151.770556]  print_report+0x180/0x4c8
[  151.771378]  ? kasan_complete_mode_report_info+0x7c/0x1e0
[  151.772241]  ? dlm_midcomms_commit_mhandle+0x19d/0x4b0
[  151.773069]  kasan_report+0x93/0x1a0
[  151.773668]  ? dlm_midcomms_commit_mhandle+0x19d/0x4b0
[  151.774514]  __asan_load4+0x7e/0xa0
[  151.775089]  dlm_midcomms_commit_mhandle+0x19d/0x4b0
[  151.775890]  ? create_message.isra.29.constprop.64+0x57/0xc0
[  151.776770]  send_common+0x19f/0x1b0
[  151.777342]  ? remove_from_waiters+0x60/0x60
[  151.778017]  ? lock_downgrade+0x410/0x410
[  151.778648]  ? __this_cpu_preempt_check+0x13/0x20
[  151.779421]  ? rcu_lockdep_current_cpu_online+0x88/0xc0
[  151.780292]  _convert_lock+0x46/0x150
[  151.780893]  convert_lock+0x7b/0xc0
[  151.781459]  dlm_lock+0x3ac/0x580
[  151.781993]  ? 0xffffffffc0540000
[  151.782522]  ? torture_stop+0x120/0x120 [dlm_locktorture]
[  151.783379]  ? dlm_scan_rsbs+0xa70/0xa70
[  151.784003]  ? preempt_count_sub+0xd6/0x130
[  151.784661]  ? is_module_address+0x47/0x70
[  151.785309]  ? torture_stop+0x120/0x120 [dlm_locktorture]
[  151.786166]  ? 0xffffffffc0540000
[  151.786693]  ? lockdep_init_map_type+0xc3/0x360
[  151.787414]  ? 0xffffffffc0540000
[  151.787947]  torture_dlm_lock_sync.isra.3+0xe9/0x150 [dlm_locktorture]
[  151.789004]  ? torture_stop+0x120/0x120 [dlm_locktorture]
[  151.789858]  ? 0xffffffffc0540000
[  151.790392]  ? lock_torture_cleanup+0x20/0x20 [dlm_locktorture]
[  151.791347]  ? delay_tsc+0x94/0xc0
[  151.791898]  torture_ex_iter+0xc3/0xea [dlm_locktorture]
[  151.792735]  ? torture_start+0x30/0x30 [dlm_locktorture]
[  151.793606]  lock_torture+0x177/0x270 [dlm_locktorture]
[  151.794448]  ? torture_dlm_lock_sync.isra.3+0x150/0x150 [dlm_locktorture]
[  151.795539]  ? lock_torture_stats+0x80/0x80 [dlm_locktorture]
[  151.796476]  ? do_raw_spin_lock+0x11e/0x1e0
[  151.797152]  ? mark_held_locks+0x34/0xb0
[  151.797784]  ? _raw_spin_unlock_irqrestore+0x30/0x70
[  151.798581]  ? __kthread_parkme+0x79/0x110
[  151.799246]  ? trace_preempt_on+0x2a/0xf0
[  151.799902]  ? __kthread_parkme+0x79/0x110
[  151.800579]  ? preempt_count_sub+0xd6/0x130
[  151.801271]  ? __kasan_check_read+0x11/0x20
[  151.801963]  ? __kthread_parkme+0xec/0x110
[  151.802630]  ? lock_torture_stats+0x80/0x80 [dlm_locktorture]
[  151.803569]  kthread+0x192/0x1d0
[  151.804104]  ? kthread_complete_and_exit+0x30/0x30
[  151.804881]  ret_from_fork+0x1f/0x30
[  151.805480]  </TASK>

[  151.806111] Allocated by task 1347:
[  151.806681]  kasan_save_stack+0x26/0x50
[  151.807308]  kasan_set_track+0x25/0x30
[  151.807920]  kasan_save_alloc_info+0x1e/0x30
[  151.808609]  __kasan_slab_alloc+0x63/0x80
[  151.809263]  kmem_cache_alloc+0x1ad/0x830
[  151.809916]  dlm_allocate_mhandle+0x17/0x20
[  151.810590]  dlm_midcomms_get_mhandle+0x96/0x260
[  151.811344]  _create_message+0x95/0x180
[  151.811994]  create_message.isra.29.constprop.64+0x57/0xc0
[  151.812880]  send_common+0x129/0x1b0
[  151.813467]  _convert_lock+0x46/0x150
[  151.814074]  convert_lock+0x7b/0xc0
[  151.814648]  dlm_lock+0x3ac/0x580
[  151.815199]  torture_dlm_lock_sync.isra.3+0xe9/0x150 [dlm_locktorture]
[  151.816258]  torture_ex_iter+0xc3/0xea [dlm_locktorture]
[  151.817129]  lock_torture+0x177/0x270 [dlm_locktorture]
[  151.817986]  kthread+0x192/0x1d0
[  151.818518]  ret_from_fork+0x1f/0x30

[  151.819369] Freed by task 1336:
[  151.819890]  kasan_save_stack+0x26/0x50
[  151.820514]  kasan_set_track+0x25/0x30
[  151.821128]  kasan_save_free_info+0x2e/0x50
[  151.821812]  __kasan_slab_free+0x107/0x1a0
[  151.822483]  kmem_cache_free+0x204/0x5e0
[  151.823152]  dlm_free_mhandle+0x18/0x20
[  151.823781]  dlm_mhandle_release+0x2e/0x40
[  151.824454]  rcu_core+0x583/0x1330
[  151.825047]  rcu_core_si+0xe/0x20
[  151.825594]  __do_softirq+0xf4/0x5c2

[  151.826450] Last potentially related work creation:
[  151.827238]  kasan_save_stack+0x26/0x50
[  151.827870]  __kasan_record_aux_stack+0xa2/0xc0
[  151.828609]  kasan_record_aux_stack_noalloc+0xb/0x20
[  151.829415]  call_rcu+0x4c/0x760
[  151.829954]  dlm_mhandle_delete+0x97/0xb0
[  151.830718]  dlm_process_incoming_buffer+0x2fc/0xb30
[  151.831524]  process_dlm_messages+0x16e/0x470
[  151.832245]  process_one_work+0x505/0xa10
[  151.832905]  worker_thread+0x67/0x650
[  151.833507]  kthread+0x192/0x1d0
[  151.834046]  ret_from_fork+0x1f/0x30

[  151.834900] The buggy address belongs to the object at ffff88811a980c30
                which belongs to the cache dlm_mhandle of size 88
[  151.836894] The buggy address is located 48 bytes inside of
                88-byte region [ffff88811a980c30, ffff88811a980c88)

[  151.839007] The buggy address belongs to the physical page:
[  151.839904] page:0000000076cf5d62 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11a980
[  151.841378] flags: 0x8000000000000200(slab|zone=2)
[  151.842141] raw: 8000000000000200 0000000000000000 dead000000000122 ffff8881089b43c0
[  151.843401] raw: 0000000000000000 0000000000220022 00000001ffffffff 0000000000000000
[  151.844640] page dumped because: kasan: bad access detected

[  151.845822] Memory state around the buggy address:
[  151.846602]  ffff88811a980b00: fb fb fb fb fc fc fc fc fa fb fb fb fb fb fb fb
[  151.847761]  ffff88811a980b80: fb fb fb fc fc fc fc fa fb fb fb fb fb fb fb fb
[  151.848921] >ffff88811a980c00: fb fb fc fc fc fc fa fb fb fb fb fb fb fb fb fb
[  151.850076]                                                        ^
[  151.851085]  ffff88811a980c80: fb fc fc fc fc fa fb fb fb fb fb fb fb fb fb fb
[  151.852269]  ffff88811a980d00: fc fc fc fc fa fb fb fb fb fb fb fb fb fb fb fc
[  151.853428] ==================================================================
[  151.855618] Disabling lock debugging due to kernel taint

It is accessing a mhandle in dlm_midcomms_commit_mhandle() and the mhandle
was freed by a call_rcu() call in dlm_process_incoming_buffer(),
dlm_mhandle_delete(). It looks like it was freed because an ack of
this message was received. There is a short race between committing the
dlm message to be transmitted and getting an ack back. If the ack is
faster than returning from dlm_midcomms_commit_msg_3_2(), then we run
into a use-after free because we still need to reference the mhandle when
calling srcu_read_unlock().

To avoid that, we don't allow that mhandle to be freed between
dlm_midcomms_commit_msg_3_2() and srcu_read_unlock() by using rcu read
lock. We can do that because mhandle is protected by rcu handling.

Cc: stable@vger.kernel.org
Fixes: 489d8e559c65 ("fs: dlm: add reliable connection if reconnect")
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs/dlm')

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index fc015a6abe17..2e60d9a2c883 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -1214,8 +1214,15 @@ void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh,
 		dlm_free_mhandle(mh);
 		break;
 	case DLM_VERSION_3_2:
+		/* held rcu read lock here, because we sending the
+		 * dlm message out, when we do that we could receive
+		 * an ack back which releases the mhandle and we
+		 * get a use after free.
+		 */
+		rcu_read_lock();
 		dlm_midcomms_commit_msg_3_2(mh, name, namelen);
 		srcu_read_unlock(&nodes_srcu, mh->idx);
+		rcu_read_unlock();
 		break;
 	default:
 		srcu_read_unlock(&nodes_srcu, mh->idx);
-- 
cgit 


From 7354fa4ef697191effedc2ae9a8293427708bbf5 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:10:33 -0500
Subject: fs: dlm: be sure to call dlm_send_queue_flush()

If we release a midcomms node structure, there should be nothing left
inside the dlm midcomms send queue. However, sometimes this is not true
because I believe some DLM_FIN message was not acked... if we run
into a shutdown timeout, then we should be sure there is no pending send
dlm message inside this queue when releasing midcomms node structure.

Cc: stable@vger.kernel.org
Fixes: 489d8e559c65 ("fs: dlm: add reliable connection if reconnect")
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/dlm')

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 2e60d9a2c883..a3eb19c8cec5 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -1402,6 +1402,7 @@ static void midcomms_node_release(struct rcu_head *rcu)
 	struct midcomms_node *node = container_of(rcu, struct midcomms_node, rcu);
 
 	WARN_ON_ONCE(atomic_read(&node->send_queue_cnt));
+	dlm_send_queue_flush(node);
 	kfree(node);
 }
 
-- 
cgit 


From 164272113b685927126c938b4a9cbd2075eb15ee Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:10:34 -0500
Subject: fs: dlm: fix race setting stop tx flag

This patch sets the stop tx flag before we commit the dlm message.
This flag will report about unexpected transmissions after we
send the DLM_FIN message out, which should be the last message sent.
When we commit the dlm fin message, it could be that we already
got an ack back and the CLOSED state change already happened.
We should not set this flag when we are in CLOSED state. To avoid this
race we simply set the tx flag before the state change can be in
progress by moving it before dlm_midcomms_commit_mhandle().

Cc: stable@vger.kernel.org
Fixes: 489d8e559c65 ("fs: dlm: add reliable connection if reconnect")
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index a3eb19c8cec5..9d459d5bf800 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -406,6 +406,7 @@ static int dlm_send_fin(struct midcomms_node *node,
 	if (!mh)
 		return -ENOMEM;
 
+	set_bit(DLM_NODE_FLAG_STOP_TX, &node->flags);
 	mh->ack_rcv = ack_rcv;
 
 	m_header = (struct dlm_header *)ppc;
@@ -417,7 +418,6 @@ static int dlm_send_fin(struct midcomms_node *node,
 
 	pr_debug("sending fin msg to node %d\n", node->nodeid);
 	dlm_midcomms_commit_mhandle(mh, NULL, 0);
-	set_bit(DLM_NODE_FLAG_STOP_TX, &node->flags);
 
 	return 0;
 }
-- 
cgit 


From 15c63db8e86a72e0d5cfb9bf0cd1870e39a3e5fe Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:10:35 -0500
Subject: fs: dlm: don't set stop rx flag after node reset

Similar to the stop tx flag, the rx flag should warn about a dlm message
being received at DLM_FIN state change, when we are assuming no other
dlm application messages. If we receive a FIN message and we are in the
state DLM_FIN_WAIT2 we call midcomms_node_reset() which puts the
midcomms node into DLM_CLOSED state. Afterwards we should not set the
DLM_NODE_FLAG_STOP_RX flag any more.  This patch changes the setting
DLM_NODE_FLAG_STOP_RX in those state changes when we receive a FIN
message and we assume there will be no other dlm application messages
received until we hit DLM_CLOSED state.

Cc: stable@vger.kernel.org
Fixes: 489d8e559c65 ("fs: dlm: add reliable connection if reconnect")
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 9d459d5bf800..1fef99214204 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -524,6 +524,7 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 				break;
 			case DLM_FIN_WAIT1:
 				node->state = DLM_CLOSING;
+				set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
 				pr_debug("switch node %d to state %s\n",
 					 node->nodeid, dlm_state_str(node->state));
 				break;
@@ -544,8 +545,6 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 				return;
 			}
 			spin_unlock(&node->state_lock);
-
-			set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
 			break;
 		default:
 			WARN_ON_ONCE(test_bit(DLM_NODE_FLAG_STOP_RX, &node->flags));
-- 
cgit 


From a58496361802070996f9bd76e941d109c4a85ebd Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:10:36 -0500
Subject: fs: dlm: move sending fin message into state change handling

This patch moves the send fin handling, which should appear in a specific
state change, into the state change handling while the per node
state_lock is held. I experienced issues with other messages because
we changed the state and a fin message was sent out in a different state.

Cc: stable@vger.kernel.org
Fixes: 489d8e559c65 ("fs: dlm: add reliable connection if reconnect")
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 33 +++++++++------------------------
 1 file changed, 9 insertions(+), 24 deletions(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 1fef99214204..fd9f413cabcd 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -402,7 +402,7 @@ static int dlm_send_fin(struct midcomms_node *node,
 	struct dlm_mhandle *mh;
 	char *ppc;
 
-	mh = dlm_midcomms_get_mhandle(node->nodeid, mb_len, GFP_NOFS, &ppc);
+	mh = dlm_midcomms_get_mhandle(node->nodeid, mb_len, GFP_ATOMIC, &ppc);
 	if (!mh)
 		return -ENOMEM;
 
@@ -518,8 +518,8 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 					node->state = DLM_LAST_ACK;
 					pr_debug("switch node %d to state %s case 1\n",
 						 node->nodeid, dlm_state_str(node->state));
-					spin_unlock(&node->state_lock);
-					goto send_fin;
+					set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
+					dlm_send_fin(node, dlm_pas_fin_ack_rcv);
 				}
 				break;
 			case DLM_FIN_WAIT1:
@@ -563,12 +563,6 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 		log_print_ratelimited("ignore dlm msg because seq mismatch, seq: %u, expected: %u, nodeid: %d",
 				      seq, node->seq_next, node->nodeid);
 	}
-
-	return;
-
-send_fin:
-	set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
-	dlm_send_fin(node, dlm_pas_fin_ack_rcv);
 }
 
 static struct midcomms_node *
@@ -1368,11 +1362,11 @@ void dlm_midcomms_remove_member(int nodeid)
 		case DLM_CLOSE_WAIT:
 			/* passive shutdown DLM_LAST_ACK case 2 */
 			node->state = DLM_LAST_ACK;
-			spin_unlock(&node->state_lock);
-
 			pr_debug("switch node %d to state %s case 2\n",
 				 node->nodeid, dlm_state_str(node->state));
-			goto send_fin;
+			set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
+			dlm_send_fin(node, dlm_pas_fin_ack_rcv);
+			break;
 		case DLM_LAST_ACK:
 			/* probably receive fin caught it, do nothing */
 			break;
@@ -1388,12 +1382,6 @@ void dlm_midcomms_remove_member(int nodeid)
 	spin_unlock(&node->state_lock);
 
 	srcu_read_unlock(&nodes_srcu, idx);
-	return;
-
-send_fin:
-	set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
-	dlm_send_fin(node, dlm_pas_fin_ack_rcv);
-	srcu_read_unlock(&nodes_srcu, idx);
 }
 
 static void midcomms_node_release(struct rcu_head *rcu)
@@ -1425,6 +1413,7 @@ static void midcomms_shutdown(struct midcomms_node *node)
 		node->state = DLM_FIN_WAIT1;
 		pr_debug("switch node %d to state %s case 2\n",
 			 node->nodeid, dlm_state_str(node->state));
+		dlm_send_fin(node, dlm_act_fin_ack_rcv);
 		break;
 	case DLM_CLOSED:
 		/* we have what we want */
@@ -1438,12 +1427,8 @@ static void midcomms_shutdown(struct midcomms_node *node)
 	}
 	spin_unlock(&node->state_lock);
 
-	if (node->state == DLM_FIN_WAIT1) {
-		dlm_send_fin(node, dlm_act_fin_ack_rcv);
-
-		if (DLM_DEBUG_FENCE_TERMINATION)
-			msleep(5000);
-	}
+	if (DLM_DEBUG_FENCE_TERMINATION)
+		msleep(5000);
 
 	/* wait for other side dlm + fin */
 	ret = wait_event_timeout(node->shutdown_wait,
-- 
cgit 


From 00908b3388255fc1d3782b744d07f327712f401f Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:10:37 -0500
Subject: fs: dlm: send FIN ack back in right cases

This patch moves to send a ack back for receiving a FIN message only
when we are in valid states. In other cases and there might be a sender
waiting for a ack we just let it timeout at the senders time and
hopefully all other cleanups will remove the FIN message on their
sending queue. As an example we should never send out an ACK being in
LAST_ACK state or we cannot assume a working socket communication when
we are in CLOSED state.

Cc: stable@vger.kernel.org
Fixes: 489d8e559c65 ("fs: dlm: add reliable connection if reconnect")
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index fd9f413cabcd..ecfb3beb0bb8 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -375,7 +375,7 @@ static int dlm_send_ack(int nodeid, uint32_t seq)
 	struct dlm_msg *msg;
 	char *ppc;
 
-	msg = dlm_lowcomms_new_msg(nodeid, mb_len, GFP_NOFS, &ppc,
+	msg = dlm_lowcomms_new_msg(nodeid, mb_len, GFP_ATOMIC, &ppc,
 				   NULL, NULL);
 	if (!msg)
 		return -ENOMEM;
@@ -498,15 +498,14 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 
 		switch (p->header.h_cmd) {
 		case DLM_FIN:
-			/* send ack before fin */
-			dlm_send_ack(node->nodeid, node->seq_next);
-
 			spin_lock(&node->state_lock);
 			pr_debug("receive fin msg from node %d with state %s\n",
 				 node->nodeid, dlm_state_str(node->state));
 
 			switch (node->state) {
 			case DLM_ESTABLISHED:
+				dlm_send_ack(node->nodeid, node->seq_next);
+
 				node->state = DLM_CLOSE_WAIT;
 				pr_debug("switch node %d to state %s\n",
 					 node->nodeid, dlm_state_str(node->state));
@@ -523,12 +522,14 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 				}
 				break;
 			case DLM_FIN_WAIT1:
+				dlm_send_ack(node->nodeid, node->seq_next);
 				node->state = DLM_CLOSING;
 				set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
 				pr_debug("switch node %d to state %s\n",
 					 node->nodeid, dlm_state_str(node->state));
 				break;
 			case DLM_FIN_WAIT2:
+				dlm_send_ack(node->nodeid, node->seq_next);
 				midcomms_node_reset(node);
 				pr_debug("switch node %d to state %s\n",
 					 node->nodeid, dlm_state_str(node->state));
-- 
cgit 


From 54fbe0c15d9a8072e2db7f6765f50c995834e2d7 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:18:42 -0500
Subject: fs: dlm: bring back previous shutdown handling

This patch mostly reverts commit 4f567acb0b86 ("fs: dlm: remove socket
shutdown handling"). There can be situations where the dlm midcomms nodes
hash and lowcomms connection hash are not equal, but we need to guarantee
that the lowcomms are all closed on a last release of a dlm lockspace,
when a shutdown is invoked. This patch guarantees that we always close
all sockets managed by the lowcomms connection hash, and calls shutdown
for the last message sent. This ensures we don't cut the socket, which
could cause the peer to get a connection reset.

In future we should try to merge the midcomms/lowcomms hashes into one
hash and not handle both in separate hashes.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 77 ++++++++++++++++++++++++++++++++++++++++---------------
 fs/dlm/midcomms.c | 20 +++++----------
 2 files changed, 63 insertions(+), 34 deletions(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 4450721ec83c..61cd6c2628fa 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -61,6 +61,7 @@
 #include "memory.h"
 #include "config.h"
 
+#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(5000)
 #define NEEDED_RMEM (4*1024*1024)
 
 struct connection {
@@ -99,6 +100,7 @@ struct connection {
 	struct connection *othercon;
 	struct work_struct rwork; /* receive worker */
 	struct work_struct swork; /* send worker */
+	wait_queue_head_t shutdown_wait;
 	unsigned char rx_leftover_buf[DLM_MAX_SOCKET_BUFSIZE];
 	int rx_leftover;
 	int mark;
@@ -282,6 +284,7 @@ static void dlm_con_init(struct connection *con, int nodeid)
 	INIT_WORK(&con->swork, process_send_sockets);
 	INIT_WORK(&con->rwork, process_recv_sockets);
 	spin_lock_init(&con->addrs_lock);
+	init_waitqueue_head(&con->shutdown_wait);
 }
 
 /*
@@ -790,6 +793,43 @@ static void close_connection(struct connection *con, bool and_other)
 	up_write(&con->sock_lock);
 }
 
+static void shutdown_connection(struct connection *con, bool and_other)
+{
+	int ret;
+
+	if (con->othercon && and_other)
+		shutdown_connection(con->othercon, false);
+
+	flush_workqueue(io_workqueue);
+	down_read(&con->sock_lock);
+	/* nothing to shutdown */
+	if (!con->sock) {
+		up_read(&con->sock_lock);
+		return;
+	}
+
+	ret = kernel_sock_shutdown(con->sock, SHUT_WR);
+	up_read(&con->sock_lock);
+	if (ret) {
+		log_print("Connection %p failed to shutdown: %d will force close",
+			  con, ret);
+		goto force_close;
+	} else {
+		ret = wait_event_timeout(con->shutdown_wait, !con->sock,
+					 DLM_SHUTDOWN_WAIT_TIMEOUT);
+		if (ret == 0) {
+			log_print("Connection %p shutdown timed out, will force close",
+				  con);
+			goto force_close;
+		}
+	}
+
+	return;
+
+force_close:
+	close_connection(con, false);
+}
+
 static struct processqueue_entry *new_processqueue_entry(int nodeid,
 							 int buflen)
 {
@@ -1488,6 +1528,7 @@ static void process_recv_sockets(struct work_struct *work)
 		break;
 	case DLM_IO_EOF:
 		close_connection(con, false);
+		wake_up(&con->shutdown_wait);
 		/* CF_RECV_PENDING cleared */
 		break;
 	case DLM_IO_RESCHED:
@@ -1695,6 +1736,9 @@ static int work_start(void)
 
 void dlm_lowcomms_shutdown(void)
 {
+	struct connection *con;
+	int i, idx;
+
 	/* stop lowcomms_listen_data_ready calls */
 	lock_sock(listen_con.sock->sk);
 	listen_con.sock->sk->sk_data_ready = listen_sock.sk_data_ready;
@@ -1703,29 +1747,20 @@ void dlm_lowcomms_shutdown(void)
 	cancel_work_sync(&listen_con.rwork);
 	dlm_close_sock(&listen_con.sock);
 
-	flush_workqueue(process_workqueue);
-}
-
-void dlm_lowcomms_shutdown_node(int nodeid, bool force)
-{
-	struct connection *con;
-	int idx;
-
 	idx = srcu_read_lock(&connections_srcu);
-	con = nodeid2con(nodeid, 0);
-	if (WARN_ON_ONCE(!con)) {
-		srcu_read_unlock(&connections_srcu, idx);
-		return;
-	}
+	for (i = 0; i < CONN_HASH_SIZE; i++) {
+		hlist_for_each_entry_rcu(con, &connection_hash[i], list) {
+			shutdown_connection(con, true);
+			stop_connection_io(con);
+			flush_workqueue(process_workqueue);
+			close_connection(con, true);
 
-	flush_work(&con->swork);
-	stop_connection_io(con);
-	WARN_ON_ONCE(!force && !list_empty(&con->writequeue));
-	close_connection(con, true);
-	clean_one_writequeue(con);
-	if (con->othercon)
-		clean_one_writequeue(con->othercon);
-	allow_connection_io(con);
+			clean_one_writequeue(con);
+			if (con->othercon)
+				clean_one_writequeue(con->othercon);
+			allow_connection_io(con);
+		}
+	}
 	srcu_read_unlock(&connections_srcu, idx);
 }
 
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index ecfb3beb0bb8..ecd81018d1cf 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -1418,8 +1418,7 @@ static void midcomms_shutdown(struct midcomms_node *node)
 		break;
 	case DLM_CLOSED:
 		/* we have what we want */
-		spin_unlock(&node->state_lock);
-		return;
+		break;
 	default:
 		/* busy to enter DLM_FIN_WAIT1, wait until passive
 		 * done in shutdown_wait to enter DLM_CLOSED.
@@ -1436,17 +1435,12 @@ static void midcomms_shutdown(struct midcomms_node *node)
 				 node->state == DLM_CLOSED ||
 				 test_bit(DLM_NODE_FLAG_CLOSE, &node->flags),
 				 DLM_SHUTDOWN_TIMEOUT);
-	if (!ret || test_bit(DLM_NODE_FLAG_CLOSE, &node->flags)) {
+	if (!ret || test_bit(DLM_NODE_FLAG_CLOSE, &node->flags))
 		pr_debug("active shutdown timed out for node %d with state %s\n",
 			 node->nodeid, dlm_state_str(node->state));
-		midcomms_node_reset(node);
-		dlm_lowcomms_shutdown_node(node->nodeid, true);
-		return;
-	}
-
-	pr_debug("active shutdown done for node %d with state %s\n",
-		 node->nodeid, dlm_state_str(node->state));
-	dlm_lowcomms_shutdown_node(node->nodeid, false);
+	else
+		pr_debug("active shutdown done for node %d with state %s\n",
+			 node->nodeid, dlm_state_str(node->state));
 }
 
 void dlm_midcomms_shutdown(void)
@@ -1454,8 +1448,6 @@ void dlm_midcomms_shutdown(void)
 	struct midcomms_node *node;
 	int i, idx;
 
-	dlm_lowcomms_shutdown();
-
 	mutex_lock(&close_lock);
 	idx = srcu_read_lock(&nodes_srcu);
 	for (i = 0; i < CONN_HASH_SIZE; i++) {
@@ -1473,6 +1465,8 @@ void dlm_midcomms_shutdown(void)
 	}
 	srcu_read_unlock(&nodes_srcu, idx);
 	mutex_unlock(&close_lock);
+
+	dlm_lowcomms_shutdown();
 }
 
 int dlm_midcomms_close(int nodeid)
-- 
cgit 


From 89835b064fe7421a60e40770c27d38024190a0c8 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:18:43 -0500
Subject: fs: dlm: ignore unexpected non dlm opts msgs

This patch ignores unexpected RCOM_NAMES/RCOM_STATUS messages.
To be backwards compatible, those messages are not part of the new
reliable DLM OPTS encapsulation header, and have their own
retransmit handling using sequence number matching  When we get
unexpected non dlm opts messages, we should allow them and let
RCOM message handling filter them out using sequence numbers.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index ecd81018d1cf..dbc998b2748b 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -606,16 +606,8 @@ dlm_midcomms_recv_node_lookup(int nodeid, const union dlm_packet *p,
 				case DLM_ESTABLISHED:
 					break;
 				default:
-					/* some invalid state passive shutdown
-					 * was failed, we try to reset and
-					 * hope it will go on.
-					 */
-					log_print("reset node %d because shutdown stuck",
-						  node->nodeid);
-
-					midcomms_node_reset(node);
-					node->state = DLM_ESTABLISHED;
-					break;
+					spin_unlock(&node->state_lock);
+					return NULL;
 				}
 				spin_unlock(&node->state_lock);
 			}
-- 
cgit 


From b8b750e0c99f39223115f2672ac4cfa96ecb9edd Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:18:44 -0500
Subject: fs: dlm: wait until all midcomms nodes detect version

The current dlm version detection is very complex due to backwards
compatablilty with earlier dlm protocol versions. It takes some time to
detect if a peer node has a specific DLM version. If it's not detected,
we just cut the socket connection. There could be cases where the local
node has not detected the version yet, but the peer node has.  In these
cases, we are trying to shutdown the dlm connection with a FIN/ACK message
exchange to be sure the other peer is ready to shutdown the connection on
dlm application level.  However this mechanism is only available on DLM
protocol version 3.2 and we need to be sure the DLM version is detected
before.

To make it more robust we introduce a a "best effort" wait to wait for the
version detection before shutdown the dlm connection. This need to be
done before the kthread recoverd for recovery handling is stopped,
because recovery handling will trigger enough messages to have a version
detection going on.

It is a corner case which was detected by modprobe dlm_locktroture module
and rmmod dlm_locktorture module directly afterwards (in a looping
behaviour). In practice probably nobody would leave a lockspace immediately
after joining it.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lockspace.c |  3 +++
 fs/dlm/midcomms.c  | 23 +++++++++++++++++++++++
 fs/dlm/midcomms.h  |  1 +
 3 files changed, 27 insertions(+)

(limited to 'fs/dlm')

diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 99bc96f90779..d9dc0b734002 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -820,6 +820,9 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 		return rv;
 	}
 
+	if (ls_count == 1)
+		dlm_midcomms_version_wait();
+
 	dlm_device_deregister(ls);
 
 	if (force < 3 && dlm_user_daemon_available())
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index dbc998b2748b..cf91a5a11b4f 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -657,6 +657,7 @@ static int dlm_midcomms_version_check_3_2(struct midcomms_node *node)
 	switch (node->version) {
 	case DLM_VERSION_NOT_SET:
 		node->version = DLM_VERSION_3_2;
+		wake_up(&node->shutdown_wait);
 		log_print("version 0x%08x for node %d detected", DLM_VERSION_3_2,
 			  node->nodeid);
 		break;
@@ -826,6 +827,7 @@ static int dlm_midcomms_version_check_3_1(struct midcomms_node *node)
 	switch (node->version) {
 	case DLM_VERSION_NOT_SET:
 		node->version = DLM_VERSION_3_1;
+		wake_up(&node->shutdown_wait);
 		log_print("version 0x%08x for node %d detected", DLM_VERSION_3_1,
 			  node->nodeid);
 		break;
@@ -1386,6 +1388,27 @@ static void midcomms_node_release(struct rcu_head *rcu)
 	kfree(node);
 }
 
+void dlm_midcomms_version_wait(void)
+{
+	struct midcomms_node *node;
+	int i, idx, ret;
+
+	idx = srcu_read_lock(&nodes_srcu);
+	for (i = 0; i < CONN_HASH_SIZE; i++) {
+		hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
+			ret = wait_event_timeout(node->shutdown_wait,
+						 node->version != DLM_VERSION_NOT_SET ||
+						 node->state == DLM_CLOSED ||
+						 test_bit(DLM_NODE_FLAG_CLOSE, &node->flags),
+						 DLM_SHUTDOWN_TIMEOUT);
+			if (!ret || test_bit(DLM_NODE_FLAG_CLOSE, &node->flags))
+				pr_debug("version wait timed out for node %d with state %s\n",
+					 node->nodeid, dlm_state_str(node->state));
+		}
+	}
+	srcu_read_unlock(&nodes_srcu, idx);
+}
+
 static void midcomms_shutdown(struct midcomms_node *node)
 {
 	int ret;
diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h
index bea1cee4279c..9f8c9605013d 100644
--- a/fs/dlm/midcomms.h
+++ b/fs/dlm/midcomms.h
@@ -20,6 +20,7 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
 					     gfp_t allocation, char **ppc);
 void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh, const void *name,
 				 int namelen);
+void dlm_midcomms_version_wait(void);
 int dlm_midcomms_close(int nodeid);
 int dlm_midcomms_start(void);
 void dlm_midcomms_stop(void);
-- 
cgit 


From 317dd6ba6ccaa5e7c24039955c39df84072d5166 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:18:45 -0500
Subject: fs: dlm: make dlm sequence id more robust

When joining a new lockspace, use a random number to initialize
a sequence number used in messages. This makes it easier to detect
sequence number mismatches in message replies during tests that
repeatedly join and leave a lockspace.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lockspace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index d9dc0b734002..9f344d76afa3 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -572,7 +572,7 @@ static int new_lockspace(const char *name, const char *cluster,
 	spin_lock_init(&ls->ls_rcom_spin);
 	get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
 	ls->ls_recover_status = 0;
-	ls->ls_recover_seq = 0;
+	ls->ls_recover_seq = get_random_u64();
 	ls->ls_recover_args = NULL;
 	init_rwsem(&ls->ls_in_recovery);
 	init_rwsem(&ls->ls_recv_active);
-- 
cgit 


From 11605353f27c23902b6fb993a57bd2f0baaff3b6 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:18:46 -0500
Subject: fs: dlm: reduce the shutdown timeout to 5 secs

When a shutdown is stuck, time out after 5 seconds instead of
3 minutes.  After this timeout we try a forced shutdown.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index cf91a5a11b4f..44e29f99de4e 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -146,8 +146,8 @@
 
 /* init value for sequence numbers for testing purpose only e.g. overflows */
 #define DLM_SEQ_INIT		0
-/* 3 minutes wait to sync ending of dlm */
-#define DLM_SHUTDOWN_TIMEOUT	msecs_to_jiffies(3 * 60 * 1000)
+/* 5 seconds wait to sync ending of dlm */
+#define DLM_SHUTDOWN_TIMEOUT	msecs_to_jiffies(5000)
 #define DLM_VERSION_NOT_SET	0
 
 struct midcomms_node {
-- 
cgit 


From 3186409711e64ed0a0fa767fad555a5e655022da Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:18:47 -0500
Subject: fs: dlm: remove newline in log_print

There is an API difference between log_print() and other printk()s to
put a newline or not. This one was introduced by mistake because
log_print() adds a newline.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 44e29f99de4e..7827d049378c 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -467,7 +467,7 @@ static void dlm_pas_fin_ack_rcv(struct midcomms_node *node)
 		break;
 	default:
 		spin_unlock(&node->state_lock);
-		log_print("%s: unexpected state: %d\n",
+		log_print("%s: unexpected state: %d",
 			  __func__, node->state);
 		WARN_ON_ONCE(1);
 		return;
@@ -540,7 +540,7 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 				break;
 			default:
 				spin_unlock(&node->state_lock);
-				log_print("%s: unexpected state: %d\n",
+				log_print("%s: unexpected state: %d",
 					  __func__, node->state);
 				WARN_ON_ONCE(1);
 				return;
@@ -1269,7 +1269,7 @@ static void dlm_act_fin_ack_rcv(struct midcomms_node *node)
 		break;
 	default:
 		spin_unlock(&node->state_lock);
-		log_print("%s: unexpected state: %d\n",
+		log_print("%s: unexpected state: %d",
 			  __func__, node->state);
 		WARN_ON_ONCE(1);
 		return;
@@ -1369,7 +1369,7 @@ void dlm_midcomms_remove_member(int nodeid)
 			/* already gone, do nothing */
 			break;
 		default:
-			log_print("%s: unexpected state: %d\n",
+			log_print("%s: unexpected state: %d",
 				  __func__, node->state);
 			break;
 		}
-- 
cgit 


From ef7ef015eb333c8954d7165d216ec0c774d7e4e4 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:18:48 -0500
Subject: fs: dlm: move state change into else branch

Currently we can switch at first into DLM_CLOSE_WAIT state and then do
another state change if a condition is true. Instead of doing two state
changes we handle the other state change inside an else branch of this
condition.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 7827d049378c..69e6f4705ebc 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -506,9 +506,6 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 			case DLM_ESTABLISHED:
 				dlm_send_ack(node->nodeid, node->seq_next);
 
-				node->state = DLM_CLOSE_WAIT;
-				pr_debug("switch node %d to state %s\n",
-					 node->nodeid, dlm_state_str(node->state));
 				/* passive shutdown DLM_LAST_ACK case 1
 				 * additional we check if the node is used by
 				 * cluster manager events at all.
@@ -519,6 +516,10 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 						 node->nodeid, dlm_state_str(node->state));
 					set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
 					dlm_send_fin(node, dlm_pas_fin_ack_rcv);
+				} else {
+					node->state = DLM_CLOSE_WAIT;
+					pr_debug("switch node %d to state %s\n",
+						 node->nodeid, dlm_state_str(node->state));
 				}
 				break;
 			case DLM_FIN_WAIT1:
-- 
cgit 


From 723b197bbdf1e0adbab772b8e5e022c40db6a9fe Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Thu, 12 Jan 2023 17:18:49 -0500
Subject: fs: dlm: remove unnecessary waker_up() calls

The wake_up() is already handled inside of midcomms_node_reset() when
switching the state to CLOSED state. So there is not need to call it
after midcomms_node_reset() again.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs/dlm')

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 69e6f4705ebc..c02c43e4980a 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -534,7 +534,6 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p,
 				midcomms_node_reset(node);
 				pr_debug("switch node %d to state %s\n",
 					 node->nodeid, dlm_state_str(node->state));
-				wake_up(&node->shutdown_wait);
 				break;
 			case DLM_LAST_ACK:
 				/* probably remove_member caught it, do nothing */
@@ -1262,7 +1261,6 @@ static void dlm_act_fin_ack_rcv(struct midcomms_node *node)
 		midcomms_node_reset(node);
 		pr_debug("switch node %d to state %s\n",
 			 node->nodeid, dlm_state_str(node->state));
-		wake_up(&node->shutdown_wait);
 		break;
 	case DLM_CLOSED:
 		/* not valid but somehow we got what we want */
-- 
cgit