diff options
| author | Philipp Reisner <philipp.reisner@linbit.com> | 2012-08-08 21:19:09 +0200 | 
|---|---|---|
| committer | Philipp Reisner <philipp.reisner@linbit.com> | 2012-11-09 14:08:18 +0100 | 
| commit | b66623e33efbbf55717df7bfc49882371118b866 (patch) | |
| tree | 3f345827c8fab3b4aa8f2b7cf7bd760c704483af | |
| parent | 39a1aa7f49dc8eae5c8d3a4bf759eb7abeabe6c0 (diff) | |
drbd: Avoid NetworkFailure state during disconnect
Disconnecting is a cluster wide state change. In case the peer node agrees
to the state transition, it sends back the fact on the meta-data connection
and closes both sockets.
In case the node node that initiated the state transfer sees the closing
action on the data-socket, before the P_STATE_CHG_REPLY packet, it was
going into one of the network failure states.
At least with the fencing option set to something else thatn "dont-care",
the unclean shutdown of the connection causes a short IO freeze or
a fence operation.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
| -rw-r--r-- | drivers/block/drbd/drbd_int.h | 1 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 30 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_state.c | 3 | 
3 files changed, 33 insertions, 1 deletions
| diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b83398d64a9c..37ae87e468ae 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -816,6 +816,7 @@ enum {  				 * so shrink_page_list() would not recurse into,  				 * and potentially deadlock on, this drbd worker.  				 */ +	DISCONNECT_SENT,  };  struct drbd_tconn {			/* is a resource from the config file */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0eefbeb65664..1a8f698021a2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -522,7 +522,6 @@ static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)  				conn_err(tconn, "sock_recvmsg returned %d\n", rv);  			break;  		} else if (rv == 0) { -			conn_info(tconn, "sock was shut down by peer\n");  			break;  		} else	{  			/* signal came in, or peer/link went down, @@ -535,9 +534,25 @@ static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)  	set_fs(oldfs); +	if (rv == 0) { +		if (test_bit(DISCONNECT_SENT, &tconn->flags)) { +			long t; +			rcu_read_lock(); +			t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10; +			rcu_read_unlock(); + +			t = wait_event_timeout(tconn->ping_wait, tconn->cstate < C_WF_REPORT_PARAMS, t); + +			if (t) +				goto out; +		} +		conn_info(tconn, "sock was shut down by peer\n"); +	} +  	if (rv != size)  		conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD); +out:  	return rv;  } @@ -894,6 +909,7 @@ static int conn_connect(struct drbd_tconn *tconn)  		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),  	}; +	clear_bit(DISCONNECT_SENT, &tconn->flags);  	if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)  		return -2; @@ -5316,6 +5332,18 @@ int drbd_asender(struct drbd_thread *thi)  			received += rv;  			buf	 += rv;  		} else if (rv == 0) { +			if (test_bit(DISCONNECT_SENT, &tconn->flags)) { +				long t; +				rcu_read_lock(); +				t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10; +				rcu_read_unlock(); + +				t = wait_event_timeout(tconn->ping_wait, +						       tconn->cstate < C_WF_REPORT_PARAMS, +						       t); +				if (t) +					break; +			}  			conn_err(tconn, "meta connection shut down by peer.\n");  			goto reconnect;  		} else if (rv == -EAGAIN) { diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c16349aec23c..4fda4e2024ec 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1742,6 +1742,9 @@ conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state v  		goto abort;  	} +	if (val.conn == C_DISCONNECTING) +		set_bit(DISCONNECT_SENT, &tconn->flags); +  	wait_event(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val)));  	clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); | 
