diff options
Diffstat (limited to 'fs/dlm/midcomms.c')
| -rw-r--r-- | fs/dlm/midcomms.c | 392 |
1 files changed, 172 insertions, 220 deletions
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index e1a0df67b566..2c101bbe261a 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -226,8 +226,7 @@ static DEFINE_MUTEX(close_lock); struct kmem_cache *dlm_midcomms_cache_create(void) { - return kmem_cache_create("dlm_mhandle", sizeof(struct dlm_mhandle), - 0, 0, NULL); + return KMEM_CACHE(dlm_mhandle, 0); } static inline const char *dlm_state_str(int state) @@ -330,18 +329,31 @@ static void midcomms_node_reset(struct midcomms_node *node) wake_up(&node->shutdown_wait); } -static struct midcomms_node *nodeid2node(int nodeid, gfp_t alloc) +static struct midcomms_node *nodeid2node(int nodeid) { - struct midcomms_node *node, *tmp; - int r = nodeid_hash(nodeid); + return __find_node(nodeid, nodeid_hash(nodeid)); +} + +int dlm_midcomms_addr(int nodeid, struct sockaddr_storage *addr) +{ + int ret, idx, r = nodeid_hash(nodeid); + struct midcomms_node *node; + ret = dlm_lowcomms_addr(nodeid, addr); + if (ret) + return ret; + + idx = srcu_read_lock(&nodes_srcu); node = __find_node(nodeid, r); - if (node || !alloc) - return node; + if (node) { + srcu_read_unlock(&nodes_srcu, idx); + return 0; + } + srcu_read_unlock(&nodes_srcu, idx); - node = kmalloc(sizeof(*node), alloc); + node = kmalloc(sizeof(*node), GFP_NOFS); if (!node) - return NULL; + return -ENOMEM; node->nodeid = nodeid; spin_lock_init(&node->state_lock); @@ -352,22 +364,12 @@ static struct midcomms_node *nodeid2node(int nodeid, gfp_t alloc) node->users = 0; midcomms_node_reset(node); - spin_lock(&nodes_lock); - /* check again if there was somebody else - * earlier here to add the node - */ - tmp = __find_node(nodeid, r); - if (tmp) { - spin_unlock(&nodes_lock); - kfree(node); - return tmp; - } - + spin_lock_bh(&nodes_lock); hlist_add_head_rcu(&node->hlist, &node_hash[r]); - spin_unlock(&nodes_lock); + spin_unlock_bh(&nodes_lock); node->debugfs = dlm_create_debug_comms_file(nodeid, node); - return node; + return 0; } static int dlm_send_ack(int nodeid, uint32_t seq) @@ -377,8 +379,7 @@ static int dlm_send_ack(int nodeid, uint32_t seq) struct dlm_msg *msg; char *ppc; - msg = dlm_lowcomms_new_msg(nodeid, mb_len, GFP_ATOMIC, &ppc, - NULL, NULL); + msg = dlm_lowcomms_new_msg(nodeid, mb_len, &ppc, NULL, NULL); if (!msg) return -ENOMEM; @@ -426,7 +427,7 @@ static int dlm_send_fin(struct midcomms_node *node, struct dlm_mhandle *mh; char *ppc; - mh = dlm_midcomms_get_mhandle(node->nodeid, mb_len, GFP_ATOMIC, &ppc); + mh = dlm_midcomms_get_mhandle(node->nodeid, mb_len, &ppc); if (!mh) return -ENOMEM; @@ -476,7 +477,7 @@ static void dlm_receive_ack(struct midcomms_node *node, uint32_t seq) static void dlm_pas_fin_ack_rcv(struct midcomms_node *node) { - spin_lock(&node->state_lock); + spin_lock_bh(&node->state_lock); pr_debug("receive passive fin ack from node %d with state %s\n", node->nodeid, dlm_state_str(node->state)); @@ -490,16 +491,17 @@ static void dlm_pas_fin_ack_rcv(struct midcomms_node *node) wake_up(&node->shutdown_wait); break; default: - spin_unlock(&node->state_lock); + spin_unlock_bh(&node->state_lock); log_print("%s: unexpected state: %d", __func__, node->state); WARN_ON_ONCE(1); return; } - spin_unlock(&node->state_lock); + spin_unlock_bh(&node->state_lock); } -static void dlm_receive_buffer_3_2_trace(uint32_t seq, union dlm_packet *p) +static void dlm_receive_buffer_3_2_trace(uint32_t seq, + const union dlm_packet *p) { switch (p->header.h_cmd) { case DLM_MSG: @@ -513,7 +515,7 @@ static void dlm_receive_buffer_3_2_trace(uint32_t seq, union dlm_packet *p) } } -static void dlm_midcomms_receive_buffer(union dlm_packet *p, +static void dlm_midcomms_receive_buffer(const union dlm_packet *p, struct midcomms_node *node, uint32_t seq) { @@ -532,7 +534,7 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p, if (is_expected_seq) { switch (p->header.h_cmd) { case DLM_FIN: - spin_lock(&node->state_lock); + spin_lock_bh(&node->state_lock); pr_debug("receive fin msg from node %d with state %s\n", node->nodeid, dlm_state_str(node->state)); @@ -573,13 +575,13 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p, /* probably remove_member caught it, do nothing */ break; default: - spin_unlock(&node->state_lock); + spin_unlock_bh(&node->state_lock); log_print("%s: unexpected state: %d", __func__, node->state); WARN_ON_ONCE(1); return; } - spin_unlock(&node->state_lock); + spin_unlock_bh(&node->state_lock); break; default: WARN_ON_ONCE(test_bit(DLM_NODE_FLAG_STOP_RX, &node->flags)); @@ -602,113 +604,8 @@ static void dlm_midcomms_receive_buffer(union dlm_packet *p, } } -static struct midcomms_node * -dlm_midcomms_recv_node_lookup(int nodeid, const union dlm_packet *p, - uint16_t msglen, int (*cb)(struct midcomms_node *node)) -{ - struct midcomms_node *node = NULL; - gfp_t allocation = 0; - int ret; - - switch (p->header.h_cmd) { - case DLM_RCOM: - if (msglen < sizeof(struct dlm_rcom)) { - log_print("rcom msg too small: %u, will skip this message from node %d", - msglen, nodeid); - return NULL; - } - - switch (p->rcom.rc_type) { - case cpu_to_le32(DLM_RCOM_NAMES): - fallthrough; - case cpu_to_le32(DLM_RCOM_NAMES_REPLY): - fallthrough; - case cpu_to_le32(DLM_RCOM_STATUS): - fallthrough; - case cpu_to_le32(DLM_RCOM_STATUS_REPLY): - node = nodeid2node(nodeid, 0); - if (node) { - spin_lock(&node->state_lock); - if (node->state != DLM_ESTABLISHED) - pr_debug("receive begin RCOM msg from node %d with state %s\n", - node->nodeid, dlm_state_str(node->state)); - - switch (node->state) { - case DLM_CLOSED: - node->state = DLM_ESTABLISHED; - pr_debug("switch node %d to state %s\n", - node->nodeid, dlm_state_str(node->state)); - break; - case DLM_ESTABLISHED: - break; - default: - spin_unlock(&node->state_lock); - return NULL; - } - spin_unlock(&node->state_lock); - } - - allocation = GFP_NOFS; - break; - default: - break; - } - - break; - default: - break; - } - - node = nodeid2node(nodeid, allocation); - if (!node) { - switch (p->header.h_cmd) { - case DLM_OPTS: - if (msglen < sizeof(struct dlm_opts)) { - log_print("opts msg too small: %u, will skip this message from node %d", - msglen, nodeid); - return NULL; - } - - log_print_ratelimited("received dlm opts message nextcmd %d from node %d in an invalid sequence", - p->opts.o_nextcmd, nodeid); - break; - default: - log_print_ratelimited("received dlm message cmd %d from node %d in an invalid sequence", - p->header.h_cmd, nodeid); - break; - } - - return NULL; - } - - ret = cb(node); - if (ret < 0) - return NULL; - - return node; -} - -static int dlm_midcomms_version_check_3_2(struct midcomms_node *node) -{ - switch (node->version) { - case DLM_VERSION_NOT_SET: - node->version = DLM_VERSION_3_2; - wake_up(&node->shutdown_wait); - log_print("version 0x%08x for node %d detected", DLM_VERSION_3_2, - node->nodeid); - break; - case DLM_VERSION_3_2: - break; - default: - log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x", - DLM_VERSION_3_2, node->nodeid, node->version); - return -1; - } - - return 0; -} - -static int dlm_opts_check_msglen(union dlm_packet *p, uint16_t msglen, int nodeid) +static int dlm_opts_check_msglen(const union dlm_packet *p, uint16_t msglen, + int nodeid) { int len = msglen; @@ -757,7 +654,7 @@ static int dlm_opts_check_msglen(union dlm_packet *p, uint16_t msglen, int nodei return 0; } -static void dlm_midcomms_receive_buffer_3_2(union dlm_packet *p, int nodeid) +static void dlm_midcomms_receive_buffer_3_2(const union dlm_packet *p, int nodeid) { uint16_t msglen = le16_to_cpu(p->header.h_length); struct midcomms_node *node; @@ -765,10 +662,37 @@ static void dlm_midcomms_receive_buffer_3_2(union dlm_packet *p, int nodeid) int ret, idx; idx = srcu_read_lock(&nodes_srcu); - node = dlm_midcomms_recv_node_lookup(nodeid, p, msglen, - dlm_midcomms_version_check_3_2); - if (!node) + node = nodeid2node(nodeid); + if (WARN_ON_ONCE(!node)) + goto out; + + switch (node->version) { + case DLM_VERSION_NOT_SET: + node->version = DLM_VERSION_3_2; + wake_up(&node->shutdown_wait); + log_print("version 0x%08x for node %d detected", DLM_VERSION_3_2, + node->nodeid); + + spin_lock(&node->state_lock); + switch (node->state) { + case DLM_CLOSED: + node->state = DLM_ESTABLISHED; + pr_debug("switch node %d to state %s\n", + node->nodeid, dlm_state_str(node->state)); + break; + default: + break; + } + spin_unlock(&node->state_lock); + + break; + case DLM_VERSION_3_2: + break; + default: + log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x", + DLM_VERSION_3_2, node->nodeid, node->version); goto out; + } switch (p->header.h_cmd) { case DLM_RCOM: @@ -858,8 +782,19 @@ out: srcu_read_unlock(&nodes_srcu, idx); } -static int dlm_midcomms_version_check_3_1(struct midcomms_node *node) +static void dlm_midcomms_receive_buffer_3_1(const union dlm_packet *p, int nodeid) { + uint16_t msglen = le16_to_cpu(p->header.h_length); + struct midcomms_node *node; + int idx; + + idx = srcu_read_lock(&nodes_srcu); + node = nodeid2node(nodeid); + if (WARN_ON_ONCE(!node)) { + srcu_read_unlock(&nodes_srcu, idx); + return; + } + switch (node->version) { case DLM_VERSION_NOT_SET: node->version = DLM_VERSION_3_1; @@ -872,22 +807,6 @@ static int dlm_midcomms_version_check_3_1(struct midcomms_node *node) default: log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x", DLM_VERSION_3_1, node->nodeid, node->version); - return -1; - } - - return 0; -} - -static void dlm_midcomms_receive_buffer_3_1(union dlm_packet *p, int nodeid) -{ - uint16_t msglen = le16_to_cpu(p->header.h_length); - struct midcomms_node *node; - int idx; - - idx = srcu_read_lock(&nodes_srcu); - node = dlm_midcomms_recv_node_lookup(nodeid, p, msglen, - dlm_midcomms_version_check_3_1); - if (!node) { srcu_read_unlock(&nodes_srcu, idx); return; } @@ -977,10 +896,10 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len) switch (hd->h_version) { case cpu_to_le32(DLM_VERSION_3_1): - dlm_midcomms_receive_buffer_3_1((union dlm_packet *)ptr, nodeid); + dlm_midcomms_receive_buffer_3_1((const union dlm_packet *)ptr, nodeid); break; case cpu_to_le32(DLM_VERSION_3_2): - dlm_midcomms_receive_buffer_3_2((union dlm_packet *)ptr, nodeid); + dlm_midcomms_receive_buffer_3_2((const union dlm_packet *)ptr, nodeid); break; default: log_print("received invalid version header: %u from node %d, will skip this message", @@ -1003,8 +922,8 @@ void dlm_midcomms_unack_msg_resend(int nodeid) int idx, ret; idx = srcu_read_lock(&nodes_srcu); - node = nodeid2node(nodeid, 0); - if (!node) { + node = nodeid2node(nodeid); + if (WARN_ON_ONCE(!node)) { srcu_read_unlock(&nodes_srcu, idx); return; } @@ -1056,13 +975,13 @@ static void midcomms_new_msg_cb(void *data) } static struct dlm_msg *dlm_midcomms_get_msg_3_2(struct dlm_mhandle *mh, int nodeid, - int len, gfp_t allocation, char **ppc) + int len, char **ppc) { struct dlm_opts *opts; struct dlm_msg *msg; msg = dlm_lowcomms_new_msg(nodeid, len + DLM_MIDCOMMS_OPT_LEN, - allocation, ppc, midcomms_new_msg_cb, mh); + ppc, midcomms_new_msg_cb, mh); if (!msg) return NULL; @@ -1081,8 +1000,7 @@ static struct dlm_msg *dlm_midcomms_get_msg_3_2(struct dlm_mhandle *mh, int node * dlm_midcomms_commit_mhandle which is a must call if success */ #ifndef __CHECKER__ -struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len, - gfp_t allocation, char **ppc) +struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len, char **ppc) { struct midcomms_node *node; struct dlm_mhandle *mh; @@ -1090,16 +1008,14 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len, int idx; idx = srcu_read_lock(&nodes_srcu); - node = nodeid2node(nodeid, 0); - if (!node) { - WARN_ON_ONCE(1); + node = nodeid2node(nodeid); + if (WARN_ON_ONCE(!node)) goto err; - } /* this is a bug, however we going on and hope it will be resolved */ WARN_ON_ONCE(test_bit(DLM_NODE_FLAG_STOP_TX, &node->flags)); - mh = dlm_allocate_mhandle(allocation); + mh = dlm_allocate_mhandle(); if (!mh) goto err; @@ -1110,8 +1026,7 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len, switch (node->version) { case DLM_VERSION_3_1: - msg = dlm_lowcomms_new_msg(nodeid, len, allocation, ppc, - NULL, NULL); + msg = dlm_lowcomms_new_msg(nodeid, len, ppc, NULL, NULL); if (!msg) { dlm_free_mhandle(mh); goto err; @@ -1119,15 +1034,14 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len, break; case DLM_VERSION_3_2: - msg = dlm_midcomms_get_msg_3_2(mh, nodeid, len, allocation, - ppc); + /* send ack back if necessary */ + dlm_send_ack_threshold(node, DLM_SEND_ACK_BACK_MSG_THRESHOLD); + + msg = dlm_midcomms_get_msg_3_2(mh, nodeid, len, ppc); if (!msg) { dlm_free_mhandle(mh); goto err; } - - /* send ack back if necessary */ - dlm_send_ack_threshold(node, DLM_SEND_ACK_BACK_MSG_THRESHOLD); break; default: dlm_free_mhandle(mh); @@ -1235,14 +1149,40 @@ void dlm_midcomms_init(void) dlm_lowcomms_init(); } +static void midcomms_node_release(struct rcu_head *rcu) +{ + struct midcomms_node *node = container_of(rcu, struct midcomms_node, rcu); + + WARN_ON_ONCE(atomic_read(&node->send_queue_cnt)); + dlm_send_queue_flush(node); + kfree(node); +} + void dlm_midcomms_exit(void) { + struct midcomms_node *node; + int i, idx; + + idx = srcu_read_lock(&nodes_srcu); + for (i = 0; i < CONN_HASH_SIZE; i++) { + hlist_for_each_entry_rcu(node, &node_hash[i], hlist) { + dlm_delete_debug_comms_file(node->debugfs); + + spin_lock(&nodes_lock); + hlist_del_rcu(&node->hlist); + spin_unlock(&nodes_lock); + + call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release); + } + } + srcu_read_unlock(&nodes_srcu, idx); + dlm_lowcomms_exit(); } static void dlm_act_fin_ack_rcv(struct midcomms_node *node) { - spin_lock(&node->state_lock); + spin_lock_bh(&node->state_lock); pr_debug("receive active fin ack from node %d with state %s\n", node->nodeid, dlm_state_str(node->state)); @@ -1262,13 +1202,13 @@ static void dlm_act_fin_ack_rcv(struct midcomms_node *node) wake_up(&node->shutdown_wait); break; default: - spin_unlock(&node->state_lock); + spin_unlock_bh(&node->state_lock); log_print("%s: unexpected state: %d", __func__, node->state); WARN_ON_ONCE(1); return; } - spin_unlock(&node->state_lock); + spin_unlock_bh(&node->state_lock); } void dlm_midcomms_add_member(int nodeid) @@ -1277,13 +1217,13 @@ void dlm_midcomms_add_member(int nodeid) int idx; idx = srcu_read_lock(&nodes_srcu); - node = nodeid2node(nodeid, GFP_NOFS); - if (!node) { + node = nodeid2node(nodeid); + if (WARN_ON_ONCE(!node)) { srcu_read_unlock(&nodes_srcu, idx); return; } - spin_lock(&node->state_lock); + spin_lock_bh(&node->state_lock); if (!node->users) { pr_debug("receive add member from node %d with state %s\n", node->nodeid, dlm_state_str(node->state)); @@ -1311,7 +1251,7 @@ void dlm_midcomms_add_member(int nodeid) node->users++; pr_debug("node %d users inc count %d\n", nodeid, node->users); - spin_unlock(&node->state_lock); + spin_unlock_bh(&node->state_lock); srcu_read_unlock(&nodes_srcu, idx); } @@ -1322,13 +1262,24 @@ void dlm_midcomms_remove_member(int nodeid) int idx; idx = srcu_read_lock(&nodes_srcu); - node = nodeid2node(nodeid, 0); + node = nodeid2node(nodeid); + /* in case of dlm_midcomms_close() removes node */ if (!node) { srcu_read_unlock(&nodes_srcu, idx); return; } - spin_lock(&node->state_lock); + spin_lock_bh(&node->state_lock); + /* case of dlm_midcomms_addr() created node but + * was not added before because dlm_midcomms_close() + * removed the node + */ + if (!node->users) { + spin_unlock_bh(&node->state_lock); + srcu_read_unlock(&nodes_srcu, idx); + return; + } + node->users--; pr_debug("node %d users dec count %d\n", nodeid, node->users); @@ -1362,20 +1313,11 @@ void dlm_midcomms_remove_member(int nodeid) break; } } - spin_unlock(&node->state_lock); + spin_unlock_bh(&node->state_lock); srcu_read_unlock(&nodes_srcu, idx); } -static void midcomms_node_release(struct rcu_head *rcu) -{ - struct midcomms_node *node = container_of(rcu, struct midcomms_node, rcu); - - WARN_ON_ONCE(atomic_read(&node->send_queue_cnt)); - dlm_send_queue_flush(node); - kfree(node); -} - void dlm_midcomms_version_wait(void) { struct midcomms_node *node; @@ -1409,7 +1351,7 @@ static void midcomms_shutdown(struct midcomms_node *node) return; } - spin_lock(&node->state_lock); + spin_lock_bh(&node->state_lock); pr_debug("receive active shutdown for node %d with state %s\n", node->nodeid, dlm_state_str(node->state)); switch (node->state) { @@ -1428,7 +1370,7 @@ static void midcomms_shutdown(struct midcomms_node *node) */ break; } - spin_unlock(&node->state_lock); + spin_unlock_bh(&node->state_lock); if (DLM_DEBUG_FENCE_TERMINATION) msleep(5000); @@ -1438,7 +1380,7 @@ static void midcomms_shutdown(struct midcomms_node *node) node->state == DLM_CLOSED || test_bit(DLM_NODE_FLAG_CLOSE, &node->flags), DLM_SHUTDOWN_TIMEOUT); - if (!ret || test_bit(DLM_NODE_FLAG_CLOSE, &node->flags)) + if (!ret) pr_debug("active shutdown timed out for node %d with state %s\n", node->nodeid, dlm_state_str(node->state)); else @@ -1456,20 +1398,18 @@ void dlm_midcomms_shutdown(void) for (i = 0; i < CONN_HASH_SIZE; i++) { hlist_for_each_entry_rcu(node, &node_hash[i], hlist) { midcomms_shutdown(node); + } + } - dlm_delete_debug_comms_file(node->debugfs); - - spin_lock(&nodes_lock); - hlist_del_rcu(&node->hlist); - spin_unlock(&nodes_lock); + dlm_lowcomms_shutdown(); - call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release); + for (i = 0; i < CONN_HASH_SIZE; i++) { + hlist_for_each_entry_rcu(node, &node_hash[i], hlist) { + midcomms_node_reset(node); } } srcu_read_unlock(&nodes_srcu, idx); mutex_unlock(&close_lock); - - dlm_lowcomms_shutdown(); } int dlm_midcomms_close(int nodeid) @@ -1479,7 +1419,7 @@ int dlm_midcomms_close(int nodeid) idx = srcu_read_lock(&nodes_srcu); /* Abort pending close/remove operation */ - node = nodeid2node(nodeid, 0); + node = nodeid2node(nodeid); if (node) { /* let shutdown waiters leave */ set_bit(DLM_NODE_FLAG_CLOSE, &node->flags); @@ -1489,20 +1429,32 @@ int dlm_midcomms_close(int nodeid) synchronize_srcu(&nodes_srcu); - idx = srcu_read_lock(&nodes_srcu); mutex_lock(&close_lock); - node = nodeid2node(nodeid, 0); + idx = srcu_read_lock(&nodes_srcu); + node = nodeid2node(nodeid); if (!node) { - mutex_unlock(&close_lock); srcu_read_unlock(&nodes_srcu, idx); + mutex_unlock(&close_lock); return dlm_lowcomms_close(nodeid); } ret = dlm_lowcomms_close(nodeid); - spin_lock(&node->state_lock); - midcomms_node_reset(node); - spin_unlock(&node->state_lock); + dlm_delete_debug_comms_file(node->debugfs); + + spin_lock_bh(&nodes_lock); + hlist_del_rcu(&node->hlist); + spin_unlock_bh(&nodes_lock); srcu_read_unlock(&nodes_srcu, idx); + + /* wait that all readers left until flush send queue */ + synchronize_srcu(&nodes_srcu); + + /* drop all pending dlm messages, this is fine as + * this function get called when the node is fenced + */ + dlm_send_queue_flush(node); + + call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release); mutex_unlock(&close_lock); return ret; @@ -1545,8 +1497,8 @@ int dlm_midcomms_rawmsg_send(struct midcomms_node *node, void *buf, rd.node = node; rd.buf = buf; - msg = dlm_lowcomms_new_msg(node->nodeid, buflen, GFP_NOFS, - &msgbuf, midcomms_new_rawmsg_cb, &rd); + msg = dlm_lowcomms_new_msg(node->nodeid, buflen, &msgbuf, + midcomms_new_rawmsg_cb, &rd); if (!msg) return -ENOMEM; |
