From fdd75ea8df370f206a8163786e7470c1277a5064 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 7 Jul 2015 09:43:45 -0400 Subject: net/tipc: initialize security state for new connection socket Calling connect() with an AF_TIPC socket would trigger a series of error messages from SELinux along the lines of: SELinux: Invalid class 0 type=AVC msg=audit(1434126658.487:34500): avc: denied { } for pid=292 comm="kworker/u16:5" scontext=system_u:system_r:kernel_t:s0 tcontext=system_u:object_r:unlabeled_t:s0 tclass= permissive=0 This was due to a failure to initialize the security state of the new connection sock by the tipc code, leaving it with junk in the security class field and an unlabeled secid. Add a call to security_sk_clone() to inherit the security state from the parent socket. Reported-by: Tim Shearer Signed-off-by: Stephen Smalley Acked-by: Paul Moore Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 46b6ed534ef2..3a7567f690f3 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2007,6 +2007,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; + security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk); -- cgit From 9d13ec65ede775f896c3da1cfa35283afe2f796c Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:19 -0400 Subject: tipc: introduce link entry structure to struct tipc_node struct 'tipc_node' currently contains two arrays for link attributes, one for the link pointers, and one for the usable link MTUs. We now group those into a new struct 'tipc_link_entry', and intoduce one single array consisting of such enties. Apart from being a cosmetic improvement, this is a starting point for the strict master-slave relation between node and link that we will introduce in the following commits. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 2 +- net/tipc/discover.c | 2 +- net/tipc/link.c | 60 ++++++++++--------- net/tipc/name_distr.c | 2 +- net/tipc/node.c | 163 ++++++++++++++++++++++++-------------------------- net/tipc/node.h | 50 +++++++++------- 6 files changed, 143 insertions(+), 136 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index a816382fc8af..59b2f2a538e1 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -413,7 +413,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) * all nodes in the cluster don't ACK at the same time */ if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) { - tipc_link_proto_xmit(node->active_links[node->addr & 1], + tipc_link_proto_xmit(node_active_link(node, node->addr), STATE_MSG, 0, 0, 0, 0); tn->bcl->stats.sent_acks++; } diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 967e292f53c8..933445337fb4 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -170,7 +170,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, return; tipc_node_lock(node); node->capabilities = caps; - link = node->links[bearer->identity]; + link = node->links[bearer->identity].link; /* Prepare to validate requesting node's signature and media address */ sign_match = (signature == node->signature); diff --git a/net/tipc/link.c b/net/tipc/link.c index eaa9fe54b4ae..03372a7e98df 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -132,9 +132,11 @@ static void tipc_link_put(struct tipc_link *l_ptr) static struct tipc_link *tipc_parallel_link(struct tipc_link *l) { - if (l->owner->active_links[0] != l) - return l->owner->active_links[0]; - return l->owner->active_links[1]; + struct tipc_node *n = l->owner; + + if (node_active_link(n, 0) != l) + return node_active_link(n, 0); + return node_active_link(n, 1); } /* @@ -147,10 +149,11 @@ int tipc_link_is_up(struct tipc_link *l_ptr) return link_working_working(l_ptr) || link_working_unknown(l_ptr); } -int tipc_link_is_active(struct tipc_link *l_ptr) +int tipc_link_is_active(struct tipc_link *l) { - return (l_ptr->owner->active_links[0] == l_ptr) || - (l_ptr->owner->active_links[1] == l_ptr); + struct tipc_node *n = l->owner; + + return (node_active_link(n, 0) == l) || (node_active_link(n, 1) == l); } /** @@ -240,7 +243,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, return NULL; } - if (n_ptr->links[b_ptr->identity]) { + if (n_ptr->links[b_ptr->identity].link) { tipc_addr_string_fill(addr_string, n_ptr->addr); pr_err("Attempt to establish second link on <%s> to %s\n", b_ptr->name, addr_string); @@ -321,7 +324,7 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id) rcu_read_lock(); list_for_each_entry_rcu(node, &tn->node_list, list) { tipc_node_lock(node); - link = node->links[bearer_id]; + link = node->links[bearer_id].link; if (link) tipc_link_delete(link); tipc_node_unlock(node); @@ -446,7 +449,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) if ((prev_state == RESET_UNKNOWN) || (prev_state == RESET_RESET)) return; - tipc_node_link_down(l_ptr->owner, l_ptr); + tipc_node_link_down(l_ptr->owner, l_ptr->bearer_id); tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr); if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) { @@ -482,7 +485,7 @@ static void link_activate(struct tipc_link *link) link->rcv_nxt = 1; link->stats.recv_info = 1; link->silent_intv_cnt = 0; - tipc_node_link_up(node, link); + tipc_node_link_up(node, link->bearer_id); tipc_bearer_add_dest(node->net, link->bearer_id, link->addr); } @@ -577,7 +580,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) case TRAFFIC_MSG_EVT: break; case ACTIVATE_MSG: - other = l_ptr->owner->active_links[0]; + other = node_active_link(l_ptr->owner, 0); if (other && link_working_unknown(other)) break; l_ptr->state = WORKING_WORKING; @@ -606,7 +609,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) switch (event) { case TRAFFIC_MSG_EVT: case ACTIVATE_MSG: - other = l_ptr->owner->active_links[0]; + other = node_active_link(l_ptr->owner, 0); if (other && link_working_unknown(other)) break; l_ptr->state = WORKING_WORKING; @@ -755,7 +758,7 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, node = tipc_node_find(net, dnode); if (node) { tipc_node_lock(node); - link = node->active_links[selector & 1]; + link = node_active_link(node, selector & 1); if (link) rc = __tipc_link_xmit(net, link, list); tipc_node_unlock(node); @@ -858,9 +861,9 @@ void tipc_link_reset_all(struct tipc_node *node) tipc_addr_string_fill(addr_string, node->addr)); for (i = 0; i < MAX_BEARERS; i++) { - if (node->links[i]) { - link_print(node->links[i], "Resetting link\n"); - tipc_link_reset(node->links[i]); + if (node->links[i].link) { + link_print(node->links[i].link, "Resetting link\n"); + tipc_link_reset(node->links[i].link); } } @@ -1029,7 +1032,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) tipc_node_lock(n_ptr); /* Locate unicast link endpoint that should handle message */ - l_ptr = n_ptr->links[b_ptr->identity]; + l_ptr = n_ptr->links[b_ptr->identity].link; if (unlikely(!l_ptr)) goto unlock; @@ -1496,7 +1499,7 @@ static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, struct sk_buff *skb; u32 length = msg_size(msg); - tunnel = l_ptr->owner->active_links[selector & 1]; + tunnel = node_active_link(l_ptr->owner, selector & 1); if (!tipc_link_is_up(tunnel)) { pr_warn("%stunnel link no longer available\n", link_co_err); return; @@ -1522,7 +1525,7 @@ static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, void tipc_link_failover_send_queue(struct tipc_link *l_ptr) { int msgcount; - struct tipc_link *tunnel = l_ptr->owner->active_links[0]; + struct tipc_link *tunnel = node_active_link(l_ptr->owner, 0); struct tipc_msg tunnel_hdr; struct sk_buff *skb; int split_bundles; @@ -1556,8 +1559,8 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) return; } - split_bundles = (l_ptr->owner->active_links[0] != - l_ptr->owner->active_links[1]); + split_bundles = (node_active_link(l_ptr->owner, 0) != + node_active_link(l_ptr->owner, 0)); skb_queue_walk(&l_ptr->transmq, skb) { struct tipc_msg *msg = buf_msg(skb); @@ -1660,7 +1663,7 @@ static bool tipc_link_failover_rcv(struct tipc_link *link, if (bearer_id == link->bearer_id) goto exit; - pl = link->owner->links[bearer_id]; + pl = link->owner->links[bearer_id].link; if (pl && tipc_link_is_up(pl)) tipc_link_reset(pl); @@ -1743,7 +1746,7 @@ static struct tipc_node *tipc_link_find_owner(struct net *net, list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { tipc_node_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { - l_ptr = n_ptr->links[i]; + l_ptr = n_ptr->links[i].link; if (l_ptr && !strcmp(l_ptr->name, link_name)) { *bearer_id = i; found_node = n_ptr; @@ -1865,7 +1868,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) tipc_node_lock(node); - link = node->links[bearer_id]; + link = node->links[bearer_id].link; if (!link) { res = -EINVAL; goto out; @@ -2055,10 +2058,11 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, for (i = *prev_link; i < MAX_BEARERS; i++) { *prev_link = i; - if (!node->links[i]) + if (!node->links[i].link) continue; - err = __tipc_nl_add_link(net, msg, node->links[i], NLM_F_MULTI); + err = __tipc_nl_add_link(net, msg, + node->links[i].link, NLM_F_MULTI); if (err) return err; } @@ -2172,7 +2176,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) return -EINVAL; tipc_node_lock(node); - link = node->links[bearer_id]; + link = node->links[bearer_id].link; if (!link) { tipc_node_unlock(node); nlmsg_free(msg.skb); @@ -2227,7 +2231,7 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) tipc_node_lock(node); - link = node->links[bearer_id]; + link = node->links[bearer_id].link; if (!link) { tipc_node_unlock(node); return -EINVAL; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 41e7b7e4dda0..3a1539e96294 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -96,7 +96,7 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb) dnode = node->addr; if (in_own_node(net, dnode)) continue; - if (!tipc_node_active_links(node)) + if (!tipc_node_is_up(node)) continue; oskb = pskb_copy(skb, GFP_ATOMIC); if (!oskb) diff --git a/net/tipc/node.c b/net/tipc/node.c index 0b1d61a5f853..db46e5d1d156 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -224,126 +224,119 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) * * Link becomes active (alone or shared) or standby, depending on its priority. */ -void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) +void tipc_node_link_up(struct tipc_node *n, int bearer_id) { - struct tipc_link **active = &n_ptr->active_links[0]; + struct tipc_link_entry **actv = &n->active_links[0]; + struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_link *l = le->link; - n_ptr->working_links++; - n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP; - n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; + /* Leave room for tunnel header when returning 'mtu' to users: */ + n->links[bearer_id].mtu = l->mtu - INT_H_SIZE; + + n->working_links++; + n->action_flags |= TIPC_NOTIFY_LINK_UP; + n->link_id = l->peer_bearer_id << 16 | l->bearer_id; pr_debug("Established link <%s> on network plane %c\n", - l_ptr->name, l_ptr->net_plane); + l->name, l->net_plane); - if (!active[0]) { - active[0] = active[1] = l_ptr; - node_established_contact(n_ptr); - goto exit; + /* No active links ? => take both active slots */ + if (!actv[0]) { + actv[0] = le; + actv[1] = le; + node_established_contact(n); + return; } - if (l_ptr->priority < active[0]->priority) { - pr_debug("New link <%s> becomes standby\n", l_ptr->name); - goto exit; + if (l->priority < actv[0]->link->priority) { + pr_debug("New link <%s> becomes standby\n", l->name); + return; } - tipc_link_dup_queue_xmit(active[0], l_ptr); - if (l_ptr->priority == active[0]->priority) { - active[0] = l_ptr; - goto exit; + tipc_link_dup_queue_xmit(actv[0]->link, l); + + /* Take one active slot if applicable */ + if (l->priority == actv[0]->link->priority) { + actv[0] = le; + return; } - pr_debug("Old link <%s> becomes standby\n", active[0]->name); - if (active[1] != active[0]) - pr_debug("Old link <%s> becomes standby\n", active[1]->name); - active[0] = active[1] = l_ptr; -exit: - /* Leave room for changeover header when returning 'mtu' to users: */ - n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE; - n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE; + /* Higher prio than current active? => take both active slots */ + pr_debug("Old l <%s> becomes standby\n", actv[0]->link->name); + if (actv[1] != actv[0]) + pr_debug("Old link <%s> now standby\n", actv[1]->link->name); + actv[0] = le; + actv[1] = le; } /** - * node_select_active_links - select active link + * node_select_active_links - select which working links should be active */ -static void node_select_active_links(struct tipc_node *n_ptr) +static void node_select_active_links(struct tipc_node *n) { - struct tipc_link **active = &n_ptr->active_links[0]; - u32 i; - u32 highest_prio = 0; + struct tipc_link_entry **actv = &n->active_links[0]; + struct tipc_link *l; + u32 b, highest = 0; - active[0] = active[1] = NULL; - - for (i = 0; i < MAX_BEARERS; i++) { - struct tipc_link *l_ptr = n_ptr->links[i]; + actv[0] = NULL; + actv[1] = NULL; - if (!l_ptr || !tipc_link_is_up(l_ptr) || - (l_ptr->priority < highest_prio)) + for (b = 0; b < MAX_BEARERS; b++) { + l = n->links[b].link; + if (!l || !tipc_link_is_up(l) || (l->priority < highest)) + continue; + if (l->priority > highest) { + highest = l->priority; + actv[0] = &n->links[b]; + actv[1] = &n->links[b]; continue; - - if (l_ptr->priority > highest_prio) { - highest_prio = l_ptr->priority; - active[0] = active[1] = l_ptr; - } else { - active[1] = l_ptr; } + actv[1] = &n->links[b]; } } /** * tipc_node_link_down - handle loss of link */ -void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) +void tipc_node_link_down(struct tipc_node *n, int bearer_id) { - struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); - struct tipc_link **active; + struct tipc_link_entry **actv = &n->active_links[0]; + struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_link *l = le->link; - n_ptr->working_links--; - n_ptr->action_flags |= TIPC_NOTIFY_LINK_DOWN; - n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; + n->working_links--; + n->action_flags |= TIPC_NOTIFY_LINK_DOWN; + n->link_id = l->peer_bearer_id << 16 | l->bearer_id; - if (!tipc_link_is_active(l_ptr)) { + if (!tipc_link_is_active(l)) { pr_debug("Lost standby link <%s> on network plane %c\n", - l_ptr->name, l_ptr->net_plane); + l->name, l->net_plane); return; } pr_debug("Lost link <%s> on network plane %c\n", - l_ptr->name, l_ptr->net_plane); - - active = &n_ptr->active_links[0]; - if (active[0] == l_ptr) - active[0] = active[1]; - if (active[1] == l_ptr) - active[1] = active[0]; - if (active[0] == l_ptr) - node_select_active_links(n_ptr); - if (tipc_node_is_up(n_ptr)) - tipc_link_failover_send_queue(l_ptr); - else - node_lost_contact(n_ptr); + l->name, l->net_plane); - /* Leave room for changeover header when returning 'mtu' to users: */ - if (active[0]) { - n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE; - n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE; - return; - } - /* Loopback link went down? No fragmentation needed from now on. */ - if (n_ptr->addr == tn->own_addr) { - n_ptr->act_mtus[0] = MAX_MSG_SIZE; - n_ptr->act_mtus[1] = MAX_MSG_SIZE; - } -} + /* Resdistribute active slots if applicable */ + if (actv[0] == le) + actv[0] = actv[1]; + if (actv[1] == le) + actv[1] = actv[0]; -int tipc_node_active_links(struct tipc_node *n_ptr) -{ - return n_ptr->active_links[0] != NULL; + /* Last link of this priority? => select other ones if available */ + if (actv[0] == le) + node_select_active_links(n); + + if (tipc_node_is_up(n)) + tipc_link_failover_send_queue(l); + else + node_lost_contact(n); } -int tipc_node_is_up(struct tipc_node *n_ptr) +bool tipc_node_is_up(struct tipc_node *n) { - return tipc_node_active_links(n_ptr); + return n->active_links[0]; } void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { - n_ptr->links[l_ptr->bearer_id] = l_ptr; + n_ptr->links[l_ptr->bearer_id].link = l_ptr; n_ptr->link_cnt++; } @@ -352,9 +345,9 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) int i; for (i = 0; i < MAX_BEARERS; i++) { - if (l_ptr != n_ptr->links[i]) + if (l_ptr != n_ptr->links[i].link) continue; - n_ptr->links[i] = NULL; + n_ptr->links[i].link = NULL; n_ptr->link_cnt--; } } @@ -396,7 +389,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { - struct tipc_link *l_ptr = n_ptr->links[i]; + struct tipc_link *l_ptr = n_ptr->links[i].link; if (!l_ptr) continue; l_ptr->flags &= ~LINK_FAILINGOVER; @@ -453,7 +446,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, goto exit; tipc_node_lock(node); - link = node->links[bearer_id]; + link = node->links[bearer_id].link; if (link) { strncpy(linkname, link->name, len); err = 0; diff --git a/net/tipc/node.h b/net/tipc/node.h index 5a834cf142c8..320cea313bdc 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -89,6 +89,11 @@ struct tipc_node_bclink { bool recv_permitted; }; +struct tipc_link_entry { + struct tipc_link *link; + u32 mtu; +}; + /** * struct tipc_node - TIPC node structure * @addr: network address of node @@ -98,9 +103,8 @@ struct tipc_node_bclink { * @hash: links to adjacent nodes in unsorted hash chain * @inputq: pointer to input queue containing messages for msg event * @namedq: pointer to name table input queue with name table messages - * @curr_link: the link holding the node lock, if any - * @active_links: pointers to active links to node - * @links: pointers to all links to node + * @active_links: pointer into links[] array, identifying which links are active + * @links: array containing references to all links to node * @action_flags: bit mask of different types of node actions * @bclink: broadcast-related info * @list: links to adjacent nodes in sorted list of cluster's nodes @@ -120,9 +124,8 @@ struct tipc_node { struct hlist_node hash; struct sk_buff_head *inputq; struct sk_buff_head *namedq; - struct tipc_link *active_links[2]; - u32 act_mtus[2]; - struct tipc_link *links[MAX_BEARERS]; + struct tipc_link_entry *active_links[2]; + struct tipc_link_entry links[MAX_BEARERS]; int action_flags; struct tipc_node_bclink bclink; struct list_head list; @@ -142,10 +145,9 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr); void tipc_node_stop(struct net *net); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -int tipc_node_active_links(struct tipc_node *n_ptr); -int tipc_node_is_up(struct tipc_node *n_ptr); +void tipc_node_link_down(struct tipc_node *n_ptr, int bearer_id); +void tipc_node_link_up(struct tipc_node *n_ptr, int bearer_id); +bool tipc_node_is_up(struct tipc_node *n); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); void tipc_node_unlock(struct tipc_node *node); @@ -165,20 +167,28 @@ static inline bool tipc_node_blocked(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN)); } -static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) +static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel) { - struct tipc_node *node; - u32 mtu; + struct tipc_link_entry *le = n->active_links[sel & 1]; - node = tipc_node_find(net, addr); + if (likely(le)) + return le->link; + return NULL; +} - if (likely(node)) { - mtu = node->act_mtus[selector & 1]; - tipc_node_put(node); - } else { - mtu = MAX_MSG_SIZE; - } +static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) +{ + struct tipc_node *n; + struct tipc_link_entry *le; + unsigned int mtu = MAX_MSG_SIZE; + n = tipc_node_find(net, addr); + if (unlikely(!n)) + return mtu; + le = n->active_links[selector & 1]; + if (likely(le)) + mtu = le->mtu; + tipc_node_put(n); return mtu; } -- cgit From d3a43b907ae688af6cb753c53cd7de05f3c1ba85 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:20 -0400 Subject: tipc: move link creation from neighbor discoverer to node As a step towards turning links into node internal entities, we move the creation of links from the neighbor discovery logics to the node's link control logics. We also create an additional entry for the link's media address in the newly introduced struct tipc_link_entry, since this is where it is needed in the upcoming commits. The current copy in struct tipc_link is kept for now, but will be removed later. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/discover.c | 20 ++++---------------- net/tipc/node.c | 27 +++++++++++++++++++++++++++ net/tipc/node.h | 6 ++++++ 3 files changed, 37 insertions(+), 16 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 933445337fb4..164d08907d6f 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -35,7 +35,7 @@ */ #include "core.h" -#include "link.h" +#include "node.h" #include "discover.h" /* min delay during bearer start up */ @@ -125,7 +125,6 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; - struct tipc_link *link; struct tipc_media_addr maddr; struct sk_buff *rbuf; struct tipc_msg *msg = buf_msg(buf); @@ -170,13 +169,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, return; tipc_node_lock(node); node->capabilities = caps; - link = node->links[bearer->identity].link; /* Prepare to validate requesting node's signature and media address */ sign_match = (signature == node->signature); - addr_match = link && !memcmp(&link->media_addr, &maddr, sizeof(maddr)); - link_up = link && tipc_link_is_up(link); - + tipc_node_check_dest(node, bearer, &link_up, &addr_match, &maddr); /* These three flags give us eight permutations: */ @@ -239,16 +235,8 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, if (accept_sign) node->signature = signature; - if (accept_addr) { - if (!link) - link = tipc_link_create(node, bearer, &maddr); - if (link) { - memcpy(&link->media_addr, &maddr, sizeof(maddr)); - tipc_link_reset(link); - } else { - respond = false; - } - } + if (accept_addr && !tipc_node_update_dest(node, bearer, &maddr)) + respond = false; /* Send response, if necessary */ if (respond && (mtyp == DSC_REQ_MSG)) { diff --git a/net/tipc/node.c b/net/tipc/node.c index db46e5d1d156..06f642abdf38 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -334,6 +334,33 @@ bool tipc_node_is_up(struct tipc_node *n) return n->active_links[0]; } +void tipc_node_check_dest(struct tipc_node *n, struct tipc_bearer *b, + bool *link_up, bool *addr_match, + struct tipc_media_addr *maddr) +{ + struct tipc_link *l = n->links[b->identity].link; + struct tipc_media_addr *curr = &n->links[b->identity].maddr; + + *link_up = l && tipc_link_is_up(l); + *addr_match = l && !memcmp(curr, maddr, sizeof(*maddr)); +} + +bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *b, + struct tipc_media_addr *maddr) +{ + struct tipc_link *l = n->links[b->identity].link; + struct tipc_media_addr *curr = &n->links[b->identity].maddr; + + if (!l) + l = tipc_link_create(n, b, maddr); + if (!l) + return false; + memcpy(&l->media_addr, maddr, sizeof(*maddr)); + memcpy(curr, maddr, sizeof(*maddr)); + tipc_link_reset(l); + return true; +} + void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { n_ptr->links[l_ptr->bearer_id].link = l_ptr; diff --git a/net/tipc/node.h b/net/tipc/node.h index 320cea313bdc..68579c70748b 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -92,6 +92,7 @@ struct tipc_node_bclink { struct tipc_link_entry { struct tipc_link *link; u32 mtu; + struct tipc_media_addr maddr; }; /** @@ -143,6 +144,11 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr); void tipc_node_put(struct tipc_node *node); struct tipc_node *tipc_node_create(struct net *net, u32 addr); void tipc_node_stop(struct net *net); +void tipc_node_check_dest(struct tipc_node *n, struct tipc_bearer *bearer, + bool *link_up, bool *addr_match, + struct tipc_media_addr *maddr); +bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *bearer, + struct tipc_media_addr *maddr); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, int bearer_id); -- cgit From d39bbd445dc44259c77bbbc8aadcce7dcdba39cc Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:21 -0400 Subject: tipc: move link input queue to tipc_node At present, the link input queue and the name distributor receive queues are fields aggregated in struct tipc_link. This is a hazard, because a link might be deleted while a receiving socket still keeps reference to one of the queues. This commit fixes this bug. However, rather than adding yet another reference counter to the critical data path, we move the two queues to safe ground inside struct tipc_node, which is already protected, and let the link code only handle references to the queues. This is also in line with planned later changes in this area. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 27 +++++++++++++++------------ net/tipc/link.h | 12 +++++++----- net/tipc/node.c | 4 +++- net/tipc/node.h | 3 ++- 4 files changed, 27 insertions(+), 19 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 03372a7e98df..f8e0e2ceceb4 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -227,7 +227,9 @@ static void link_set_timer(struct tipc_link *link, unsigned long time) */ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, - const struct tipc_media_addr *media_addr) + const struct tipc_media_addr *media_addr, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq) { struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); struct tipc_link *l_ptr; @@ -289,8 +291,9 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, __skb_queue_head_init(&l_ptr->backlogq); __skb_queue_head_init(&l_ptr->deferdq); skb_queue_head_init(&l_ptr->wakeupq); - skb_queue_head_init(&l_ptr->inputq); - skb_queue_head_init(&l_ptr->namedq); + l_ptr->inputq = inputq; + l_ptr->namedq = namedq; + skb_queue_head_init(l_ptr->inputq); link_reset_statistics(l_ptr); tipc_node_attach_link(n_ptr, l_ptr); setup_timer(&l_ptr->timer, link_timeout, (unsigned long)l_ptr); @@ -391,8 +394,8 @@ void link_prepare_wakeup(struct tipc_link *l) if ((pnd[imp] + l->backlog[imp].len) >= lim) break; skb_unlink(skb, &l->wakeupq); - skb_queue_tail(&l->inputq, skb); - l->owner->inputq = &l->inputq; + skb_queue_tail(l->inputq, skb); + l->owner->inputq = l->inputq; l->owner->action_flags |= TIPC_MSG_EVT; } } @@ -465,7 +468,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) __skb_queue_purge(&l_ptr->transmq); __skb_queue_purge(&l_ptr->deferdq); if (!owner->inputq) - owner->inputq = &l_ptr->inputq; + owner->inputq = l_ptr->inputq; skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq); if (!skb_queue_empty(owner->inputq)) owner->action_flags |= TIPC_MSG_EVT; @@ -962,7 +965,7 @@ static bool link_synch(struct tipc_link *l) /* Is it still in the input queue ? */ post_synch = mod(pl->rcv_nxt - l->synch_point) - 1; - if (skb_queue_len(&pl->inputq) > post_synch) + if (skb_queue_len(pl->inputq) > post_synch) return false; synched: l->flags &= ~LINK_SYNCHING; @@ -1141,16 +1144,16 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: case CONN_MANAGER: - if (tipc_skb_queue_tail(&link->inputq, skb, dport)) { - node->inputq = &link->inputq; + if (tipc_skb_queue_tail(link->inputq, skb, dport)) { + node->inputq = link->inputq; node->action_flags |= TIPC_MSG_EVT; } return true; case NAME_DISTRIBUTOR: node->bclink.recv_permitted = true; - node->namedq = &link->namedq; - skb_queue_tail(&link->namedq, skb); - if (skb_queue_len(&link->namedq) == 1) + node->namedq = link->namedq; + skb_queue_tail(link->namedq, skb); + if (skb_queue_len(link->namedq) == 1) node->action_flags |= TIPC_NAMED_MSG_EVT; return true; case MSG_BUNDLER: diff --git a/net/tipc/link.h b/net/tipc/link.h index ae0a0ea572f2..9c71d9e42e93 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -192,8 +192,8 @@ struct tipc_link { u16 rcv_nxt; u32 rcv_unacked; struct sk_buff_head deferdq; - struct sk_buff_head inputq; - struct sk_buff_head namedq; + struct sk_buff_head *inputq; + struct sk_buff_head *namedq; /* Congestion handling */ struct sk_buff_head wakeupq; @@ -207,9 +207,11 @@ struct tipc_link { struct tipc_port; -struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, - struct tipc_bearer *b_ptr, - const struct tipc_media_addr *media_addr); +struct tipc_link *tipc_link_create(struct tipc_node *n, + struct tipc_bearer *b, + const struct tipc_media_addr *maddr, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq); void tipc_link_delete(struct tipc_link *link); void tipc_link_delete_list(struct net *net, unsigned int bearer_id); void tipc_link_failover_send_queue(struct tipc_link *l_ptr); diff --git a/net/tipc/node.c b/net/tipc/node.c index 06f642abdf38..20ec61ceffac 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -132,6 +132,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->publ_list); INIT_LIST_HEAD(&n_ptr->conn_sks); + skb_queue_head_init(&n_ptr->bclink.namedq); __skb_queue_head_init(&n_ptr->bclink.deferdq); hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { @@ -350,9 +351,10 @@ bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *b, { struct tipc_link *l = n->links[b->identity].link; struct tipc_media_addr *curr = &n->links[b->identity].maddr; + struct sk_buff_head *inputq = &n->links[b->identity].inputq; if (!l) - l = tipc_link_create(n, b, maddr); + l = tipc_link_create(n, b, maddr, inputq, &n->bclink.namedq); if (!l) return false; memcpy(&l->media_addr, maddr, sizeof(*maddr)); diff --git a/net/tipc/node.h b/net/tipc/node.h index 68579c70748b..0657cbf1f5cd 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -85,13 +85,14 @@ struct tipc_node_bclink { u32 deferred_size; struct sk_buff_head deferdq; struct sk_buff *reasm_buf; - int inputq_map; + struct sk_buff_head namedq; bool recv_permitted; }; struct tipc_link_entry { struct tipc_link *link; u32 mtu; + struct sk_buff_head inputq; struct tipc_media_addr maddr; }; -- cgit From 36e78a463b26c9b8017a2e11dcd6c4b8e34b4161 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:22 -0400 Subject: tipc: use bearer index when looking up active links struct tipc_node currently holds two arrays of link pointers; one, indexed by bearer identity, which contains all links irrespective of current state, and one two-slot array for the currently active link or links. The latter array contains direct pointers into the elements of the former. This has the effect that we cannot know the bearer id of a link when accessing it via the "active_links[]" array without actually dereferencing the pointer, something we want to avoid in some cases. In this commit, we do instead store the bearer identity in the "active_links" array, and use this as an index to find the right element in the overall link entry array. This change should be seen as a preparation for the later commits in this series. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 106 +++++++++++++++++++++++--------------------------------- net/tipc/node.h | 26 ++++++++------ 2 files changed, 59 insertions(+), 73 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/node.c b/net/tipc/node.c index 20ec61ceffac..19729645d494 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -142,6 +142,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) list_add_tail_rcu(&n_ptr->list, &temp_node->list); n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; + n_ptr->active_links[0] = INVALID_BEARER_ID; + n_ptr->active_links[1] = INVALID_BEARER_ID; tipc_node_get(n_ptr); exit: spin_unlock_bh(&tn->node_list_lock); @@ -227,12 +229,13 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) */ void tipc_node_link_up(struct tipc_node *n, int bearer_id) { - struct tipc_link_entry **actv = &n->active_links[0]; - struct tipc_link_entry *le = &n->links[bearer_id]; - struct tipc_link *l = le->link; + int *slot0 = &n->active_links[0]; + int *slot1 = &n->active_links[1]; + struct tipc_link_entry *links = n->links; + struct tipc_link *l = n->links[bearer_id].link; /* Leave room for tunnel header when returning 'mtu' to users: */ - n->links[bearer_id].mtu = l->mtu - INT_H_SIZE; + links[bearer_id].mtu = l->mtu - INT_H_SIZE; n->working_links++; n->action_flags |= TIPC_NOTIFY_LINK_UP; @@ -242,55 +245,30 @@ void tipc_node_link_up(struct tipc_node *n, int bearer_id) l->name, l->net_plane); /* No active links ? => take both active slots */ - if (!actv[0]) { - actv[0] = le; - actv[1] = le; + if (*slot0 < 0) { + *slot0 = bearer_id; + *slot1 = bearer_id; node_established_contact(n); return; } - if (l->priority < actv[0]->link->priority) { + + /* Lower prio than current active ? => no slot */ + if (l->priority < links[*slot0].link->priority) { pr_debug("New link <%s> becomes standby\n", l->name); return; } - tipc_link_dup_queue_xmit(actv[0]->link, l); + tipc_link_dup_queue_xmit(links[*slot0].link, l); - /* Take one active slot if applicable */ - if (l->priority == actv[0]->link->priority) { - actv[0] = le; + /* Same prio as current active ? => take one slot */ + if (l->priority == links[*slot0].link->priority) { + *slot0 = bearer_id; return; } - /* Higher prio than current active? => take both active slots */ - pr_debug("Old l <%s> becomes standby\n", actv[0]->link->name); - if (actv[1] != actv[0]) - pr_debug("Old link <%s> now standby\n", actv[1]->link->name); - actv[0] = le; - actv[1] = le; -} - -/** - * node_select_active_links - select which working links should be active - */ -static void node_select_active_links(struct tipc_node *n) -{ - struct tipc_link_entry **actv = &n->active_links[0]; - struct tipc_link *l; - u32 b, highest = 0; - actv[0] = NULL; - actv[1] = NULL; - - for (b = 0; b < MAX_BEARERS; b++) { - l = n->links[b].link; - if (!l || !tipc_link_is_up(l) || (l->priority < highest)) - continue; - if (l->priority > highest) { - highest = l->priority; - actv[0] = &n->links[b]; - actv[1] = &n->links[b]; - continue; - } - actv[1] = &n->links[b]; - } + /* Higher prio than current active => take both active slots */ + pr_debug("Old link <%s> now standby\n", links[*slot0].link->name); + *slot0 = bearer_id; + *slot1 = bearer_id; } /** @@ -298,32 +276,36 @@ static void node_select_active_links(struct tipc_node *n) */ void tipc_node_link_down(struct tipc_node *n, int bearer_id) { - struct tipc_link_entry **actv = &n->active_links[0]; - struct tipc_link_entry *le = &n->links[bearer_id]; - struct tipc_link *l = le->link; + int *slot0 = &n->active_links[0]; + int *slot1 = &n->active_links[1]; + int i, highest = 0; + struct tipc_link *l, *_l; + l = n->links[bearer_id].link; n->working_links--; n->action_flags |= TIPC_NOTIFY_LINK_DOWN; n->link_id = l->peer_bearer_id << 16 | l->bearer_id; - if (!tipc_link_is_active(l)) { - pr_debug("Lost standby link <%s> on network plane %c\n", - l->name, l->net_plane); - return; - } pr_debug("Lost link <%s> on network plane %c\n", l->name, l->net_plane); - /* Resdistribute active slots if applicable */ - if (actv[0] == le) - actv[0] = actv[1]; - if (actv[1] == le) - actv[1] = actv[0]; - - /* Last link of this priority? => select other ones if available */ - if (actv[0] == le) - node_select_active_links(n); - + /* Select new active link if any available */ + *slot0 = INVALID_BEARER_ID; + *slot1 = INVALID_BEARER_ID; + for (i = 0; i < MAX_BEARERS; i++) { + _l = n->links[i].link; + if (!_l || !tipc_link_is_up(_l)) + continue; + if (_l->priority < highest) + continue; + if (_l->priority > highest) { + highest = _l->priority; + *slot0 = i; + *slot1 = i; + continue; + } + *slot1 = i; + } if (tipc_node_is_up(n)) tipc_link_failover_send_queue(l); else @@ -332,7 +314,7 @@ void tipc_node_link_down(struct tipc_node *n, int bearer_id) bool tipc_node_is_up(struct tipc_node *n) { - return n->active_links[0]; + return n->active_links[0] != INVALID_BEARER_ID; } void tipc_node_check_dest(struct tipc_node *n, struct tipc_bearer *b, diff --git a/net/tipc/node.h b/net/tipc/node.h index 0657cbf1f5cd..74f278adada3 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -45,6 +45,8 @@ /* Out-of-range value for node signature */ #define INVALID_NODE_SIG 0x10000 +#define INVALID_BEARER_ID -1 + /* Flags used to take different actions according to flag type * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down @@ -105,7 +107,7 @@ struct tipc_link_entry { * @hash: links to adjacent nodes in unsorted hash chain * @inputq: pointer to input queue containing messages for msg event * @namedq: pointer to name table input queue with name table messages - * @active_links: pointer into links[] array, identifying which links are active + * @active_links: bearer ids of active links, used as index into links[] array * @links: array containing references to all links to node * @action_flags: bit mask of different types of node actions * @bclink: broadcast-related info @@ -126,7 +128,7 @@ struct tipc_node { struct hlist_node hash; struct sk_buff_head *inputq; struct sk_buff_head *namedq; - struct tipc_link_entry *active_links[2]; + int active_links[2]; struct tipc_link_entry links[MAX_BEARERS]; int action_flags; struct tipc_node_bclink bclink; @@ -176,25 +178,27 @@ static inline bool tipc_node_blocked(struct tipc_node *node) static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel) { - struct tipc_link_entry *le = n->active_links[sel & 1]; + int bearer_id = n->active_links[sel & 1]; + + if (unlikely(bearer_id == INVALID_BEARER_ID)) + return NULL; - if (likely(le)) - return le->link; - return NULL; + return n->links[bearer_id].link; } -static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) +static inline unsigned int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) { struct tipc_node *n; - struct tipc_link_entry *le; + int bearer_id; unsigned int mtu = MAX_MSG_SIZE; n = tipc_node_find(net, addr); if (unlikely(!n)) return mtu; - le = n->active_links[selector & 1]; - if (likely(le)) - mtu = le->mtu; + + bearer_id = n->active_links[sel & 1]; + if (likely(bearer_id != INVALID_BEARER_ID)) + mtu = n->links[bearer_id].mtu; tipc_node_put(n); return mtu; } -- cgit From 22d85c79428b8ca9a01623aa3e3a1fe29a30a119 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:23 -0400 Subject: tipc: change sk_buffer handling in tipc_link_xmit() When the function tipc_link_xmit() is given a buffer list for transmission, it currently consumes the list both when transmission is successful and when it fails, except for the special case when it encounters link congestion. This behavior is inconsistent, and needs to be corrected if we want to avoid problems in later commits in this series. In this commit, we change this to let the function consume the list only when transmission is successful, and leave the list with the sender in all other cases. We also modifiy the socket code so that it adapts to this change, i.e., purges the list when a non-congestion error code is returned. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 5 ++--- net/tipc/link.c | 23 +++++++++-------------- net/tipc/socket.c | 49 ++++++++++++++++++++++++++----------------------- 3 files changed, 37 insertions(+), 40 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 59b2f2a538e1..295bdc26f103 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -358,10 +358,9 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) /* Prepare clone of message for local node */ skb = tipc_msg_reassemble(list); - if (unlikely(!skb)) { - __skb_queue_purge(list); + if (unlikely(!skb)) return -EHOSTUNREACH; - } + /* Broadcast to all nodes */ if (likely(bclink)) { tipc_bclink_lock(net); diff --git a/net/tipc/link.c b/net/tipc/link.c index f8e0e2ceceb4..ea32679b6737 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -340,7 +340,7 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id) * @link: congested link * @list: message that was attempted sent * Create pseudo msg to send back to user when congestion abates - * Only consumes message if there is an error + * Does not consume buffer list */ static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list) { @@ -354,7 +354,7 @@ static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list) if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); tipc_link_reset(link); - goto err; + return -ENOBUFS; } /* Non-blocking sender: */ if (TIPC_SKB_CB(skb_peek(list))->wakeup_pending) @@ -364,15 +364,12 @@ static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list) skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, addr, addr, oport, 0, 0); if (!skb) - goto err; + return -ENOBUFS; TIPC_SKB_CB(skb)->chain_sz = skb_queue_len(list); TIPC_SKB_CB(skb)->chain_imp = imp; skb_queue_tail(&link->wakeupq, skb); link->stats.link_congs++; return -ELINKCONG; -err: - __skb_queue_purge(list); - return -ENOBUFS; } /** @@ -641,8 +638,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) * @link: link to use * @list: chain of buffers containing message * - * Consumes the buffer chain, except when returning -ELINKCONG, - * since the caller then may want to make more send attempts. + * Consumes the buffer chain, except when returning an error code, * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted */ @@ -666,10 +662,9 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, if (unlikely(link->backlog[i].len >= link->backlog[i].limit)) return link_schedule_user(link, list); } - if (unlikely(msg_size(msg) > mtu)) { - __skb_queue_purge(list); + if (unlikely(msg_size(msg) > mtu)) return -EMSGSIZE; - } + /* Prepare each packet for sending, and add to relevant queue: */ while (skb_queue_len(list)) { skb = skb_peek(list); @@ -722,7 +717,7 @@ static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb) /* tipc_link_xmit_skb(): send single buffer to destination * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE - * messages, which will not be rejected + * messages, which will not cause link congestion * The only exception is datagram messages rerouted after secondary * lookup, which are rare and safe to dispose of anyway. * TODO: Return real return value, and let callers use @@ -736,7 +731,7 @@ int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, skb2list(skb, &head); rc = tipc_link_xmit(net, &head, dnode, selector); - if (rc == -ELINKCONG) + if (rc) kfree_skb(skb); return 0; } @@ -748,7 +743,7 @@ int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, * @dsz: amount of user data to be sent * @dnode: address of destination node * @selector: a number used for deterministic link selection - * Consumes the buffer chain, except when returning -ELINKCONG + * Consumes the buffer chain, except when returning error * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3a7567f690f3..87fef25f6519 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -686,21 +686,22 @@ new_mtu: do { rc = tipc_bclink_xmit(net, pktchain); - if (likely(rc >= 0)) { - rc = dsz; - break; + if (likely(!rc)) + return dsz; + + if (rc == -ELINKCONG) { + tsk->link_cong = 1; + rc = tipc_wait_for_sndmsg(sock, &timeo); + if (!rc) + continue; } + __skb_queue_purge(pktchain); if (rc == -EMSGSIZE) { msg->msg_iter = save; goto new_mtu; } - if (rc != -ELINKCONG) - break; - tipc_sk(sk)->link_cong = 1; - rc = tipc_wait_for_sndmsg(sock, &timeo); - if (rc) - __skb_queue_purge(pktchain); - } while (!rc); + break; + } while (1); return rc; } @@ -925,23 +926,24 @@ new_mtu: skb = skb_peek(pktchain); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; rc = tipc_link_xmit(net, pktchain, dnode, tsk->portid); - if (likely(rc >= 0)) { + if (likely(!rc)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; - rc = dsz; - break; + return dsz; } + if (rc == -ELINKCONG) { + tsk->link_cong = 1; + rc = tipc_wait_for_sndmsg(sock, &timeo); + if (!rc) + continue; + } + __skb_queue_purge(pktchain); if (rc == -EMSGSIZE) { m->msg_iter = save; goto new_mtu; } - if (rc != -ELINKCONG) - break; - tsk->link_cong = 1; - rc = tipc_wait_for_sndmsg(sock, &timeo); - if (rc) - __skb_queue_purge(pktchain); - } while (!rc); + break; + } while (1); return rc; } @@ -1048,10 +1050,11 @@ next: tsk->sent_unacked++; sent += send; if (sent == dsz) - break; + return dsz; goto next; } if (rc == -EMSGSIZE) { + __skb_queue_purge(pktchain); tsk->max_pkt = tipc_node_get_mtu(net, dnode, portid); m->msg_iter = save; @@ -1059,13 +1062,13 @@ next: } if (rc != -ELINKCONG) break; + tsk->link_cong = 1; } rc = tipc_wait_for_sndpkt(sock, &timeo); - if (rc) - __skb_queue_purge(pktchain); } while (!rc); + __skb_queue_purge(pktchain); return sent ? sent : rc; } -- cgit From af9b028e270fda6fb812d70d17d902297df1ceb5 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:24 -0400 Subject: tipc: make media xmit call outside node spinlock context Currently, message sending is performed through a deep call chain, where the node spinlock is grabbed and held during a significant part of the transmission time. This is clearly detrimental to overall throughput performance; it would be better if we could send the message after the spinlock has been released. In this commit, we do instead let the call revert on the stack after the buffer chain has been added to the transmission queue, whereafter clones of the buffers are transmitted to the device layer outside the spinlock scope. As a further step in our effort to separate the roles of the node and link entities we also move the function tipc_link_xmit() to node.c, and rename it to tipc_node_xmit(). Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 26 ++++++++++ net/tipc/bearer.h | 3 ++ net/tipc/link.c | 132 +++++++++++++++++++++++++++----------------------- net/tipc/link.h | 6 +-- net/tipc/name_distr.c | 4 +- net/tipc/node.c | 78 +++++++++++++++++++++++++++++ net/tipc/node.h | 4 ++ net/tipc/socket.c | 22 ++++----- 8 files changed, 198 insertions(+), 77 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 00bc0e620532..eae58a6b121c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -470,6 +470,32 @@ void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, rcu_read_unlock(); } +/* tipc_bearer_xmit() -send buffer to destination over bearer + */ +void tipc_bearer_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq, + struct tipc_media_addr *dst) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bearer *b; + struct sk_buff *skb, *tmp; + + if (skb_queue_empty(xmitq)) + return; + + rcu_read_lock(); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (likely(b)) { + skb_queue_walk_safe(xmitq, skb, tmp) { + __skb_dequeue(xmitq); + b->media->send_msg(net, skb, b, dst); + /* Until we remove cloning in tipc_l2_send_msg(): */ + kfree_skb(skb); + } + } + rcu_read_unlock(); +} + /** * tipc_l2_rcv_msg - handle incoming TIPC message from an interface * @buf: the received packet diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index dc714d977768..6426f242f626 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -217,5 +217,8 @@ void tipc_bearer_cleanup(void); void tipc_bearer_stop(struct net *net); void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest); +void tipc_bearer_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq, + struct tipc_media_addr *dst); #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/link.c b/net/tipc/link.c index ea32679b6737..c052437a7cfa 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -353,7 +353,6 @@ static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list) /* This really cannot happen... */ if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); - tipc_link_reset(link); return -ENOBUFS; } /* Non-blocking sender: */ @@ -701,6 +700,78 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, return 0; } +/** + * tipc_link_xmit(): enqueue buffer list according to queue situation + * @link: link to use + * @list: chain of buffers containing message + * @xmitq: returned list of packets to be sent by caller + * + * Consumes the buffer chain, except when returning -ELINKCONG, + * since the caller then may want to make more send attempts. + * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS + * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted + */ +int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, + struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr = buf_msg(skb_peek(list)); + unsigned int maxwin = l->window; + unsigned int i, imp = msg_importance(hdr); + unsigned int mtu = l->mtu; + u16 ack = l->rcv_nxt - 1; + u16 seqno = l->snd_nxt; + u16 bc_last_in = l->owner->bclink.last_in; + struct sk_buff_head *transmq = &l->transmq; + struct sk_buff_head *backlogq = &l->backlogq; + struct sk_buff *skb, *_skb, *bskb; + + /* Match msg importance against this and all higher backlog limits: */ + for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) { + if (unlikely(l->backlog[i].len >= l->backlog[i].limit)) + return link_schedule_user(l, list); + } + if (unlikely(msg_size(hdr) > mtu)) + return -EMSGSIZE; + + /* Prepare each packet for sending, and add to relevant queue: */ + while (skb_queue_len(list)) { + skb = skb_peek(list); + hdr = buf_msg(skb); + msg_set_seqno(hdr, seqno); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_last_in); + + if (likely(skb_queue_len(transmq) < maxwin)) { + _skb = skb_clone(skb, GFP_ATOMIC); + if (!_skb) + return -ENOBUFS; + __skb_dequeue(list); + __skb_queue_tail(transmq, skb); + __skb_queue_tail(xmitq, _skb); + l->rcv_unacked = 0; + seqno++; + continue; + } + if (tipc_msg_bundle(skb_peek_tail(backlogq), hdr, mtu)) { + kfree_skb(__skb_dequeue(list)); + l->stats.sent_bundled++; + continue; + } + if (tipc_msg_make_bundle(&bskb, hdr, mtu, l->addr)) { + kfree_skb(__skb_dequeue(list)); + __skb_queue_tail(backlogq, bskb); + l->backlog[msg_importance(buf_msg(bskb))].len++; + l->stats.sent_bundled++; + l->stats.sent_bundles++; + continue; + } + l->backlog[imp].len += skb_queue_len(list); + skb_queue_splice_tail_init(list, backlogq); + } + l->snd_nxt = seqno; + return 0; +} + static void skb2list(struct sk_buff *skb, struct sk_buff_head *list) { skb_queue_head_init(list); @@ -715,65 +786,6 @@ static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb) return __tipc_link_xmit(link->owner->net, link, &head); } -/* tipc_link_xmit_skb(): send single buffer to destination - * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE - * messages, which will not cause link congestion - * The only exception is datagram messages rerouted after secondary - * lookup, which are rare and safe to dispose of anyway. - * TODO: Return real return value, and let callers use - * tipc_wait_for_sendpkt() where applicable - */ -int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, - u32 selector) -{ - struct sk_buff_head head; - int rc; - - skb2list(skb, &head); - rc = tipc_link_xmit(net, &head, dnode, selector); - if (rc) - kfree_skb(skb); - return 0; -} - -/** - * tipc_link_xmit() is the general link level function for message sending - * @net: the applicable net namespace - * @list: chain of buffers containing message - * @dsz: amount of user data to be sent - * @dnode: address of destination node - * @selector: a number used for deterministic link selection - * Consumes the buffer chain, except when returning error - * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE - */ -int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, - u32 selector) -{ - struct tipc_link *link = NULL; - struct tipc_node *node; - int rc = -EHOSTUNREACH; - - node = tipc_node_find(net, dnode); - if (node) { - tipc_node_lock(node); - link = node_active_link(node, selector & 1); - if (link) - rc = __tipc_link_xmit(net, link, list); - tipc_node_unlock(node); - tipc_node_put(node); - } - if (link) - return rc; - - if (likely(in_own_node(net, dnode))) { - tipc_sk_rcv(net, list); - return 0; - } - - __skb_queue_purge(list); - return rc; -} - /* * tipc_link_sync_xmit - synchronize broadcast link endpoints. * diff --git a/net/tipc/link.h b/net/tipc/link.h index 9c71d9e42e93..7add2b90361d 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -223,12 +223,10 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr); void tipc_link_purge_backlog(struct tipc_link *l); void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); -int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, - u32 selector); -int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest, - u32 selector); int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *list); +int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, + struct sk_buff_head *xmitq); void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, u32 gap, u32 tolerance, u32 priority); void tipc_link_push_packets(struct tipc_link *l_ptr); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 3a1539e96294..e6018b7eb197 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -102,7 +102,7 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb) if (!oskb) break; msg_set_destnode(buf_msg(oskb), dnode); - tipc_link_xmit_skb(net, oskb, dnode, dnode); + tipc_node_xmit_skb(net, oskb, dnode, dnode); } rcu_read_unlock(); @@ -223,7 +223,7 @@ void tipc_named_node_up(struct net *net, u32 dnode) &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]); rcu_read_unlock(); - tipc_link_xmit(net, &head, dnode, dnode); + tipc_node_xmit(net, &head, dnode, dnode); } static void tipc_publ_subscribe(struct net *net, struct publication *publ, diff --git a/net/tipc/node.c b/net/tipc/node.c index 19729645d494..ad759bb034e7 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -563,6 +563,84 @@ msg_full: return -EMSGSIZE; } +static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel, + int *bearer_id, + struct tipc_media_addr **maddr) +{ + int id = n->active_links[sel & 1]; + + if (unlikely(id < 0)) + return NULL; + + *bearer_id = id; + *maddr = &n->links[id].maddr; + return n->links[id].link; +} + +/** + * tipc_node_xmit() is the general link level function for message sending + * @net: the applicable net namespace + * @list: chain of buffers containing message + * @dnode: address of destination node + * @selector: a number used for deterministic link selection + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE + */ +int tipc_node_xmit(struct net *net, struct sk_buff_head *list, + u32 dnode, int selector) +{ + struct tipc_link *l = NULL; + struct tipc_node *n; + struct sk_buff_head xmitq; + struct tipc_media_addr *maddr; + int bearer_id; + int rc = -EHOSTUNREACH; + + __skb_queue_head_init(&xmitq); + n = tipc_node_find(net, dnode); + if (likely(n)) { + tipc_node_lock(n); + l = tipc_node_select_link(n, selector, &bearer_id, &maddr); + if (likely(l)) + rc = tipc_link_xmit(l, list, &xmitq); + if (unlikely(rc == -ENOBUFS)) + tipc_link_reset(l); + tipc_node_unlock(n); + tipc_node_put(n); + } + if (likely(!rc)) { + tipc_bearer_xmit(net, bearer_id, &xmitq, maddr); + return 0; + } + if (likely(in_own_node(net, dnode))) { + tipc_sk_rcv(net, list); + return 0; + } + return rc; +} + +/* tipc_node_xmit_skb(): send single buffer to destination + * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE + * messages, which will not be rejected + * The only exception is datagram messages rerouted after secondary + * lookup, which are rare and safe to dispose of anyway. + * TODO: Return real return value, and let callers use + * tipc_wait_for_sendpkt() where applicable + */ +int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, + u32 selector) +{ + struct sk_buff_head head; + int rc; + + skb_queue_head_init(&head); + __skb_queue_tail(&head, skb); + rc = tipc_node_xmit(net, &head, dnode, selector); + if (rc == -ELINKCONG) + kfree_skb(skb); + return 0; +} + int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; diff --git a/net/tipc/node.h b/net/tipc/node.h index 74f278adada3..86b7c740cf84 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -160,6 +160,10 @@ bool tipc_node_is_up(struct tipc_node *n); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); void tipc_node_unlock(struct tipc_node *node); +int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, + int selector); +int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, + u32 selector); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 87fef25f6519..5b0b08d58fcc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -261,7 +261,7 @@ static void tsk_rej_rx_queue(struct sock *sk) while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0); + tipc_node_xmit_skb(sock_net(sk), skb, dnode, 0); } } @@ -443,7 +443,7 @@ static int tipc_release(struct socket *sock) } if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(net, skb, dnode, 0); + tipc_node_xmit_skb(net, skb, dnode, 0); } } @@ -456,7 +456,7 @@ static int tipc_release(struct socket *sock) tsk_own_node(tsk), tsk_peer_port(tsk), tsk->portid, TIPC_ERR_NO_PORT); if (skb) - tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); tipc_node_remove_conn(net, dnode, tsk->portid); } @@ -925,7 +925,7 @@ new_mtu: do { skb = skb_peek(pktchain); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; - rc = tipc_link_xmit(net, pktchain, dnode, tsk->portid); + rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid); if (likely(!rc)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; @@ -1045,7 +1045,7 @@ next: return rc; do { if (likely(!tsk_conn_cong(tsk))) { - rc = tipc_link_xmit(net, pktchain, dnode, portid); + rc = tipc_node_xmit(net, pktchain, dnode, portid); if (likely(!rc)) { tsk->sent_unacked++; sent += send; @@ -1224,7 +1224,7 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) return; msg = buf_msg(skb); msg_set_msgcnt(msg, ack); - tipc_link_xmit_skb(net, skb, dnode, msg_link_selector(msg)); + tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg)); } static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) @@ -1703,7 +1703,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) return 0; } if (!err || tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, -err)) - tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); return 0; } @@ -1799,7 +1799,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err)) continue; xmit: - tipc_link_xmit_skb(net, skb, dnode, dport); + tipc_node_xmit_skb(net, skb, dnode, dport); } return err ? -EHOSTUNREACH : 0; } @@ -2092,7 +2092,7 @@ restart: } if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, TIPC_CONN_SHUTDOWN)) - tipc_link_xmit_skb(net, skb, dnode, + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); } else { dnode = tsk_peer_node(tsk); @@ -2102,7 +2102,7 @@ restart: 0, dnode, tsk_own_node(tsk), tsk_peer_port(tsk), tsk->portid, TIPC_CONN_SHUTDOWN); - tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); } tsk->connected = 0; sock->state = SS_DISCONNECTING; @@ -2164,7 +2164,7 @@ static void tipc_sk_timeout(unsigned long data) } bh_unlock_sock(sk); if (skb) - tipc_link_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); + tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); exit: sock_put(sk); } -- cgit From d3504c3449fead545e5254bfb11da916f72c4734 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:25 -0400 Subject: tipc: clean up definitions and usage of link flags The status flag LINK_STOPPED is not needed any more, since the mechanism for delayed deletion of links has been removed. Likewise, LINK_STARTED and LINK_START_EVT are unnecessary, because we can just as well start the link timer directly from inside tipc_link_create(). We eliminate these flags in this commit. Instead of the above flags, we now introduce three new link modes, TIPC_LINK_OPEN, TIPC_LINK_BLOCKED and TIPC_LINK_TUNNEL. The values indicate whether, and in the case of TIPC_LINK_TUNNEL, which, messages the link is allowed to receive in this state. TIPC_LINK_BLOCKED also blocks timer-driven protocol messages to be sent out, and any change to the link FSM. Since the modes are mutually exclusive, we convert them to state values, and rename the 'flags' field in struct tipc_link to 'exec_mode'. Finally, we move the #defines for link FSM states and events from link.h into enums inside the file link.c, which is the real usage scope of these definitions. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 1 - net/tipc/link.c | 98 +++++++++++++++++++++++++++++++++----------------------- net/tipc/link.h | 44 +++++-------------------- net/tipc/node.c | 2 +- 4 files changed, 67 insertions(+), 78 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 295bdc26f103..aab4e8dd7b32 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -924,7 +924,6 @@ int tipc_bclink_init(struct net *net) tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); bcl->bearer_id = MAX_BEARERS; rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); - bcl->state = WORKING_WORKING; bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg; msg_set_prevnode(bcl->pmsg, tn->own_addr); strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); diff --git a/net/tipc/link.c b/net/tipc/link.c index c052437a7cfa..35a2da688db1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -79,19 +79,49 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { /* * Out-of-range value for link session numbers */ -#define INVALID_SESSION 0x10000 +#define WILDCARD_SESSION 0x10000 -/* - * Link state events: +/* State value stored in 'failover_pkts' */ -#define STARTING_EVT 856384768 /* link processing trigger */ -#define TRAFFIC_MSG_EVT 560815u /* rx'd ??? */ -#define SILENCE_EVT 560817u /* timer dicovered silence from peer */ +#define FIRST_FAILOVER 0xffffu -/* - * State value stored in 'failover_pkts' +/* Link FSM states and events: */ -#define FIRST_FAILOVER 0xffffu +enum { + WORKING_WORKING, + WORKING_UNKNOWN, + RESET_RESET, + RESET_UNKNOWN +}; + +enum { + PEER_RESET_EVT = RESET_MSG, + ACTIVATE_EVT = ACTIVATE_MSG, + TRAFFIC_EVT, /* Any other valid msg from peer */ + SILENCE_EVT /* Peer was silent during last timer interval*/ +}; + +/* Link FSM state checking routines + */ +static int link_working_working(struct tipc_link *l) +{ + return l->state == WORKING_WORKING; +} + +static int link_working_unknown(struct tipc_link *l) +{ + return l->state == WORKING_UNKNOWN; +} + +static int link_reset_unknown(struct tipc_link *l) +{ + return l->state == RESET_UNKNOWN; +} + +static int link_reset_reset(struct tipc_link *l) +{ + return l->state == RESET_RESET; +} static void link_handle_out_of_seq_msg(struct tipc_link *link, struct sk_buff *skb); @@ -268,7 +298,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, /* note: peer i/f name is updated by reset/activate message */ memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); l_ptr->owner = n_ptr; - l_ptr->peer_session = INVALID_SESSION; + l_ptr->peer_session = WILDCARD_SESSION; l_ptr->bearer_id = b_ptr->identity; link_set_supervision_props(l_ptr, b_ptr->tolerance); l_ptr->state = RESET_UNKNOWN; @@ -297,8 +327,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, link_reset_statistics(l_ptr); tipc_node_attach_link(n_ptr, l_ptr); setup_timer(&l_ptr->timer, link_timeout, (unsigned long)l_ptr); - link_state_event(l_ptr, STARTING_EVT); - + link_set_timer(l_ptr, l_ptr->keepalive_intv); return l_ptr; } @@ -311,7 +340,6 @@ void tipc_link_delete(struct tipc_link *l) tipc_link_reset(l); if (del_timer(&l->timer)) tipc_link_put(l); - l->flags |= LINK_STOPPED; /* Delete link now, or when timer is finished: */ tipc_link_reset_fragments(l); tipc_node_detach_link(l->owner, l); @@ -438,7 +466,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff)); /* Link is down, accept any session */ - l_ptr->peer_session = INVALID_SESSION; + l_ptr->peer_session = WILDCARD_SESSION; /* Prepare for renewed mtu size negotiation */ l_ptr->mtu = l_ptr->advertised_mtu; @@ -452,7 +480,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr); if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) { - l_ptr->flags |= LINK_FAILINGOVER; + l_ptr->exec_mode = TIPC_LINK_BLOCKED; l_ptr->failover_checkpt = l_ptr->rcv_nxt; pl->failover_pkts = FIRST_FAILOVER; pl->failover_checkpt = l_ptr->rcv_nxt; @@ -496,21 +524,14 @@ static void link_activate(struct tipc_link *link) static void link_state_event(struct tipc_link *l_ptr, unsigned int event) { struct tipc_link *other; - unsigned long timer_intv = l_ptr->keepalive_intv; - - if (l_ptr->flags & LINK_STOPPED) - return; - - if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT)) - return; /* Not yet. */ - if (l_ptr->flags & LINK_FAILINGOVER) + if (l_ptr->exec_mode == TIPC_LINK_BLOCKED) return; switch (l_ptr->state) { case WORKING_WORKING: switch (event) { - case TRAFFIC_MSG_EVT: + case TRAFFIC_EVT: case ACTIVATE_MSG: l_ptr->silent_intv_cnt = 0; break; @@ -538,7 +559,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) break; case WORKING_UNKNOWN: switch (event) { - case TRAFFIC_MSG_EVT: + case TRAFFIC_EVT: case ACTIVATE_MSG: l_ptr->state = WORKING_WORKING; l_ptr->silent_intv_cnt = 0; @@ -576,7 +597,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) break; case RESET_UNKNOWN: switch (event) { - case TRAFFIC_MSG_EVT: + case TRAFFIC_EVT: break; case ACTIVATE_MSG: other = node_active_link(l_ptr->owner, 0); @@ -593,10 +614,6 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, 1, 0, 0, 0); break; - case STARTING_EVT: - l_ptr->flags |= LINK_STARTED; - link_set_timer(l_ptr, timer_intv); - break; case SILENCE_EVT: tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0); break; @@ -606,7 +623,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) break; case RESET_RESET: switch (event) { - case TRAFFIC_MSG_EVT: + case TRAFFIC_EVT: case ACTIVATE_MSG: other = node_active_link(l_ptr->owner, 0); if (other && link_working_unknown(other)) @@ -975,7 +992,7 @@ static bool link_synch(struct tipc_link *l) if (skb_queue_len(pl->inputq) > post_synch) return false; synched: - l->flags &= ~LINK_SYNCHING; + l->exec_mode = TIPC_LINK_OPEN; return true; } @@ -1091,7 +1108,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) } /* Traffic message. Conditionally activate link */ - link_state_event(l_ptr, TRAFFIC_MSG_EVT); + link_state_event(l_ptr, TRAFFIC_EVT); if (link_working_working(l_ptr)) { /* Re-insert buffer in front of queue */ @@ -1112,7 +1129,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) l_ptr->silent_intv_cnt = 0; /* Synchronize with parallel link if applicable */ - if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) { + if (unlikely((l_ptr->exec_mode == TIPC_LINK_TUNNEL) && + !msg_dup(msg))) { if (!link_synch(l_ptr)) goto unlock; } @@ -1193,7 +1211,7 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) switch (msg_user(msg)) { case TUNNEL_PROTOCOL: if (msg_dup(msg)) { - link->flags |= LINK_SYNCHING; + link->exec_mode = TIPC_LINK_TUNNEL; link->synch_point = msg_seqno(msg_get_wrapped(msg)); kfree_skb(skb); break; @@ -1315,7 +1333,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, u16 last_rcv; /* Don't send protocol message during link failover */ - if (l_ptr->flags & LINK_FAILINGOVER) + if (l_ptr->exec_mode == TIPC_LINK_BLOCKED) return; /* Abort non-RESET send if communication with node is prohibited */ @@ -1390,7 +1408,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, u32 msg_tol; struct tipc_msg *msg = buf_msg(buf); - if (l_ptr->flags & LINK_FAILINGOVER) + if (l_ptr->exec_mode == TIPC_LINK_BLOCKED) goto exit; if (l_ptr->net_plane != msg_net_plane(msg)) @@ -1401,7 +1419,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, case RESET_MSG: if (!link_working_unknown(l_ptr) && - (l_ptr->peer_session != INVALID_SESSION)) { + (l_ptr->peer_session != WILDCARD_SESSION)) { if (less_eq(msg_session(msg), l_ptr->peer_session)) break; /* duplicate or old reset: ignore */ } @@ -1465,7 +1483,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, /* Record reception; force mismatch at next timeout: */ l_ptr->silent_intv_cnt = 0; - link_state_event(l_ptr, TRAFFIC_MSG_EVT); + link_state_event(l_ptr, TRAFFIC_EVT); l_ptr->stats.recv_states++; if (link_reset_unknown(l_ptr)) break; @@ -1704,7 +1722,7 @@ static bool tipc_link_failover_rcv(struct tipc_link *link, } exit: if (!link->failover_pkts && pl) - pl->flags &= ~LINK_FAILINGOVER; + pl->exec_mode = TIPC_LINK_OPEN; kfree_skb(*skb); *skb = iskb; return *skb; diff --git a/net/tipc/link.h b/net/tipc/link.h index 7add2b90361d..0509c6de03cd 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -49,19 +49,14 @@ */ #define INVALID_LINK_SEQ 0x10000 -/* Link working states - */ -#define WORKING_WORKING 560810u -#define WORKING_UNKNOWN 560811u -#define RESET_UNKNOWN 560812u -#define RESET_RESET 560813u -/* Link endpoint execution states +/* Link endpoint receive states */ -#define LINK_STARTED 0x0001 -#define LINK_STOPPED 0x0002 -#define LINK_SYNCHING 0x0004 -#define LINK_FAILINGOVER 0x0008 +enum { + TIPC_LINK_OPEN, + TIPC_LINK_BLOCKED, + TIPC_LINK_TUNNEL +}; /* Starting value for maximum packet size negotiation on unicast links * (unless bearer MTU is less) @@ -106,7 +101,6 @@ struct tipc_stats { * @timer: link timer * @owner: pointer to peer node * @refcnt: reference counter for permanent references (owner node & timer) - * @flags: execution state flags for link endpoint instance * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint * @bearer_id: local bearer id used by link @@ -119,6 +113,7 @@ struct tipc_stats { * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') + * @exec_mode: transmit/receive mode for link endpoint instance * @backlog_limit: backlog queue congestion thresholds (indexed by importance) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset @@ -149,7 +144,6 @@ struct tipc_link { struct kref ref; /* Management and link supervision data */ - unsigned int flags; u32 peer_session; u32 peer_bearer_id; u32 bearer_id; @@ -165,6 +159,7 @@ struct tipc_link { struct tipc_msg *pmsg; u32 priority; char net_plane; + u8 exec_mode; u16 synch_point; /* Failover */ @@ -249,27 +244,4 @@ static inline u32 link_own_addr(struct tipc_link *l) return msg_prevnode(l->pmsg); } -/* - * Link status checking routines - */ -static inline int link_working_working(struct tipc_link *l_ptr) -{ - return l_ptr->state == WORKING_WORKING; -} - -static inline int link_working_unknown(struct tipc_link *l_ptr) -{ - return l_ptr->state == WORKING_UNKNOWN; -} - -static inline int link_reset_unknown(struct tipc_link *l_ptr) -{ - return l_ptr->state == RESET_UNKNOWN; -} - -static inline int link_reset_reset(struct tipc_link *l_ptr) -{ - return l_ptr->state == RESET_RESET; -} - #endif diff --git a/net/tipc/node.c b/net/tipc/node.c index ad759bb034e7..b7a4457f653c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -403,7 +403,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) struct tipc_link *l_ptr = n_ptr->links[i].link; if (!l_ptr) continue; - l_ptr->flags &= ~LINK_FAILINGOVER; + l_ptr->exec_mode = TIPC_LINK_OPEN; l_ptr->failover_checkpt = 0; l_ptr->failover_pkts = 0; kfree_skb(l_ptr->failover_skb); -- cgit From 426cc2b86d1813959497d608dcb52c32df2d448a Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:26 -0400 Subject: tipc: introduce new link protocol msg create function As a preparation for later changes, we introduce a new function tipc_link_build_proto_msg(). Instead of actually sending the created protocol message, it only creates it and adds it to the head of a skb queue provided by the caller. Since we still need the existing function tipc_link_protocol_xmit() for a while, we redesign it to make use of the new function. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 144 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 77 insertions(+), 67 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 35a2da688db1..657ba91fde41 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -129,6 +129,9 @@ static void tipc_link_proto_rcv(struct tipc_link *link, struct sk_buff *skb); static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol); static void link_state_event(struct tipc_link *l_ptr, u32 event); +static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, + u16 rcvgap, int tolerance, int priority, + struct sk_buff_head *xmitq); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); static void tipc_link_sync_xmit(struct tipc_link *l); @@ -1323,77 +1326,21 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, /* * Send protocol message to the other endpoint. */ -void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, +void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, u32 gap, u32 tolerance, u32 priority) { - struct sk_buff *buf = NULL; - struct tipc_msg *msg = l_ptr->pmsg; - u32 msg_size = sizeof(l_ptr->proto_msg); - int r_flag; - u16 last_rcv; + struct sk_buff *skb = NULL; + struct sk_buff_head xmitq; - /* Don't send protocol message during link failover */ - if (l_ptr->exec_mode == TIPC_LINK_BLOCKED) - return; - - /* Abort non-RESET send if communication with node is prohibited */ - if ((tipc_node_blocked(l_ptr->owner)) && (msg_typ != RESET_MSG)) - return; - - /* Create protocol message with "out-of-sequence" sequence number */ - msg_set_type(msg, msg_typ); - msg_set_net_plane(msg, l_ptr->net_plane); - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - msg_set_last_bcast(msg, tipc_bclink_get_last_sent(l_ptr->owner->net)); - - if (msg_typ == STATE_MSG) { - u16 next_sent = l_ptr->snd_nxt; - - if (!tipc_link_is_up(l_ptr)) - return; - msg_set_next_sent(msg, next_sent); - if (!skb_queue_empty(&l_ptr->deferdq)) { - last_rcv = buf_seqno(skb_peek(&l_ptr->deferdq)); - gap = mod(last_rcv - l_ptr->rcv_nxt); - } - msg_set_seq_gap(msg, gap); - if (gap) - l_ptr->stats.sent_nacks++; - msg_set_link_tolerance(msg, tolerance); - msg_set_linkprio(msg, priority); - msg_set_max_pkt(msg, l_ptr->mtu); - msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1)); - msg_set_probe(msg, probe_msg != 0); - if (probe_msg) - l_ptr->stats.sent_probes++; - l_ptr->stats.sent_states++; - } else { /* RESET_MSG or ACTIVATE_MSG */ - msg_set_ack(msg, mod(l_ptr->failover_checkpt - 1)); - msg_set_seq_gap(msg, 0); - msg_set_next_sent(msg, 1); - msg_set_probe(msg, 0); - msg_set_link_tolerance(msg, l_ptr->tolerance); - msg_set_linkprio(msg, l_ptr->priority); - msg_set_max_pkt(msg, l_ptr->advertised_mtu); - } - - r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr)); - msg_set_redundant_link(msg, r_flag); - msg_set_linkprio(msg, l_ptr->priority); - msg_set_size(msg, msg_size); - - msg_set_seqno(msg, mod(l_ptr->snd_nxt + (0xffff / 2))); - - buf = tipc_buf_acquire(msg_size); - if (!buf) + __skb_queue_head_init(&xmitq); + tipc_link_build_proto_msg(l, msg_typ, probe_msg, gap, + tolerance, priority, &xmitq); + skb = __skb_dequeue(&xmitq); + if (!skb) return; - - skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); - buf->priority = TC_PRIO_CONTROL; - tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, buf, - &l_ptr->media_addr); - l_ptr->rcv_unacked = 0; - kfree_skb(buf); + tipc_bearer_send(l->owner->net, l->bearer_id, skb, &l->media_addr); + l->rcv_unacked = 0; + kfree_skb(skb); } /* @@ -1514,6 +1461,69 @@ exit: kfree_skb(buf); } +/* tipc_link_build_proto_msg: prepare link protocol message for transmission + */ +static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, + u16 rcvgap, int tolerance, int priority, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb = NULL; + struct tipc_msg *hdr = l->pmsg; + u16 snd_nxt = l->snd_nxt; + u16 rcv_nxt = l->rcv_nxt; + u16 rcv_last = rcv_nxt - 1; + int node_up = l->owner->bclink.recv_permitted; + + /* Don't send protocol message during reset or link failover */ + if (l->exec_mode == TIPC_LINK_BLOCKED) + return; + + /* Abort non-RESET send if communication with node is prohibited */ + if ((tipc_node_blocked(l->owner)) && (mtyp != RESET_MSG)) + return; + + msg_set_type(hdr, mtyp); + msg_set_net_plane(hdr, l->net_plane); + msg_set_bcast_ack(hdr, l->owner->bclink.last_in); + msg_set_last_bcast(hdr, tipc_bclink_get_last_sent(l->owner->net)); + msg_set_link_tolerance(hdr, tolerance); + msg_set_linkprio(hdr, priority); + msg_set_redundant_link(hdr, node_up); + msg_set_seq_gap(hdr, 0); + + /* Compatibility: created msg must not be in sequence with pkt flow */ + msg_set_seqno(hdr, snd_nxt + U16_MAX / 2); + + if (mtyp == STATE_MSG) { + if (!tipc_link_is_up(l)) + return; + msg_set_next_sent(hdr, snd_nxt); + + /* Override rcvgap if there are packets in deferred queue */ + if (!skb_queue_empty(&l->deferdq)) + rcvgap = buf_seqno(skb_peek(&l->deferdq)) - rcv_nxt; + if (rcvgap) { + msg_set_seq_gap(hdr, rcvgap); + l->stats.sent_nacks++; + } + msg_set_ack(hdr, rcv_last); + msg_set_probe(hdr, probe); + if (probe) + l->stats.sent_probes++; + l->stats.sent_states++; + } else { + /* RESET_MSG or ACTIVATE_MSG */ + msg_set_max_pkt(hdr, l->advertised_mtu); + msg_set_ack(hdr, l->failover_checkpt - 1); + msg_set_next_sent(hdr, 1); + } + skb = tipc_buf_acquire(msg_size(hdr)); + if (!skb) + return; + skb_copy_to_linear_data(skb, hdr, msg_size(hdr)); + skb->priority = TC_PRIO_CONTROL; + __skb_queue_head(xmitq, skb); +} /* tipc_link_tunnel_xmit(): Tunnel one packet via a link belonging to * a different bearer. Owner node is locked. -- cgit From 6ab30f9cbe134d19559f48dc748587d036529aaf Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:27 -0400 Subject: tipc: improve link FSM implementation The link FSM implementation is currently unnecessarily complex. It sometimes checks for conditional state outside the FSM data before deciding next state, and often performs actions directly inside the FSM logics. In this commit, we create a second, simpler FSM implementation, that as far as possible acts only on states and events that it is strictly defined for, and postpone any actions until it is finished with its decisions. It also returns an event flag field and an a buffer queue which may potentially contain a protocol message to be sent by the caller. Unfortunately, we cannot yet make the FSM "clean", in the sense that its decisions are only based on FSM state and event, and that state changes happen only here. That will have to wait until the activate/reset logics has been cleaned up in a future commit. We also rename the link states as follows: WORKING_WORKING -> TIPC_LINK_WORKING WORKING_UNKNOWN -> TIPC_LINK_PROBING RESET_UNKNOWN -> TIPC_LINK_RESETTING RESET_RESET -> TIPC_LINK_ESTABLISHING The existing FSM function, link_state_event(), is still needed for a while, so we redesign it to make use of the new function. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 344 +++++++++++++++++++++++++++++++------------------------- net/tipc/link.h | 7 ++ 2 files changed, 195 insertions(+), 156 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 657ba91fde41..5d2f9198c6bc 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -88,10 +88,10 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { /* Link FSM states and events: */ enum { - WORKING_WORKING, - WORKING_UNKNOWN, - RESET_RESET, - RESET_UNKNOWN + TIPC_LINK_WORKING, + TIPC_LINK_PROBING, + TIPC_LINK_RESETTING, + TIPC_LINK_ESTABLISHING }; enum { @@ -103,24 +103,24 @@ enum { /* Link FSM state checking routines */ -static int link_working_working(struct tipc_link *l) +static int link_working(struct tipc_link *l) { - return l->state == WORKING_WORKING; + return l->state == TIPC_LINK_WORKING; } -static int link_working_unknown(struct tipc_link *l) +static int link_probing(struct tipc_link *l) { - return l->state == WORKING_UNKNOWN; + return l->state == TIPC_LINK_PROBING; } -static int link_reset_unknown(struct tipc_link *l) +static int link_resetting(struct tipc_link *l) { - return l->state == RESET_UNKNOWN; + return l->state == TIPC_LINK_RESETTING; } -static int link_reset_reset(struct tipc_link *l) +static int link_establishing(struct tipc_link *l) { - return l->state == RESET_RESET; + return l->state == TIPC_LINK_ESTABLISHING; } static void link_handle_out_of_seq_msg(struct tipc_link *link, @@ -140,6 +140,8 @@ static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb); static void link_set_timer(struct tipc_link *link, unsigned long time); +static void link_activate(struct tipc_link *link); + /* * Simple link routines */ @@ -179,7 +181,7 @@ int tipc_link_is_up(struct tipc_link *l_ptr) { if (!l_ptr) return 0; - return link_working_working(l_ptr) || link_working_unknown(l_ptr); + return link_working(l_ptr) || link_probing(l_ptr); } int tipc_link_is_active(struct tipc_link *l) @@ -234,8 +236,11 @@ static void link_timeout(unsigned long data) } /* do all other link processing performed on a periodic basis */ - if (l_ptr->silent_intv_cnt || tipc_bclink_acks_missing(l_ptr->owner)) + if (l_ptr->silent_intv_cnt) link_state_event(l_ptr, SILENCE_EVT); + else if (link_working(l_ptr) && tipc_bclink_acks_missing(l_ptr->owner)) + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); + l_ptr->silent_intv_cnt++; if (skb_queue_len(&l_ptr->backlogq)) tipc_link_push_packets(l_ptr); @@ -304,7 +309,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->peer_session = WILDCARD_SESSION; l_ptr->bearer_id = b_ptr->identity; link_set_supervision_props(l_ptr, b_ptr->tolerance); - l_ptr->state = RESET_UNKNOWN; + l_ptr->state = TIPC_LINK_RESETTING; l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; msg = l_ptr->pmsg; @@ -366,6 +371,134 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id) rcu_read_unlock(); } +/** + * tipc_link_fsm_evt - link finite state machine + * @l: pointer to link + * @evt: state machine event to be processed + * @xmitq: queue to prepend created protocol message, if any + */ +static int tipc_link_fsm_evt(struct tipc_link *l, int evt, + struct sk_buff_head *xmitq) +{ + int mtyp = 0, rc = 0; + struct tipc_link *pl; + enum { + LINK_RESET = 1, + LINK_ACTIVATE = (1 << 1), + SND_PROBE = (1 << 2), + SND_STATE = (1 << 3), + SND_RESET = (1 << 4), + SND_ACTIVATE = (1 << 5) + } actions = 0; + + if (l->exec_mode == TIPC_LINK_BLOCKED) + return rc; + + switch (l->state) { + case TIPC_LINK_WORKING: + switch (evt) { + case TRAFFIC_EVT: + case ACTIVATE_EVT: + break; + case SILENCE_EVT: + l->state = TIPC_LINK_PROBING; + actions |= SND_PROBE; + break; + case PEER_RESET_EVT: + actions |= LINK_RESET | SND_ACTIVATE; + break; + default: + pr_debug("%s%u WORKING\n", link_unk_evt, evt); + } + break; + case TIPC_LINK_PROBING: + switch (evt) { + case TRAFFIC_EVT: + case ACTIVATE_EVT: + l->state = TIPC_LINK_WORKING; + break; + case PEER_RESET_EVT: + actions |= LINK_RESET | SND_ACTIVATE; + break; + case SILENCE_EVT: + if (l->silent_intv_cnt <= l->abort_limit) { + actions |= SND_PROBE; + break; + } + actions |= LINK_RESET | SND_RESET; + break; + default: + pr_err("%s%u PROBING\n", link_unk_evt, evt); + } + break; + case TIPC_LINK_RESETTING: + switch (evt) { + case TRAFFIC_EVT: + break; + case ACTIVATE_EVT: + pl = node_active_link(l->owner, 0); + if (pl && link_probing(pl)) + break; + actions |= LINK_ACTIVATE; + if (l->owner->working_links == 1) + tipc_link_sync_xmit(l); + break; + case PEER_RESET_EVT: + l->state = TIPC_LINK_ESTABLISHING; + actions |= SND_ACTIVATE; + break; + case SILENCE_EVT: + actions |= SND_RESET; + break; + default: + pr_err("%s%u in RESETTING\n", link_unk_evt, evt); + } + break; + case TIPC_LINK_ESTABLISHING: + switch (evt) { + case TRAFFIC_EVT: + case ACTIVATE_EVT: + pl = node_active_link(l->owner, 0); + if (pl && link_probing(pl)) + break; + actions |= LINK_ACTIVATE; + if (l->owner->working_links == 1) + tipc_link_sync_xmit(l); + break; + case PEER_RESET_EVT: + break; + case SILENCE_EVT: + actions |= SND_ACTIVATE; + break; + default: + pr_err("%s%u ESTABLISHING\n", link_unk_evt, evt); + } + break; + default: + pr_err("Unknown link state %u/%u\n", l->state, evt); + } + + /* Perform actions as decided by FSM */ + if (actions & LINK_RESET) { + l->exec_mode = TIPC_LINK_BLOCKED; + rc |= TIPC_LINK_DOWN_EVT; + } + if (actions & LINK_ACTIVATE) { + l->exec_mode = TIPC_LINK_OPEN; + rc |= TIPC_LINK_UP_EVT; + } + if (actions & (SND_STATE | SND_PROBE)) + mtyp = STATE_MSG; + if (actions & SND_RESET) + mtyp = RESET_MSG; + if (actions & SND_ACTIVATE) + mtyp = ACTIVATE_MSG; + if (actions & (SND_PROBE | SND_STATE | SND_RESET | SND_ACTIVATE)) + tipc_link_build_proto_msg(l, mtyp, actions & SND_PROBE, + 0, 0, 0, xmitq); + return rc; +} + /** * link_schedule_user - schedule a message sender for wakeup after congestion * @link: congested link @@ -474,9 +607,10 @@ void tipc_link_reset(struct tipc_link *l_ptr) /* Prepare for renewed mtu size negotiation */ l_ptr->mtu = l_ptr->advertised_mtu; - l_ptr->state = RESET_UNKNOWN; + l_ptr->state = TIPC_LINK_RESETTING; - if ((prev_state == RESET_UNKNOWN) || (prev_state == RESET_RESET)) + if ((prev_state == TIPC_LINK_RESETTING) || + (prev_state == TIPC_LINK_ESTABLISHING)) return; tipc_node_link_down(l_ptr->owner, l_ptr->bearer_id); @@ -515,6 +649,8 @@ static void link_activate(struct tipc_link *link) link->rcv_nxt = 1; link->stats.recv_info = 1; link->silent_intv_cnt = 0; + link->state = TIPC_LINK_WORKING; + link->exec_mode = TIPC_LINK_OPEN; tipc_node_link_up(node, link->bearer_id); tipc_bearer_add_dest(node->net, link->bearer_id, link->addr); } @@ -524,132 +660,29 @@ static void link_activate(struct tipc_link *link) * @l_ptr: pointer to link * @event: state machine event to process */ -static void link_state_event(struct tipc_link *l_ptr, unsigned int event) +static void link_state_event(struct tipc_link *l, unsigned int evt) { - struct tipc_link *other; + int rc; + struct sk_buff_head xmitq; + struct sk_buff *skb; - if (l_ptr->exec_mode == TIPC_LINK_BLOCKED) + if (l->exec_mode == TIPC_LINK_BLOCKED) return; - switch (l_ptr->state) { - case WORKING_WORKING: - switch (event) { - case TRAFFIC_EVT: - case ACTIVATE_MSG: - l_ptr->silent_intv_cnt = 0; - break; - case SILENCE_EVT: - if (!l_ptr->silent_intv_cnt) { - if (tipc_bclink_acks_missing(l_ptr->owner)) - tipc_link_proto_xmit(l_ptr, STATE_MSG, - 0, 0, 0, 0); - break; - } - l_ptr->state = WORKING_UNKNOWN; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); - break; - case RESET_MSG: - pr_debug("%s<%s>, requested by peer\n", - link_rst_msg, l_ptr->name); - tipc_link_reset(l_ptr); - l_ptr->state = RESET_RESET; - tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 0, 0, 0, 0); - break; - default: - pr_debug("%s%u in WW state\n", link_unk_evt, event); - } - break; - case WORKING_UNKNOWN: - switch (event) { - case TRAFFIC_EVT: - case ACTIVATE_MSG: - l_ptr->state = WORKING_WORKING; - l_ptr->silent_intv_cnt = 0; - break; - case RESET_MSG: - pr_debug("%s<%s>, requested by peer while probing\n", - link_rst_msg, l_ptr->name); - tipc_link_reset(l_ptr); - l_ptr->state = RESET_RESET; - tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 0, 0, 0, 0); - break; - case SILENCE_EVT: - if (!l_ptr->silent_intv_cnt) { - l_ptr->state = WORKING_WORKING; - if (tipc_bclink_acks_missing(l_ptr->owner)) - tipc_link_proto_xmit(l_ptr, STATE_MSG, - 0, 0, 0, 0); - } else if (l_ptr->silent_intv_cnt < - l_ptr->abort_limit) { - tipc_link_proto_xmit(l_ptr, STATE_MSG, - 1, 0, 0, 0); - } else { /* Link has failed */ - pr_debug("%s<%s>, peer not responding\n", - link_rst_msg, l_ptr->name); - tipc_link_reset(l_ptr); - l_ptr->state = RESET_UNKNOWN; - tipc_link_proto_xmit(l_ptr, RESET_MSG, - 0, 0, 0, 0); - } - break; - default: - pr_err("%s%u in WU state\n", link_unk_evt, event); - } - break; - case RESET_UNKNOWN: - switch (event) { - case TRAFFIC_EVT: - break; - case ACTIVATE_MSG: - other = node_active_link(l_ptr->owner, 0); - if (other && link_working_unknown(other)) - break; - l_ptr->state = WORKING_WORKING; - link_activate(l_ptr); - tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); - if (l_ptr->owner->working_links == 1) - tipc_link_sync_xmit(l_ptr); - break; - case RESET_MSG: - l_ptr->state = RESET_RESET; - tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 1, 0, 0, 0); - break; - case SILENCE_EVT: - tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0); - break; - default: - pr_err("%s%u in RU state\n", link_unk_evt, event); - } - break; - case RESET_RESET: - switch (event) { - case TRAFFIC_EVT: - case ACTIVATE_MSG: - other = node_active_link(l_ptr->owner, 0); - if (other && link_working_unknown(other)) - break; - l_ptr->state = WORKING_WORKING; - link_activate(l_ptr); - tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); - if (l_ptr->owner->working_links == 1) - tipc_link_sync_xmit(l_ptr); - break; - case RESET_MSG: - break; - case SILENCE_EVT: - tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 0, 0, 0, 0); - break; - default: - pr_err("%s%u in RR state\n", link_unk_evt, event); - } - break; - default: - pr_err("Unknown link state %u/%u\n", l_ptr->state, event); - } + __skb_queue_head_init(&xmitq); + + rc = tipc_link_fsm_evt(l, evt, &xmitq); + + if (rc & TIPC_LINK_UP_EVT) + link_activate(l); + + if (rc & TIPC_LINK_DOWN_EVT) + tipc_link_reset(l); + + skb = __skb_dequeue(&xmitq); + if (!skb) + return; + tipc_bearer_send(l->owner->net, l->bearer_id, skb, &l->media_addr); } /** @@ -1102,7 +1135,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) link_prepare_wakeup(l_ptr); /* Process the incoming packet */ - if (unlikely(!link_working_working(l_ptr))) { + if (unlikely(!link_working(l_ptr))) { if (msg_user(msg) == LINK_PROTOCOL) { tipc_link_proto_rcv(l_ptr, skb); link_retrieve_defq(l_ptr, &head); @@ -1113,7 +1146,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) /* Traffic message. Conditionally activate link */ link_state_event(l_ptr, TRAFFIC_EVT); - if (link_working_working(l_ptr)) { + if (link_working(l_ptr)) { /* Re-insert buffer in front of queue */ __skb_queue_head(&head, skb); skb = NULL; @@ -1122,7 +1155,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) goto unlock; } - /* Link is now in state WORKING_WORKING */ + /* Link is now in state TIPC_LINK_WORKING */ if (unlikely(seq_no != l_ptr->rcv_nxt)) { link_handle_out_of_seq_msg(l_ptr, skb); link_retrieve_defq(l_ptr, &head); @@ -1365,16 +1398,15 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, switch (msg_type(msg)) { case RESET_MSG: - if (!link_working_unknown(l_ptr) && + if (!link_probing(l_ptr) && (l_ptr->peer_session != WILDCARD_SESSION)) { if (less_eq(msg_session(msg), l_ptr->peer_session)) break; /* duplicate or old reset: ignore */ } - if (!msg_redundant_link(msg) && (link_working_working(l_ptr) || - link_working_unknown(l_ptr))) { - /* - * peer has lost contact -- don't allow peer's links + if (!msg_redundant_link(msg) && (link_working(l_ptr) || + link_probing(l_ptr))) { + /* peer has lost contact -- don't allow peer's links * to reactivate before we recognize loss & clean up */ l_ptr->owner->action_flags |= TIPC_WAIT_OWN_LINKS_DOWN; @@ -1432,7 +1464,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, link_state_event(l_ptr, TRAFFIC_EVT); l_ptr->stats.recv_states++; - if (link_reset_unknown(l_ptr)) + if (link_resetting(l_ptr)) break; if (less_eq(l_ptr->rcv_nxt, msg_next_sent(msg))) @@ -1822,14 +1854,14 @@ static void link_print(struct tipc_link *l_ptr, const char *str) pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name); rcu_read_unlock(); - if (link_working_unknown(l_ptr)) - pr_cont(":WU\n"); - else if (link_reset_reset(l_ptr)) - pr_cont(":RR\n"); - else if (link_reset_unknown(l_ptr)) - pr_cont(":RU\n"); - else if (link_working_working(l_ptr)) - pr_cont(":WW\n"); + if (link_probing(l_ptr)) + pr_cont(":P\n"); + else if (link_establishing(l_ptr)) + pr_cont(":E\n"); + else if (link_resetting(l_ptr)) + pr_cont(":R\n"); + else if (link_working(l_ptr)) + pr_cont(":W\n"); else pr_cont("\n"); } diff --git a/net/tipc/link.h b/net/tipc/link.h index 0509c6de03cd..ef68424f492d 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -58,6 +58,13 @@ enum { TIPC_LINK_TUNNEL }; +/* Events occurring at packet reception or at timeout + */ +enum { + TIPC_LINK_UP_EVT = 1, + TIPC_LINK_DOWN_EVT = (1 << 1) +}; + /* Starting value for maximum packet size negotiation on unicast links * (unless bearer MTU is less) */ -- cgit From 333ef69ed2121f535e00ceb26e095d3745584c6e Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:28 -0400 Subject: tipc: simplify link timer implementation We create a second, simpler, link timer function, tipc_link_timeout(). The new function makes use of the new FSM function introduced in the previous commit, and just like it, takes a buffer queue as parameter. It returns an event bit field and potentially a link protocol packet to the caller. The existing timer function, link_timeout(), is still needed for a while, so we redesign it to become a wrapper around the new function. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 116 ++++++++++++++++++++++++++++++++++---------------------- net/tipc/link.h | 1 + 2 files changed, 72 insertions(+), 45 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 5d2f9198c6bc..f58bb434d1c8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -193,60 +193,30 @@ int tipc_link_is_active(struct tipc_link *l) /** * link_timeout - handle expiration of link timer - * @l_ptr: pointer to link */ static void link_timeout(unsigned long data) { - struct tipc_link *l_ptr = (struct tipc_link *)data; + struct tipc_link *l = (struct tipc_link *)data; + struct sk_buff_head xmitq; struct sk_buff *skb; + int rc; - tipc_node_lock(l_ptr->owner); + __skb_queue_head_init(&xmitq); - /* update counters used in statistical profiling of send traffic */ - l_ptr->stats.accu_queue_sz += skb_queue_len(&l_ptr->transmq); - l_ptr->stats.queue_sz_counts++; + tipc_node_lock(l->owner); - skb = skb_peek(&l_ptr->transmq); - if (skb) { - struct tipc_msg *msg = buf_msg(skb); - u32 length = msg_size(msg); - - if ((msg_user(msg) == MSG_FRAGMENTER) && - (msg_type(msg) == FIRST_FRAGMENT)) { - length = msg_size(msg_get_wrapped(msg)); - } - if (length) { - l_ptr->stats.msg_lengths_total += length; - l_ptr->stats.msg_length_counts++; - if (length <= 64) - l_ptr->stats.msg_length_profile[0]++; - else if (length <= 256) - l_ptr->stats.msg_length_profile[1]++; - else if (length <= 1024) - l_ptr->stats.msg_length_profile[2]++; - else if (length <= 4096) - l_ptr->stats.msg_length_profile[3]++; - else if (length <= 16384) - l_ptr->stats.msg_length_profile[4]++; - else if (length <= 32768) - l_ptr->stats.msg_length_profile[5]++; - else - l_ptr->stats.msg_length_profile[6]++; - } - } + rc = tipc_link_timeout(l, &xmitq); - /* do all other link processing performed on a periodic basis */ - if (l_ptr->silent_intv_cnt) - link_state_event(l_ptr, SILENCE_EVT); - else if (link_working(l_ptr) && tipc_bclink_acks_missing(l_ptr->owner)) - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); + if (rc & TIPC_LINK_DOWN_EVT) + tipc_link_reset(l); - l_ptr->silent_intv_cnt++; - if (skb_queue_len(&l_ptr->backlogq)) - tipc_link_push_packets(l_ptr); - link_set_timer(l_ptr, l_ptr->keepalive_intv); - tipc_node_unlock(l_ptr->owner); - tipc_link_put(l_ptr); + skb = __skb_dequeue(&xmitq); + if (skb) + tipc_bearer_send(l->owner->net, l->bearer_id, + skb, &l->media_addr); + link_set_timer(l, l->keepalive_intv); + tipc_node_unlock(l->owner); + tipc_link_put(l); } static void link_set_timer(struct tipc_link *link, unsigned long time) @@ -499,6 +469,62 @@ static int tipc_link_fsm_evt(struct tipc_link *l, int evt, return rc; } +/* link_profile_stats - update statistical profiling of traffic + */ +static void link_profile_stats(struct tipc_link *l) +{ + struct sk_buff *skb; + struct tipc_msg *msg; + int length; + + /* Update counters used in statistical profiling of send traffic */ + l->stats.accu_queue_sz += skb_queue_len(&l->transmq); + l->stats.queue_sz_counts++; + + skb = skb_peek(&l->transmq); + if (!skb) + return; + msg = buf_msg(skb); + length = msg_size(msg); + + if (msg_user(msg) == MSG_FRAGMENTER) { + if (msg_type(msg) != FIRST_FRAGMENT) + return; + length = msg_size(msg_get_wrapped(msg)); + } + l->stats.msg_lengths_total += length; + l->stats.msg_length_counts++; + if (length <= 64) + l->stats.msg_length_profile[0]++; + else if (length <= 256) + l->stats.msg_length_profile[1]++; + else if (length <= 1024) + l->stats.msg_length_profile[2]++; + else if (length <= 4096) + l->stats.msg_length_profile[3]++; + else if (length <= 16384) + l->stats.msg_length_profile[4]++; + else if (length <= 32768) + l->stats.msg_length_profile[5]++; + else + l->stats.msg_length_profile[6]++; +} + +/* tipc_link_timeout - perform periodic task as instructed from node timeout + */ +int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) +{ + int rc = 0; + + link_profile_stats(l); + if (l->silent_intv_cnt) + rc = tipc_link_fsm_evt(l, SILENCE_EVT, xmitq); + else if (link_working(l) && tipc_bclink_acks_missing(l->owner)) + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + l->silent_intv_cnt++; + return rc; +} + /** * link_schedule_user - schedule a message sender for wakeup after congestion * @link: congested link diff --git a/net/tipc/link.h b/net/tipc/link.h index ef68424f492d..98507b0f008d 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -245,6 +245,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info); int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); void link_prepare_wakeup(struct tipc_link *l); +int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); static inline u32 link_own_addr(struct tipc_link *l) { -- cgit From 8a1577c96f122308ac9b5f195f9f9a7dd74ac541 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:29 -0400 Subject: tipc: move link supervision timer to node level In our effort to move control of the links to the link aggregation layer, we move the perodic link supervision timer to struct tipc_node. The new timer is shared between all links belonging to the node, thus saving resources, while still kicking the FSM on both its pertaining links at each expiration. The current link timer and corresponding functions are removed. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 78 +++------------------------------------------------------ net/tipc/link.h | 2 -- net/tipc/node.c | 66 +++++++++++++++++++++++++++++++++++++++++++++--- net/tipc/node.h | 2 ++ 4 files changed, 68 insertions(+), 80 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index f58bb434d1c8..5b4609bd0ddc 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -127,7 +127,6 @@ static void link_handle_out_of_seq_msg(struct tipc_link *link, struct sk_buff *skb); static void tipc_link_proto_rcv(struct tipc_link *link, struct sk_buff *skb); -static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, @@ -139,7 +138,6 @@ static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb); -static void link_set_timer(struct tipc_link *link, unsigned long time); static void link_activate(struct tipc_link *link); /* @@ -150,21 +148,6 @@ static unsigned int align(unsigned int i) return (i + 3) & ~3u; } -static void tipc_link_release(struct kref *kref) -{ - kfree(container_of(kref, struct tipc_link, ref)); -} - -static void tipc_link_get(struct tipc_link *l_ptr) -{ - kref_get(&l_ptr->ref); -} - -static void tipc_link_put(struct tipc_link *l_ptr) -{ - kref_put(&l_ptr->ref, tipc_link_release); -} - static struct tipc_link *tipc_parallel_link(struct tipc_link *l) { struct tipc_node *n = l->owner; @@ -191,40 +174,6 @@ int tipc_link_is_active(struct tipc_link *l) return (node_active_link(n, 0) == l) || (node_active_link(n, 1) == l); } -/** - * link_timeout - handle expiration of link timer - */ -static void link_timeout(unsigned long data) -{ - struct tipc_link *l = (struct tipc_link *)data; - struct sk_buff_head xmitq; - struct sk_buff *skb; - int rc; - - __skb_queue_head_init(&xmitq); - - tipc_node_lock(l->owner); - - rc = tipc_link_timeout(l, &xmitq); - - if (rc & TIPC_LINK_DOWN_EVT) - tipc_link_reset(l); - - skb = __skb_dequeue(&xmitq); - if (skb) - tipc_bearer_send(l->owner->net, l->bearer_id, - skb, &l->media_addr); - link_set_timer(l, l->keepalive_intv); - tipc_node_unlock(l->owner); - tipc_link_put(l); -} - -static void link_set_timer(struct tipc_link *link, unsigned long time) -{ - if (!mod_timer(&link->timer, jiffies + time)) - tipc_link_get(link); -} - /** * tipc_link_create - create a new link * @n_ptr: pointer to associated node @@ -265,7 +214,6 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, pr_warn("Link creation failed, no memory\n"); return NULL; } - kref_init(&l_ptr->ref); l_ptr->addr = peer; if_name = strchr(b_ptr->name, ':') + 1; sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown", @@ -278,7 +226,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->owner = n_ptr; l_ptr->peer_session = WILDCARD_SESSION; l_ptr->bearer_id = b_ptr->identity; - link_set_supervision_props(l_ptr, b_ptr->tolerance); + l_ptr->tolerance = b_ptr->tolerance; l_ptr->state = TIPC_LINK_RESETTING; l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; @@ -304,8 +252,6 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, skb_queue_head_init(l_ptr->inputq); link_reset_statistics(l_ptr); tipc_node_attach_link(n_ptr, l_ptr); - setup_timer(&l_ptr->timer, link_timeout, (unsigned long)l_ptr); - link_set_timer(l_ptr, l_ptr->keepalive_intv); return l_ptr; } @@ -316,12 +262,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, void tipc_link_delete(struct tipc_link *l) { tipc_link_reset(l); - if (del_timer(&l->timer)) - tipc_link_put(l); - /* Delete link now, or when timer is finished: */ tipc_link_reset_fragments(l); tipc_node_detach_link(l->owner, l); - tipc_link_put(l); } void tipc_link_delete_list(struct net *net, unsigned int bearer_id) @@ -1447,7 +1389,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, msg_tol = msg_link_tolerance(msg); if (msg_tol > l_ptr->tolerance) - link_set_supervision_props(l_ptr, msg_tol); + l_ptr->tolerance = msg_tol; if (msg_linkprio(msg) > l_ptr->priority) l_ptr->priority = msg_linkprio(msg); @@ -1473,7 +1415,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, msg_tol = msg_link_tolerance(msg); if (msg_tol) - link_set_supervision_props(l_ptr, msg_tol); + l_ptr->tolerance = msg_tol; if (msg_linkprio(msg) && (msg_linkprio(msg) != l_ptr->priority)) { @@ -1796,18 +1738,6 @@ exit: return *skb; } -static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) -{ - unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; - - if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL)) - return; - - l_ptr->tolerance = tol; - l_ptr->keepalive_intv = msecs_to_jiffies(intv); - l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->keepalive_intv)); -} - void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) { int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE); @@ -1984,7 +1914,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) u32 tol; tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); - link_set_supervision_props(link, tol); + link->tolerance = tol; tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); } if (props[TIPC_NLA_PROP_PRIO]) { diff --git a/net/tipc/link.h b/net/tipc/link.h index 98507b0f008d..0cf7d2b11803 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -146,9 +146,7 @@ struct tipc_link { u32 addr; char name[TIPC_MAX_LINK_NAME]; struct tipc_media_addr media_addr; - struct timer_list timer; struct tipc_node *owner; - struct kref ref; /* Management and link supervision data */ u32 peer_session; diff --git a/net/tipc/node.c b/net/tipc/node.c index b7a4457f653c..77effb233725 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -44,6 +44,7 @@ static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); static void tipc_node_delete(struct tipc_node *node); +static void tipc_node_timeout(unsigned long data); struct tipc_sock_conn { u32 port; @@ -145,11 +146,27 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) n_ptr->active_links[0] = INVALID_BEARER_ID; n_ptr->active_links[1] = INVALID_BEARER_ID; tipc_node_get(n_ptr); + setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr); + n_ptr->keepalive_intv = U32_MAX; exit: spin_unlock_bh(&tn->node_list_lock); return n_ptr; } +static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) +{ + unsigned long tol = l->tolerance; + unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; + unsigned long keepalive_intv = msecs_to_jiffies(intv); + + /* Link with lowest tolerance determines timer interval */ + if (keepalive_intv < n->keepalive_intv) + n->keepalive_intv = keepalive_intv; + + /* Ensure link's abort limit corresponds to current interval */ + l->abort_limit = l->tolerance / jiffies_to_msecs(n->keepalive_intv); +} + static void tipc_node_delete(struct tipc_node *node) { list_del_rcu(&node->list); @@ -163,8 +180,11 @@ void tipc_node_stop(struct net *net) struct tipc_node *node, *t_node; spin_lock_bh(&tn->node_list_lock); - list_for_each_entry_safe(node, t_node, &tn->node_list, list) + list_for_each_entry_safe(node, t_node, &tn->node_list, list) { + if (del_timer(&node->timer)) + tipc_node_put(node); tipc_node_put(node); + } spin_unlock_bh(&tn->node_list_lock); } @@ -222,6 +242,38 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) tipc_node_put(node); } +/* tipc_node_timeout - handle expiration of node timer + */ +static void tipc_node_timeout(unsigned long data) +{ + struct tipc_node *n = (struct tipc_node *)data; + struct sk_buff_head xmitq; + struct tipc_link *l; + struct tipc_media_addr *maddr; + int bearer_id; + int rc = 0; + + __skb_queue_head_init(&xmitq); + + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + tipc_node_lock(n); + l = n->links[bearer_id].link; + if (l) { + /* Link tolerance may change asynchronously: */ + tipc_node_calculate_timer(n, l); + rc = tipc_link_timeout(l, &xmitq); + if (rc & TIPC_LINK_DOWN_EVT) + tipc_link_reset(l); + } + tipc_node_unlock(n); + maddr = &n->links[bearer_id].maddr; + tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); + } + if (!mod_timer(&n->timer, jiffies + n->keepalive_intv)) + tipc_node_get(n); + tipc_node_put(n); +} + /** * tipc_node_link_up - handle addition of link * @@ -335,10 +387,16 @@ bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *b, struct tipc_media_addr *curr = &n->links[b->identity].maddr; struct sk_buff_head *inputq = &n->links[b->identity].inputq; - if (!l) + if (!l) { l = tipc_link_create(n, b, maddr, inputq, &n->bclink.namedq); - if (!l) - return false; + if (!l) + return false; + tipc_node_calculate_timer(n, l); + if (n->link_cnt == 1) { + if (!mod_timer(&n->timer, jiffies + n->keepalive_intv)) + tipc_node_get(n); + } + } memcpy(&l->media_addr, maddr, sizeof(*maddr)); memcpy(curr, maddr, sizeof(*maddr)); tipc_link_reset(l); diff --git a/net/tipc/node.h b/net/tipc/node.h index 86b7c740cf84..2d56344962e7 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -140,6 +140,8 @@ struct tipc_node { u32 link_id; struct list_head publ_list; struct list_head conn_sks; + unsigned long keepalive_intv; + struct timer_list timer; struct rcu_head rcu; }; -- cgit From 1a20cc254e60e79929ef7edb5cf784df86b46e42 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:30 -0400 Subject: tipc: introduce node contact FSM The logics for determining when a node is permitted to establish and maintain contact with its peer node becomes non-trivial in the presence of multiple parallel links that may come and go independently. A known failure scenario is that one endpoint registers both its links to the peer lost, cleans up it binding table, and prepares for a table update once contact is re-establihed, while the other endpoint may see its links reset and re-established one by one, hence seeing no need to re-synchronize the binding table. To avoid this, a node must not allow re-establishing contact until it has confirmation that even the peer has lost both links. Currently, the mechanism for handling this consists of setting and resetting two state flags from different locations in the code. This solution is hard to understand and maintain. A closer analysis even reveals that it is not completely safe. In this commit we do instead introduce an FSM that keeps track of the conditions for when the node can establish and maintain links. It has six states and four events, and is strictly based on explicit knowledge about the own node's and the peer node's contact states. Only events leading to state change are shown as edges in the figure below. +--------------+ | SELF_UP/ | +---------------->| PEER_COMING |-----------------+ SELF_ | +--------------+ |PEER_ ESTBL_ | | |ESTBL_ CONTACT| SELF_LOST_CONTACT | |CONTACT | v | | +--------------+ | | PEER_ | SELF_DOWN/ | SELF_ | | LOST_ +--| PEER_LEAVING |<--+ LOST_ v +-------------+ CONTACT | +--------------+ | CONTACT +-----------+ | SELF_DOWN/ |<----------+ +----------| SELF_UP/ | | PEER_DOWN |<----------+ +----------| PEER_UP | +-------------+ SELF_ | +--------------+ | PEER_ +-----------+ | LOST_ +--| SELF_LEAVING/|<--+ LOST_ A | CONTACT | PEER_DOWN | CONTACT | | +--------------+ | | A | PEER_ | PEER_LOST_CONTACT | |SELF_ ESTBL_ | | |ESTBL_ CONTACT| +--------------+ |CONTACT +---------------->| PEER_UP/ |-----------------+ | SELF_COMING | +--------------+ Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 74 ++++++++++++++------------------ net/tipc/msg.h | 7 +++ net/tipc/node.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- net/tipc/node.h | 28 +++++++++--- 4 files changed, 185 insertions(+), 54 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 5b4609bd0ddc..eaccf4552d15 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -911,9 +911,13 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, if (l_ptr->addr) { /* Handle failure on standard link */ - link_print(l_ptr, "Resetting link\n"); + link_print(l_ptr, "Resetting link "); + pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", + msg_user(msg), msg_type(msg), msg_size(msg), + msg_errcode(msg)); + pr_info("sqno %u, prev: %x, src: %x\n", + msg_seqno(msg), msg_prevnode(msg), msg_orignode(msg)); tipc_link_reset(l_ptr); - } else { /* Handle failure on broadcast link */ struct tipc_node *n_ptr; @@ -1067,15 +1071,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) if (unlikely(!l_ptr)) goto unlock; - /* Verify that communication with node is currently allowed */ - if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) && - msg_user(msg) == LINK_PROTOCOL && - (msg_type(msg) == RESET_MSG || - msg_type(msg) == ACTIVATE_MSG) && - !msg_redundant_link(msg)) - n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN; - - if (tipc_node_blocked(n_ptr)) + /* Is reception of this pkt permitted at the moment ? */ + if (!tipc_node_filter_skb(n_ptr, msg)) goto unlock; /* Validate message sequence number info */ @@ -1371,15 +1368,6 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, if (less_eq(msg_session(msg), l_ptr->peer_session)) break; /* duplicate or old reset: ignore */ } - - if (!msg_redundant_link(msg) && (link_working(l_ptr) || - link_probing(l_ptr))) { - /* peer has lost contact -- don't allow peer's links - * to reactivate before we recognize loss & clean up - */ - l_ptr->owner->action_flags |= TIPC_WAIT_OWN_LINKS_DOWN; - } - link_state_event(l_ptr, RESET_MSG); /* fall thru' */ @@ -1408,6 +1396,8 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, l_ptr->peer_session = msg_session(msg); l_ptr->peer_bearer_id = msg_bearer_id(msg); + if (!msg_peer_is_up(msg)) + tipc_node_fsm_evt(l_ptr->owner, PEER_LOST_CONTACT_EVT); if (msg_type(msg) == ACTIVATE_MSG) link_state_event(l_ptr, ACTIVATE_MSG); break; @@ -1419,11 +1409,11 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, if (msg_linkprio(msg) && (msg_linkprio(msg) != l_ptr->priority)) { - pr_debug("%s<%s>, priority change %u->%u\n", - link_rst_msg, l_ptr->name, - l_ptr->priority, msg_linkprio(msg)); + pr_info("%s<%s>, priority change %u->%u\n", + link_rst_msg, l_ptr->name, + l_ptr->priority, msg_linkprio(msg)); l_ptr->priority = msg_linkprio(msg); - tipc_link_reset(l_ptr); /* Enforce change to take effect */ + tipc_link_reset(l_ptr); break; } @@ -1446,15 +1436,18 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, tipc_bclink_update_link_state(l_ptr->owner, msg_last_bcast(msg)); - if (rec_gap || (msg_probe(msg))) { + if (rec_gap || (msg_probe(msg))) tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, rec_gap, 0, 0); - } + if (msg_seq_gap(msg)) { l_ptr->stats.recv_nacks++; tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->transmq), msg_seq_gap(msg)); } + if (tipc_link_is_up(l_ptr)) + tipc_node_fsm_evt(l_ptr->owner, + PEER_ESTABL_CONTACT_EVT); break; } exit: @@ -1478,10 +1471,6 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, if (l->exec_mode == TIPC_LINK_BLOCKED) return; - /* Abort non-RESET send if communication with node is prohibited */ - if ((tipc_node_blocked(l->owner)) && (mtyp != RESET_MSG)) - return; - msg_set_type(hdr, mtyp); msg_set_net_plane(hdr, l->net_plane); msg_set_bcast_ack(hdr, l->owner->bclink.last_in); @@ -1799,27 +1788,28 @@ static void link_reset_statistics(struct tipc_link *l_ptr) l_ptr->stats.recv_info = l_ptr->rcv_nxt; } -static void link_print(struct tipc_link *l_ptr, const char *str) +static void link_print(struct tipc_link *l, const char *str) { - struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct sk_buff *hskb = skb_peek(&l->transmq); + u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt; + u16 tail = l->snd_nxt - 1; - rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]); - if (b_ptr) - pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name); - rcu_read_unlock(); + pr_info("%s Link <%s>:", str, l->name); - if (link_probing(l_ptr)) + if (link_probing(l)) pr_cont(":P\n"); - else if (link_establishing(l_ptr)) + else if (link_establishing(l)) pr_cont(":E\n"); - else if (link_resetting(l_ptr)) + else if (link_resetting(l)) pr_cont(":R\n"); - else if (link_working(l_ptr)) + else if (link_working(l)) pr_cont(":W\n"); else pr_cont("\n"); + + pr_info("XMTQ: %u [%u-%u], BKLGQ: %u, SNDNX: %u, RCVNX: %u\n", + skb_queue_len(&l->transmq), head, tail, + skb_queue_len(&l->backlogq), l->snd_nxt, l->rcv_nxt); } /* Parse and validate nested (link) properties valid for media, bearer and link diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 19c45fb66238..4dc66d9f69cc 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -766,6 +766,13 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } +static inline bool msg_peer_is_up(struct tipc_msg *m) +{ + if (likely(msg_user(m) != LINK_PROTOCOL) || (msg_type(m) == STATE_MSG)) + return true; + return msg_redundant_link(m); +} + struct sk_buff *tipc_buf_acquire(u32 size); bool tipc_msg_validate(struct sk_buff *skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, diff --git a/net/tipc/node.c b/net/tipc/node.c index 77effb233725..9dbbb5de287b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -141,7 +141,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) break; } list_add_tail_rcu(&n_ptr->list, &temp_node->list); - n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; + n_ptr->state = SELF_DOWN_PEER_DOWN; n_ptr->signature = INVALID_NODE_SIG; n_ptr->active_links[0] = INVALID_BEARER_ID; n_ptr->active_links[1] = INVALID_BEARER_ID; @@ -421,8 +421,131 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) } } +/* tipc_node_fsm_evt - node finite state machine + * Determines when contact is allowed with peer node + */ +void tipc_node_fsm_evt(struct tipc_node *n, int evt) +{ + int state = n->state; + + switch (state) { + case SELF_DOWN_PEER_DOWN: + switch (evt) { + case SELF_ESTABL_CONTACT_EVT: + state = SELF_UP_PEER_COMING; + break; + case PEER_ESTABL_CONTACT_EVT: + state = SELF_COMING_PEER_UP; + break; + case SELF_LOST_CONTACT_EVT: + case PEER_LOST_CONTACT_EVT: + break; + default: + pr_err("Unknown node fsm evt %x/%x\n", state, evt); + } + break; + case SELF_UP_PEER_UP: + switch (evt) { + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_LEAVING; + break; + case PEER_LOST_CONTACT_EVT: + state = SELF_LEAVING_PEER_DOWN; + break; + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + break; + default: + pr_err("Unknown node fsm evt %x/%x\n", state, evt); + } + break; + case SELF_DOWN_PEER_LEAVING: + switch (evt) { + case PEER_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_DOWN; + break; + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + case SELF_LOST_CONTACT_EVT: + break; + default: + pr_err("Unknown node fsm evt %x/%x\n", state, evt); + } + break; + case SELF_UP_PEER_COMING: + switch (evt) { + case PEER_ESTABL_CONTACT_EVT: + state = SELF_UP_PEER_UP; + break; + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_LEAVING; + break; + case SELF_ESTABL_CONTACT_EVT: + case PEER_LOST_CONTACT_EVT: + break; + default: + pr_err("Unknown node fsm evt %x/%x\n", state, evt); + } + break; + case SELF_COMING_PEER_UP: + switch (evt) { + case SELF_ESTABL_CONTACT_EVT: + state = SELF_UP_PEER_UP; + break; + case PEER_LOST_CONTACT_EVT: + state = SELF_LEAVING_PEER_DOWN; + break; + case SELF_LOST_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + break; + default: + pr_err("Unknown node fsm evt %x/%x\n", state, evt); + } + break; + case SELF_LEAVING_PEER_DOWN: + switch (evt) { + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_DOWN; + break; + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + case PEER_LOST_CONTACT_EVT: + break; + default: + pr_err("Unknown node fsm evt %x/%x\n", state, evt); + } + break; + default: + pr_err("Unknown node fsm state %x\n", state); + break; + } + + n->state = state; +} + +bool tipc_node_filter_skb(struct tipc_node *n, struct tipc_msg *hdr) +{ + int state = n->state; + + if (likely(state == SELF_UP_PEER_UP)) + return true; + if (state == SELF_DOWN_PEER_DOWN) + return true; + if (state == SELF_UP_PEER_COMING) + return true; + if (state == SELF_COMING_PEER_UP) + return true; + if (state == SELF_LEAVING_PEER_DOWN) + return false; + if (state == SELF_DOWN_PEER_LEAVING) + if (!msg_peer_is_up(hdr)) + return true; + return false; +} + static void node_established_contact(struct tipc_node *n_ptr) { + tipc_node_fsm_evt(n_ptr, SELF_ESTABL_CONTACT_EVT); n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP; n_ptr->bclink.oos_state = 0; n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net); @@ -468,11 +591,8 @@ static void node_lost_contact(struct tipc_node *n_ptr) l_ptr->failover_skb = NULL; tipc_link_reset_fragments(l_ptr); } - - n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN; - /* Prevent re-contact with node until cleanup is done */ - n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN; + tipc_node_fsm_evt(n_ptr, SELF_LOST_CONTACT_EVT); /* Notify publications from this node */ n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN; diff --git a/net/tipc/node.h b/net/tipc/node.h index 2d56344962e7..270256e09ee5 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -47,6 +47,24 @@ #define INVALID_BEARER_ID -1 +/* Node FSM states and events: + */ +enum { + SELF_DOWN_PEER_DOWN = 0xdd, + SELF_UP_PEER_UP = 0xaa, + SELF_DOWN_PEER_LEAVING = 0xd1, + SELF_UP_PEER_COMING = 0xac, + SELF_COMING_PEER_UP = 0xca, + SELF_LEAVING_PEER_DOWN = 0x1d, +}; + +enum { + SELF_ESTABL_CONTACT_EVT = 0xec, + SELF_LOST_CONTACT_EVT = 0x1c, + PEER_ESTABL_CONTACT_EVT = 0xfec, + PEER_LOST_CONTACT_EVT = 0xf1c +}; + /* Flags used to take different actions according to flag type * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down @@ -56,8 +74,6 @@ */ enum { TIPC_MSG_EVT = 1, - TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1), - TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2), TIPC_NOTIFY_NODE_DOWN = (1 << 3), TIPC_NOTIFY_NODE_UP = (1 << 4), TIPC_WAKEUP_BCAST_USERS = (1 << 5), @@ -133,6 +149,7 @@ struct tipc_node { int action_flags; struct tipc_node_bclink bclink; struct list_head list; + int state; int link_cnt; u16 working_links; u16 capabilities; @@ -176,11 +193,8 @@ static inline void tipc_node_lock(struct tipc_node *node) spin_lock_bh(&node->lock); } -static inline bool tipc_node_blocked(struct tipc_node *node) -{ - return (node->action_flags & (TIPC_WAIT_PEER_LINKS_DOWN | - TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN)); -} +void tipc_node_fsm_evt(struct tipc_node *n, int evt); +bool tipc_node_filter_skb(struct tipc_node *n, struct tipc_msg *hdr); static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel) { -- cgit From d999297c3dbbe7fdd832f7fa4ec84301e170b3e6 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 16 Jul 2015 16:54:31 -0400 Subject: tipc: reduce locking scope during packet reception We convert packet/message reception according to the same principle we have been using for message sending and timeout handling: We move the function tipc_rcv() to node.c, hence handling the initial packet reception at the link aggregation level. The function grabs the node lock, selects the receiving link, and accesses it via a new call tipc_link_rcv(). This function appends buffers to the input queue for delivery upwards, but it may also append outgoing packets to the xmit queue, just as we do during regular message sending. The latter will happen when buffers are forwarded from the link backlog, or when retransmission is requested. Upon return of this function, and after having released the node lock, tipc_rcv() delivers/tranmsits the contents of those queues, but it may also perform actions such as link activation or reset, as indicated by the return flags from the link. This reduces the number of cpu cycles spent inside the node spinlock, and reduces contention on that lock. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 23 ++ net/tipc/bcast.h | 1 + net/tipc/core.h | 5 + net/tipc/link.c | 673 +++++++++++++++++++++++++------------------------------ net/tipc/link.h | 6 +- net/tipc/msg.h | 50 ++++- net/tipc/node.c | 105 ++++++++- net/tipc/node.h | 4 - 8 files changed, 478 insertions(+), 389 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index aab4e8dd7b32..8b010c976b2f 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -316,6 +316,29 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, } } +void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *hdr) +{ + u16 last = msg_last_bcast(hdr); + int mtyp = msg_type(hdr); + + if (unlikely(msg_user(hdr) != LINK_PROTOCOL)) + return; + if (mtyp == STATE_MSG) { + tipc_bclink_update_link_state(n, last); + return; + } + /* Compatibility: older nodes don't know BCAST_PROTOCOL synchronization, + * and transfer synch info in LINK_PROTOCOL messages. + */ + if (tipc_node_is_up(n)) + return; + if ((mtyp != RESET_MSG) && (mtyp != ACTIVATE_MSG)) + return; + n->bclink.last_sent = last; + n->bclink.last_in = last; + n->bclink.oos_state = 0; +} + /** * bclink_peek_nack - monitor retransmission requests sent by other nodes * diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 3c290a48f720..d74c69bcf60b 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -133,5 +133,6 @@ void tipc_bclink_wakeup_users(struct net *net); int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); void tipc_bclink_input(struct net *net); +void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *msg); #endif diff --git a/net/tipc/core.h b/net/tipc/core.h index 0fcf133d5cb7..f4ed67778c54 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -129,6 +129,11 @@ static inline int less(u16 left, u16 right) return less_eq(left, right) && (mod(right) != mod(left)); } +static inline int in_range(u16 val, u16 min, u16 max) +{ + return !less(val, min) && !more(val, max); +} + #ifdef CONFIG_SYSCTL int tipc_register_sysctl(void); void tipc_unregister_sysctl(void); diff --git a/net/tipc/link.c b/net/tipc/link.c index eaccf4552d15..55b675d20de8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -76,6 +76,10 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 } }; +/* + * Interval between NACKs when packets arrive out of order + */ +#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2) /* * Out-of-range value for link session numbers */ @@ -123,22 +127,19 @@ static int link_establishing(struct tipc_link *l) return l->state == TIPC_LINK_ESTABLISHING; } -static void link_handle_out_of_seq_msg(struct tipc_link *link, - struct sk_buff *skb); -static void tipc_link_proto_rcv(struct tipc_link *link, - struct sk_buff *skb); -static void link_state_event(struct tipc_link *l_ptr, u32 event); +static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq); static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); -static void tipc_link_sync_xmit(struct tipc_link *l); +static void tipc_link_build_bcast_sync_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb); -static void link_activate(struct tipc_link *link); /* * Simple link routines @@ -283,6 +284,26 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id) rcu_read_unlock(); } +/* tipc_link_build_bcast_sync_msg() - synchronize broadcast link endpoints. + * + * Give a newly added peer node the sequence number where it should + * start receiving and acking broadcast packets. + */ +static void tipc_link_build_bcast_sync_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb; + struct sk_buff_head list; + + skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, + 0, l->addr, link_own_addr(l), 0, 0, 0); + if (!skb) + return; + __skb_queue_head_init(&list); + __skb_queue_tail(&list, skb); + tipc_link_xmit(l, &list, xmitq); +} + /** * tipc_link_fsm_evt - link finite state machine * @l: pointer to link @@ -295,12 +316,13 @@ static int tipc_link_fsm_evt(struct tipc_link *l, int evt, int mtyp = 0, rc = 0; struct tipc_link *pl; enum { - LINK_RESET = 1, - LINK_ACTIVATE = (1 << 1), - SND_PROBE = (1 << 2), - SND_STATE = (1 << 3), - SND_RESET = (1 << 4), - SND_ACTIVATE = (1 << 5) + LINK_RESET = 1, + LINK_ACTIVATE = (1 << 1), + SND_PROBE = (1 << 2), + SND_STATE = (1 << 3), + SND_RESET = (1 << 4), + SND_ACTIVATE = (1 << 5), + SND_BCAST_SYNC = (1 << 6) } actions = 0; if (l->exec_mode == TIPC_LINK_BLOCKED) @@ -352,8 +374,8 @@ static int tipc_link_fsm_evt(struct tipc_link *l, int evt, if (pl && link_probing(pl)) break; actions |= LINK_ACTIVATE; - if (l->owner->working_links == 1) - tipc_link_sync_xmit(l); + if (!l->owner->working_links) + actions |= SND_BCAST_SYNC; break; case PEER_RESET_EVT: l->state = TIPC_LINK_ESTABLISHING; @@ -374,8 +396,8 @@ static int tipc_link_fsm_evt(struct tipc_link *l, int evt, if (pl && link_probing(pl)) break; actions |= LINK_ACTIVATE; - if (l->owner->working_links == 1) - tipc_link_sync_xmit(l); + if (!l->owner->working_links) + actions |= SND_BCAST_SYNC; break; case PEER_RESET_EVT: break; @@ -408,6 +430,8 @@ static int tipc_link_fsm_evt(struct tipc_link *l, int evt, if (actions & (SND_PROBE | SND_STATE | SND_RESET | SND_ACTIVATE)) tipc_link_build_proto_msg(l, mtyp, actions & SND_PROBE, 0, 0, 0, xmitq); + if (actions & SND_BCAST_SYNC) + tipc_link_build_bcast_sync_msg(l, xmitq); return rc; } @@ -605,12 +629,14 @@ void tipc_link_reset(struct tipc_link *l_ptr) l_ptr->reasm_buf = NULL; l_ptr->rcv_unacked = 0; l_ptr->snd_nxt = 1; + l_ptr->rcv_nxt = 1; l_ptr->silent_intv_cnt = 0; + l_ptr->stats.recv_info = 0; l_ptr->stale_count = 0; link_reset_statistics(l_ptr); } -static void link_activate(struct tipc_link *link) +void tipc_link_activate(struct tipc_link *link) { struct tipc_node *node = link->owner; @@ -623,36 +649,6 @@ static void link_activate(struct tipc_link *link) tipc_bearer_add_dest(node->net, link->bearer_id, link->addr); } -/** - * link_state_event - link finite state machine - * @l_ptr: pointer to link - * @event: state machine event to process - */ -static void link_state_event(struct tipc_link *l, unsigned int evt) -{ - int rc; - struct sk_buff_head xmitq; - struct sk_buff *skb; - - if (l->exec_mode == TIPC_LINK_BLOCKED) - return; - - __skb_queue_head_init(&xmitq); - - rc = tipc_link_fsm_evt(l, evt, &xmitq); - - if (rc & TIPC_LINK_UP_EVT) - link_activate(l); - - if (rc & TIPC_LINK_DOWN_EVT) - tipc_link_reset(l); - - skb = __skb_dequeue(&xmitq); - if (!skb) - return; - tipc_bearer_send(l->owner->net, l->bearer_id, skb, &l->media_addr); -} - /** * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked * @link: link to use @@ -807,30 +803,6 @@ static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb) return __tipc_link_xmit(link->owner->net, link, &head); } -/* - * tipc_link_sync_xmit - synchronize broadcast link endpoints. - * - * Give a newly added peer node the sequence number where it should - * start receiving and acking broadcast packets. - * - * Called with node locked - */ -static void tipc_link_sync_xmit(struct tipc_link *link) -{ - struct sk_buff *skb; - struct tipc_msg *msg; - - skb = tipc_buf_acquire(INT_H_SIZE); - if (!skb) - return; - - msg = buf_msg(skb); - tipc_msg_init(link_own_addr(link), msg, BCAST_PROTOCOL, STATE_MSG, - INT_H_SIZE, link->addr); - msg_set_last_bcast(msg, link->owner->bclink.acked); - __tipc_link_xmit_skb(link, skb); -} - /* * tipc_link_sync_rcv - synchronize broadcast link endpoints. * Receive the sequence number where we should start receiving and @@ -881,6 +853,34 @@ void tipc_link_push_packets(struct tipc_link *link) link->snd_nxt = seqno; } +void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) +{ + struct sk_buff *skb, *_skb; + struct tipc_msg *hdr; + u16 seqno = l->snd_nxt; + u16 ack = l->rcv_nxt - 1; + + while (skb_queue_len(&l->transmq) < l->window) { + skb = skb_peek(&l->backlogq); + if (!skb) + break; + _skb = skb_clone(skb, GFP_ATOMIC); + if (!_skb) + break; + __skb_dequeue(&l->backlogq); + hdr = buf_msg(skb); + l->backlog[msg_importance(hdr)].len--; + __skb_queue_tail(&l->transmq, skb); + __skb_queue_tail(xmitq, _skb); + msg_set_ack(hdr, ack); + msg_set_seqno(hdr, seqno); + msg_set_bcast_ack(hdr, l->owner->bclink.last_in); + l->rcv_unacked = 0; + seqno++; + } + l->snd_nxt = seqno; +} + void tipc_link_reset_all(struct tipc_node *node) { char addr_string[16]; @@ -978,6 +978,41 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, } } +static int tipc_link_retransm(struct tipc_link *l, int retransm, + struct sk_buff_head *xmitq) +{ + struct sk_buff *_skb, *skb = skb_peek(&l->transmq); + struct tipc_msg *hdr; + + if (!skb) + return 0; + + /* Detect repeated retransmit failures on same packet */ + if (likely(l->last_retransm != buf_seqno(skb))) { + l->last_retransm = buf_seqno(skb); + l->stale_count = 1; + } else if (++l->stale_count > 100) { + link_retransmit_failure(l, skb); + return TIPC_LINK_DOWN_EVT; + } + skb_queue_walk(&l->transmq, skb) { + if (!retransm) + return 0; + hdr = buf_msg(skb); + _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); + if (!_skb) + return 0; + hdr = buf_msg(_skb); + msg_set_ack(hdr, l->rcv_nxt - 1); + msg_set_bcast_ack(hdr, l->owner->bclink.last_in); + _skb->priority = TC_PRIO_CONTROL; + __skb_queue_tail(xmitq, _skb); + retransm--; + l->stats.retransmitted++; + } + return 0; +} + /* link_synch(): check if all packets arrived before the synch * point have been consumed * Returns true if the parallel links are synched, otherwise false @@ -1004,155 +1039,6 @@ synched: return true; } -static void link_retrieve_defq(struct tipc_link *link, - struct sk_buff_head *list) -{ - u16 seq_no; - - if (skb_queue_empty(&link->deferdq)) - return; - - seq_no = buf_seqno(skb_peek(&link->deferdq)); - if (seq_no == link->rcv_nxt) - skb_queue_splice_tail_init(&link->deferdq, list); -} - -/** - * tipc_rcv - process TIPC packets/messages arriving from off-node - * @net: the applicable net namespace - * @skb: TIPC packet - * @b_ptr: pointer to bearer message arrived on - * - * Invoked with no locks held. Bearer pointer must point to a valid bearer - * structure (i.e. cannot be NULL), but bearer can be inactive. - */ -void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct sk_buff_head head; - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - struct sk_buff *skb1, *tmp; - struct tipc_msg *msg; - u16 seq_no; - u16 ackd; - u32 released; - - skb2list(skb, &head); - - while ((skb = __skb_dequeue(&head))) { - /* Ensure message is well-formed */ - if (unlikely(!tipc_msg_validate(skb))) - goto discard; - - /* Handle arrival of a non-unicast link message */ - msg = buf_msg(skb); - if (unlikely(msg_non_seq(msg))) { - if (msg_user(msg) == LINK_CONFIG) - tipc_disc_rcv(net, skb, b_ptr); - else - tipc_bclink_rcv(net, skb); - continue; - } - - /* Discard unicast link messages destined for another node */ - if (unlikely(!msg_short(msg) && - (msg_destnode(msg) != tn->own_addr))) - goto discard; - - /* Locate neighboring node that sent message */ - n_ptr = tipc_node_find(net, msg_prevnode(msg)); - if (unlikely(!n_ptr)) - goto discard; - - tipc_node_lock(n_ptr); - /* Locate unicast link endpoint that should handle message */ - l_ptr = n_ptr->links[b_ptr->identity].link; - if (unlikely(!l_ptr)) - goto unlock; - - /* Is reception of this pkt permitted at the moment ? */ - if (!tipc_node_filter_skb(n_ptr, msg)) - goto unlock; - - /* Validate message sequence number info */ - seq_no = msg_seqno(msg); - ackd = msg_ack(msg); - - /* Release acked messages */ - if (unlikely(n_ptr->bclink.acked != msg_bcast_ack(msg))) - tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg)); - - released = 0; - skb_queue_walk_safe(&l_ptr->transmq, skb1, tmp) { - if (more(buf_seqno(skb1), ackd)) - break; - __skb_unlink(skb1, &l_ptr->transmq); - kfree_skb(skb1); - released = 1; - } - - /* Try sending any messages link endpoint has pending */ - if (unlikely(skb_queue_len(&l_ptr->backlogq))) - tipc_link_push_packets(l_ptr); - - if (released && !skb_queue_empty(&l_ptr->wakeupq)) - link_prepare_wakeup(l_ptr); - - /* Process the incoming packet */ - if (unlikely(!link_working(l_ptr))) { - if (msg_user(msg) == LINK_PROTOCOL) { - tipc_link_proto_rcv(l_ptr, skb); - link_retrieve_defq(l_ptr, &head); - skb = NULL; - goto unlock; - } - - /* Traffic message. Conditionally activate link */ - link_state_event(l_ptr, TRAFFIC_EVT); - - if (link_working(l_ptr)) { - /* Re-insert buffer in front of queue */ - __skb_queue_head(&head, skb); - skb = NULL; - goto unlock; - } - goto unlock; - } - - /* Link is now in state TIPC_LINK_WORKING */ - if (unlikely(seq_no != l_ptr->rcv_nxt)) { - link_handle_out_of_seq_msg(l_ptr, skb); - link_retrieve_defq(l_ptr, &head); - skb = NULL; - goto unlock; - } - l_ptr->silent_intv_cnt = 0; - - /* Synchronize with parallel link if applicable */ - if (unlikely((l_ptr->exec_mode == TIPC_LINK_TUNNEL) && - !msg_dup(msg))) { - if (!link_synch(l_ptr)) - goto unlock; - } - l_ptr->rcv_nxt++; - if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) - link_retrieve_defq(l_ptr, &head); - if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) { - l_ptr->stats.sent_acks++; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); - } - tipc_link_input(l_ptr, skb); - skb = NULL; -unlock: - tipc_node_unlock(n_ptr); - tipc_node_put(n_ptr); -discard: - if (unlikely(skb)) - kfree_skb(skb); - } -} - /* tipc_data_input - deliver data and name distr msgs to upper layer * * Consumes buffer if message is of right type @@ -1206,9 +1092,6 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) struct sk_buff *iskb; int pos = 0; - if (likely(tipc_data_input(link, skb))) - return; - switch (msg_user(msg)) { case TUNNEL_PROTOCOL: if (msg_dup(msg)) { @@ -1247,6 +1130,110 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) }; } +static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) +{ + bool released = false; + struct sk_buff *skb, *tmp; + + skb_queue_walk_safe(&l->transmq, skb, tmp) { + if (more(buf_seqno(skb), acked)) + break; + __skb_unlink(skb, &l->transmq); + kfree_skb(skb); + released = true; + } + return released; +} + +/* tipc_link_rcv - process TIPC packets/messages arriving from off-node + * @link: the link that should handle the message + * @skb: TIPC packet + * @xmitq: queue to place packets to be sent after this call + */ +int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + struct sk_buff_head *arrvq = &l->deferdq; + struct sk_buff *tmp; + struct tipc_msg *hdr; + u16 seqno, rcv_nxt; + int rc = 0; + + if (unlikely(!__tipc_skb_queue_sorted(arrvq, skb))) { + if (!(skb_queue_len(arrvq) % TIPC_NACK_INTV)) + tipc_link_build_proto_msg(l, STATE_MSG, 0, + 0, 0, 0, xmitq); + return rc; + } + + skb_queue_walk_safe(arrvq, skb, tmp) { + hdr = buf_msg(skb); + + /* Verify and update link state */ + if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) { + __skb_dequeue(arrvq); + rc |= tipc_link_proto_rcv(l, skb, xmitq); + continue; + } + + if (unlikely(!link_working(l))) { + rc |= tipc_link_fsm_evt(l, TRAFFIC_EVT, xmitq); + if (!link_working(l)) { + kfree_skb(__skb_dequeue(arrvq)); + return rc; + } + } + + l->silent_intv_cnt = 0; + + /* Forward queues and wake up waiting users */ + if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) { + tipc_link_advance_backlog(l, xmitq); + if (unlikely(!skb_queue_empty(&l->wakeupq))) + link_prepare_wakeup(l); + } + + /* Defer reception if there is a gap in the sequence */ + seqno = msg_seqno(hdr); + rcv_nxt = l->rcv_nxt; + if (unlikely(less(rcv_nxt, seqno))) { + l->stats.deferred_recv++; + return rc; + } + + __skb_dequeue(arrvq); + + /* Drop if packet already received */ + if (unlikely(more(rcv_nxt, seqno))) { + l->stats.duplicates++; + kfree_skb(skb); + return rc; + } + + /* Synchronize with parallel link if applicable */ + if (unlikely(l->exec_mode == TIPC_LINK_TUNNEL)) + if (!msg_dup(hdr) && !link_synch(l)) { + kfree_skb(skb); + return rc; + } + + /* Packet can be delivered */ + l->rcv_nxt++; + l->stats.recv_info++; + if (unlikely(!tipc_data_input(l, skb))) + tipc_link_input(l, skb); + + /* Ack at regular intervals */ + if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) { + l->rcv_unacked = 0; + l->stats.sent_acks++; + tipc_link_build_proto_msg(l, STATE_MSG, + 0, 0, 0, 0, xmitq); + } + } + return rc; +} + /** * tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue * @@ -1286,41 +1273,6 @@ u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb) return 1; } -/* - * link_handle_out_of_seq_msg - handle arrival of out-of-sequence packet - */ -static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, - struct sk_buff *buf) -{ - u32 seq_no = buf_seqno(buf); - - if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) { - tipc_link_proto_rcv(l_ptr, buf); - return; - } - - /* Record OOS packet arrival */ - l_ptr->silent_intv_cnt = 0; - - /* - * Discard packet if a duplicate; otherwise add it to deferred queue - * and notify peer of gap as per protocol specification - */ - if (less(seq_no, l_ptr->rcv_nxt)) { - l_ptr->stats.duplicates++; - kfree_skb(buf); - return; - } - - if (tipc_link_defer_pkt(&l_ptr->deferdq, buf)) { - l_ptr->stats.deferred_recv++; - if ((skb_queue_len(&l_ptr->deferdq) % TIPC_MIN_LINK_WIN) == 1) - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); - } else { - l_ptr->stats.duplicates++; - } -} - /* * Send protocol message to the other endpoint. */ @@ -1341,119 +1293,6 @@ void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, kfree_skb(skb); } -/* - * Receive protocol message : - * Note that network plane id propagates through the network, and may - * change at any time. The node with lowest address rules - */ -static void tipc_link_proto_rcv(struct tipc_link *l_ptr, - struct sk_buff *buf) -{ - u32 rec_gap = 0; - u32 msg_tol; - struct tipc_msg *msg = buf_msg(buf); - - if (l_ptr->exec_mode == TIPC_LINK_BLOCKED) - goto exit; - - if (l_ptr->net_plane != msg_net_plane(msg)) - if (link_own_addr(l_ptr) > msg_prevnode(msg)) - l_ptr->net_plane = msg_net_plane(msg); - - switch (msg_type(msg)) { - - case RESET_MSG: - if (!link_probing(l_ptr) && - (l_ptr->peer_session != WILDCARD_SESSION)) { - if (less_eq(msg_session(msg), l_ptr->peer_session)) - break; /* duplicate or old reset: ignore */ - } - link_state_event(l_ptr, RESET_MSG); - - /* fall thru' */ - case ACTIVATE_MSG: - /* Update link settings according other endpoint's values */ - strcpy((strrchr(l_ptr->name, ':') + 1), (char *)msg_data(msg)); - - msg_tol = msg_link_tolerance(msg); - if (msg_tol > l_ptr->tolerance) - l_ptr->tolerance = msg_tol; - - if (msg_linkprio(msg) > l_ptr->priority) - l_ptr->priority = msg_linkprio(msg); - - if (l_ptr->mtu > msg_max_pkt(msg)) - l_ptr->mtu = msg_max_pkt(msg); - - /* Synchronize broadcast link info, if not done previously */ - if (!tipc_node_is_up(l_ptr->owner)) { - l_ptr->owner->bclink.last_sent = - l_ptr->owner->bclink.last_in = - msg_last_bcast(msg); - l_ptr->owner->bclink.oos_state = 0; - } - - l_ptr->peer_session = msg_session(msg); - l_ptr->peer_bearer_id = msg_bearer_id(msg); - - if (!msg_peer_is_up(msg)) - tipc_node_fsm_evt(l_ptr->owner, PEER_LOST_CONTACT_EVT); - if (msg_type(msg) == ACTIVATE_MSG) - link_state_event(l_ptr, ACTIVATE_MSG); - break; - case STATE_MSG: - - msg_tol = msg_link_tolerance(msg); - if (msg_tol) - l_ptr->tolerance = msg_tol; - - if (msg_linkprio(msg) && - (msg_linkprio(msg) != l_ptr->priority)) { - pr_info("%s<%s>, priority change %u->%u\n", - link_rst_msg, l_ptr->name, - l_ptr->priority, msg_linkprio(msg)); - l_ptr->priority = msg_linkprio(msg); - tipc_link_reset(l_ptr); - break; - } - - /* Record reception; force mismatch at next timeout: */ - l_ptr->silent_intv_cnt = 0; - - link_state_event(l_ptr, TRAFFIC_EVT); - l_ptr->stats.recv_states++; - if (link_resetting(l_ptr)) - break; - - if (less_eq(l_ptr->rcv_nxt, msg_next_sent(msg))) - rec_gap = mod(msg_next_sent(msg) - l_ptr->rcv_nxt); - - if (msg_probe(msg)) - l_ptr->stats.recv_probes++; - - /* Protocol message before retransmits, reduce loss risk */ - if (l_ptr->owner->bclink.recv_permitted) - tipc_bclink_update_link_state(l_ptr->owner, - msg_last_bcast(msg)); - - if (rec_gap || (msg_probe(msg))) - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, - rec_gap, 0, 0); - - if (msg_seq_gap(msg)) { - l_ptr->stats.recv_nacks++; - tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->transmq), - msg_seq_gap(msg)); - } - if (tipc_link_is_up(l_ptr)) - tipc_node_fsm_evt(l_ptr->owner, - PEER_ESTABL_CONTACT_EVT); - break; - } -exit: - kfree_skb(buf); -} - /* tipc_link_build_proto_msg: prepare link protocol message for transmission */ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, @@ -1727,6 +1566,96 @@ exit: return *skb; } +/* tipc_link_proto_rcv(): receive link level protocol message : + * Note that network plane id propagates through the network, and may + * change at any time. The node with lowest numerical id determines + * network plane + */ +static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr = buf_msg(skb); + u16 rcvgap = 0; + u16 nacked_gap = msg_seq_gap(hdr); + u16 peers_snd_nxt = msg_next_sent(hdr); + u16 peers_tol = msg_link_tolerance(hdr); + u16 peers_prio = msg_linkprio(hdr); + char *if_name; + int rc = 0; + + if (l->exec_mode == TIPC_LINK_BLOCKED) + goto exit; + + if (link_own_addr(l) > msg_prevnode(hdr)) + l->net_plane = msg_net_plane(hdr); + + switch (msg_type(hdr)) { + case RESET_MSG: + + /* Ignore duplicate RESET with old session number */ + if ((less_eq(msg_session(hdr), l->peer_session)) && + (l->peer_session != WILDCARD_SESSION)) + break; + /* fall thru' */ + case ACTIVATE_MSG: + + /* Complete own link name with peer's interface name */ + if_name = strrchr(l->name, ':') + 1; + if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME) + break; + if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME) + break; + strncpy(if_name, msg_data(hdr), TIPC_MAX_IF_NAME); + + /* Update own tolerance if peer indicates a non-zero value */ + if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) + l->tolerance = peers_tol; + + /* Update own priority if peer's priority is higher */ + if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) + l->priority = peers_prio; + + l->peer_session = msg_session(hdr); + l->peer_bearer_id = msg_bearer_id(hdr); + rc = tipc_link_fsm_evt(l, msg_type(hdr), xmitq); + if (l->mtu > msg_max_pkt(hdr)) + l->mtu = msg_max_pkt(hdr); + break; + case STATE_MSG: + /* Update own tolerance if peer indicates a non-zero value */ + if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) + l->tolerance = peers_tol; + + l->silent_intv_cnt = 0; + l->stats.recv_states++; + if (msg_probe(hdr)) + l->stats.recv_probes++; + rc = tipc_link_fsm_evt(l, TRAFFIC_EVT, xmitq); + if (!tipc_link_is_up(l)) + break; + + /* Has peer sent packets we haven't received yet ? */ + if (more(peers_snd_nxt, l->rcv_nxt)) + rcvgap = peers_snd_nxt - l->rcv_nxt; + if (rcvgap || (msg_probe(hdr))) + tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, + 0, l->mtu, xmitq); + tipc_link_release_pkts(l, msg_ack(hdr)); + + /* If NACK, retransmit will now start at right position */ + if (nacked_gap) { + rc |= tipc_link_retransm(l, nacked_gap, xmitq); + l->stats.recv_nacks++; + } + tipc_link_advance_backlog(l, xmitq); + if (unlikely(!skb_queue_empty(&l->wakeupq))) + link_prepare_wakeup(l); + } +exit: + kfree_skb(skb); + return rc; +} + void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) { int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE); diff --git a/net/tipc/link.h b/net/tipc/link.h index 0cf7d2b11803..37cfd7d7bf7d 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -58,7 +58,7 @@ enum { TIPC_LINK_TUNNEL }; -/* Events occurring at packet reception or at timeout +/* Events returned from link at packet reception or at timeout */ enum { TIPC_LINK_UP_EVT = 1, @@ -223,6 +223,7 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr); void tipc_link_purge_backlog(struct tipc_link *l); void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); +void tipc_link_activate(struct tipc_link *link); int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *list); int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, @@ -244,7 +245,8 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); void link_prepare_wakeup(struct tipc_link *l); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); - +int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq); static inline u32 link_own_addr(struct tipc_link *l) { return msg_prevnode(l->pmsg); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 4dc66d9f69cc..2f1563b47e24 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -38,6 +38,7 @@ #define _TIPC_MSG_H #include +#include "core.h" /* * Constants and routines used to read and write TIPC payload message headers @@ -658,12 +659,12 @@ static inline void msg_set_link_selector(struct tipc_msg *m, u32 n) /* * Word 5 */ -static inline u32 msg_session(struct tipc_msg *m) +static inline u16 msg_session(struct tipc_msg *m) { return msg_bits(m, 5, 16, 0xffff); } -static inline void msg_set_session(struct tipc_msg *m, u32 n) +static inline void msg_set_session(struct tipc_msg *m, u16 n) { msg_set_bits(m, 5, 16, 0xffff, n); } @@ -766,10 +767,19 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } -static inline bool msg_peer_is_up(struct tipc_msg *m) +static inline bool msg_is_traffic(struct tipc_msg *m) { - if (likely(msg_user(m) != LINK_PROTOCOL) || (msg_type(m) == STATE_MSG)) + if (likely(msg_user(m) != LINK_PROTOCOL)) return true; + if ((msg_type(m) == RESET_MSG) || (msg_type(m) == ACTIVATE_MSG)) + return false; + return true; +} + +static inline bool msg_peer_is_up(struct tipc_msg *m) +{ + if (likely(msg_is_traffic(m))) + return false; return msg_redundant_link(m); } @@ -886,4 +896,36 @@ static inline bool tipc_skb_queue_tail(struct sk_buff_head *list, return rv; } +/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number + * @list: list to be appended to + * @skb: buffer to add + * Returns true if queue should treated further, otherwise false + */ +static inline bool __tipc_skb_queue_sorted(struct sk_buff_head *list, + struct sk_buff *skb) +{ + struct sk_buff *_skb, *tmp; + struct tipc_msg *hdr = buf_msg(skb); + u16 seqno = msg_seqno(hdr); + + if (skb_queue_empty(list) || (msg_user(hdr) == LINK_PROTOCOL)) { + __skb_queue_head(list, skb); + return true; + } + if (likely(less(seqno, buf_seqno(skb_peek(list))))) { + __skb_queue_head(list, skb); + return true; + } + if (!more(seqno, buf_seqno(skb_peek_tail(list)))) { + skb_queue_walk_safe(list, _skb, tmp) { + if (likely(less(seqno, buf_seqno(_skb)))) { + __skb_queue_before(list, _skb, skb); + return true; + } + } + } + __skb_queue_tail(list, skb); + return false; +} + #endif diff --git a/net/tipc/node.c b/net/tipc/node.c index 9dbbb5de287b..e92f84afbf95 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -40,11 +40,13 @@ #include "name_distr.h" #include "socket.h" #include "bcast.h" +#include "discover.h" static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); static void tipc_node_delete(struct tipc_node *node); static void tipc_node_timeout(unsigned long data); +static void tipc_node_fsm_evt(struct tipc_node *n, int evt); struct tipc_sock_conn { u32 port; @@ -141,7 +143,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) break; } list_add_tail_rcu(&n_ptr->list, &temp_node->list); - n_ptr->state = SELF_DOWN_PEER_DOWN; + n_ptr->state = SELF_DOWN_PEER_LEAVING; n_ptr->signature = INVALID_NODE_SIG; n_ptr->active_links[0] = INVALID_BEARER_ID; n_ptr->active_links[1] = INVALID_BEARER_ID; @@ -424,7 +426,7 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) /* tipc_node_fsm_evt - node finite state machine * Determines when contact is allowed with peer node */ -void tipc_node_fsm_evt(struct tipc_node *n, int evt) +static void tipc_node_fsm_evt(struct tipc_node *n, int evt) { int state = n->state; @@ -523,23 +525,36 @@ void tipc_node_fsm_evt(struct tipc_node *n, int evt) n->state = state; } -bool tipc_node_filter_skb(struct tipc_node *n, struct tipc_msg *hdr) +bool tipc_node_filter_skb(struct tipc_node *n, struct tipc_link *l, + struct tipc_msg *hdr) { int state = n->state; if (likely(state == SELF_UP_PEER_UP)) return true; + if (state == SELF_DOWN_PEER_DOWN) return true; - if (state == SELF_UP_PEER_COMING) + + if (state == SELF_UP_PEER_COMING) { + /* If not traffic msg, peer may still be ESTABLISHING */ + if (tipc_link_is_up(l) && msg_is_traffic(hdr)) + tipc_node_fsm_evt(n, PEER_ESTABL_CONTACT_EVT); return true; + } + if (state == SELF_COMING_PEER_UP) return true; + if (state == SELF_LEAVING_PEER_DOWN) return false; - if (state == SELF_DOWN_PEER_LEAVING) - if (!msg_peer_is_up(hdr)) - return true; + + if (state == SELF_DOWN_PEER_LEAVING) { + if (msg_peer_is_up(hdr)) + return false; + tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); + return true; + } return false; } @@ -819,6 +834,82 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, return 0; } +/** + * tipc_rcv - process TIPC packets/messages arriving from off-node + * @net: the applicable net namespace + * @skb: TIPC packet + * @bearer: pointer to bearer message arrived on + * + * Invoked with no locks held. Bearer pointer must point to a valid bearer + * structure (i.e. cannot be NULL), but bearer can be inactive. + */ +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) +{ + struct sk_buff_head xmitq; + struct tipc_node *n; + struct tipc_link *l; + struct tipc_msg *hdr; + struct tipc_media_addr *maddr; + int bearer_id = b->identity; + int rc = 0; + + __skb_queue_head_init(&xmitq); + + /* Ensure message is well-formed */ + if (unlikely(!tipc_msg_validate(skb))) + goto discard; + + /* Handle arrival of a non-unicast link packet */ + hdr = buf_msg(skb); + if (unlikely(msg_non_seq(hdr))) { + if (msg_user(hdr) == LINK_CONFIG) + tipc_disc_rcv(net, skb, b); + else + tipc_bclink_rcv(net, skb); + return; + } + + /* Locate neighboring node that sent packet */ + n = tipc_node_find(net, msg_prevnode(hdr)); + if (unlikely(!n)) + goto discard; + tipc_node_lock(n); + + /* Locate link endpoint that should handle packet */ + l = n->links[bearer_id].link; + if (unlikely(!l)) + goto unlock; + + /* Is reception of this packet permitted at the moment ? */ + if (unlikely(n->state != SELF_UP_PEER_UP)) + if (!tipc_node_filter_skb(n, l, hdr)) + goto unlock; + + if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) + tipc_bclink_sync_state(n, hdr); + + /* Release acked broadcast messages */ + if (unlikely(n->bclink.acked != msg_bcast_ack(hdr))) + tipc_bclink_acknowledge(n, msg_bcast_ack(hdr)); + + /* Check protocol and update link state */ + rc = tipc_link_rcv(l, skb, &xmitq); + + if (unlikely(rc & TIPC_LINK_UP_EVT)) + tipc_link_activate(l); + if (unlikely(rc & TIPC_LINK_DOWN_EVT)) + tipc_link_reset(l); + skb = NULL; +unlock: + tipc_node_unlock(n); + tipc_sk_rcv(net, &n->links[bearer_id].inputq); + maddr = &n->links[bearer_id].maddr; + tipc_bearer_xmit(net, bearer_id, &xmitq, maddr); + tipc_node_put(n); +discard: + kfree_skb(skb); +} + int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; diff --git a/net/tipc/node.h b/net/tipc/node.h index 270256e09ee5..5e7016802077 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -185,7 +185,6 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, u32 selector); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); - int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); static inline void tipc_node_lock(struct tipc_node *node) @@ -193,9 +192,6 @@ static inline void tipc_node_lock(struct tipc_node *node) spin_lock_bh(&node->lock); } -void tipc_node_fsm_evt(struct tipc_node *n, int evt); -bool tipc_node_filter_skb(struct tipc_node *n, struct tipc_msg *hdr); - static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel) { int bearer_id = n->active_links[sel & 1]; -- cgit From 16040894b26af9f85d9395f072c53d76a44eba21 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Tue, 21 Jul 2015 06:42:28 -0400 Subject: tipc: fix compatibility bug In commit d999297c3dbbe7fdd832f7fa4ec84301e170b3e6 ("tipc: reduce locking scope during packet reception") we introduced a new function tipc_link_proto_rcv(). This function contains a bug, so that it sometimes by error sends out a non-zero link priority value in created protocol messages. The bug may lead to an extra link reset at initial link establising with older nodes. This will never happen more than once, whereafter the link will work as intended. We fix this bug in this commit. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 55b675d20de8..b63d57390bb7 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1639,7 +1639,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, rcvgap = peers_snd_nxt - l->rcv_nxt; if (rcvgap || (msg_probe(hdr))) tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, - 0, l->mtu, xmitq); + 0, 0, xmitq); tipc_link_release_pkts(l, msg_ack(hdr)); /* If NACK, retransmit will now start at right position */ -- cgit From 29042e19f2c602fabe4705b5b719550b4627639c Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 22 Jul 2015 10:11:18 -0400 Subject: tipc: let function tipc_msg_reverse() expand header when needed The shortest TIPC message header, for cluster local CONNECTED messages, is 24 bytes long. With this format, the fields "dest_node" and "orig_node" are optimized away, since they in reality are redundant in this particular case. However, the absence of these fields leads to code inconsistencies that are difficult to handle in some cases, especially when we need to reverse or reject messages at the socket layer. In this commit, we concentrate the handling of the absent fields to one place, by letting the function tipc_msg_reverse() reallocate the buffer and expand the header to 32 bytes when necessary. This means that the socket code now can assume that the two previously absent fields are present in the header when a message needs to be rejected. This opens up for some further simplifications of the socket code. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 67 ++++++++++++++++++++++++++++++++++--------------------- net/tipc/msg.h | 3 +-- net/tipc/socket.c | 12 +++++----- 3 files changed, 48 insertions(+), 34 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 08b4cc7d496d..4339aab93034 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -463,43 +463,58 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, /** * tipc_msg_reverse(): swap source and destination addresses and add error code - * @buf: buffer containing message to be reversed - * @dnode: return value: node where to send message after reversal - * @err: error code to be set in message - * Consumes buffer if failure + * @own_node: originating node id for reversed message + * @skb: buffer containing message to be reversed; may be replaced. + * @err: error code to be set in message, if any + * Consumes buffer at failure * Returns true if success, otherwise false */ -bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, - int err) +bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, u32 *dnode, int err) { - struct tipc_msg *msg = buf_msg(buf); + struct sk_buff *_skb = *skb; + struct tipc_msg *hdr = buf_msg(_skb); struct tipc_msg ohdr; - uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE); + int dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE); - if (skb_linearize(buf)) + if (skb_linearize(_skb)) goto exit; - msg = buf_msg(buf); - if (msg_dest_droppable(msg)) + hdr = buf_msg(_skb); + if (msg_dest_droppable(hdr)) goto exit; - if (msg_errcode(msg)) + if (msg_errcode(hdr)) goto exit; - memcpy(&ohdr, msg, msg_hdr_sz(msg)); - msg_set_errcode(msg, err); - msg_set_origport(msg, msg_destport(&ohdr)); - msg_set_destport(msg, msg_origport(&ohdr)); - msg_set_prevnode(msg, own_addr); - if (!msg_short(msg)) { - msg_set_orignode(msg, msg_destnode(&ohdr)); - msg_set_destnode(msg, msg_orignode(&ohdr)); + + /* Take a copy of original header before altering message */ + memcpy(&ohdr, hdr, msg_hdr_sz(hdr)); + + /* Never return SHORT header; expand by replacing buffer if necessary */ + if (msg_short(hdr)) { + *skb = tipc_buf_acquire(BASIC_H_SIZE + dlen); + if (!*skb) + goto exit; + memcpy((*skb)->data + BASIC_H_SIZE, msg_data(hdr), dlen); + kfree_skb(_skb); + _skb = *skb; + hdr = buf_msg(_skb); + memcpy(hdr, &ohdr, BASIC_H_SIZE); + msg_set_hdr_sz(hdr, BASIC_H_SIZE); } - msg_set_size(msg, msg_hdr_sz(msg) + rdsz); - skb_trim(buf, msg_size(msg)); - skb_orphan(buf); - *dnode = msg_orignode(&ohdr); + + /* Now reverse the concerned fields */ + msg_set_errcode(hdr, err); + msg_set_origport(hdr, msg_destport(&ohdr)); + msg_set_destport(hdr, msg_origport(&ohdr)); + msg_set_destnode(hdr, msg_prevnode(&ohdr)); + msg_set_prevnode(hdr, own_node); + msg_set_orignode(hdr, own_node); + msg_set_size(hdr, msg_hdr_sz(hdr) + dlen); + *dnode = msg_destnode(hdr); + skb_trim(_skb, msg_size(hdr)); + skb_orphan(_skb); return true; exit: - kfree_skb(buf); - *dnode = 0; + kfree_skb(_skb); + *skb = NULL; return false; } diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 2f1563b47e24..0e96f59e3315 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -785,8 +785,7 @@ static inline bool msg_peer_is_up(struct tipc_msg *m) struct sk_buff *tipc_buf_acquire(u32 size); bool tipc_msg_validate(struct sk_buff *skb); -bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, - int err); +bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, u32 *dnode, int err); void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5b0b08d58fcc..e2d5b9831485 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -260,7 +260,7 @@ static void tsk_rej_rx_queue(struct sock *sk) u32 own_node = tsk_own_node(tipc_sk(sk)); while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { - if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_ERR_NO_PORT)) + if (tipc_msg_reverse(own_node, &skb, &dnode, TIPC_ERR_NO_PORT)) tipc_node_xmit_skb(sock_net(sk), skb, dnode, 0); } } @@ -441,7 +441,7 @@ static int tipc_release(struct socket *sock) tsk->connected = 0; tipc_node_remove_conn(net, dnode, tsk->portid); } - if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, + if (tipc_msg_reverse(tsk_own_node(tsk), &skb, &dnode, TIPC_ERR_NO_PORT)) tipc_node_xmit_skb(net, skb, dnode, 0); } @@ -784,7 +784,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff **skb) if (conn_cong) tsk->sk.sk_write_space(&tsk->sk); } else if (msg_type(msg) == CONN_PROBE) { - if (tipc_msg_reverse(own_node, *skb, &dnode, TIPC_OK)) { + if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_OK)) { msg_set_type(msg, CONN_PROBE_REPLY); return; } @@ -1702,7 +1702,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) atomic_add(truesize, dcnt); return 0; } - if (!err || tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, -err)) + if (!err || tipc_msg_reverse(tsk_own_node(tsk), &skb, &dnode, -err)) tipc_node_xmit_skb(net, skb, dnode, tsk->portid); return 0; } @@ -1796,7 +1796,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) goto xmit; } tn = net_generic(net, tipc_net_id); - if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err)) + if (!tipc_msg_reverse(tn->own_addr, &skb, &dnode, -err)) continue; xmit: tipc_node_xmit_skb(net, skb, dnode, dport); @@ -2090,7 +2090,7 @@ restart: kfree_skb(skb); goto restart; } - if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, + if (tipc_msg_reverse(tsk_own_node(tsk), &skb, &dnode, TIPC_CONN_SHUTDOWN)) tipc_node_xmit_skb(net, skb, dnode, tsk->portid); -- cgit From bcd3ffd4f6d7c994c93be2ab8598fdfb2952a1f1 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 22 Jul 2015 10:11:19 -0400 Subject: tipc: introduce new tipc_sk_respond() function Currently, we use the code sequence if (msg_reverse()) tipc_link_xmit_skb() at numerous locations in socket.c. The preparation of arguments for these calls, as well as the sequence itself, makes the code unecessarily complex. In this commit, we introduce a new function, tipc_sk_respond(), that performs this call combination. We also replace some, but not yet all, of these explicit call sequences with calls to the new function. Notably, we let the function tipc_sk_proto_rcv() use the new function to directly send out PROBE_REPLY messages, instead of deferring this to the calling tipc_sk_rcv() function, as we do now. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 3 +-- net/tipc/msg.h | 2 +- net/tipc/socket.c | 81 ++++++++++++++++++++++++++++++------------------------- 3 files changed, 47 insertions(+), 39 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 4339aab93034..b6cc58ec7346 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -469,7 +469,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, * Consumes buffer at failure * Returns true if success, otherwise false */ -bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, u32 *dnode, int err) +bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) { struct sk_buff *_skb = *skb; struct tipc_msg *hdr = buf_msg(_skb); @@ -508,7 +508,6 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, u32 *dnode, int err) msg_set_prevnode(hdr, own_node); msg_set_orignode(hdr, own_node); msg_set_size(hdr, msg_hdr_sz(hdr) + dlen); - *dnode = msg_destnode(hdr); skb_trim(_skb, msg_size(hdr)); skb_orphan(_skb); return true; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 0e96f59e3315..d0834bc519aa 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -785,7 +785,7 @@ static inline bool msg_peer_is_up(struct tipc_msg *m) struct sk_buff *tipc_buf_acquire(u32 size); bool tipc_msg_validate(struct sk_buff *skb); -bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, u32 *dnode, int err); +bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e2d5b9831485..71d88adadb18 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -248,6 +248,22 @@ static void tsk_advance_rx_queue(struct sock *sk) kfree_skb(__skb_dequeue(&sk->sk_receive_queue)); } +/* tipc_sk_respond() : send response message back to sender + */ +static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err) +{ + u32 selector; + u32 dnode; + u32 onode = tipc_own_addr(sock_net(sk)); + + if (!tipc_msg_reverse(onode, &skb, err)) + return; + + dnode = msg_destnode(buf_msg(skb)); + selector = msg_origport(buf_msg(skb)); + tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); +} + /** * tsk_rej_rx_queue - reject all buffers in socket receive queue * @@ -256,13 +272,9 @@ static void tsk_advance_rx_queue(struct sock *sk) static void tsk_rej_rx_queue(struct sock *sk) { struct sk_buff *skb; - u32 dnode; - u32 own_node = tsk_own_node(tipc_sk(sk)); - while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { - if (tipc_msg_reverse(own_node, &skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_node_xmit_skb(sock_net(sk), skb, dnode, 0); - } + while ((skb = __skb_dequeue(&sk->sk_receive_queue))) + tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); } /* tsk_peer_msg - verify if message was sent by connected port's peer @@ -441,9 +453,7 @@ static int tipc_release(struct socket *sock) tsk->connected = 0; tipc_node_remove_conn(net, dnode, tsk->portid); } - if (tipc_msg_reverse(tsk_own_node(tsk), &skb, &dnode, - TIPC_ERR_NO_PORT)) - tipc_node_xmit_skb(net, skb, dnode, 0); + tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); } } @@ -764,35 +774,35 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, /** * tipc_sk_proto_rcv - receive a connection mng protocol message * @tsk: receiving socket - * @skb: pointer to message buffer. Set to NULL if buffer is consumed. + * @skb: pointer to message buffer. */ -static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff **skb) +static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) { - struct tipc_msg *msg = buf_msg(*skb); + struct sock *sk = &tsk->sk; + struct tipc_msg *hdr = buf_msg(skb); + int mtyp = msg_type(hdr); int conn_cong; - u32 dnode; - u32 own_node = tsk_own_node(tsk); + /* Ignore if connection cannot be validated: */ - if (!tsk_peer_msg(tsk, msg)) + if (!tsk_peer_msg(tsk, hdr)) goto exit; tsk->probing_state = TIPC_CONN_OK; - if (msg_type(msg) == CONN_ACK) { + if (mtyp == CONN_PROBE) { + msg_set_type(hdr, CONN_PROBE_REPLY); + tipc_sk_respond(sk, skb, TIPC_OK); + return; + } else if (mtyp == CONN_ACK) { conn_cong = tsk_conn_cong(tsk); - tsk->sent_unacked -= msg_msgcnt(msg); + tsk->sent_unacked -= msg_msgcnt(hdr); if (conn_cong) - tsk->sk.sk_write_space(&tsk->sk); - } else if (msg_type(msg) == CONN_PROBE) { - if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_OK)) { - msg_set_type(msg, CONN_PROBE_REPLY); - return; - } + sk->sk_write_space(sk); + } else if (mtyp != CONN_PROBE_REPLY) { + pr_warn("Received unknown CONN_PROTO msg\n"); } - /* Do nothing if msg_type() == CONN_PROBE_REPLY */ exit: - kfree_skb(*skb); - *skb = NULL; + kfree_skb(skb); } static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) @@ -1638,7 +1648,7 @@ static int filter_rcv(struct sock *sk, struct sk_buff **skb) int rc = TIPC_OK; if (unlikely(msg_user(msg) == CONN_MANAGER)) { - tipc_sk_proto_rcv(tsk, skb); + tipc_sk_proto_rcv(tsk, *skb); return TIPC_OK; } @@ -1690,7 +1700,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) { int err; atomic_t *dcnt; - u32 dnode; + u32 dnode = msg_prevnode(buf_msg(skb)); struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); uint truesize = skb->truesize; @@ -1702,7 +1712,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) atomic_add(truesize, dcnt); return 0; } - if (!err || tipc_msg_reverse(tsk_own_node(tsk), &skb, &dnode, -err)) + if (!err || tipc_msg_reverse(tsk_own_node(tsk), &skb, -err)) tipc_node_xmit_skb(net, skb, dnode, tsk->portid); return 0; } @@ -1794,9 +1804,11 @@ int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) if (!err) { dnode = msg_destnode(buf_msg(skb)); goto xmit; + } else { + dnode = msg_prevnode(buf_msg(skb)); } tn = net_generic(net, tipc_net_id); - if (!tipc_msg_reverse(tn->own_addr, &skb, &dnode, -err)) + if (!tipc_msg_reverse(tn->own_addr, &skb, -err)) continue; xmit: tipc_node_xmit_skb(net, skb, dnode, dport); @@ -2083,6 +2095,8 @@ static int tipc_shutdown(struct socket *sock, int how) case SS_CONNECTED: restart: + dnode = tsk_peer_node(tsk); + /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */ skb = __skb_dequeue(&sk->sk_receive_queue); if (skb) { @@ -2090,13 +2104,8 @@ restart: kfree_skb(skb); goto restart; } - if (tipc_msg_reverse(tsk_own_node(tsk), &skb, &dnode, - TIPC_CONN_SHUTDOWN)) - tipc_node_xmit_skb(net, skb, dnode, - tsk->portid); + tipc_sk_respond(sk, skb, TIPC_CONN_SHUTDOWN); } else { - dnode = tsk_peer_node(tsk); - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, tsk_own_node(tsk), -- cgit From cda3696d3d26eb798c94de0dab5bd66ddb5627cb Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 22 Jul 2015 10:11:20 -0400 Subject: tipc: clean up socket layer message reception When a message is received in a socket, one of the call chains tipc_sk_rcv()->tipc_sk_enqueue()->filter_rcv()(->tipc_sk_proto_rcv()) or tipc_sk_backlog_rcv()->filter_rcv()(->tipc_sk_proto_rcv()) are followed. At each of these levels we may encounter situations where the message may need to be rejected, or a new message produced for transfer back to the sender. Despite recent improvements, the current code for doing this is perceived as awkward and hard to follow. Leveraging the two previous commits in this series, we now introduce a more uniform handling of such situations. We let each of the functions in the chain itself produce/reverse the message to be returned to the sender, but also perform the actual forwarding. This simplifies the necessary logics within each function. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 20 ++--- net/tipc/msg.h | 3 +- net/tipc/socket.c | 255 ++++++++++++++++++++++++++---------------------------- net/tipc/socket.h | 2 +- 4 files changed, 134 insertions(+), 146 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index b6cc58ec7346..562c926a51cc 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -520,17 +520,15 @@ exit: /** * tipc_msg_lookup_dest(): try to find new destination for named message * @skb: the buffer containing the message. - * @dnode: return value: next-hop node, if destination found - * @err: return value: error code to use, if message to be rejected + * @err: error code to be used by caller if lookup fails * Does not consume buffer * Returns true if a destination is found, false otherwise */ -bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, - u32 *dnode, int *err) +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) { struct tipc_msg *msg = buf_msg(skb); - u32 dport; - u32 own_addr = tipc_own_addr(net); + u32 dport, dnode; + u32 onode = tipc_own_addr(net); if (!msg_isdata(msg)) return false; @@ -543,15 +541,15 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, return false; if (msg_reroute_cnt(msg)) return false; - *dnode = addr_domain(net, msg_lookup_scope(msg)); + dnode = addr_domain(net, msg_lookup_scope(msg)); dport = tipc_nametbl_translate(net, msg_nametype(msg), - msg_nameinst(msg), dnode); + msg_nameinst(msg), &dnode); if (!dport) return false; msg_incr_reroute_cnt(msg); - if (*dnode != own_addr) - msg_set_prevnode(msg, own_addr); - msg_set_destnode(msg, *dnode); + if (dnode != onode) + msg_set_prevnode(msg, onode); + msg_set_destnode(msg, dnode); msg_set_destport(msg, dport); *err = TIPC_OK; return true; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d0834bc519aa..234fb0531d1d 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -798,8 +798,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); -bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode, - int *err); +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); static inline u16 buf_seqno(struct sk_buff *skb) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 71d88adadb18..1060d52ff23e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1520,82 +1520,81 @@ static void tipc_data_ready(struct sock *sk) * @tsk: TIPC socket * @skb: pointer to message buffer. Set to NULL if buffer is consumed * - * Returns 0 (TIPC_OK) if everything ok, -TIPC_ERR_NO_PORT otherwise + * Returns true if everything ok, false otherwise */ -static int filter_connect(struct tipc_sock *tsk, struct sk_buff **skb) +static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); struct socket *sock = sk->sk_socket; - struct tipc_msg *msg = buf_msg(*skb); - int retval = -TIPC_ERR_NO_PORT; + struct tipc_msg *hdr = buf_msg(skb); - if (msg_mcast(msg)) - return retval; + if (unlikely(msg_mcast(hdr))) + return false; switch ((int)sock->state) { case SS_CONNECTED: + /* Accept only connection-based messages sent by peer */ - if (tsk_peer_msg(tsk, msg)) { - if (unlikely(msg_errcode(msg))) { - sock->state = SS_DISCONNECTING; - tsk->connected = 0; - /* let timer expire on it's own */ - tipc_node_remove_conn(net, tsk_peer_node(tsk), - tsk->portid); - } - retval = TIPC_OK; + if (unlikely(!tsk_peer_msg(tsk, hdr))) + return false; + + if (unlikely(msg_errcode(hdr))) { + sock->state = SS_DISCONNECTING; + tsk->connected = 0; + /* Let timer expire on it's own */ + tipc_node_remove_conn(net, tsk_peer_node(tsk), + tsk->portid); } - break; + return true; + case SS_CONNECTING: - /* Accept only ACK or NACK message */ - if (unlikely(!msg_connected(msg))) - break; + /* Accept only ACK or NACK message */ + if (unlikely(!msg_connected(hdr))) + return false; - if (unlikely(msg_errcode(msg))) { + if (unlikely(msg_errcode(hdr))) { sock->state = SS_DISCONNECTING; sk->sk_err = ECONNREFUSED; - retval = TIPC_OK; - break; + return true; } - if (unlikely(msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)) { + if (unlikely(!msg_isdata(hdr))) { sock->state = SS_DISCONNECTING; sk->sk_err = EINVAL; - retval = TIPC_OK; - break; + return true; } - tipc_sk_finish_conn(tsk, msg_origport(msg), msg_orignode(msg)); - msg_set_importance(&tsk->phdr, msg_importance(msg)); + tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr)); + msg_set_importance(&tsk->phdr, msg_importance(hdr)); sock->state = SS_CONNECTED; - /* If an incoming message is an 'ACK-', it should be - * discarded here because it doesn't contain useful - * data. In addition, we should try to wake up - * connect() routine if sleeping. - */ - if (msg_data_sz(msg) == 0) { - kfree_skb(*skb); - *skb = NULL; - if (waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); - } - retval = TIPC_OK; - break; + /* If 'ACK+' message, add to socket receive queue */ + if (msg_data_sz(hdr)) + return true; + + /* If empty 'ACK-' message, wake up sleeping connect() */ + if (waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); + + /* 'ACK-' message is neither accepted nor rejected: */ + msg_set_dest_droppable(hdr, 1); + return false; + case SS_LISTENING: case SS_UNCONNECTED: + /* Accept only SYN message */ - if (!msg_connected(msg) && !(msg_errcode(msg))) - retval = TIPC_OK; + if (!msg_connected(hdr) && !(msg_errcode(hdr))) + return true; break; case SS_DISCONNECTING: break; default: pr_err("Unknown socket state %u\n", sock->state); } - return retval; + return false; } /** @@ -1630,61 +1629,70 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) /** * filter_rcv - validate incoming message * @sk: socket - * @skb: pointer to message. Set to NULL if buffer is consumed. + * @skb: pointer to message. * * Enqueues message on receive queue if acceptable; optionally handles * disconnect indication for a connected socket. * * Called with socket lock already taken * - * Returns 0 (TIPC_OK) if message was ok, -TIPC error code if rejected + * Returns true if message was added to socket receive queue, otherwise false */ -static int filter_rcv(struct sock *sk, struct sk_buff **skb) +static bool filter_rcv(struct sock *sk, struct sk_buff *skb) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_msg *msg = buf_msg(*skb); - unsigned int limit = rcvbuf_limit(sk, *skb); - int rc = TIPC_OK; + struct tipc_msg *hdr = buf_msg(skb); + unsigned int limit = rcvbuf_limit(sk, skb); + int err = TIPC_OK; + int usr = msg_user(hdr); - if (unlikely(msg_user(msg) == CONN_MANAGER)) { - tipc_sk_proto_rcv(tsk, *skb); - return TIPC_OK; + if (unlikely(msg_user(hdr) == CONN_MANAGER)) { + tipc_sk_proto_rcv(tsk, skb); + return false; } - if (unlikely(msg_user(msg) == SOCK_WAKEUP)) { - kfree_skb(*skb); + if (unlikely(usr == SOCK_WAKEUP)) { + kfree_skb(skb); tsk->link_cong = 0; sk->sk_write_space(sk); - *skb = NULL; - return TIPC_OK; + return false; } - /* Reject message if it is wrong sort of message for socket */ - if (msg_type(msg) > TIPC_DIRECT_MSG) - return -TIPC_ERR_NO_PORT; + /* Drop if illegal message type */ + if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) { + kfree_skb(skb); + return false; + } - if (sock->state == SS_READY) { - if (msg_connected(msg)) - return -TIPC_ERR_NO_PORT; - } else { - rc = filter_connect(tsk, skb); - if (rc != TIPC_OK || !*skb) - return rc; + /* Reject if wrong message type for current socket state */ + if (unlikely(sock->state == SS_READY)) { + if (msg_connected(hdr)) { + err = TIPC_ERR_NO_PORT; + goto reject; + } + } else if (unlikely(!filter_connect(tsk, skb))) { + err = TIPC_ERR_NO_PORT; + goto reject; } /* Reject message if there isn't room to queue it */ - if (sk_rmem_alloc_get(sk) + (*skb)->truesize >= limit) - return -TIPC_ERR_OVERLOAD; + if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) { + err = TIPC_ERR_OVERLOAD; + goto reject; + } /* Enqueue message */ - TIPC_SKB_CB(*skb)->handle = NULL; - __skb_queue_tail(&sk->sk_receive_queue, *skb); - skb_set_owner_r(*skb, sk); + TIPC_SKB_CB(skb)->handle = NULL; + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); sk->sk_data_ready(sk); - *skb = NULL; - return TIPC_OK; + return true; + +reject: + tipc_sk_respond(sk, skb, err); + return false; } /** @@ -1698,22 +1706,10 @@ static int filter_rcv(struct sock *sk, struct sk_buff **skb) */ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) { - int err; - atomic_t *dcnt; - u32 dnode = msg_prevnode(buf_msg(skb)); - struct tipc_sock *tsk = tipc_sk(sk); - struct net *net = sock_net(sk); - uint truesize = skb->truesize; + unsigned int truesize = skb->truesize; - err = filter_rcv(sk, &skb); - if (likely(!skb)) { - dcnt = &tsk->dupl_rcvcnt; - if (atomic_read(dcnt) < TIPC_CONN_OVERLOAD_LIMIT) - atomic_add(truesize, dcnt); - return 0; - } - if (!err || tipc_msg_reverse(tsk_own_node(tsk), &skb, -err)) - tipc_node_xmit_skb(net, skb, dnode, tsk->portid); + if (likely(filter_rcv(sk, skb))) + atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); return 0; } @@ -1723,45 +1719,43 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) * @inputq: list of incoming buffers with potentially different destinations * @sk: socket where the buffers should be enqueued * @dport: port number for the socket - * @_skb: returned buffer to be forwarded or rejected, if applicable * * Caller must hold socket lock - * - * Returns TIPC_OK if all buffers enqueued, otherwise -TIPC_ERR_OVERLOAD - * or -TIPC_ERR_NO_PORT */ -static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, - u32 dport, struct sk_buff **_skb) +static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, + u32 dport) { unsigned int lim; atomic_t *dcnt; - int err; struct sk_buff *skb; unsigned long time_limit = jiffies + 2; while (skb_queue_len(inputq)) { if (unlikely(time_after_eq(jiffies, time_limit))) - return TIPC_OK; + return; + skb = tipc_skb_dequeue(inputq, dport); if (unlikely(!skb)) - return TIPC_OK; + return; + + /* Add message directly to receive queue if possible */ if (!sock_owned_by_user(sk)) { - err = filter_rcv(sk, &skb); - if (likely(!skb)) - continue; - *_skb = skb; - return err; + filter_rcv(sk, skb); + continue; } + + /* Try backlog, compensating for double-counted bytes */ dcnt = &tipc_sk(sk)->dupl_rcvcnt; if (sk->sk_backlog.len) atomic_set(dcnt, 0); lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); if (likely(!sk_add_backlog(sk, skb, lim))) continue; - *_skb = skb; - return -TIPC_ERR_OVERLOAD; + + /* Overload => reject message back to sender */ + tipc_sk_respond(sk, skb, TIPC_ERR_OVERLOAD); + break; } - return TIPC_OK; } /** @@ -1769,51 +1763,46 @@ static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, * @inputq: buffer list containing the buffers * Consumes all buffers in list until inputq is empty * Note: may be called in multiple threads referring to the same queue - * Returns 0 if last buffer was accepted, otherwise -EHOSTUNREACH - * Only node local calls check the return value, sending single-buffer queues */ -int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) +void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { u32 dnode, dport = 0; int err; - struct sk_buff *skb; struct tipc_sock *tsk; - struct tipc_net *tn; struct sock *sk; + struct sk_buff *skb; while (skb_queue_len(inputq)) { - err = -TIPC_ERR_NO_PORT; - skb = NULL; dport = tipc_skb_peek_port(inputq, dport); tsk = tipc_sk_lookup(net, dport); + if (likely(tsk)) { sk = &tsk->sk; if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { - err = tipc_sk_enqueue(inputq, sk, dport, &skb); + tipc_sk_enqueue(inputq, sk, dport); spin_unlock_bh(&sk->sk_lock.slock); - dport = 0; } sock_put(sk); - } else { - skb = tipc_skb_dequeue(inputq, dport); - } - if (likely(!skb)) continue; - if (tipc_msg_lookup_dest(net, skb, &dnode, &err)) - goto xmit; - if (!err) { - dnode = msg_destnode(buf_msg(skb)); - goto xmit; - } else { - dnode = msg_prevnode(buf_msg(skb)); } - tn = net_generic(net, tipc_net_id); - if (!tipc_msg_reverse(tn->own_addr, &skb, -err)) + + /* No destination socket => dequeue skb if still there */ + skb = tipc_skb_dequeue(inputq, dport); + if (!skb) + return; + + /* Try secondary lookup if unresolved named message */ + err = TIPC_ERR_NO_PORT; + if (tipc_msg_lookup_dest(net, skb, &err)) + goto xmit; + + /* Prepare for message rejection */ + if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err)) continue; xmit: + dnode = msg_destnode(buf_msg(skb)); tipc_node_xmit_skb(net, skb, dnode, dport); } - return err ? -EHOSTUNREACH : 0; } static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) @@ -2082,7 +2071,10 @@ static int tipc_shutdown(struct socket *sock, int how) struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct sk_buff *skb; - u32 dnode; + u32 dnode = tsk_peer_node(tsk); + u32 dport = tsk_peer_port(tsk); + u32 onode = tipc_own_addr(net); + u32 oport = tsk->portid; int res; if (how != SHUT_RDWR) @@ -2108,9 +2100,8 @@ restart: } else { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, - 0, dnode, tsk_own_node(tsk), - tsk_peer_port(tsk), - tsk->portid, TIPC_CONN_SHUTDOWN); + 0, dnode, onode, dport, oport, + TIPC_CONN_SHUTDOWN); tipc_node_xmit_skb(net, skb, dnode, tsk->portid); } tsk->connected = 0; diff --git a/net/tipc/socket.h b/net/tipc/socket.h index bf6551389522..4241f22069dc 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -44,7 +44,7 @@ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) int tipc_socket_init(void); void tipc_socket_stop(void); -int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); +void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq); void tipc_sk_reinit(struct net *net); -- cgit From 5a4c355229da12558b5ded0775f4d0bc6650d28d Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Wed, 29 Jul 2015 18:28:01 -0400 Subject: tipc: fix bug in broadcast synch message create function In commit d999297c3dbbe7fdd832f7fa4ec84301e170b3e6 ("tipc: reduce locking scope during packet reception") we introduced a new function tipc_build_bcast_sync_msg(), which carries initial synchronization data between two nodes at first contact and at re-contact. In this function, we missed to add synchronization data, with the effect that the broadcast link endpoints will fail to synchronize correctly at re-contact between a running and a restarted node. All other cases work as intended. With this commit, we fix this bug. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index b63d57390bb7..cc40aa6eb66c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -294,11 +294,14 @@ static void tipc_link_build_bcast_sync_msg(struct tipc_link *l, { struct sk_buff *skb; struct sk_buff_head list; + u16 last_sent; skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, 0, l->addr, link_own_addr(l), 0, 0, 0); if (!skb) return; + last_sent = tipc_bclink_get_last_sent(l->owner->net); + msg_set_last_bcast(buf_msg(skb), last_sent); __skb_queue_head_init(&list); __skb_queue_tail(&list, skb); tipc_link_xmit(l, &list, xmitq); -- cgit From cbeb83ca68dcedf69b336fd1c5263658cbe5b51e Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 30 Jul 2015 18:24:15 -0400 Subject: tipc: eliminate function tipc_link_activate() The function tipc_link_activate() is redundant, since it mostly performs settings that have already been done in a preceding tipc_link_reset(). There are three exceptions to this: - The actual state change to TIPC_LINK_WORKING. This should anyway be done in the FSM, and not in a separate function. - Registration of the link with the bearer. This should be done by the node, since we don't want the link to have any knowledge about its specific bearer. - Call to tipc_node_link_up() for user access registration. With the new role distribution between link aggregation and link level this becomes the wrong call order; tipc_node_link_up() should instead be called directly as a result of a TIPC_LINK_UP event, hence by the node itself. This commit implements those changes. Tested-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 17 ++++------------- net/tipc/link.h | 1 - net/tipc/node.c | 6 ++++-- 3 files changed, 8 insertions(+), 16 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index cc40aa6eb66c..05837ba7b68c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -228,6 +228,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->peer_session = WILDCARD_SESSION; l_ptr->bearer_id = b_ptr->identity; l_ptr->tolerance = b_ptr->tolerance; + l_ptr->snd_nxt = 1; + l_ptr->rcv_nxt = 1; l_ptr->state = TIPC_LINK_RESETTING; l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; @@ -376,6 +378,7 @@ static int tipc_link_fsm_evt(struct tipc_link *l, int evt, pl = node_active_link(l->owner, 0); if (pl && link_probing(pl)) break; + l->state = TIPC_LINK_WORKING; actions |= LINK_ACTIVATE; if (!l->owner->working_links) actions |= SND_BCAST_SYNC; @@ -398,6 +401,7 @@ static int tipc_link_fsm_evt(struct tipc_link *l, int evt, pl = node_active_link(l->owner, 0); if (pl && link_probing(pl)) break; + l->state = TIPC_LINK_WORKING; actions |= LINK_ACTIVATE; if (!l->owner->working_links) actions |= SND_BCAST_SYNC; @@ -639,19 +643,6 @@ void tipc_link_reset(struct tipc_link *l_ptr) link_reset_statistics(l_ptr); } -void tipc_link_activate(struct tipc_link *link) -{ - struct tipc_node *node = link->owner; - - link->rcv_nxt = 1; - link->stats.recv_info = 1; - link->silent_intv_cnt = 0; - link->state = TIPC_LINK_WORKING; - link->exec_mode = TIPC_LINK_OPEN; - tipc_node_link_up(node, link->bearer_id); - tipc_bearer_add_dest(node->net, link->bearer_id, link->addr); -} - /** * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked * @link: link to use diff --git a/net/tipc/link.h b/net/tipc/link.h index 37cfd7d7bf7d..279196d6baac 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -223,7 +223,6 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr); void tipc_link_purge_backlog(struct tipc_link *l); void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); -void tipc_link_activate(struct tipc_link *link); int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *list); int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, diff --git a/net/tipc/node.c b/net/tipc/node.c index e92f84afbf95..558df25a7fc6 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -295,11 +295,13 @@ void tipc_node_link_up(struct tipc_node *n, int bearer_id) n->action_flags |= TIPC_NOTIFY_LINK_UP; n->link_id = l->peer_bearer_id << 16 | l->bearer_id; + tipc_bearer_add_dest(n->net, bearer_id, n->addr); + pr_debug("Established link <%s> on network plane %c\n", l->name, l->net_plane); /* No active links ? => take both active slots */ - if (*slot0 < 0) { + if (!tipc_node_is_up(n)) { *slot0 = bearer_id; *slot1 = bearer_id; node_established_contact(n); @@ -896,7 +898,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) rc = tipc_link_rcv(l, skb, &xmitq); if (unlikely(rc & TIPC_LINK_UP_EVT)) - tipc_link_activate(l); + tipc_node_link_up(n, bearer_id); if (unlikely(rc & TIPC_LINK_DOWN_EVT)) tipc_link_reset(l); skb = NULL; -- cgit From 6144a996a65199480eed7521c1c50590c282e78e Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 30 Jul 2015 18:24:16 -0400 Subject: tipc: move all link_reset() calls to link aggregation level In line with our effort to let the node level have full control over its links, we want to move all link reset calls from link.c to node.c. Some of the calls can be moved by simply moving the calling function, when this is the right thing to do. For the remaining calls we use the now established technique of returning a TIPC_LINK_DOWN_EVT flag from tipc_link_rcv(), whereafter we perform the reset call when the call returns. This change serves as a preparation for the coming commits. Tested-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 4 +-- net/tipc/link.c | 81 +++++++++++++---------------------------------------- net/tipc/link.h | 3 -- net/tipc/node.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- net/tipc/node.h | 1 + 5 files changed, 104 insertions(+), 69 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index eae58a6b121c..ce9f7bfc0b92 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -343,7 +343,7 @@ restart: static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) { pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_link_delete_list(net, b_ptr->identity); + tipc_node_delete_links(net, b_ptr->identity); tipc_disc_reset(net, b_ptr); return 0; } @@ -361,7 +361,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr) pr_info("Disabling bearer <%s>\n", b_ptr->name); b_ptr->media->disable_media(b_ptr); - tipc_link_delete_list(net, b_ptr->identity); + tipc_node_delete_links(net, b_ptr->identity); if (b_ptr->link_req) tipc_disc_delete(b_ptr->link_req); diff --git a/net/tipc/link.c b/net/tipc/link.c index 05837ba7b68c..8c81db7b17f9 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -137,9 +137,9 @@ static void link_print(struct tipc_link *l_ptr, const char *str); static void tipc_link_build_bcast_sync_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); -static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); +static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); -static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb); +static int tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb); /* * Simple link routines @@ -258,34 +258,6 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, return l_ptr; } -/** - * tipc_link_delete - Delete a link - * @l: link to be deleted - */ -void tipc_link_delete(struct tipc_link *l) -{ - tipc_link_reset(l); - tipc_link_reset_fragments(l); - tipc_node_detach_link(l->owner, l); -} - -void tipc_link_delete_list(struct net *net, unsigned int bearer_id) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *link; - struct tipc_node *node; - - rcu_read_lock(); - list_for_each_entry_rcu(node, &tn->node_list, list) { - tipc_node_lock(node); - link = node->links[bearer_id].link; - if (link) - tipc_link_delete(link); - tipc_node_unlock(node); - } - rcu_read_unlock(); -} - /* tipc_link_build_bcast_sync_msg() - synchronize broadcast link endpoints. * * Give a newly added peer node the sequence number where it should @@ -875,26 +847,6 @@ void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) l->snd_nxt = seqno; } -void tipc_link_reset_all(struct tipc_node *node) -{ - char addr_string[16]; - u32 i; - - tipc_node_lock(node); - - pr_warn("Resetting all links to %s\n", - tipc_addr_string_fill(addr_string, node->addr)); - - for (i = 0; i < MAX_BEARERS; i++) { - if (node->links[i].link) { - link_print(node->links[i].link, "Resetting link\n"); - tipc_link_reset(node->links[i].link); - } - } - - tipc_node_unlock(node); -} - static void link_retransmit_failure(struct tipc_link *l_ptr, struct sk_buff *buf) { @@ -911,7 +863,6 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, msg_errcode(msg)); pr_info("sqno %u, prev: %x, src: %x\n", msg_seqno(msg), msg_prevnode(msg), msg_orignode(msg)); - tipc_link_reset(l_ptr); } else { /* Handle failure on broadcast link */ struct tipc_node *n_ptr; @@ -987,6 +938,7 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm, l->stale_count = 1; } else if (++l->stale_count > 100) { link_retransmit_failure(l, skb); + l->exec_mode = TIPC_LINK_BLOCKED; return TIPC_LINK_DOWN_EVT; } skb_queue_walk(&l->transmq, skb) { @@ -1079,12 +1031,13 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) * Consumes buffer * Node lock must be held */ -static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) +static int tipc_link_input(struct tipc_link *link, struct sk_buff *skb) { struct tipc_node *node = link->owner; struct tipc_msg *msg = buf_msg(skb); struct sk_buff *iskb; int pos = 0; + int rc = 0; switch (msg_user(msg)) { case TUNNEL_PROTOCOL: @@ -1094,7 +1047,8 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) kfree_skb(skb); break; } - if (!tipc_link_failover_rcv(link, &skb)) + rc |= tipc_link_failover_rcv(link, &skb); + if (!skb) break; if (msg_user(buf_msg(skb)) != MSG_BUNDLER) { tipc_data_input(link, skb); @@ -1113,7 +1067,8 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) link->stats.recv_fragmented++; tipc_data_input(link, skb); } else if (!link->reasm_buf) { - tipc_link_reset(link); + link->exec_mode = TIPC_LINK_BLOCKED; + rc |= TIPC_LINK_DOWN_EVT; } break; case BCAST_PROTOCOL: @@ -1122,6 +1077,7 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) default: break; }; + return rc; } static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) @@ -1215,7 +1171,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, l->rcv_nxt++; l->stats.recv_info++; if (unlikely(!tipc_data_input(l, skb))) - tipc_link_input(l, skb); + rc |= tipc_link_input(l, skb); /* Ack at regular intervals */ if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) { @@ -1504,14 +1460,15 @@ tunnel_queue: /* tipc_link_failover_rcv(): Receive a tunnelled FAILOVER_MSG packet * Owner node is locked. */ -static bool tipc_link_failover_rcv(struct tipc_link *link, - struct sk_buff **skb) +static int tipc_link_failover_rcv(struct tipc_link *link, + struct sk_buff **skb) { struct tipc_msg *msg = buf_msg(*skb); struct sk_buff *iskb = NULL; struct tipc_link *pl = NULL; int bearer_id = msg_bearer_id(msg); int pos = 0; + int rc = 0; if (msg_type(msg) != FAILOVER_MSG) { pr_warn("%sunknown tunnel pkt received\n", link_co_err); @@ -1524,8 +1481,6 @@ static bool tipc_link_failover_rcv(struct tipc_link *link, goto exit; pl = link->owner->links[bearer_id].link; - if (pl && tipc_link_is_up(pl)) - tipc_link_reset(pl); if (link->failover_pkts == FIRST_FAILOVER) link->failover_pkts = msg_msgcnt(msg); @@ -1550,14 +1505,18 @@ static bool tipc_link_failover_rcv(struct tipc_link *link, } if (msg_user(buf_msg(iskb)) == MSG_FRAGMENTER) { link->stats.recv_fragments++; - tipc_buf_append(&link->failover_skb, &iskb); + if (!tipc_buf_append(&link->failover_skb, &iskb) && + !link->failover_skb) { + link->exec_mode = TIPC_LINK_BLOCKED; + rc |= TIPC_LINK_DOWN_EVT; + } } exit: if (!link->failover_pkts && pl) pl->exec_mode = TIPC_LINK_OPEN; kfree_skb(*skb); *skb = iskb; - return *skb; + return rc; } /* tipc_link_proto_rcv(): receive link level protocol message : diff --git a/net/tipc/link.h b/net/tipc/link.h index 279196d6baac..bb1378b7cb59 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -212,8 +212,6 @@ struct tipc_link *tipc_link_create(struct tipc_node *n, const struct tipc_media_addr *maddr, struct sk_buff_head *inputq, struct sk_buff_head *namedq); -void tipc_link_delete(struct tipc_link *link); -void tipc_link_delete_list(struct net *net, unsigned int bearer_id); void tipc_link_failover_send_queue(struct tipc_link *l_ptr); void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest); void tipc_link_reset_fragments(struct tipc_link *l_ptr); @@ -221,7 +219,6 @@ int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); void tipc_link_purge_queues(struct tipc_link *l_ptr); void tipc_link_purge_backlog(struct tipc_link *l); -void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *list); diff --git a/net/tipc/node.c b/net/tipc/node.c index 558df25a7fc6..6a0680ba98a9 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -407,6 +407,44 @@ bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *b, return true; } +void tipc_node_delete_links(struct net *net, int bearer_id) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l; + struct tipc_node *n; + + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_lock(n); + l = n->links[bearer_id].link; + if (l) { + tipc_link_reset(l); + n->links[bearer_id].link = NULL; + n->link_cnt--; + } + tipc_node_unlock(n); + kfree(l); + } + rcu_read_unlock(); +} + +static void tipc_node_reset_links(struct tipc_node *n) +{ + char addr_string[16]; + u32 i; + + tipc_node_lock(n); + + pr_warn("Resetting all links to %s\n", + tipc_addr_string_fill(addr_string, n->addr)); + + for (i = 0; i < MAX_BEARERS; i++) { + if (n->links[i].link) + tipc_link_reset(n->links[i].link); + } + tipc_node_unlock(n); +} + void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { n_ptr->links[l_ptr->bearer_id].link = l_ptr; @@ -721,7 +759,7 @@ void tipc_node_unlock(struct tipc_node *node) tipc_bclink_input(net); if (flags & TIPC_BCAST_RESET) - tipc_link_reset_all(node); + tipc_node_reset_links(node); } /* Caller should hold node lock for the passed node */ @@ -836,6 +874,40 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, return 0; } +/* tipc_node_tnl_init(): handle a received TUNNEL_PROTOCOL packet, + * in order to control parallel link failover or synchronization + */ +static void tipc_node_tnl_init(struct tipc_node *n, int bearer_id, + struct sk_buff *skb) +{ + struct tipc_link *tnl, *pl; + struct tipc_msg *hdr = buf_msg(skb); + u16 oseqno = msg_seqno(hdr); + int pb_id = msg_bearer_id(hdr); + + if (pb_id >= MAX_BEARERS) + return; + + tnl = n->links[bearer_id].link; + if (!tnl) + return; + + /* Ignore if duplicate */ + if (less(oseqno, tnl->rcv_nxt)) + return; + + pl = n->links[pb_id].link; + if (!pl) + return; + + if (msg_type(hdr) == FAILOVER_MSG) { + if (tipc_link_is_up(pl)) { + tipc_link_reset(pl); + pl->exec_mode = TIPC_LINK_BLOCKED; + } + } +} + /** * tipc_rcv - process TIPC packets/messages arriving from off-node * @net: the applicable net namespace @@ -854,6 +926,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) struct tipc_media_addr *maddr; int bearer_id = b->identity; int rc = 0; + int usr; __skb_queue_head_init(&xmitq); @@ -863,8 +936,9 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Handle arrival of a non-unicast link packet */ hdr = buf_msg(skb); + usr = msg_user(hdr); if (unlikely(msg_non_seq(hdr))) { - if (msg_user(hdr) == LINK_CONFIG) + if (usr == LINK_CONFIG) tipc_disc_rcv(net, skb, b); else tipc_bclink_rcv(net, skb); @@ -877,6 +951,10 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) goto discard; tipc_node_lock(n); + /* Prepare links for tunneled reception if applicable */ + if (unlikely(usr == TUNNEL_PROTOCOL)) + tipc_node_tnl_init(n, bearer_id, skb); + /* Locate link endpoint that should handle packet */ l = n->links[bearer_id].link; if (unlikely(!l)) @@ -887,7 +965,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) if (!tipc_node_filter_skb(n, l, hdr)) goto unlock; - if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) + if (unlikely(usr == LINK_PROTOCOL)) tipc_bclink_sync_state(n, hdr); /* Release acked broadcast messages */ diff --git a/net/tipc/node.h b/net/tipc/node.h index 5e7016802077..49df0e934a65 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -171,6 +171,7 @@ void tipc_node_check_dest(struct tipc_node *n, struct tipc_bearer *bearer, struct tipc_media_addr *maddr); bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *bearer, struct tipc_media_addr *maddr); +void tipc_node_delete_links(struct net *net, int bearer_id); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, int bearer_id); -- cgit From 655fb243b8ae5e652f744311bcb6e806e83cea1e Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 30 Jul 2015 18:24:17 -0400 Subject: tipc: reverse call order for link_reset()->node_link_down() In many cases the call order when a link is reset goes as follows: tipc_node_xx()->tipc_link_reset()->tipc_node_link_down() This is not the right order if we want the node to be in control, so in this commit we change the order to: tipc_node_xx()->tipc_node_link_down()->tipc_link_reset() The fact that tipc_link_reset() now is called from only one location with a well-defined state will also facilitate later simplifications of tipc_link_reset() and the link FSM. Tested-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 6 +----- net/tipc/node.c | 30 +++++++++++++++++++++--------- 2 files changed, 22 insertions(+), 14 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 8c81db7b17f9..2ccdb6ffd5c8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -566,7 +566,6 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr) void tipc_link_reset(struct tipc_link *l_ptr) { u32 prev_state = l_ptr->state; - int was_active_link = tipc_link_is_active(l_ptr); struct tipc_node *owner = l_ptr->owner; struct tipc_link *pl = tipc_parallel_link(l_ptr); @@ -584,10 +583,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) (prev_state == TIPC_LINK_ESTABLISHING)) return; - tipc_node_link_down(l_ptr->owner, l_ptr->bearer_id); - tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr); - - if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) { + if (tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) { l_ptr->exec_mode = TIPC_LINK_BLOCKED; l_ptr->failover_checkpt = l_ptr->rcv_nxt; pl->failover_pkts = FIRST_FAILOVER; diff --git a/net/tipc/node.c b/net/tipc/node.c index 6a0680ba98a9..65c2c80cffe7 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -265,7 +265,7 @@ static void tipc_node_timeout(unsigned long data) tipc_node_calculate_timer(n, l); rc = tipc_link_timeout(l, &xmitq); if (rc & TIPC_LINK_DOWN_EVT) - tipc_link_reset(l); + tipc_node_link_down(n, bearer_id); } tipc_node_unlock(n); maddr = &n->links[bearer_id].maddr; @@ -338,10 +338,15 @@ void tipc_node_link_down(struct tipc_node *n, int bearer_id) struct tipc_link *l, *_l; l = n->links[bearer_id].link; + if (!l || !tipc_link_is_up(l)) + return; + n->working_links--; n->action_flags |= TIPC_NOTIFY_LINK_DOWN; n->link_id = l->peer_bearer_id << 16 | l->bearer_id; + tipc_bearer_remove_dest(n->net, l->bearer_id, n->addr); + pr_debug("Lost link <%s> on network plane %c\n", l->name, l->net_plane); @@ -352,6 +357,8 @@ void tipc_node_link_down(struct tipc_node *n, int bearer_id) _l = n->links[i].link; if (!_l || !tipc_link_is_up(_l)) continue; + if (_l == l) + continue; if (_l->priority < highest) continue; if (_l->priority > highest) { @@ -362,9 +369,13 @@ void tipc_node_link_down(struct tipc_node *n, int bearer_id) } *slot1 = i; } + if (tipc_node_is_up(n)) tipc_link_failover_send_queue(l); - else + + tipc_link_reset(l); + + if (!tipc_node_is_up(n)) node_lost_contact(n); } @@ -403,7 +414,7 @@ bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *b, } memcpy(&l->media_addr, maddr, sizeof(*maddr)); memcpy(curr, maddr, sizeof(*maddr)); - tipc_link_reset(l); + tipc_node_link_down(n, b->identity); return true; } @@ -418,7 +429,7 @@ void tipc_node_delete_links(struct net *net, int bearer_id) tipc_node_lock(n); l = n->links[bearer_id].link; if (l) { - tipc_link_reset(l); + tipc_node_link_down(n, bearer_id); n->links[bearer_id].link = NULL; n->link_cnt--; } @@ -439,8 +450,9 @@ static void tipc_node_reset_links(struct tipc_node *n) tipc_addr_string_fill(addr_string, n->addr)); for (i = 0; i < MAX_BEARERS; i++) { - if (n->links[i].link) - tipc_link_reset(n->links[i].link); + if (!n->links[i].link) + continue; + tipc_node_link_down(n, i); } tipc_node_unlock(n); } @@ -837,7 +849,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, if (likely(l)) rc = tipc_link_xmit(l, list, &xmitq); if (unlikely(rc == -ENOBUFS)) - tipc_link_reset(l); + tipc_node_link_down(n, bearer_id); tipc_node_unlock(n); tipc_node_put(n); } @@ -902,7 +914,7 @@ static void tipc_node_tnl_init(struct tipc_node *n, int bearer_id, if (msg_type(hdr) == FAILOVER_MSG) { if (tipc_link_is_up(pl)) { - tipc_link_reset(pl); + tipc_node_link_down(n, pb_id); pl->exec_mode = TIPC_LINK_BLOCKED; } } @@ -978,7 +990,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) if (unlikely(rc & TIPC_LINK_UP_EVT)) tipc_node_link_up(n, bearer_id); if (unlikely(rc & TIPC_LINK_DOWN_EVT)) - tipc_link_reset(l); + tipc_node_link_down(n, bearer_id); skb = NULL; unlock: tipc_node_unlock(n); -- cgit From 66996b6c47ed7f6bbb01a768e23fae262c7db8e0 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 30 Jul 2015 18:24:18 -0400 Subject: tipc: extend node FSM In the next commit, we will move link synch/failover orchestration to the link aggregation level. In order to do this, we first need to extend the node FSM with two more states, NODE_SYNCHING and NODE_FAILINGOVER, plus four new events to enter and leave those states. This commit introduces this change, without yet making use of it. The node FSM now looks as follows: +-----------------------------------------+ | PEER_DOWN_EVT| | | +------------------------+----------------+ | |SELF_DOWN_EVT | | | | | | | | +-----------+ +-----------+ | | |NODE_ | |NODE_ | | | +----------|FAILINGOVER|<---------|SYNCHING |------------+ | | |SELF_ +-----------+ FAILOVER_+-----------+ PEER_ | | | |DOWN_EVT | A BEGIN_EVT A | DOWN_EVT| | | | | | | | | | | | | | | | | | | | |FAILOVER_|FAILOVER_ |SYNCH_ |SYNCH_ | | | | |END_EVT |BEGIN_EVT |BEGIN_EVT|END_EVT | | | | | | | | | | | | | | | | | | | | | +--------------+ | | | | | +------->| SELF_UP_ |<-------+ | | | | +----------------| PEER_UP |------------------+ | | | | |SELF_DOWN_EVT +--------------+ PEER_DOWN_EVT| | | | | | A A | | | | | | | | | | | | | | PEER_UP_EVT| |SELF_UP_EVT | | | | | | | | | | | V V V | | V V V +------------+ +-----------+ +-----------+ +------------+ |SELF_DOWN_ | |SELF_UP_ | |PEER_UP_ | |PEER_DOWN | |PEER_LEAVING|<------|PEER_COMING| |SELF_COMING|------>|SELF_LEAVING| +------------+ SELF_ +-----------+ +-----------+ PEER_ +------------+ | DOWN_EVT A A DOWN_EVT | | | | | | | | | | SELF_UP_EVT| |PEER_UP_EVT | | | | | | | | | |PEER_DOWN_EVT +--------------+ SELF_DOWN_EVT| +------------------->| SELF_DOWN_ |<--------------------+ | PEER_DOWN | +--------------+ Tested-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- net/tipc/node.h | 14 ++++++--- 2 files changed, 92 insertions(+), 11 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/node.c b/net/tipc/node.c index 65c2c80cffe7..6b18d73830ca 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -494,8 +494,12 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) case SELF_LOST_CONTACT_EVT: case PEER_LOST_CONTACT_EVT: break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: default: - pr_err("Unknown node fsm evt %x/%x\n", state, evt); + goto illegal_evt; } break; case SELF_UP_PEER_UP: @@ -506,11 +510,19 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) case PEER_LOST_CONTACT_EVT: state = SELF_LEAVING_PEER_DOWN; break; + case NODE_SYNCH_BEGIN_EVT: + state = NODE_SYNCHING; + break; + case NODE_FAILOVER_BEGIN_EVT: + state = NODE_FAILINGOVER; + break; case SELF_ESTABL_CONTACT_EVT: case PEER_ESTABL_CONTACT_EVT: + case NODE_SYNCH_END_EVT: + case NODE_FAILOVER_END_EVT: break; default: - pr_err("Unknown node fsm evt %x/%x\n", state, evt); + goto illegal_evt; } break; case SELF_DOWN_PEER_LEAVING: @@ -522,8 +534,12 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) case PEER_ESTABL_CONTACT_EVT: case SELF_LOST_CONTACT_EVT: break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: default: - pr_err("Unknown node fsm evt %x/%x\n", state, evt); + goto illegal_evt; } break; case SELF_UP_PEER_COMING: @@ -537,8 +553,12 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) case SELF_ESTABL_CONTACT_EVT: case PEER_LOST_CONTACT_EVT: break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: default: - pr_err("Unknown node fsm evt %x/%x\n", state, evt); + goto illegal_evt; } break; case SELF_COMING_PEER_UP: @@ -552,8 +572,12 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) case SELF_LOST_CONTACT_EVT: case PEER_ESTABL_CONTACT_EVT: break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: default: - pr_err("Unknown node fsm evt %x/%x\n", state, evt); + goto illegal_evt; } break; case SELF_LEAVING_PEER_DOWN: @@ -565,16 +589,67 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) case PEER_ESTABL_CONTACT_EVT: case PEER_LOST_CONTACT_EVT: break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case NODE_FAILINGOVER: + switch (evt) { + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_LEAVING; + break; + case PEER_LOST_CONTACT_EVT: + state = SELF_LEAVING_PEER_DOWN; + break; + case NODE_FAILOVER_END_EVT: + state = SELF_UP_PEER_UP; + break; + case NODE_FAILOVER_BEGIN_EVT: + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + break; + case NODE_SYNCH_BEGIN_EVT: + case NODE_SYNCH_END_EVT: default: - pr_err("Unknown node fsm evt %x/%x\n", state, evt); + goto illegal_evt; + } + break; + case NODE_SYNCHING: + switch (evt) { + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_LEAVING; + break; + case PEER_LOST_CONTACT_EVT: + state = SELF_LEAVING_PEER_DOWN; + break; + case NODE_SYNCH_END_EVT: + state = SELF_UP_PEER_UP; + break; + case NODE_FAILOVER_BEGIN_EVT: + state = NODE_FAILINGOVER; + break; + case NODE_SYNCH_BEGIN_EVT: + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + break; + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; } break; default: pr_err("Unknown node fsm state %x\n", state); break; } - n->state = state; + return; + +illegal_evt: + pr_err("Illegal node fsm evt %x in state %x\n", evt, state); } bool tipc_node_filter_skb(struct tipc_node *n, struct tipc_link *l, diff --git a/net/tipc/node.h b/net/tipc/node.h index 49df0e934a65..65e2728f66a6 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -56,13 +56,19 @@ enum { SELF_UP_PEER_COMING = 0xac, SELF_COMING_PEER_UP = 0xca, SELF_LEAVING_PEER_DOWN = 0x1d, + NODE_FAILINGOVER = 0xf0, + NODE_SYNCHING = 0xcc }; enum { - SELF_ESTABL_CONTACT_EVT = 0xec, - SELF_LOST_CONTACT_EVT = 0x1c, - PEER_ESTABL_CONTACT_EVT = 0xfec, - PEER_LOST_CONTACT_EVT = 0xf1c + SELF_ESTABL_CONTACT_EVT = 0xece, + SELF_LOST_CONTACT_EVT = 0x1ce, + PEER_ESTABL_CONTACT_EVT = 0xfece, + PEER_LOST_CONTACT_EVT = 0xf1ce, + NODE_FAILOVER_BEGIN_EVT = 0xfbe, + NODE_FAILOVER_END_EVT = 0xfee, + NODE_SYNCH_BEGIN_EVT = 0xcbe, + NODE_SYNCH_END_EVT = 0xcee }; /* Flags used to take different actions according to flag type -- cgit From 6e498158a827fd515b514842e9a06bdf0f75ab86 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 30 Jul 2015 18:24:19 -0400 Subject: tipc: move link synch and failover to link aggregation level Link failover and synchronization have until now been handled by the links themselves, forcing them to have knowledge about and to access parallel links in order to make the two algorithms work correctly. In this commit, we move the control part of this functionality to the link aggregation level in node.c, which is the right location for this. As a result, the two algorithms become easier to follow, and the link implementation becomes simpler. Tested-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 482 ++++++++++++++++---------------------------------------- net/tipc/link.h | 14 +- net/tipc/msg.h | 32 ++-- net/tipc/node.c | 291 ++++++++++++++++++++++------------ net/tipc/node.h | 31 +--- 5 files changed, 342 insertions(+), 508 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 2ccdb6ffd5c8..d5f4005f388f 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -48,7 +48,7 @@ /* * Error message prefixes */ -static const char *link_co_err = "Link changeover error, "; +static const char *link_co_err = "Link tunneling error, "; static const char *link_rst_msg = "Resetting link "; static const char *link_unk_evt = "Unknown link event "; @@ -139,24 +139,6 @@ static void tipc_link_build_bcast_sync_msg(struct tipc_link *l, static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); -static int tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb); - -/* - * Simple link routines - */ -static unsigned int align(unsigned int i) -{ - return (i + 3) & ~3u; -} - -static struct tipc_link *tipc_parallel_link(struct tipc_link *l) -{ - struct tipc_node *n = l->owner; - - if (node_active_link(n, 0) != l) - return node_active_link(n, 0); - return node_active_link(n, 1); -} /* * Simple non-static link routines (i.e. referenced outside this file) @@ -394,12 +376,10 @@ static int tipc_link_fsm_evt(struct tipc_link *l, int evt, /* Perform actions as decided by FSM */ if (actions & LINK_RESET) { l->exec_mode = TIPC_LINK_BLOCKED; - rc |= TIPC_LINK_DOWN_EVT; - } - if (actions & LINK_ACTIVATE) { - l->exec_mode = TIPC_LINK_OPEN; - rc |= TIPC_LINK_UP_EVT; + rc = TIPC_LINK_DOWN_EVT; } + if (actions & LINK_ACTIVATE) + rc = TIPC_LINK_UP_EVT; if (actions & (SND_STATE | SND_PROBE)) mtyp = STATE_MSG; if (actions & SND_RESET) @@ -461,6 +441,9 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { int rc = 0; + if (l->exec_mode == TIPC_LINK_BLOCKED) + return rc; + link_profile_stats(l); if (l->silent_intv_cnt) rc = tipc_link_fsm_evt(l, SILENCE_EVT, xmitq); @@ -563,52 +546,42 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr) tipc_link_reset_fragments(l_ptr); } -void tipc_link_reset(struct tipc_link *l_ptr) +void tipc_link_reset(struct tipc_link *l) { - u32 prev_state = l_ptr->state; - struct tipc_node *owner = l_ptr->owner; - struct tipc_link *pl = tipc_parallel_link(l_ptr); + struct tipc_node *owner = l->owner; - msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff)); + l->state = TIPC_LINK_RESETTING; /* Link is down, accept any session */ - l_ptr->peer_session = WILDCARD_SESSION; + l->peer_session = WILDCARD_SESSION; - /* Prepare for renewed mtu size negotiation */ - l_ptr->mtu = l_ptr->advertised_mtu; - - l_ptr->state = TIPC_LINK_RESETTING; + /* If peer is up, it only accepts an incremented session number */ + msg_set_session(l->pmsg, msg_session(l->pmsg) + 1); - if ((prev_state == TIPC_LINK_RESETTING) || - (prev_state == TIPC_LINK_ESTABLISHING)) - return; + /* Prepare for renewed mtu size negotiation */ + l->mtu = l->advertised_mtu; - if (tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) { - l_ptr->exec_mode = TIPC_LINK_BLOCKED; - l_ptr->failover_checkpt = l_ptr->rcv_nxt; - pl->failover_pkts = FIRST_FAILOVER; - pl->failover_checkpt = l_ptr->rcv_nxt; - pl->failover_skb = l_ptr->reasm_buf; - } else { - kfree_skb(l_ptr->reasm_buf); - } /* Clean up all queues, except inputq: */ - __skb_queue_purge(&l_ptr->transmq); - __skb_queue_purge(&l_ptr->deferdq); + __skb_queue_purge(&l->transmq); + __skb_queue_purge(&l->deferdq); if (!owner->inputq) - owner->inputq = l_ptr->inputq; - skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq); + owner->inputq = l->inputq; + skb_queue_splice_init(&l->wakeupq, owner->inputq); if (!skb_queue_empty(owner->inputq)) owner->action_flags |= TIPC_MSG_EVT; - tipc_link_purge_backlog(l_ptr); - l_ptr->reasm_buf = NULL; - l_ptr->rcv_unacked = 0; - l_ptr->snd_nxt = 1; - l_ptr->rcv_nxt = 1; - l_ptr->silent_intv_cnt = 0; - l_ptr->stats.recv_info = 0; - l_ptr->stale_count = 0; - link_reset_statistics(l_ptr); + + tipc_link_purge_backlog(l); + kfree_skb(l->reasm_buf); + kfree_skb(l->failover_reasm_skb); + l->reasm_buf = NULL; + l->failover_reasm_skb = NULL; + l->rcv_unacked = 0; + l->snd_nxt = 1; + l->rcv_nxt = 1; + l->silent_intv_cnt = 0; + l->stats.recv_info = 0; + l->stale_count = 0; + link_reset_statistics(l); } /** @@ -751,20 +724,6 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return 0; } -static void skb2list(struct sk_buff *skb, struct sk_buff_head *list) -{ - skb_queue_head_init(list); - __skb_queue_tail(list, skb); -} - -static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb) -{ - struct sk_buff_head head; - - skb2list(skb, &head); - return __tipc_link_xmit(link->owner->net, link, &head); -} - /* * tipc_link_sync_rcv - synchronize broadcast link endpoints. * Receive the sequence number where we should start receiving and @@ -955,32 +914,6 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm, return 0; } -/* link_synch(): check if all packets arrived before the synch - * point have been consumed - * Returns true if the parallel links are synched, otherwise false - */ -static bool link_synch(struct tipc_link *l) -{ - unsigned int post_synch; - struct tipc_link *pl; - - pl = tipc_parallel_link(l); - if (pl == l) - goto synched; - - /* Was last pre-synch packet added to input queue ? */ - if (less_eq(pl->rcv_nxt, l->synch_point)) - return false; - - /* Is it still in the input queue ? */ - post_synch = mod(pl->rcv_nxt - l->synch_point) - 1; - if (skb_queue_len(pl->inputq) > post_synch) - return false; -synched: - l->exec_mode = TIPC_LINK_OPEN; - return true; -} - /* tipc_data_input - deliver data and name distr msgs to upper layer * * Consumes buffer if message is of right type @@ -1025,54 +958,59 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) /* tipc_link_input - process packet that has passed link protocol check * * Consumes buffer - * Node lock must be held */ -static int tipc_link_input(struct tipc_link *link, struct sk_buff *skb) +static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb) { - struct tipc_node *node = link->owner; - struct tipc_msg *msg = buf_msg(skb); + struct tipc_node *node = l->owner; + struct tipc_msg *hdr = buf_msg(skb); + struct sk_buff **reasm_skb = &l->reasm_buf; struct sk_buff *iskb; - int pos = 0; + int usr = msg_user(hdr); int rc = 0; + int pos = 0; + int ipos = 0; - switch (msg_user(msg)) { - case TUNNEL_PROTOCOL: - if (msg_dup(msg)) { - link->exec_mode = TIPC_LINK_TUNNEL; - link->synch_point = msg_seqno(msg_get_wrapped(msg)); - kfree_skb(skb); - break; + if (unlikely(usr == TUNNEL_PROTOCOL)) { + if (msg_type(hdr) == SYNCH_MSG) { + __skb_queue_purge(&l->deferdq); + goto drop; } - rc |= tipc_link_failover_rcv(link, &skb); - if (!skb) - break; - if (msg_user(buf_msg(skb)) != MSG_BUNDLER) { - tipc_data_input(link, skb); - break; - } - case MSG_BUNDLER: - link->stats.recv_bundles++; - link->stats.recv_bundled += msg_msgcnt(msg); + if (!tipc_msg_extract(skb, &iskb, &ipos)) + return rc; + kfree_skb(skb); + skb = iskb; + hdr = buf_msg(skb); + if (less(msg_seqno(hdr), l->drop_point)) + goto drop; + if (tipc_data_input(l, skb)) + return rc; + usr = msg_user(hdr); + reasm_skb = &l->failover_reasm_skb; + } + if (usr == MSG_BUNDLER) { + l->stats.recv_bundles++; + l->stats.recv_bundled += msg_msgcnt(hdr); while (tipc_msg_extract(skb, &iskb, &pos)) - tipc_data_input(link, iskb); - break; - case MSG_FRAGMENTER: - link->stats.recv_fragments++; - if (tipc_buf_append(&link->reasm_buf, &skb)) { - link->stats.recv_fragmented++; - tipc_data_input(link, skb); - } else if (!link->reasm_buf) { - link->exec_mode = TIPC_LINK_BLOCKED; - rc |= TIPC_LINK_DOWN_EVT; + tipc_data_input(l, iskb); + return rc; + } else if (usr == MSG_FRAGMENTER) { + l->stats.recv_fragments++; + if (tipc_buf_append(reasm_skb, &skb)) { + l->stats.recv_fragmented++; + tipc_data_input(l, skb); + } else if (!*reasm_skb) { + l->exec_mode = TIPC_LINK_BLOCKED; + l->state = TIPC_LINK_RESETTING; + rc = TIPC_LINK_DOWN_EVT; } - break; - case BCAST_PROTOCOL: + return rc; + } else if (usr == BCAST_PROTOCOL) { tipc_link_sync_rcv(node, skb); - break; - default: - break; - }; + return rc; + } +drop: + kfree_skb(skb); return rc; } @@ -1100,7 +1038,6 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct sk_buff_head *arrvq = &l->deferdq; - struct sk_buff *tmp; struct tipc_msg *hdr; u16 seqno, rcv_nxt; int rc = 0; @@ -1112,18 +1049,18 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, return rc; } - skb_queue_walk_safe(arrvq, skb, tmp) { + while ((skb = skb_peek(arrvq))) { hdr = buf_msg(skb); /* Verify and update link state */ if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) { __skb_dequeue(arrvq); - rc |= tipc_link_proto_rcv(l, skb, xmitq); + rc = tipc_link_proto_rcv(l, skb, xmitq); continue; } if (unlikely(!link_working(l))) { - rc |= tipc_link_fsm_evt(l, TRAFFIC_EVT, xmitq); + rc = tipc_link_fsm_evt(l, TRAFFIC_EVT, xmitq); if (!link_working(l)) { kfree_skb(__skb_dequeue(arrvq)); return rc; @@ -1156,18 +1093,11 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, return rc; } - /* Synchronize with parallel link if applicable */ - if (unlikely(l->exec_mode == TIPC_LINK_TUNNEL)) - if (!msg_dup(hdr) && !link_synch(l)) { - kfree_skb(skb); - return rc; - } - /* Packet can be delivered */ l->rcv_nxt++; l->stats.recv_info++; if (unlikely(!tipc_data_input(l, skb))) - rc |= tipc_link_input(l, skb); + rc = tipc_link_input(l, skb); /* Ack at regular intervals */ if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) { @@ -1288,7 +1218,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, } else { /* RESET_MSG or ACTIVATE_MSG */ msg_set_max_pkt(hdr, l->advertised_mtu); - msg_set_ack(hdr, l->failover_checkpt - 1); + msg_set_ack(hdr, l->rcv_nxt - 1); msg_set_next_sent(hdr, 1); } skb = tipc_buf_acquire(msg_size(hdr)); @@ -1296,223 +1226,75 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, return; skb_copy_to_linear_data(skb, hdr, msg_size(hdr)); skb->priority = TC_PRIO_CONTROL; - __skb_queue_head(xmitq, skb); -} - -/* tipc_link_tunnel_xmit(): Tunnel one packet via a link belonging to - * a different bearer. Owner node is locked. - */ -static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, - struct tipc_msg *tunnel_hdr, - struct tipc_msg *msg, - u32 selector) -{ - struct tipc_link *tunnel; - struct sk_buff *skb; - u32 length = msg_size(msg); - - tunnel = node_active_link(l_ptr->owner, selector & 1); - if (!tipc_link_is_up(tunnel)) { - pr_warn("%stunnel link no longer available\n", link_co_err); - return; - } - msg_set_size(tunnel_hdr, length + INT_H_SIZE); - skb = tipc_buf_acquire(length + INT_H_SIZE); - if (!skb) { - pr_warn("%sunable to send tunnel msg\n", link_co_err); - return; - } - skb_copy_to_linear_data(skb, tunnel_hdr, INT_H_SIZE); - skb_copy_to_linear_data_offset(skb, INT_H_SIZE, msg, length); - __tipc_link_xmit_skb(tunnel, skb); + __skb_queue_tail(xmitq, skb); } - -/* tipc_link_failover_send_queue(): A link has gone down, but a second - * link is still active. We can do failover. Tunnel the failing link's - * whole send queue via the remaining link. This way, we don't lose - * any packets, and sequence order is preserved for subsequent traffic - * sent over the remaining link. Owner node is locked. +/* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets + * with contents of the link's tranmsit and backlog queues. */ -void tipc_link_failover_send_queue(struct tipc_link *l_ptr) +void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, + int mtyp, struct sk_buff_head *xmitq) { - int msgcount; - struct tipc_link *tunnel = node_active_link(l_ptr->owner, 0); - struct tipc_msg tunnel_hdr; - struct sk_buff *skb; - int split_bundles; + struct sk_buff *skb, *tnlskb; + struct tipc_msg *hdr, tnlhdr; + struct sk_buff_head *queue = &l->transmq; + struct sk_buff_head tmpxq, tnlq; + u16 pktlen, pktcnt, seqno = l->snd_nxt; - if (!tunnel) + if (!tnl) return; - tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, TUNNEL_PROTOCOL, - FAILOVER_MSG, INT_H_SIZE, l_ptr->addr); + skb_queue_head_init(&tnlq); + skb_queue_head_init(&tmpxq); - skb_queue_walk(&l_ptr->backlogq, skb) { - msg_set_seqno(buf_msg(skb), l_ptr->snd_nxt); - l_ptr->snd_nxt = mod(l_ptr->snd_nxt + 1); - } - skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq); - tipc_link_purge_backlog(l_ptr); - msgcount = skb_queue_len(&l_ptr->transmq); - msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); - msg_set_msgcnt(&tunnel_hdr, msgcount); - - if (skb_queue_empty(&l_ptr->transmq)) { - skb = tipc_buf_acquire(INT_H_SIZE); - if (skb) { - skb_copy_to_linear_data(skb, &tunnel_hdr, INT_H_SIZE); - msg_set_size(&tunnel_hdr, INT_H_SIZE); - __tipc_link_xmit_skb(tunnel, skb); - } else { - pr_warn("%sunable to send changeover msg\n", - link_co_err); - } + /* At least one packet required for safe algorithm => add dummy */ + skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, + BASIC_H_SIZE, 0, l->addr, link_own_addr(l), + 0, 0, TIPC_ERR_NO_PORT); + if (!skb) { + pr_warn("%sunable to create tunnel packet\n", link_co_err); return; } - - split_bundles = (node_active_link(l_ptr->owner, 0) != - node_active_link(l_ptr->owner, 0)); - - skb_queue_walk(&l_ptr->transmq, skb) { - struct tipc_msg *msg = buf_msg(skb); - - if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) { - struct tipc_msg *m = msg_get_wrapped(msg); - unchar *pos = (unchar *)m; - - msgcount = msg_msgcnt(msg); - while (msgcount--) { - msg_set_seqno(m, msg_seqno(msg)); - tipc_link_tunnel_xmit(l_ptr, &tunnel_hdr, m, - msg_link_selector(m)); - pos += align(msg_size(m)); - m = (struct tipc_msg *)pos; - } - } else { - tipc_link_tunnel_xmit(l_ptr, &tunnel_hdr, msg, - msg_link_selector(msg)); - } - } -} - -/* tipc_link_dup_queue_xmit(): A second link has become active. Tunnel a - * duplicate of the first link's send queue via the new link. This way, we - * are guaranteed that currently queued packets from a socket are delivered - * before future traffic from the same socket, even if this is using the - * new link. The last arriving copy of each duplicate packet is dropped at - * the receiving end by the regular protocol check, so packet cardinality - * and sequence order is preserved per sender/receiver socket pair. - * Owner node is locked. - */ -void tipc_link_dup_queue_xmit(struct tipc_link *link, - struct tipc_link *tnl) -{ - struct sk_buff *skb; - struct tipc_msg tnl_hdr; - struct sk_buff_head *queue = &link->transmq; - int mcnt; - u16 seqno; - - tipc_msg_init(link_own_addr(link), &tnl_hdr, TUNNEL_PROTOCOL, - SYNCH_MSG, INT_H_SIZE, link->addr); - mcnt = skb_queue_len(&link->transmq) + skb_queue_len(&link->backlogq); - msg_set_msgcnt(&tnl_hdr, mcnt); - msg_set_bearer_id(&tnl_hdr, link->peer_bearer_id); - -tunnel_queue: + skb_queue_tail(&tnlq, skb); + tipc_link_xmit(l, &tnlq, &tmpxq); + __skb_queue_purge(&tmpxq); + + /* Initialize reusable tunnel packet header */ + tipc_msg_init(link_own_addr(l), &tnlhdr, TUNNEL_PROTOCOL, + mtyp, INT_H_SIZE, l->addr); + pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq); + msg_set_msgcnt(&tnlhdr, pktcnt); + msg_set_bearer_id(&tnlhdr, l->peer_bearer_id); +tnl: + /* Wrap each packet into a tunnel packet */ skb_queue_walk(queue, skb) { - struct sk_buff *outskb; - struct tipc_msg *msg = buf_msg(skb); - u32 len = msg_size(msg); - - msg_set_ack(msg, mod(link->rcv_nxt - 1)); - msg_set_bcast_ack(msg, link->owner->bclink.last_in); - msg_set_size(&tnl_hdr, len + INT_H_SIZE); - outskb = tipc_buf_acquire(len + INT_H_SIZE); - if (outskb == NULL) { - pr_warn("%sunable to send duplicate msg\n", - link_co_err); + hdr = buf_msg(skb); + if (queue == &l->backlogq) + msg_set_seqno(hdr, seqno++); + pktlen = msg_size(hdr); + msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); + tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE); + if (!tnlskb) { + pr_warn("%sunable to send packet\n", link_co_err); return; } - skb_copy_to_linear_data(outskb, &tnl_hdr, INT_H_SIZE); - skb_copy_to_linear_data_offset(outskb, INT_H_SIZE, - skb->data, len); - __tipc_link_xmit_skb(tnl, outskb); - if (!tipc_link_is_up(link)) - return; + skb_copy_to_linear_data(tnlskb, &tnlhdr, INT_H_SIZE); + skb_copy_to_linear_data_offset(tnlskb, INT_H_SIZE, hdr, pktlen); + __skb_queue_tail(&tnlq, tnlskb); } - if (queue == &link->backlogq) - return; - seqno = link->snd_nxt; - skb_queue_walk(&link->backlogq, skb) { - msg_set_seqno(buf_msg(skb), seqno); - seqno = mod(seqno + 1); - } - queue = &link->backlogq; - goto tunnel_queue; -} - -/* tipc_link_failover_rcv(): Receive a tunnelled FAILOVER_MSG packet - * Owner node is locked. - */ -static int tipc_link_failover_rcv(struct tipc_link *link, - struct sk_buff **skb) -{ - struct tipc_msg *msg = buf_msg(*skb); - struct sk_buff *iskb = NULL; - struct tipc_link *pl = NULL; - int bearer_id = msg_bearer_id(msg); - int pos = 0; - int rc = 0; - - if (msg_type(msg) != FAILOVER_MSG) { - pr_warn("%sunknown tunnel pkt received\n", link_co_err); - goto exit; + if (queue != &l->backlogq) { + queue = &l->backlogq; + goto tnl; } - if (bearer_id >= MAX_BEARERS) - goto exit; - - if (bearer_id == link->bearer_id) - goto exit; - - pl = link->owner->links[bearer_id].link; - - if (link->failover_pkts == FIRST_FAILOVER) - link->failover_pkts = msg_msgcnt(msg); - - /* Should we expect an inner packet? */ - if (!link->failover_pkts) - goto exit; - if (!tipc_msg_extract(*skb, &iskb, &pos)) { - pr_warn("%sno inner failover pkt\n", link_co_err); - *skb = NULL; - goto exit; - } - link->failover_pkts--; - *skb = NULL; + tipc_link_xmit(tnl, &tnlq, xmitq); - /* Was this packet already delivered? */ - if (less(buf_seqno(iskb), link->failover_checkpt)) { - kfree_skb(iskb); - iskb = NULL; - goto exit; - } - if (msg_user(buf_msg(iskb)) == MSG_FRAGMENTER) { - link->stats.recv_fragments++; - if (!tipc_buf_append(&link->failover_skb, &iskb) && - !link->failover_skb) { - link->exec_mode = TIPC_LINK_BLOCKED; - rc |= TIPC_LINK_DOWN_EVT; - } + if (mtyp == FAILOVER_MSG) { + tnl->drop_point = l->rcv_nxt; + tnl->failover_reasm_skb = l->reasm_buf; + l->reasm_buf = NULL; + l->exec_mode = TIPC_LINK_BLOCKED; } -exit: - if (!link->failover_pkts && pl) - pl->exec_mode = TIPC_LINK_OPEN; - kfree_skb(*skb); - *skb = iskb; - return rc; } /* tipc_link_proto_rcv(): receive link level protocol message : @@ -1593,7 +1375,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* If NACK, retransmit will now start at right position */ if (nacked_gap) { - rc |= tipc_link_retransm(l, nacked_gap, xmitq); + rc = tipc_link_retransm(l, nacked_gap, xmitq); l->stats.recv_nacks++; } tipc_link_advance_backlog(l, xmitq); diff --git a/net/tipc/link.h b/net/tipc/link.h index bb1378b7cb59..e377d9ba41c5 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -164,13 +164,11 @@ struct tipc_link { struct tipc_msg *pmsg; u32 priority; char net_plane; - u8 exec_mode; - u16 synch_point; - /* Failover */ - u16 failover_pkts; - u16 failover_checkpt; - struct sk_buff *failover_skb; + /* Failover/synch */ + u8 exec_mode; + u16 drop_point; + struct sk_buff *failover_reasm_skb; /* Max packet negotiation */ u16 mtu; @@ -212,8 +210,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n, const struct tipc_media_addr *maddr, struct sk_buff_head *inputq, struct sk_buff_head *namedq); -void tipc_link_failover_send_queue(struct tipc_link *l_ptr); -void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest); +void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, + int mtyp, struct sk_buff_head *xmitq); void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 234fb0531d1d..115bb2aa6bed 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -110,7 +110,6 @@ struct tipc_skb_cb { struct sk_buff *tail; bool validated; bool wakeup_pending; - bool bundling; u16 chain_sz; u16 chain_imp; }; @@ -559,15 +558,6 @@ static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 15, 0x1fff, n); } -static inline bool msg_dup(struct tipc_msg *m) -{ - if (likely(msg_user(m) != TUNNEL_PROTOCOL)) - return false; - if (msg_type(m) != SYNCH_MSG) - return false; - return true; -} - /* * Word 2 */ @@ -621,12 +611,12 @@ static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n) } -static inline u32 msg_next_sent(struct tipc_msg *m) +static inline u16 msg_next_sent(struct tipc_msg *m) { return msg_bits(m, 4, 0, 0xffff); } -static inline void msg_set_next_sent(struct tipc_msg *m, u32 n) +static inline void msg_set_next_sent(struct tipc_msg *m, u16 n) { msg_set_bits(m, 4, 0, 0xffff, n); } @@ -727,12 +717,12 @@ static inline char *msg_media_addr(struct tipc_msg *m) /* * Word 9 */ -static inline u32 msg_msgcnt(struct tipc_msg *m) +static inline u16 msg_msgcnt(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); } -static inline void msg_set_msgcnt(struct tipc_msg *m, u32 n) +static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n) { msg_set_bits(m, 9, 16, 0xffff, n); } @@ -767,19 +757,19 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } -static inline bool msg_is_traffic(struct tipc_msg *m) +static inline bool msg_peer_link_is_up(struct tipc_msg *m) { if (likely(msg_user(m) != LINK_PROTOCOL)) return true; - if ((msg_type(m) == RESET_MSG) || (msg_type(m) == ACTIVATE_MSG)) - return false; - return true; + if (msg_type(m) == STATE_MSG) + return true; + return false; } -static inline bool msg_peer_is_up(struct tipc_msg *m) +static inline bool msg_peer_node_is_up(struct tipc_msg *m) { - if (likely(msg_is_traffic(m))) - return false; + if (msg_peer_link_is_up(m)) + return true; return msg_redundant_link(m); } diff --git a/net/tipc/node.c b/net/tipc/node.c index 6b18d73830ca..b0372bb107f6 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,6 +42,31 @@ #include "bcast.h" #include "discover.h" +/* Node FSM states and events: + */ +enum { + SELF_DOWN_PEER_DOWN = 0xdd, + SELF_UP_PEER_UP = 0xaa, + SELF_DOWN_PEER_LEAVING = 0xd1, + SELF_UP_PEER_COMING = 0xac, + SELF_COMING_PEER_UP = 0xca, + SELF_LEAVING_PEER_DOWN = 0x1d, + NODE_FAILINGOVER = 0xf0, + NODE_SYNCHING = 0xcc +}; + +enum { + SELF_ESTABL_CONTACT_EVT = 0xece, + SELF_LOST_CONTACT_EVT = 0x1ce, + PEER_ESTABL_CONTACT_EVT = 0x9ece, + PEER_LOST_CONTACT_EVT = 0x91ce, + NODE_FAILOVER_BEGIN_EVT = 0xfbe, + NODE_FAILOVER_END_EVT = 0xfee, + NODE_SYNCH_BEGIN_EVT = 0xcbe, + NODE_SYNCH_END_EVT = 0xcee +}; + +static void tipc_node_link_down(struct tipc_node *n, int bearer_id); static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); static void tipc_node_delete(struct tipc_node *node); @@ -281,69 +306,75 @@ static void tipc_node_timeout(unsigned long data) * * Link becomes active (alone or shared) or standby, depending on its priority. */ -void tipc_node_link_up(struct tipc_node *n, int bearer_id) +static void tipc_node_link_up(struct tipc_node *n, int bearer_id, + struct sk_buff_head *xmitq) { int *slot0 = &n->active_links[0]; int *slot1 = &n->active_links[1]; - struct tipc_link_entry *links = n->links; - struct tipc_link *l = n->links[bearer_id].link; - - /* Leave room for tunnel header when returning 'mtu' to users: */ - links[bearer_id].mtu = l->mtu - INT_H_SIZE; + struct tipc_link *ol = node_active_link(n, 0); + struct tipc_link *nl = n->links[bearer_id].link; + if (n->working_links > 1) { + pr_warn("Attempt to establish 3rd link to %x\n", n->addr); + return; + } n->working_links++; n->action_flags |= TIPC_NOTIFY_LINK_UP; - n->link_id = l->peer_bearer_id << 16 | l->bearer_id; + n->link_id = nl->peer_bearer_id << 16 | bearer_id; + + /* Leave room for tunnel header when returning 'mtu' to users: */ + n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE; tipc_bearer_add_dest(n->net, bearer_id, n->addr); pr_debug("Established link <%s> on network plane %c\n", - l->name, l->net_plane); + nl->name, nl->net_plane); - /* No active links ? => take both active slots */ - if (!tipc_node_is_up(n)) { + /* First link? => give it both slots */ + if (!ol) { *slot0 = bearer_id; *slot1 = bearer_id; + nl->exec_mode = TIPC_LINK_OPEN; node_established_contact(n); return; } - /* Lower prio than current active ? => no slot */ - if (l->priority < links[*slot0].link->priority) { - pr_debug("New link <%s> becomes standby\n", l->name); - return; - } - tipc_link_dup_queue_xmit(links[*slot0].link, l); - - /* Same prio as current active ? => take one slot */ - if (l->priority == links[*slot0].link->priority) { + /* Second link => redistribute slots */ + if (nl->priority > ol->priority) { + pr_debug("Old link <%s> becomes standby\n", ol->name); *slot0 = bearer_id; - return; + *slot1 = bearer_id; + } else if (nl->priority == ol->priority) { + *slot0 = bearer_id; + } else { + pr_debug("New link <%s> is standby\n", nl->name); } - /* Higher prio than current active => take both active slots */ - pr_debug("Old link <%s> now standby\n", links[*slot0].link->name); - *slot0 = bearer_id; - *slot1 = bearer_id; + /* Prepare synchronization with first link */ + tipc_link_tnl_prepare(ol, nl, SYNCH_MSG, xmitq); } /** * tipc_node_link_down - handle loss of link */ -void tipc_node_link_down(struct tipc_node *n, int bearer_id) +static void tipc_node_link_down(struct tipc_node *n, int bearer_id) { int *slot0 = &n->active_links[0]; int *slot1 = &n->active_links[1]; + struct tipc_media_addr *maddr = &n->links[bearer_id].maddr; int i, highest = 0; - struct tipc_link *l, *_l; + struct tipc_link *l, *_l, *tnl; + struct sk_buff_head xmitq; l = n->links[bearer_id].link; if (!l || !tipc_link_is_up(l)) return; + __skb_queue_head_init(&xmitq); + n->working_links--; n->action_flags |= TIPC_NOTIFY_LINK_DOWN; - n->link_id = l->peer_bearer_id << 16 | l->bearer_id; + n->link_id = l->peer_bearer_id << 16 | bearer_id; tipc_bearer_remove_dest(n->net, l->bearer_id, n->addr); @@ -370,13 +401,19 @@ void tipc_node_link_down(struct tipc_node *n, int bearer_id) *slot1 = i; } - if (tipc_node_is_up(n)) - tipc_link_failover_send_queue(l); + if (!tipc_node_is_up(n)) { + tipc_link_reset(l); + node_lost_contact(n); + return; + } + /* There is still a working link => initiate failover */ + tnl = node_active_link(n, 0); + tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); + n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1); + tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, &xmitq); tipc_link_reset(l); - - if (!tipc_node_is_up(n)) - node_lost_contact(n); + tipc_bearer_xmit(n->net, tnl->bearer_id, &xmitq, maddr); } bool tipc_node_is_up(struct tipc_node *n) @@ -652,37 +689,22 @@ illegal_evt: pr_err("Illegal node fsm evt %x in state %x\n", evt, state); } -bool tipc_node_filter_skb(struct tipc_node *n, struct tipc_link *l, - struct tipc_msg *hdr) +bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr) { int state = n->state; if (likely(state == SELF_UP_PEER_UP)) return true; - if (state == SELF_DOWN_PEER_DOWN) - return true; - - if (state == SELF_UP_PEER_COMING) { - /* If not traffic msg, peer may still be ESTABLISHING */ - if (tipc_link_is_up(l) && msg_is_traffic(hdr)) - tipc_node_fsm_evt(n, PEER_ESTABL_CONTACT_EVT); - return true; - } - - if (state == SELF_COMING_PEER_UP) - return true; - if (state == SELF_LEAVING_PEER_DOWN) return false; if (state == SELF_DOWN_PEER_LEAVING) { - if (msg_peer_is_up(hdr)) + if (msg_peer_node_is_up(hdr)) return false; - tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); - return true; } - return false; + + return true; } static void node_established_contact(struct tipc_node *n_ptr) @@ -727,10 +749,8 @@ static void node_lost_contact(struct tipc_node *n_ptr) if (!l_ptr) continue; l_ptr->exec_mode = TIPC_LINK_OPEN; - l_ptr->failover_checkpt = 0; - l_ptr->failover_pkts = 0; - kfree_skb(l_ptr->failover_skb); - l_ptr->failover_skb = NULL; + kfree_skb(l_ptr->failover_reasm_skb); + l_ptr->failover_reasm_skb = NULL; tipc_link_reset_fragments(l_ptr); } /* Prevent re-contact with node until cleanup is done */ @@ -961,38 +981,111 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, return 0; } -/* tipc_node_tnl_init(): handle a received TUNNEL_PROTOCOL packet, - * in order to control parallel link failover or synchronization +/** + * tipc_node_check_state - check and if necessary update node state + * @skb: TIPC packet + * @bearer_id: identity of bearer delivering the packet + * Returns true if state is ok, otherwise consumes buffer and returns false */ -static void tipc_node_tnl_init(struct tipc_node *n, int bearer_id, - struct sk_buff *skb) +static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, + int bearer_id) { - struct tipc_link *tnl, *pl; struct tipc_msg *hdr = buf_msg(skb); + int usr = msg_user(hdr); + int mtyp = msg_type(hdr); u16 oseqno = msg_seqno(hdr); - int pb_id = msg_bearer_id(hdr); + u16 iseqno = msg_seqno(msg_get_wrapped(hdr)); + u16 exp_pkts = msg_msgcnt(hdr); + u16 rcv_nxt, syncpt, dlv_nxt; + int state = n->state; + struct tipc_link *l, *pl = NULL; + struct sk_buff_head; + int i; - if (pb_id >= MAX_BEARERS) - return; + l = n->links[bearer_id].link; + if (!l) + return false; + rcv_nxt = l->rcv_nxt; - tnl = n->links[bearer_id].link; - if (!tnl) - return; - /* Ignore if duplicate */ - if (less(oseqno, tnl->rcv_nxt)) - return; + if (likely((state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) + return true; - pl = n->links[pb_id].link; - if (!pl) - return; + /* Find parallel link, if any */ + for (i = 0; i < MAX_BEARERS; i++) { + if ((i != bearer_id) && n->links[i].link) { + pl = n->links[i].link; + break; + } + } - if (msg_type(hdr) == FAILOVER_MSG) { - if (tipc_link_is_up(pl)) { - tipc_node_link_down(n, pb_id); + /* Update node accesibility if applicable */ + if (state == SELF_UP_PEER_COMING) { + if (!tipc_link_is_up(l)) + return true; + if (!msg_peer_link_is_up(hdr)) + return true; + tipc_node_fsm_evt(n, PEER_ESTABL_CONTACT_EVT); + } + + if (state == SELF_DOWN_PEER_LEAVING) { + if (msg_peer_node_is_up(hdr)) + return false; + tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); + } + + /* Ignore duplicate packets */ + if (less(oseqno, rcv_nxt)) + return true; + + /* Initiate or update failover mode if applicable */ + if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { + syncpt = oseqno + exp_pkts - 1; + if (pl && tipc_link_is_up(pl)) { + tipc_node_link_down(n, pl->bearer_id); pl->exec_mode = TIPC_LINK_BLOCKED; } + /* If pkts arrive out of order, use lowest calculated syncpt */ + if (less(syncpt, n->sync_point)) + n->sync_point = syncpt; + } + + /* Open parallel link when tunnel link reaches synch point */ + if ((n->state == NODE_FAILINGOVER) && (more(rcv_nxt, n->sync_point))) { + tipc_node_fsm_evt(n, NODE_FAILOVER_END_EVT); + if (pl) + pl->exec_mode = TIPC_LINK_OPEN; + return true; + } + + /* Initiate or update synch mode if applicable */ + if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) { + syncpt = iseqno + exp_pkts - 1; + if (n->state == SELF_UP_PEER_UP) { + n->sync_point = syncpt; + tipc_node_fsm_evt(n, NODE_SYNCH_BEGIN_EVT); + } + l->exec_mode = TIPC_LINK_TUNNEL; + if (less(syncpt, n->sync_point)) + n->sync_point = syncpt; } + + /* Open tunnel link when parallel link reaches synch point */ + if ((n->state == NODE_SYNCHING) && (l->exec_mode == TIPC_LINK_TUNNEL)) { + if (pl) + dlv_nxt = mod(pl->rcv_nxt - skb_queue_len(pl->inputq)); + if (!pl || more(dlv_nxt, n->sync_point)) { + tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); + l->exec_mode = TIPC_LINK_OPEN; + return true; + } + if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) + return true; + if (usr == LINK_PROTOCOL) + return true; + return false; + } + return true; } /** @@ -1008,12 +1101,11 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) { struct sk_buff_head xmitq; struct tipc_node *n; - struct tipc_link *l; - struct tipc_msg *hdr; - struct tipc_media_addr *maddr; + struct tipc_msg *hdr = buf_msg(skb); + int usr = msg_user(hdr); int bearer_id = b->identity; + struct tipc_link_entry *le; int rc = 0; - int usr; __skb_queue_head_init(&xmitq); @@ -1022,8 +1114,6 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) goto discard; /* Handle arrival of a non-unicast link packet */ - hdr = buf_msg(skb); - usr = msg_user(hdr); if (unlikely(msg_non_seq(hdr))) { if (usr == LINK_CONFIG) tipc_disc_rcv(net, skb, b); @@ -1036,42 +1126,41 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) n = tipc_node_find(net, msg_prevnode(hdr)); if (unlikely(!n)) goto discard; - tipc_node_lock(n); + le = &n->links[bearer_id]; - /* Prepare links for tunneled reception if applicable */ - if (unlikely(usr == TUNNEL_PROTOCOL)) - tipc_node_tnl_init(n, bearer_id, skb); + tipc_node_lock(n); - /* Locate link endpoint that should handle packet */ - l = n->links[bearer_id].link; - if (unlikely(!l)) + /* Is reception permitted at the moment ? */ + if (!tipc_node_filter_pkt(n, hdr)) goto unlock; - /* Is reception of this packet permitted at the moment ? */ - if (unlikely(n->state != SELF_UP_PEER_UP)) - if (!tipc_node_filter_skb(n, l, hdr)) - goto unlock; - - if (unlikely(usr == LINK_PROTOCOL)) + if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) tipc_bclink_sync_state(n, hdr); /* Release acked broadcast messages */ if (unlikely(n->bclink.acked != msg_bcast_ack(hdr))) tipc_bclink_acknowledge(n, msg_bcast_ack(hdr)); - /* Check protocol and update link state */ - rc = tipc_link_rcv(l, skb, &xmitq); + /* Check and if necessary update node state */ + if (likely(tipc_node_check_state(n, skb, bearer_id))) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } if (unlikely(rc & TIPC_LINK_UP_EVT)) - tipc_node_link_up(n, bearer_id); + tipc_node_link_up(n, bearer_id, &xmitq); + if (unlikely(rc & TIPC_LINK_DOWN_EVT)) tipc_node_link_down(n, bearer_id); - skb = NULL; unlock: tipc_node_unlock(n); - tipc_sk_rcv(net, &n->links[bearer_id].inputq); - maddr = &n->links[bearer_id].maddr; - tipc_bearer_xmit(net, bearer_id, &xmitq, maddr); + + if (!skb_queue_empty(&le->inputq)) + tipc_sk_rcv(net, &le->inputq); + + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + tipc_node_put(n); discard: kfree_skb(skb); diff --git a/net/tipc/node.h b/net/tipc/node.h index 65e2728f66a6..406c6fe0dbb2 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -47,33 +47,7 @@ #define INVALID_BEARER_ID -1 -/* Node FSM states and events: - */ -enum { - SELF_DOWN_PEER_DOWN = 0xdd, - SELF_UP_PEER_UP = 0xaa, - SELF_DOWN_PEER_LEAVING = 0xd1, - SELF_UP_PEER_COMING = 0xac, - SELF_COMING_PEER_UP = 0xca, - SELF_LEAVING_PEER_DOWN = 0x1d, - NODE_FAILINGOVER = 0xf0, - NODE_SYNCHING = 0xcc -}; - -enum { - SELF_ESTABL_CONTACT_EVT = 0xece, - SELF_LOST_CONTACT_EVT = 0x1ce, - PEER_ESTABL_CONTACT_EVT = 0xfece, - PEER_LOST_CONTACT_EVT = 0xf1ce, - NODE_FAILOVER_BEGIN_EVT = 0xfbe, - NODE_FAILOVER_END_EVT = 0xfee, - NODE_SYNCH_BEGIN_EVT = 0xcbe, - NODE_SYNCH_END_EVT = 0xcee -}; - /* Flags used to take different actions according to flag type - * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down - * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down * TIPC_NOTIFY_NODE_DOWN: notify node is down * TIPC_NOTIFY_NODE_UP: notify node is up * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type @@ -133,6 +107,8 @@ struct tipc_link_entry { * @links: array containing references to all links to node * @action_flags: bit mask of different types of node actions * @bclink: broadcast-related info + * @state: connectivity state vs peer node + * @sync_point: sequence number where synch/failover is finished * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) * @link_cnt: number of links to node @@ -156,6 +132,7 @@ struct tipc_node { struct tipc_node_bclink bclink; struct list_head list; int state; + u16 sync_point; int link_cnt; u16 working_links; u16 capabilities; @@ -180,8 +157,6 @@ bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *bearer, void tipc_node_delete_links(struct net *net, int bearer_id); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -void tipc_node_link_down(struct tipc_node *n_ptr, int bearer_id); -void tipc_node_link_up(struct tipc_node *n_ptr, int bearer_id); bool tipc_node_is_up(struct tipc_node *n); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); -- cgit From 5045f7b9009f1455268b98cecbcc271663934c85 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 30 Jul 2015 18:24:20 -0400 Subject: tipc: move protocol message sending away from link FSM The implementation of the link FSM currently takes decisions about and sends out link protocol messages. This is unnecessary, since such actions are not the result of any link state change, and are even decided based on non-FSM state information ("silent_intv_cnt"). We now move the sending of unicast link protocol messages to the function tipc_link_timeout(), and the initial broadcast synchronization message to tipc_node_link_up(). The latter is done because a link instance should not need to know whether it is the first or second link to a destination. Such information is now restricted to and handled by the link aggregation layer in node.c Tested-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 51 ++++++++++++++++++++++++++++++--------------------- net/tipc/link.h | 2 ++ net/tipc/node.c | 1 + 3 files changed, 33 insertions(+), 21 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index d5f4005f388f..9a3ccf910c49 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -134,8 +134,6 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, struct sk_buff_head *xmitq); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); -static void tipc_link_build_bcast_sync_msg(struct tipc_link *l, - struct sk_buff_head *xmitq); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); @@ -245,8 +243,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, * Give a newly added peer node the sequence number where it should * start receiving and acking broadcast packets. */ -static void tipc_link_build_bcast_sync_msg(struct tipc_link *l, - struct sk_buff_head *xmitq) +void tipc_link_build_bcast_sync_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) { struct sk_buff *skb; struct sk_buff_head list; @@ -272,7 +270,7 @@ static void tipc_link_build_bcast_sync_msg(struct tipc_link *l, static int tipc_link_fsm_evt(struct tipc_link *l, int evt, struct sk_buff_head *xmitq) { - int mtyp = 0, rc = 0; + int rc = 0; struct tipc_link *pl; enum { LINK_RESET = 1, @@ -380,17 +378,7 @@ static int tipc_link_fsm_evt(struct tipc_link *l, int evt, } if (actions & LINK_ACTIVATE) rc = TIPC_LINK_UP_EVT; - if (actions & (SND_STATE | SND_PROBE)) - mtyp = STATE_MSG; - if (actions & SND_RESET) - mtyp = RESET_MSG; - if (actions & SND_ACTIVATE) - mtyp = ACTIVATE_MSG; - if (actions & (SND_PROBE | SND_STATE | SND_RESET | SND_ACTIVATE)) - tipc_link_build_proto_msg(l, mtyp, actions & SND_PROBE, - 0, 0, 0, xmitq); - if (actions & SND_BCAST_SYNC) - tipc_link_build_bcast_sync_msg(l, xmitq); + return rc; } @@ -440,16 +428,37 @@ static void link_profile_stats(struct tipc_link *l) int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { int rc = 0; + int mtyp = STATE_MSG; + bool xmit = false; + bool prb = false; if (l->exec_mode == TIPC_LINK_BLOCKED) return rc; link_profile_stats(l); - if (l->silent_intv_cnt) - rc = tipc_link_fsm_evt(l, SILENCE_EVT, xmitq); - else if (link_working(l) && tipc_bclink_acks_missing(l->owner)) - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); - l->silent_intv_cnt++; + + if (l->state == TIPC_LINK_WORKING) { + if (!l->silent_intv_cnt) { + if (tipc_bclink_acks_missing(l->owner)) + xmit = true; + } else if (l->silent_intv_cnt <= l->abort_limit) { + xmit = true; + prb = true; + } else { + l->exec_mode = TIPC_LINK_BLOCKED; + rc |= TIPC_LINK_DOWN_EVT; + } + l->silent_intv_cnt++; + } else if (l->state == TIPC_LINK_RESETTING) { + xmit = true; + mtyp = RESET_MSG; + } else if (l->state == TIPC_LINK_ESTABLISHING) { + xmit = true; + mtyp = ACTIVATE_MSG; + } + if (xmit) + tipc_link_build_proto_msg(l, mtyp, prb, 0, 0, 0, xmitq); + return rc; } diff --git a/net/tipc/link.h b/net/tipc/link.h index e377d9ba41c5..b317c4df9079 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -212,6 +212,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n, struct sk_buff_head *namedq); void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); +void tipc_link_build_bcast_sync_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); diff --git a/net/tipc/node.c b/net/tipc/node.c index b0372bb107f6..9e20acffb3d4 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -335,6 +335,7 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id, *slot0 = bearer_id; *slot1 = bearer_id; nl->exec_mode = TIPC_LINK_OPEN; + tipc_link_build_bcast_sync_msg(nl, xmitq); node_established_contact(n); return; } -- cgit From 662921cd0a53db4504838dfbb7d996f9e6e94001 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 30 Jul 2015 18:24:21 -0400 Subject: tipc: merge link->exec_mode and link->state into one FSM Until now, we have been handling link failover and synchronization by using an additional link state variable, "exec_mode". This variable is not independent of the link FSM state, something causing a risk of inconsistencies, apart from the fact that it clutters the code. The conditions are now in place to define a new link FSM that covers all existing use cases, including failover and synchronization, and eliminate the "exec_mode" field altogether. The FSM must also support non-atomic resetting of links, which will be introduced later. The new link FSM is shown below, with 7 states and 8 events. Only events leading to state change are shown as edges. +------------------------------------+ |RESET_EVT | | | | +--------------+ | +-----------------| SYNCHING |-----------------+ | |FAILURE_EVT +--------------+ PEER_RESET_EVT| | | A | | | | | | | | | | | | | | |SYNCH_ |SYNCH_ | | | |BEGIN_EVT |END_EVT | | | | | | | V | V V | +-------------+ +--------------+ +------------+ | | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET | | +-------------+ FAILURE_ +--------------+ PEER_ +------------+ | | EVT | A RESET_EVT | | | | | | | | | | | | | +--------------+ | | | RESET_EVT| |RESET_EVT |ESTABLISH_EVT | | | | | | | | | | | | V V | | | +-------------+ +--------------+ RESET_EVT| +--->| RESET |--------->| ESTABLISHING |<----------------+ +-------------+ PEER_ +--------------+ | A RESET_EVT | | | | | | | |FAILOVER_ |FAILOVER_ |FAILOVER_ |BEGIN_EVT |END_EVT |BEGIN_EVT | | | V | | +-------------+ | | FAILINGOVER |<----------------+ +-------------+ These changes are fully backwards compatible. Tested-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 350 +++++++++++++++++++++++++++++++------------------------- net/tipc/link.h | 25 ++-- net/tipc/node.c | 31 ++--- 3 files changed, 226 insertions(+), 180 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 9a3ccf910c49..9840b03348e1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -50,7 +50,6 @@ */ static const char *link_co_err = "Link tunneling error, "; static const char *link_rst_msg = "Resetting link "; -static const char *link_unk_evt = "Unknown link event "; static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, @@ -85,46 +84,23 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { */ #define WILDCARD_SESSION 0x10000 -/* State value stored in 'failover_pkts' +/* Link FSM states: */ -#define FIRST_FAILOVER 0xffffu - -/* Link FSM states and events: - */ -enum { - TIPC_LINK_WORKING, - TIPC_LINK_PROBING, - TIPC_LINK_RESETTING, - TIPC_LINK_ESTABLISHING -}; - enum { - PEER_RESET_EVT = RESET_MSG, - ACTIVATE_EVT = ACTIVATE_MSG, - TRAFFIC_EVT, /* Any other valid msg from peer */ - SILENCE_EVT /* Peer was silent during last timer interval*/ + LINK_ESTABLISHED = 0xe, + LINK_ESTABLISHING = 0xe << 4, + LINK_RESET = 0x1 << 8, + LINK_RESETTING = 0x2 << 12, + LINK_PEER_RESET = 0xd << 16, + LINK_FAILINGOVER = 0xf << 20, + LINK_SYNCHING = 0xc << 24 }; /* Link FSM state checking routines */ -static int link_working(struct tipc_link *l) -{ - return l->state == TIPC_LINK_WORKING; -} - -static int link_probing(struct tipc_link *l) -{ - return l->state == TIPC_LINK_PROBING; -} - -static int link_resetting(struct tipc_link *l) +static int link_is_up(struct tipc_link *l) { - return l->state == TIPC_LINK_RESETTING; -} - -static int link_establishing(struct tipc_link *l) -{ - return l->state == TIPC_LINK_ESTABLISHING; + return l->state & (LINK_ESTABLISHED | LINK_SYNCHING); } static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, @@ -141,11 +117,29 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); /* * Simple non-static link routines (i.e. referenced outside this file) */ -int tipc_link_is_up(struct tipc_link *l_ptr) +bool tipc_link_is_up(struct tipc_link *l) { - if (!l_ptr) - return 0; - return link_working(l_ptr) || link_probing(l_ptr); + return link_is_up(l); +} + +bool tipc_link_is_reset(struct tipc_link *l) +{ + return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING); +} + +bool tipc_link_is_synching(struct tipc_link *l) +{ + return l->state == LINK_SYNCHING; +} + +bool tipc_link_is_failingover(struct tipc_link *l) +{ + return l->state == LINK_FAILINGOVER; +} + +bool tipc_link_is_blocked(struct tipc_link *l) +{ + return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER); } int tipc_link_is_active(struct tipc_link *l) @@ -210,7 +204,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->tolerance = b_ptr->tolerance; l_ptr->snd_nxt = 1; l_ptr->rcv_nxt = 1; - l_ptr->state = TIPC_LINK_RESETTING; + l_ptr->state = LINK_RESET; l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; msg = l_ptr->pmsg; @@ -265,120 +259,159 @@ void tipc_link_build_bcast_sync_msg(struct tipc_link *l, * tipc_link_fsm_evt - link finite state machine * @l: pointer to link * @evt: state machine event to be processed - * @xmitq: queue to prepend created protocol message, if any */ -static int tipc_link_fsm_evt(struct tipc_link *l, int evt, - struct sk_buff_head *xmitq) +int tipc_link_fsm_evt(struct tipc_link *l, int evt) { int rc = 0; - struct tipc_link *pl; - enum { - LINK_RESET = 1, - LINK_ACTIVATE = (1 << 1), - SND_PROBE = (1 << 2), - SND_STATE = (1 << 3), - SND_RESET = (1 << 4), - SND_ACTIVATE = (1 << 5), - SND_BCAST_SYNC = (1 << 6) - } actions = 0; - - if (l->exec_mode == TIPC_LINK_BLOCKED) - return rc; switch (l->state) { - case TIPC_LINK_WORKING: + case LINK_RESETTING: switch (evt) { - case TRAFFIC_EVT: - case ACTIVATE_EVT: + case LINK_PEER_RESET_EVT: + l->state = LINK_PEER_RESET; break; - case SILENCE_EVT: - l->state = TIPC_LINK_PROBING; - actions |= SND_PROBE; + case LINK_RESET_EVT: + l->state = LINK_RESET; + break; + case LINK_FAILURE_EVT: + case LINK_FAILOVER_BEGIN_EVT: + case LINK_ESTABLISH_EVT: + case LINK_FAILOVER_END_EVT: + case LINK_SYNCH_BEGIN_EVT: + case LINK_SYNCH_END_EVT: + default: + goto illegal_evt; + } + break; + case LINK_RESET: + switch (evt) { + case LINK_PEER_RESET_EVT: + l->state = LINK_ESTABLISHING; break; - case PEER_RESET_EVT: - actions |= LINK_RESET | SND_ACTIVATE; + case LINK_FAILOVER_BEGIN_EVT: + l->state = LINK_FAILINGOVER; + case LINK_FAILURE_EVT: + case LINK_RESET_EVT: + case LINK_ESTABLISH_EVT: + case LINK_FAILOVER_END_EVT: break; + case LINK_SYNCH_BEGIN_EVT: + case LINK_SYNCH_END_EVT: default: - pr_debug("%s%u WORKING\n", link_unk_evt, evt); + goto illegal_evt; } break; - case TIPC_LINK_PROBING: + case LINK_PEER_RESET: switch (evt) { - case TRAFFIC_EVT: - case ACTIVATE_EVT: - l->state = TIPC_LINK_WORKING; + case LINK_RESET_EVT: + l->state = LINK_ESTABLISHING; break; - case PEER_RESET_EVT: - actions |= LINK_RESET | SND_ACTIVATE; + case LINK_PEER_RESET_EVT: + case LINK_ESTABLISH_EVT: + case LINK_FAILURE_EVT: break; - case SILENCE_EVT: - if (l->silent_intv_cnt <= l->abort_limit) { - actions |= SND_PROBE; - break; - } - actions |= LINK_RESET | SND_RESET; + case LINK_SYNCH_BEGIN_EVT: + case LINK_SYNCH_END_EVT: + case LINK_FAILOVER_BEGIN_EVT: + case LINK_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case LINK_FAILINGOVER: + switch (evt) { + case LINK_FAILOVER_END_EVT: + l->state = LINK_RESET; break; + case LINK_PEER_RESET_EVT: + case LINK_RESET_EVT: + case LINK_ESTABLISH_EVT: + case LINK_FAILURE_EVT: + break; + case LINK_FAILOVER_BEGIN_EVT: + case LINK_SYNCH_BEGIN_EVT: + case LINK_SYNCH_END_EVT: default: - pr_err("%s%u PROBING\n", link_unk_evt, evt); + goto illegal_evt; } break; - case TIPC_LINK_RESETTING: + case LINK_ESTABLISHING: switch (evt) { - case TRAFFIC_EVT: + case LINK_ESTABLISH_EVT: + l->state = LINK_ESTABLISHED; + rc |= TIPC_LINK_UP_EVT; break; - case ACTIVATE_EVT: - pl = node_active_link(l->owner, 0); - if (pl && link_probing(pl)) - break; - l->state = TIPC_LINK_WORKING; - actions |= LINK_ACTIVATE; - if (!l->owner->working_links) - actions |= SND_BCAST_SYNC; + case LINK_FAILOVER_BEGIN_EVT: + l->state = LINK_FAILINGOVER; + break; + case LINK_PEER_RESET_EVT: + case LINK_RESET_EVT: + case LINK_FAILURE_EVT: + case LINK_SYNCH_BEGIN_EVT: + case LINK_FAILOVER_END_EVT: + break; + case LINK_SYNCH_END_EVT: + default: + goto illegal_evt; + } + break; + case LINK_ESTABLISHED: + switch (evt) { + case LINK_PEER_RESET_EVT: + l->state = LINK_PEER_RESET; + rc |= TIPC_LINK_DOWN_EVT; + break; + case LINK_FAILURE_EVT: + l->state = LINK_RESETTING; + rc |= TIPC_LINK_DOWN_EVT; break; - case PEER_RESET_EVT: - l->state = TIPC_LINK_ESTABLISHING; - actions |= SND_ACTIVATE; + case LINK_RESET_EVT: + l->state = LINK_RESET; break; - case SILENCE_EVT: - actions |= SND_RESET; + case LINK_ESTABLISH_EVT: break; + case LINK_SYNCH_BEGIN_EVT: + l->state = LINK_SYNCHING; + break; + case LINK_SYNCH_END_EVT: + case LINK_FAILOVER_BEGIN_EVT: + case LINK_FAILOVER_END_EVT: default: - pr_err("%s%u in RESETTING\n", link_unk_evt, evt); + goto illegal_evt; } break; - case TIPC_LINK_ESTABLISHING: + case LINK_SYNCHING: switch (evt) { - case TRAFFIC_EVT: - case ACTIVATE_EVT: - pl = node_active_link(l->owner, 0); - if (pl && link_probing(pl)) - break; - l->state = TIPC_LINK_WORKING; - actions |= LINK_ACTIVATE; - if (!l->owner->working_links) - actions |= SND_BCAST_SYNC; + case LINK_PEER_RESET_EVT: + l->state = LINK_PEER_RESET; + rc |= TIPC_LINK_DOWN_EVT; + break; + case LINK_FAILURE_EVT: + l->state = LINK_RESETTING; + rc |= TIPC_LINK_DOWN_EVT; break; - case PEER_RESET_EVT: + case LINK_RESET_EVT: + l->state = LINK_RESET; break; - case SILENCE_EVT: - actions |= SND_ACTIVATE; + case LINK_ESTABLISH_EVT: + case LINK_SYNCH_BEGIN_EVT: break; + case LINK_SYNCH_END_EVT: + l->state = LINK_ESTABLISHED; + break; + case LINK_FAILOVER_BEGIN_EVT: + case LINK_FAILOVER_END_EVT: default: - pr_err("%s%u ESTABLISHING\n", link_unk_evt, evt); + goto illegal_evt; } break; default: - pr_err("Unknown link state %u/%u\n", l->state, evt); - } - - /* Perform actions as decided by FSM */ - if (actions & LINK_RESET) { - l->exec_mode = TIPC_LINK_BLOCKED; - rc = TIPC_LINK_DOWN_EVT; + pr_err("Unknown FSM state %x in %s\n", l->state, l->name); } - if (actions & LINK_ACTIVATE) - rc = TIPC_LINK_UP_EVT; - + return rc; +illegal_evt: + pr_err("Illegal FSM event %x in state %x on link %s\n", + evt, l->state, l->name); return rc; } @@ -432,12 +465,11 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) bool xmit = false; bool prb = false; - if (l->exec_mode == TIPC_LINK_BLOCKED) - return rc; - link_profile_stats(l); - if (l->state == TIPC_LINK_WORKING) { + switch (l->state) { + case LINK_ESTABLISHED: + case LINK_SYNCHING: if (!l->silent_intv_cnt) { if (tipc_bclink_acks_missing(l->owner)) xmit = true; @@ -445,17 +477,26 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) xmit = true; prb = true; } else { - l->exec_mode = TIPC_LINK_BLOCKED; - rc |= TIPC_LINK_DOWN_EVT; + rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } l->silent_intv_cnt++; - } else if (l->state == TIPC_LINK_RESETTING) { + break; + case LINK_RESET: xmit = true; mtyp = RESET_MSG; - } else if (l->state == TIPC_LINK_ESTABLISHING) { + break; + case LINK_ESTABLISHING: xmit = true; mtyp = ACTIVATE_MSG; + break; + case LINK_RESETTING: + case LINK_PEER_RESET: + case LINK_FAILINGOVER: + break; + default: + break; } + if (xmit) tipc_link_build_proto_msg(l, mtyp, prb, 0, 0, 0, xmitq); @@ -559,7 +600,7 @@ void tipc_link_reset(struct tipc_link *l) { struct tipc_node *owner = l->owner; - l->state = TIPC_LINK_RESETTING; + tipc_link_fsm_evt(l, LINK_RESET_EVT); /* Link is down, accept any session */ l->peer_session = WILDCARD_SESSION; @@ -902,8 +943,7 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm, l->stale_count = 1; } else if (++l->stale_count > 100) { link_retransmit_failure(l, skb); - l->exec_mode = TIPC_LINK_BLOCKED; - return TIPC_LINK_DOWN_EVT; + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } skb_queue_walk(&l->transmq, skb) { if (!retransm) @@ -1002,25 +1042,23 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb) l->stats.recv_bundled += msg_msgcnt(hdr); while (tipc_msg_extract(skb, &iskb, &pos)) tipc_data_input(l, iskb); - return rc; + return 0; } else if (usr == MSG_FRAGMENTER) { l->stats.recv_fragments++; if (tipc_buf_append(reasm_skb, &skb)) { l->stats.recv_fragmented++; tipc_data_input(l, skb); } else if (!*reasm_skb) { - l->exec_mode = TIPC_LINK_BLOCKED; - l->state = TIPC_LINK_RESETTING; - rc = TIPC_LINK_DOWN_EVT; + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } - return rc; + return 0; } else if (usr == BCAST_PROTOCOL) { tipc_link_sync_rcv(node, skb); - return rc; + return 0; } drop: kfree_skb(skb); - return rc; + return 0; } static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) @@ -1068,9 +1106,9 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, continue; } - if (unlikely(!link_working(l))) { - rc = tipc_link_fsm_evt(l, TRAFFIC_EVT, xmitq); - if (!link_working(l)) { + if (unlikely(!link_is_up(l))) { + rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); + if (!link_is_up(l)) { kfree_skb(__skb_dequeue(arrvq)); return rc; } @@ -1192,7 +1230,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, int node_up = l->owner->bclink.recv_permitted; /* Don't send protocol message during reset or link failover */ - if (l->exec_mode == TIPC_LINK_BLOCKED) + if (tipc_link_is_blocked(l)) return; msg_set_type(hdr, mtyp); @@ -1302,7 +1340,6 @@ tnl: tnl->drop_point = l->rcv_nxt; tnl->failover_reasm_skb = l->reasm_buf; l->reasm_buf = NULL; - l->exec_mode = TIPC_LINK_BLOCKED; } } @@ -1323,7 +1360,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, char *if_name; int rc = 0; - if (l->exec_mode == TIPC_LINK_BLOCKED) + if (tipc_link_is_blocked(l)) goto exit; if (link_own_addr(l) > msg_prevnode(hdr)) @@ -1337,6 +1374,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, (l->peer_session != WILDCARD_SESSION)) break; /* fall thru' */ + case ACTIVATE_MSG: /* Complete own link name with peer's interface name */ @@ -1355,13 +1393,20 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) l->priority = peers_prio; + if (msg_type(hdr) == RESET_MSG) { + rc |= tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); + } else if (!link_is_up(l)) { + tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); + rc |= tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); + } l->peer_session = msg_session(hdr); l->peer_bearer_id = msg_bearer_id(hdr); - rc = tipc_link_fsm_evt(l, msg_type(hdr), xmitq); if (l->mtu > msg_max_pkt(hdr)) l->mtu = msg_max_pkt(hdr); break; + case STATE_MSG: + /* Update own tolerance if peer indicates a non-zero value */ if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) l->tolerance = peers_tol; @@ -1370,11 +1415,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, l->stats.recv_states++; if (msg_probe(hdr)) l->stats.recv_probes++; - rc = tipc_link_fsm_evt(l, TRAFFIC_EVT, xmitq); - if (!tipc_link_is_up(l)) + rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); + if (!link_is_up(l)) break; - /* Has peer sent packets we haven't received yet ? */ + /* Send NACK if peer has sent pkts we haven't received yet */ if (more(peers_snd_nxt, l->rcv_nxt)) rcvgap = peers_snd_nxt - l->rcv_nxt; if (rcvgap || (msg_probe(hdr))) @@ -1387,6 +1432,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, rc = tipc_link_retransm(l, nacked_gap, xmitq); l->stats.recv_nacks++; } + tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) link_prepare_wakeup(l); @@ -1463,19 +1509,7 @@ static void link_print(struct tipc_link *l, const char *str) u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt; u16 tail = l->snd_nxt - 1; - pr_info("%s Link <%s>:", str, l->name); - - if (link_probing(l)) - pr_cont(":P\n"); - else if (link_establishing(l)) - pr_cont(":E\n"); - else if (link_resetting(l)) - pr_cont(":R\n"); - else if (link_working(l)) - pr_cont(":W\n"); - else - pr_cont("\n"); - + pr_info("%s Link <%s> state %x\n", str, l->name, l->state); pr_info("XMTQ: %u [%u-%u], BKLGQ: %u, SNDNX: %u, RCVNX: %u\n", skb_queue_len(&l->transmq), head, tail, skb_queue_len(&l->backlogq), l->snd_nxt, l->rcv_nxt); diff --git a/net/tipc/link.h b/net/tipc/link.h index b317c4df9079..39b8c4c5121e 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -49,13 +49,17 @@ */ #define INVALID_LINK_SEQ 0x10000 - -/* Link endpoint receive states +/* Link FSM events: */ enum { - TIPC_LINK_OPEN, - TIPC_LINK_BLOCKED, - TIPC_LINK_TUNNEL + LINK_ESTABLISH_EVT = 0xec1ab1e, + LINK_PEER_RESET_EVT = 0x9eed0e, + LINK_FAILURE_EVT = 0xfa110e, + LINK_RESET_EVT = 0x10ca1d0e, + LINK_FAILOVER_BEGIN_EVT = 0xfa110bee, + LINK_FAILOVER_END_EVT = 0xfa110ede, + LINK_SYNCH_BEGIN_EVT = 0xc1ccbee, + LINK_SYNCH_END_EVT = 0xc1ccede }; /* Events returned from link at packet reception or at timeout @@ -120,7 +124,6 @@ struct tipc_stats { * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') - * @exec_mode: transmit/receive mode for link endpoint instance * @backlog_limit: backlog queue congestion thresholds (indexed by importance) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset @@ -155,7 +158,7 @@ struct tipc_link { u32 tolerance; unsigned long keepalive_intv; u32 abort_limit; - int state; + u32 state; u32 silent_intv_cnt; struct { unchar hdr[INT_H_SIZE]; @@ -166,7 +169,6 @@ struct tipc_link { char net_plane; /* Failover/synch */ - u8 exec_mode; u16 drop_point; struct sk_buff *failover_reasm_skb; @@ -214,8 +216,13 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); void tipc_link_build_bcast_sync_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +int tipc_link_fsm_evt(struct tipc_link *l, int evt); void tipc_link_reset_fragments(struct tipc_link *l_ptr); -int tipc_link_is_up(struct tipc_link *l_ptr); +bool tipc_link_is_up(struct tipc_link *l); +bool tipc_link_is_reset(struct tipc_link *l); +bool tipc_link_is_synching(struct tipc_link *l); +bool tipc_link_is_failingover(struct tipc_link *l); +bool tipc_link_is_blocked(struct tipc_link *l); int tipc_link_is_active(struct tipc_link *l_ptr); void tipc_link_purge_queues(struct tipc_link *l_ptr); void tipc_link_purge_backlog(struct tipc_link *l); diff --git a/net/tipc/node.c b/net/tipc/node.c index 9e20acffb3d4..a3ceeda2a80a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -334,7 +334,6 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id, if (!ol) { *slot0 = bearer_id; *slot1 = bearer_id; - nl->exec_mode = TIPC_LINK_OPEN; tipc_link_build_bcast_sync_msg(nl, xmitq); node_established_contact(n); return; @@ -368,7 +367,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id) struct sk_buff_head xmitq; l = n->links[bearer_id].link; - if (!l || !tipc_link_is_up(l)) + if (!l || tipc_link_is_reset(l)) return; __skb_queue_head_init(&xmitq); @@ -414,6 +413,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id) n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1); tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, &xmitq); tipc_link_reset(l); + tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); tipc_bearer_xmit(n->net, tnl->bearer_id, &xmitq, maddr); } @@ -749,7 +749,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) struct tipc_link *l_ptr = n_ptr->links[i].link; if (!l_ptr) continue; - l_ptr->exec_mode = TIPC_LINK_OPEN; + tipc_link_fsm_evt(l_ptr, LINK_FAILOVER_END_EVT); kfree_skb(l_ptr->failover_reasm_skb); l_ptr->failover_reasm_skb = NULL; tipc_link_reset_fragments(l_ptr); @@ -989,7 +989,7 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, * Returns true if state is ok, otherwise consumes buffer and returns false */ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, - int bearer_id) + int bearer_id, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); int usr = msg_user(hdr); @@ -1042,42 +1042,47 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Initiate or update failover mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { syncpt = oseqno + exp_pkts - 1; - if (pl && tipc_link_is_up(pl)) { + if (pl && tipc_link_is_up(pl)) tipc_node_link_down(n, pl->bearer_id); - pl->exec_mode = TIPC_LINK_BLOCKED; - } + /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) n->sync_point = syncpt; } /* Open parallel link when tunnel link reaches synch point */ - if ((n->state == NODE_FAILINGOVER) && (more(rcv_nxt, n->sync_point))) { + if ((n->state == NODE_FAILINGOVER) && !tipc_link_is_failingover(l)) { + if (!more(rcv_nxt, n->sync_point)) + return true; tipc_node_fsm_evt(n, NODE_FAILOVER_END_EVT); if (pl) - pl->exec_mode = TIPC_LINK_OPEN; + tipc_link_fsm_evt(pl, LINK_FAILOVER_END_EVT); return true; } /* Initiate or update synch mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) { syncpt = iseqno + exp_pkts - 1; + if (!tipc_link_is_up(l)) { + tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); + tipc_node_link_up(n, bearer_id, xmitq); + } if (n->state == SELF_UP_PEER_UP) { n->sync_point = syncpt; + tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_BEGIN_EVT); } - l->exec_mode = TIPC_LINK_TUNNEL; if (less(syncpt, n->sync_point)) n->sync_point = syncpt; } /* Open tunnel link when parallel link reaches synch point */ - if ((n->state == NODE_SYNCHING) && (l->exec_mode == TIPC_LINK_TUNNEL)) { + if ((n->state == NODE_SYNCHING) && tipc_link_is_synching(l)) { if (pl) dlv_nxt = mod(pl->rcv_nxt - skb_queue_len(pl->inputq)); if (!pl || more(dlv_nxt, n->sync_point)) { + tipc_link_fsm_evt(l, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); - l->exec_mode = TIPC_LINK_OPEN; return true; } if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) @@ -1143,7 +1148,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) tipc_bclink_acknowledge(n, msg_bcast_ack(hdr)); /* Check and if necessary update node state */ - if (likely(tipc_node_check_state(n, skb, bearer_id))) { + if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) { rc = tipc_link_rcv(le->link, skb, &xmitq); skb = NULL; } -- cgit From cf148816acb6def45474001302368eb472995e62 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 30 Jul 2015 18:24:22 -0400 Subject: tipc: move received discovery data evaluation inside node.c The node lock is currently grabbed and and released in the function tipc_disc_rcv() in the file discover.c. As a preparation for the next commits, we need to move this node lock handling, along with the code area it is covering, to node.c. This commit introduces this change. Tested-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/discover.c | 116 +++++++++---------------------------------------- net/tipc/node.c | 123 +++++++++++++++++++++++++++++++++++++++++++--------- net/tipc/node.h | 11 +++-- 3 files changed, 127 insertions(+), 123 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 164d08907d6f..d14e0a4aa9af 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -120,29 +120,24 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, * @buf: buffer containing message * @bearer: bearer that message arrived on */ -void tipc_disc_rcv(struct net *net, struct sk_buff *buf, +void tipc_disc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *bearer) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *node; struct tipc_media_addr maddr; - struct sk_buff *rbuf; - struct tipc_msg *msg = buf_msg(buf); - u32 ddom = msg_dest_domain(msg); - u32 onode = msg_prevnode(msg); - u32 net_id = msg_bc_netid(msg); - u32 mtyp = msg_type(msg); - u32 signature = msg_node_sig(msg); - u16 caps = msg_node_capabilities(msg); - bool addr_match = false; - bool sign_match = false; - bool link_up = false; - bool accept_addr = false; - bool accept_sign = false; + struct sk_buff *rskb; + struct tipc_msg *hdr = buf_msg(skb); + u32 ddom = msg_dest_domain(hdr); + u32 onode = msg_prevnode(hdr); + u32 net_id = msg_bc_netid(hdr); + u32 mtyp = msg_type(hdr); + u32 signature = msg_node_sig(hdr); + u16 caps = msg_node_capabilities(hdr); bool respond = false; + bool dupl_addr = false; - bearer->media->msg2addr(bearer, &maddr, msg_media_addr(msg)); - kfree_skb(buf); + bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr)); + kfree_skb(skb); /* Ensure message from node is valid and communication is permitted */ if (net_id != tn->net_id) @@ -164,91 +159,20 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, if (!tipc_in_scope(bearer->domain, onode)) return; - node = tipc_node_create(net, onode); - if (!node) - return; - tipc_node_lock(node); - node->capabilities = caps; - - /* Prepare to validate requesting node's signature and media address */ - sign_match = (signature == node->signature); - tipc_node_check_dest(node, bearer, &link_up, &addr_match, &maddr); - - /* These three flags give us eight permutations: */ - - if (sign_match && addr_match && link_up) { - /* All is fine. Do nothing. */ - } else if (sign_match && addr_match && !link_up) { - /* Respond. The link will come up in due time */ - respond = true; - } else if (sign_match && !addr_match && link_up) { - /* Peer has changed i/f address without rebooting. - * If so, the link will reset soon, and the next - * discovery will be accepted. So we can ignore it. - * It may also be an cloned or malicious peer having - * chosen the same node address and signature as an - * existing one. - * Ignore requests until the link goes down, if ever. - */ + tipc_node_check_dest(net, onode, bearer, caps, signature, + &maddr, &respond, &dupl_addr); + if (dupl_addr) disc_dupl_alert(bearer, onode, &maddr); - } else if (sign_match && !addr_match && !link_up) { - /* Peer link has changed i/f address without rebooting. - * It may also be a cloned or malicious peer; we can't - * distinguish between the two. - * The signature is correct, so we must accept. - */ - accept_addr = true; - respond = true; - } else if (!sign_match && addr_match && link_up) { - /* Peer node rebooted. Two possibilities: - * - Delayed re-discovery; this link endpoint has already - * reset and re-established contact with the peer, before - * receiving a discovery message from that node. - * (The peer happened to receive one from this node first). - * - The peer came back so fast that our side has not - * discovered it yet. Probing from this side will soon - * reset the link, since there can be no working link - * endpoint at the peer end, and the link will re-establish. - * Accept the signature, since it comes from a known peer. - */ - accept_sign = true; - } else if (!sign_match && addr_match && !link_up) { - /* The peer node has rebooted. - * Accept signature, since it is a known peer. - */ - accept_sign = true; - respond = true; - } else if (!sign_match && !addr_match && link_up) { - /* Peer rebooted with new address, or a new/duplicate peer. - * Ignore until the link goes down, if ever. - */ - disc_dupl_alert(bearer, onode, &maddr); - } else if (!sign_match && !addr_match && !link_up) { - /* Peer rebooted with new address, or it is a new peer. - * Accept signature and address. - */ - accept_sign = true; - accept_addr = true; - respond = true; - } - - if (accept_sign) - node->signature = signature; - - if (accept_addr && !tipc_node_update_dest(node, bearer, &maddr)) - respond = false; /* Send response, if necessary */ if (respond && (mtyp == DSC_REQ_MSG)) { - rbuf = tipc_buf_acquire(MAX_H_SIZE); - if (rbuf) { - tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer); - tipc_bearer_send(net, bearer->identity, rbuf, &maddr); - kfree_skb(rbuf); + rskb = tipc_buf_acquire(MAX_H_SIZE); + if (rskb) { + tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer); + tipc_bearer_send(net, bearer->identity, rskb, &maddr); + kfree_skb(rskb); } } - tipc_node_unlock(node); - tipc_node_put(node); } /** diff --git a/net/tipc/node.c b/net/tipc/node.c index a3ceeda2a80a..d03e88f2273b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -138,7 +138,7 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) return NULL; } -struct tipc_node *tipc_node_create(struct net *net, u32 addr) +struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *n_ptr, *temp_node; @@ -154,6 +154,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) } n_ptr->addr = addr; n_ptr->net = net; + n_ptr->capabilities = capabilities; kref_init(&n_ptr->kref); spin_lock_init(&n_ptr->lock); INIT_HLIST_NODE(&n_ptr->hash); @@ -422,38 +423,118 @@ bool tipc_node_is_up(struct tipc_node *n) return n->active_links[0] != INVALID_BEARER_ID; } -void tipc_node_check_dest(struct tipc_node *n, struct tipc_bearer *b, - bool *link_up, bool *addr_match, - struct tipc_media_addr *maddr) +void tipc_node_check_dest(struct net *net, u32 onode, + struct tipc_bearer *b, + u16 capabilities, u32 signature, + struct tipc_media_addr *maddr, + bool *respond, bool *dupl_addr) { - struct tipc_link *l = n->links[b->identity].link; - struct tipc_media_addr *curr = &n->links[b->identity].maddr; + struct tipc_node *n; + struct tipc_link *l; + struct tipc_media_addr *curr_maddr; + struct sk_buff_head *inputq; + bool addr_match = false; + bool sign_match = false; + bool link_up = false; + bool accept_addr = false; + + *dupl_addr = false; + *respond = false; + + n = tipc_node_create(net, onode, capabilities); + if (!n) + return; - *link_up = l && tipc_link_is_up(l); - *addr_match = l && !memcmp(curr, maddr, sizeof(*maddr)); -} + tipc_node_lock(n); -bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *b, - struct tipc_media_addr *maddr) -{ - struct tipc_link *l = n->links[b->identity].link; - struct tipc_media_addr *curr = &n->links[b->identity].maddr; - struct sk_buff_head *inputq = &n->links[b->identity].inputq; + curr_maddr = &n->links[b->identity].maddr; + inputq = &n->links[b->identity].inputq; + + /* Prepare to validate requesting node's signature and media address */ + l = n->links[b->identity].link; + link_up = l && tipc_link_is_up(l); + addr_match = l && !memcmp(curr_maddr, maddr, sizeof(*maddr)); + sign_match = (signature == n->signature); + + /* These three flags give us eight permutations: */ + + if (sign_match && addr_match && link_up) { + /* All is fine. Do nothing. */ + } else if (sign_match && addr_match && !link_up) { + /* Respond. The link will come up in due time */ + *respond = true; + } else if (sign_match && !addr_match && link_up) { + /* Peer has changed i/f address without rebooting. + * If so, the link will reset soon, and the next + * discovery will be accepted. So we can ignore it. + * It may also be an cloned or malicious peer having + * chosen the same node address and signature as an + * existing one. + * Ignore requests until the link goes down, if ever. + */ + *dupl_addr = true; + } else if (sign_match && !addr_match && !link_up) { + /* Peer link has changed i/f address without rebooting. + * It may also be a cloned or malicious peer; we can't + * distinguish between the two. + * The signature is correct, so we must accept. + */ + accept_addr = true; + *respond = true; + } else if (!sign_match && addr_match && link_up) { + /* Peer node rebooted. Two possibilities: + * - Delayed re-discovery; this link endpoint has already + * reset and re-established contact with the peer, before + * receiving a discovery message from that node. + * (The peer happened to receive one from this node first). + * - The peer came back so fast that our side has not + * discovered it yet. Probing from this side will soon + * reset the link, since there can be no working link + * endpoint at the peer end, and the link will re-establish. + * Accept the signature, since it comes from a known peer. + */ + n->signature = signature; + } else if (!sign_match && addr_match && !link_up) { + /* The peer node has rebooted. + * Accept signature, since it is a known peer. + */ + n->signature = signature; + *respond = true; + } else if (!sign_match && !addr_match && link_up) { + /* Peer rebooted with new address, or a new/duplicate peer. + * Ignore until the link goes down, if ever. + */ + *dupl_addr = true; + } else if (!sign_match && !addr_match && !link_up) { + /* Peer rebooted with new address, or it is a new peer. + * Accept signature and address. + */ + n->signature = signature; + accept_addr = true; + *respond = true; + } + + if (!accept_addr) + goto exit; + /* Now create new link if not already existing */ if (!l) { l = tipc_link_create(n, b, maddr, inputq, &n->bclink.namedq); - if (!l) - return false; + if (!l) { + *respond = false; + goto exit; + } tipc_node_calculate_timer(n, l); - if (n->link_cnt == 1) { + if (n->link_cnt == 1) if (!mod_timer(&n->timer, jiffies + n->keepalive_intv)) tipc_node_get(n); - } } memcpy(&l->media_addr, maddr, sizeof(*maddr)); - memcpy(curr, maddr, sizeof(*maddr)); + memcpy(curr_maddr, maddr, sizeof(*maddr)); tipc_node_link_down(n, b->identity); - return true; +exit: + tipc_node_unlock(n); + tipc_node_put(n); } void tipc_node_delete_links(struct net *net, int bearer_id) diff --git a/net/tipc/node.h b/net/tipc/node.h index 406c6fe0dbb2..9a977467fc46 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -147,13 +147,12 @@ struct tipc_node { struct tipc_node *tipc_node_find(struct net *net, u32 addr); void tipc_node_put(struct tipc_node *node); -struct tipc_node *tipc_node_create(struct net *net, u32 addr); void tipc_node_stop(struct net *net); -void tipc_node_check_dest(struct tipc_node *n, struct tipc_bearer *bearer, - bool *link_up, bool *addr_match, - struct tipc_media_addr *maddr); -bool tipc_node_update_dest(struct tipc_node *n, struct tipc_bearer *bearer, - struct tipc_media_addr *maddr); +void tipc_node_check_dest(struct net *net, u32 onode, + struct tipc_bearer *bearer, + u16 capabilities, u32 signature, + struct tipc_media_addr *maddr, + bool *respond, bool *dupl_addr); void tipc_node_delete_links(struct net *net, int bearer_id); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -- cgit From 598411d70f85dcf5b5c6c2369cc48637c251b656 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 30 Jul 2015 18:24:23 -0400 Subject: tipc: make resetting of links non-atomic In order to facilitate future improvements to the locking structure, we want to make resetting and establishing of links non-atomic. I.e., the functions tipc_node_link_up() and tipc_node_link_down() should be called from outside the node lock context, and grab/release the node lock themselves. This requires that we can freeze the link state from the moment it is set to RESETTING or PEER_RESET in one lock context until it is set to RESET or ESTABLISHING in a later context. The recently introduced link FSM makes this possible, so we are now ready to introduce the above change. This commit implements this. Tested-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 2 +- net/tipc/msg.h | 29 ++++++++++ net/tipc/node.c | 166 +++++++++++++++++++++++++++++++++----------------------- 3 files changed, 127 insertions(+), 70 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 9840b03348e1..3a92924711a1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -489,8 +489,8 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) xmit = true; mtyp = ACTIVATE_MSG; break; - case LINK_RESETTING: case LINK_PEER_RESET: + case LINK_RESETTING: case LINK_FAILINGOVER: break; default: diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 115bb2aa6bed..53d98ef78650 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -916,4 +916,33 @@ static inline bool __tipc_skb_queue_sorted(struct sk_buff_head *list, return false; } +/* tipc_skb_queue_splice_tail - append an skb list to lock protected list + * @list: the new list to append. Not lock protected + * @head: target list. Lock protected. + */ +static inline void tipc_skb_queue_splice_tail(struct sk_buff_head *list, + struct sk_buff_head *head) +{ + spin_lock_bh(&head->lock); + skb_queue_splice_tail(list, head); + spin_unlock_bh(&head->lock); +} + +/* tipc_skb_queue_splice_tail_init - merge two lock protected skb lists + * @list: the new list to add. Lock protected. Will be reinitialized + * @head: target list. Lock protected. + */ +static inline void tipc_skb_queue_splice_tail_init(struct sk_buff_head *list, + struct sk_buff_head *head) +{ + struct sk_buff_head tmp; + + __skb_queue_head_init(&tmp); + + spin_lock_bh(&list->lock); + skb_queue_splice_tail_init(list, &tmp); + spin_unlock_bh(&list->lock); + tipc_skb_queue_splice_tail(&tmp, head); +} + #endif diff --git a/net/tipc/node.c b/net/tipc/node.c index d03e88f2273b..cdca57be85bf 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -66,8 +66,12 @@ enum { NODE_SYNCH_END_EVT = 0xcee }; -static void tipc_node_link_down(struct tipc_node *n, int bearer_id); -static void node_lost_contact(struct tipc_node *n_ptr); +static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, + struct sk_buff_head *xmitq, + struct tipc_media_addr **maddr); +static void tipc_node_link_down(struct tipc_node *n, int bearer_id, + bool delete); +static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); static void node_established_contact(struct tipc_node *n_ptr); static void tipc_node_delete(struct tipc_node *node); static void tipc_node_timeout(unsigned long data); @@ -275,9 +279,8 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) static void tipc_node_timeout(unsigned long data) { struct tipc_node *n = (struct tipc_node *)data; + struct tipc_link_entry *le; struct sk_buff_head xmitq; - struct tipc_link *l; - struct tipc_media_addr *maddr; int bearer_id; int rc = 0; @@ -285,17 +288,16 @@ static void tipc_node_timeout(unsigned long data) for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { tipc_node_lock(n); - l = n->links[bearer_id].link; - if (l) { + le = &n->links[bearer_id]; + if (le->link) { /* Link tolerance may change asynchronously: */ - tipc_node_calculate_timer(n, l); - rc = tipc_link_timeout(l, &xmitq); - if (rc & TIPC_LINK_DOWN_EVT) - tipc_node_link_down(n, bearer_id); + tipc_node_calculate_timer(n, le->link); + rc = tipc_link_timeout(le->link, &xmitq); } tipc_node_unlock(n); - maddr = &n->links[bearer_id].maddr; - tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); + tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr); + if (rc & TIPC_LINK_DOWN_EVT) + tipc_node_link_down(n, bearer_id, false); } if (!mod_timer(&n->timer, jiffies + n->keepalive_intv)) tipc_node_get(n); @@ -303,18 +305,21 @@ static void tipc_node_timeout(unsigned long data) } /** - * tipc_node_link_up - handle addition of link - * + * __tipc_node_link_up - handle addition of link + * Node lock must be held by caller * Link becomes active (alone or shared) or standby, depending on its priority. */ -static void tipc_node_link_up(struct tipc_node *n, int bearer_id, - struct sk_buff_head *xmitq) +static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, + struct sk_buff_head *xmitq) { int *slot0 = &n->active_links[0]; int *slot1 = &n->active_links[1]; struct tipc_link *ol = node_active_link(n, 0); struct tipc_link *nl = n->links[bearer_id].link; + if (!nl || !tipc_link_is_up(nl)) + return; + if (n->working_links > 1) { pr_warn("Attempt to establish 3rd link to %x\n", n->addr); return; @@ -356,28 +361,40 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id, } /** - * tipc_node_link_down - handle loss of link + * tipc_node_link_up - handle addition of link + * + * Link becomes active (alone or shared) or standby, depending on its priority. */ -static void tipc_node_link_down(struct tipc_node *n, int bearer_id) +static void tipc_node_link_up(struct tipc_node *n, int bearer_id, + struct sk_buff_head *xmitq) { + tipc_node_lock(n); + __tipc_node_link_up(n, bearer_id, xmitq); + tipc_node_unlock(n); +} + +/** + * __tipc_node_link_down - handle loss of link + */ +static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, + struct sk_buff_head *xmitq, + struct tipc_media_addr **maddr) +{ + struct tipc_link_entry *le = &n->links[*bearer_id]; int *slot0 = &n->active_links[0]; int *slot1 = &n->active_links[1]; - struct tipc_media_addr *maddr = &n->links[bearer_id].maddr; int i, highest = 0; struct tipc_link *l, *_l, *tnl; - struct sk_buff_head xmitq; - l = n->links[bearer_id].link; + l = n->links[*bearer_id].link; if (!l || tipc_link_is_reset(l)) return; - __skb_queue_head_init(&xmitq); - n->working_links--; n->action_flags |= TIPC_NOTIFY_LINK_DOWN; - n->link_id = l->peer_bearer_id << 16 | bearer_id; + n->link_id = l->peer_bearer_id << 16 | *bearer_id; - tipc_bearer_remove_dest(n->net, l->bearer_id, n->addr); + tipc_bearer_remove_dest(n->net, *bearer_id, n->addr); pr_debug("Lost link <%s> on network plane %c\n", l->name, l->net_plane); @@ -404,18 +421,40 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id) if (!tipc_node_is_up(n)) { tipc_link_reset(l); - node_lost_contact(n); + node_lost_contact(n, &le->inputq); return; } /* There is still a working link => initiate failover */ tnl = node_active_link(n, 0); - tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1); - tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, &xmitq); + tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); - tipc_bearer_xmit(n->net, tnl->bearer_id, &xmitq, maddr); + tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); + *maddr = &n->links[tnl->bearer_id].maddr; + *bearer_id = tnl->bearer_id; +} + +static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) +{ + struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_media_addr *maddr; + struct sk_buff_head xmitq; + + __skb_queue_head_init(&xmitq); + + tipc_node_lock(n); + __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); + if (delete && le->link) { + kfree(le->link); + le->link = NULL; + n->link_cnt--; + } + tipc_node_unlock(n); + + tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); + tipc_sk_rcv(n->net, &le->inputq); } bool tipc_node_is_up(struct tipc_node *n) @@ -437,7 +476,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, bool sign_match = false; bool link_up = false; bool accept_addr = false; - + bool reset = true; *dupl_addr = false; *respond = false; @@ -460,6 +499,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, if (sign_match && addr_match && link_up) { /* All is fine. Do nothing. */ + reset = false; } else if (sign_match && addr_match && !link_up) { /* Respond. The link will come up in due time */ *respond = true; @@ -531,29 +571,21 @@ void tipc_node_check_dest(struct net *net, u32 onode, } memcpy(&l->media_addr, maddr, sizeof(*maddr)); memcpy(curr_maddr, maddr, sizeof(*maddr)); - tipc_node_link_down(n, b->identity); exit: tipc_node_unlock(n); + if (reset) + tipc_node_link_down(n, b->identity, false); tipc_node_put(n); } void tipc_node_delete_links(struct net *net, int bearer_id) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *l; struct tipc_node *n; rcu_read_lock(); list_for_each_entry_rcu(n, &tn->node_list, list) { - tipc_node_lock(n); - l = n->links[bearer_id].link; - if (l) { - tipc_node_link_down(n, bearer_id); - n->links[bearer_id].link = NULL; - n->link_cnt--; - } - tipc_node_unlock(n); - kfree(l); + tipc_node_link_down(n, bearer_id, true); } rcu_read_unlock(); } @@ -561,19 +593,14 @@ void tipc_node_delete_links(struct net *net, int bearer_id) static void tipc_node_reset_links(struct tipc_node *n) { char addr_string[16]; - u32 i; - - tipc_node_lock(n); + int i; pr_warn("Resetting all links to %s\n", tipc_addr_string_fill(addr_string, n->addr)); for (i = 0; i < MAX_BEARERS; i++) { - if (!n->links[i].link) - continue; - tipc_node_link_down(n, i); + tipc_node_link_down(n, i, false); } - tipc_node_unlock(n); } void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) @@ -798,10 +825,12 @@ static void node_established_contact(struct tipc_node *n_ptr) tipc_bclink_add_node(n_ptr->net, n_ptr->addr); } -static void node_lost_contact(struct tipc_node *n_ptr) +static void node_lost_contact(struct tipc_node *n_ptr, + struct sk_buff_head *inputq) { char addr_string[16]; struct tipc_sock_conn *conn, *safe; + struct tipc_link *l; struct list_head *conns = &n_ptr->conn_sks; struct sk_buff *skb; struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); @@ -827,14 +856,11 @@ static void node_lost_contact(struct tipc_node *n_ptr) /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { - struct tipc_link *l_ptr = n_ptr->links[i].link; - if (!l_ptr) - continue; - tipc_link_fsm_evt(l_ptr, LINK_FAILOVER_END_EVT); - kfree_skb(l_ptr->failover_reasm_skb); - l_ptr->failover_reasm_skb = NULL; - tipc_link_reset_fragments(l_ptr); + l = n_ptr->links[i].link; + if (l) + tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT); } + /* Prevent re-contact with node until cleanup is done */ tipc_node_fsm_evt(n_ptr, SELF_LOST_CONTACT_EVT); @@ -848,7 +874,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) conn->peer_node, conn->port, conn->peer_port, TIPC_ERR_NO_NODE); if (likely(skb)) { - skb_queue_tail(n_ptr->inputq, skb); + skb_queue_tail(inputq, skb); n_ptr->action_flags |= TIPC_MSG_EVT; } list_del(&conn->list); @@ -1025,9 +1051,9 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, l = tipc_node_select_link(n, selector, &bearer_id, &maddr); if (likely(l)) rc = tipc_link_xmit(l, list, &xmitq); - if (unlikely(rc == -ENOBUFS)) - tipc_node_link_down(n, bearer_id); tipc_node_unlock(n); + if (unlikely(rc == -ENOBUFS)) + tipc_node_link_down(n, bearer_id, false); tipc_node_put(n); } if (likely(!rc)) { @@ -1081,8 +1107,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, u16 rcv_nxt, syncpt, dlv_nxt; int state = n->state; struct tipc_link *l, *pl = NULL; - struct sk_buff_head; - int i; + struct tipc_media_addr *maddr; + int i, pb_id; l = n->links[bearer_id].link; if (!l) @@ -1123,9 +1149,11 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Initiate or update failover mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { syncpt = oseqno + exp_pkts - 1; - if (pl && tipc_link_is_up(pl)) - tipc_node_link_down(n, pl->bearer_id); - + if (pl && tipc_link_is_up(pl)) { + pb_id = pl->bearer_id; + __tipc_node_link_down(n, &pb_id, xmitq, &maddr); + tipc_skb_queue_splice_tail_init(pl->inputq, l->inputq); + } /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) n->sync_point = syncpt; @@ -1146,7 +1174,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, syncpt = iseqno + exp_pkts - 1; if (!tipc_link_is_up(l)) { tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); - tipc_node_link_up(n, bearer_id, xmitq); + __tipc_node_link_up(n, bearer_id, xmitq); } if (n->state == SELF_UP_PEER_UP) { n->sync_point = syncpt; @@ -1224,7 +1252,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) tipc_bclink_sync_state(n, hdr); - /* Release acked broadcast messages */ + /* Release acked broadcast packets */ if (unlikely(n->bclink.acked != msg_bcast_ack(hdr))) tipc_bclink_acknowledge(n, msg_bcast_ack(hdr)); @@ -1233,14 +1261,14 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) rc = tipc_link_rcv(le->link, skb, &xmitq); skb = NULL; } +unlock: + tipc_node_unlock(n); if (unlikely(rc & TIPC_LINK_UP_EVT)) tipc_node_link_up(n, bearer_id, &xmitq); if (unlikely(rc & TIPC_LINK_DOWN_EVT)) - tipc_node_link_down(n, bearer_id); -unlock: - tipc_node_unlock(n); + tipc_node_link_down(n, bearer_id, false); if (!skb_queue_empty(&le->inputq)) tipc_sk_rcv(net, &le->inputq); -- cgit From 23d8335d786472021b5c733f228c7074208dcfa0 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 30 Jul 2015 18:24:24 -0400 Subject: tipc: remove implicit message delivery in node_unlock() After the most recent changes, all access calls to a link which may entail addition of messages to the link's input queue are postpended by an explicit call to tipc_sk_rcv(), using a reference to the correct queue. This means that the potentially hazardous implicit delivery, using tipc_node_unlock() in combination with a binary flag and a cached queue pointer, now has become redundant. This commit removes this implicit delivery mechanism both for regular data messages and for binding table update messages. Tested-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 21 +++------------------ net/tipc/msg.h | 22 ---------------------- net/tipc/node.c | 26 +++++++------------------- net/tipc/node.h | 4 ---- 4 files changed, 10 insertions(+), 63 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 3a92924711a1..2aa19de715f6 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -559,8 +559,6 @@ void link_prepare_wakeup(struct tipc_link *l) break; skb_unlink(skb, &l->wakeupq); skb_queue_tail(l->inputq, skb); - l->owner->inputq = l->inputq; - l->owner->action_flags |= TIPC_MSG_EVT; } } @@ -598,8 +596,6 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr) void tipc_link_reset(struct tipc_link *l) { - struct tipc_node *owner = l->owner; - tipc_link_fsm_evt(l, LINK_RESET_EVT); /* Link is down, accept any session */ @@ -611,14 +607,10 @@ void tipc_link_reset(struct tipc_link *l) /* Prepare for renewed mtu size negotiation */ l->mtu = l->advertised_mtu; - /* Clean up all queues, except inputq: */ + /* Clean up all queues: */ __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); - if (!owner->inputq) - owner->inputq = l->inputq; - skb_queue_splice_init(&l->wakeupq, owner->inputq); - if (!skb_queue_empty(owner->inputq)) - owner->action_flags |= TIPC_MSG_EVT; + skb_queue_splice_init(&l->wakeupq, l->inputq); tipc_link_purge_backlog(l); kfree_skb(l->reasm_buf); @@ -972,7 +964,6 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) { struct tipc_node *node = link->owner; struct tipc_msg *msg = buf_msg(skb); - u32 dport = msg_destport(msg); switch (msg_user(msg)) { case TIPC_LOW_IMPORTANCE: @@ -980,17 +971,11 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: case CONN_MANAGER: - if (tipc_skb_queue_tail(link->inputq, skb, dport)) { - node->inputq = link->inputq; - node->action_flags |= TIPC_MSG_EVT; - } + skb_queue_tail(link->inputq, skb); return true; case NAME_DISTRIBUTOR: node->bclink.recv_permitted = true; - node->namedq = link->namedq; skb_queue_tail(link->namedq, skb); - if (skb_queue_len(link->namedq) == 1) - node->action_flags |= TIPC_NAMED_MSG_EVT; return true; case MSG_BUNDLER: case TUNNEL_PROTOCOL: diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 53d98ef78650..a82c5848d4bc 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -862,28 +862,6 @@ static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list, return skb; } -/* tipc_skb_queue_tail(): add buffer to tail of list; - * @list: list to be appended to - * @skb: buffer to append. Always appended - * @dport: the destination port of the buffer - * returns true if dport differs from previous destination - */ -static inline bool tipc_skb_queue_tail(struct sk_buff_head *list, - struct sk_buff *skb, u32 dport) -{ - struct sk_buff *_skb = NULL; - bool rv = false; - - spin_lock_bh(&list->lock); - _skb = skb_peek_tail(list); - if (!_skb || (msg_destport(buf_msg(_skb)) != dport) || - (skb_queue_len(list) > 32)) - rv = true; - __skb_queue_tail(list, skb); - spin_unlock_bh(&list->lock); - return rv; -} - /* tipc_skb_queue_sorted(); sort pkt into list according to sequence number * @list: list to be appended to * @skb: buffer to add diff --git a/net/tipc/node.c b/net/tipc/node.c index cdca57be85bf..9e9b0938bd17 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -873,10 +873,8 @@ static void node_lost_contact(struct tipc_node *n_ptr, SHORT_H_SIZE, 0, tn->own_addr, conn->peer_node, conn->port, conn->peer_port, TIPC_ERR_NO_NODE); - if (likely(skb)) { + if (likely(skb)) skb_queue_tail(inputq, skb); - n_ptr->action_flags |= TIPC_MSG_EVT; - } list_del(&conn->list); kfree(conn); } @@ -923,27 +921,20 @@ void tipc_node_unlock(struct tipc_node *node) u32 flags = node->action_flags; u32 link_id = 0; struct list_head *publ_list; - struct sk_buff_head *inputq = node->inputq; - struct sk_buff_head *namedq; - if (likely(!flags || (flags == TIPC_MSG_EVT))) { - node->action_flags = 0; + if (likely(!flags)) { spin_unlock_bh(&node->lock); - if (flags == TIPC_MSG_EVT) - tipc_sk_rcv(net, inputq); return; } addr = node->addr; link_id = node->link_id; - namedq = node->namedq; publ_list = &node->publ_list; - node->action_flags &= ~(TIPC_MSG_EVT | - TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | + node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | - TIPC_NAMED_MSG_EVT | TIPC_BCAST_RESET); + TIPC_BCAST_RESET); spin_unlock_bh(&node->lock); @@ -964,12 +955,6 @@ void tipc_node_unlock(struct tipc_node *node) tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, link_id, addr); - if (flags & TIPC_MSG_EVT) - tipc_sk_rcv(net, inputq); - - if (flags & TIPC_NAMED_MSG_EVT) - tipc_named_rcv(net, namedq); - if (flags & TIPC_BCAST_MSG_EVT) tipc_bclink_input(net); @@ -1270,6 +1255,9 @@ unlock: if (unlikely(rc & TIPC_LINK_DOWN_EVT)) tipc_node_link_down(n, bearer_id, false); + if (unlikely(!skb_queue_empty(&n->bclink.namedq))) + tipc_named_rcv(net, &n->bclink.namedq); + if (!skb_queue_empty(&le->inputq)) tipc_sk_rcv(net, &le->inputq); diff --git a/net/tipc/node.h b/net/tipc/node.h index 9a977467fc46..344b3e7594fd 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -53,13 +53,11 @@ * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type */ enum { - TIPC_MSG_EVT = 1, TIPC_NOTIFY_NODE_DOWN = (1 << 3), TIPC_NOTIFY_NODE_UP = (1 << 4), TIPC_WAKEUP_BCAST_USERS = (1 << 5), TIPC_NOTIFY_LINK_UP = (1 << 6), TIPC_NOTIFY_LINK_DOWN = (1 << 7), - TIPC_NAMED_MSG_EVT = (1 << 8), TIPC_BCAST_MSG_EVT = (1 << 9), TIPC_BCAST_RESET = (1 << 10) }; @@ -124,8 +122,6 @@ struct tipc_node { spinlock_t lock; struct net *net; struct hlist_node hash; - struct sk_buff_head *inputq; - struct sk_buff_head *namedq; int active_links[2]; struct tipc_link_entry links[MAX_BEARERS]; int action_flags; -- cgit From 9073fb8be3ee6f89492b8ea8f6d3902913a9fc91 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 30 Jul 2015 18:24:25 -0400 Subject: tipc: use temporary, non-protected skb queue for bundle reception Currently, when we extract small messages from a message bundle, or when many messages have accumulated in the link arrival queue, those messages are added one by one to the lock protected link input queue. This may increase contention with the reader of that queue, in the function tipc_sk_rcv(). This commit introduces a temporary, unprotected input queue in tipc_link_rcv() for such cases. Only when the arrival queue has been emptied, and the function is ready to return, does it splice the whole temporary queue into the real input queue. Tested-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 2aa19de715f6..d683fe9f68c8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -111,8 +111,6 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); -static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb); -static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); /* * Simple non-static link routines (i.e. referenced outside this file) @@ -960,18 +958,18 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm, * Consumes buffer if message is of right type * Node lock must be held */ -static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) +static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb, + struct sk_buff_head *inputq) { struct tipc_node *node = link->owner; - struct tipc_msg *msg = buf_msg(skb); - switch (msg_user(msg)) { + switch (msg_user(buf_msg(skb))) { case TIPC_LOW_IMPORTANCE: case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: case CONN_MANAGER: - skb_queue_tail(link->inputq, skb); + __skb_queue_tail(inputq, skb); return true; case NAME_DISTRIBUTOR: node->bclink.recv_permitted = true; @@ -993,7 +991,8 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) * * Consumes buffer */ -static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb) +static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *inputq) { struct tipc_node *node = l->owner; struct tipc_msg *hdr = buf_msg(skb); @@ -1016,7 +1015,7 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb) hdr = buf_msg(skb); if (less(msg_seqno(hdr), l->drop_point)) goto drop; - if (tipc_data_input(l, skb)) + if (tipc_data_input(l, skb, inputq)) return rc; usr = msg_user(hdr); reasm_skb = &l->failover_reasm_skb; @@ -1026,13 +1025,13 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb) l->stats.recv_bundles++; l->stats.recv_bundled += msg_msgcnt(hdr); while (tipc_msg_extract(skb, &iskb, &pos)) - tipc_data_input(l, iskb); + tipc_data_input(l, iskb, inputq); return 0; } else if (usr == MSG_FRAGMENTER) { l->stats.recv_fragments++; if (tipc_buf_append(reasm_skb, &skb)) { l->stats.recv_fragmented++; - tipc_data_input(l, skb); + tipc_data_input(l, skb, inputq); } else if (!*reasm_skb) { return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } @@ -1070,10 +1069,13 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct sk_buff_head *arrvq = &l->deferdq; + struct sk_buff_head tmpq; struct tipc_msg *hdr; u16 seqno, rcv_nxt; int rc = 0; + __skb_queue_head_init(&tmpq); + if (unlikely(!__tipc_skb_queue_sorted(arrvq, skb))) { if (!(skb_queue_len(arrvq) % TIPC_NACK_INTV)) tipc_link_build_proto_msg(l, STATE_MSG, 0, @@ -1095,7 +1097,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); if (!link_is_up(l)) { kfree_skb(__skb_dequeue(arrvq)); - return rc; + goto exit; } } @@ -1113,7 +1115,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, rcv_nxt = l->rcv_nxt; if (unlikely(less(rcv_nxt, seqno))) { l->stats.deferred_recv++; - return rc; + goto exit; } __skb_dequeue(arrvq); @@ -1122,14 +1124,14 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, if (unlikely(more(rcv_nxt, seqno))) { l->stats.duplicates++; kfree_skb(skb); - return rc; + goto exit; } /* Packet can be delivered */ l->rcv_nxt++; l->stats.recv_info++; - if (unlikely(!tipc_data_input(l, skb))) - rc = tipc_link_input(l, skb); + if (unlikely(!tipc_data_input(l, skb, &tmpq))) + rc = tipc_link_input(l, skb, &tmpq); /* Ack at regular intervals */ if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) { @@ -1139,6 +1141,8 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, 0, 0, 0, 0, xmitq); } } +exit: + tipc_skb_queue_splice_tail(&tmpq, l->inputq); return rc; } -- cgit From 440d8963cd590ec9387d76a36e60c02da9ed944d Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 30 Jul 2015 18:24:26 -0400 Subject: tipc: clean up link creation We simplify the link creation function tipc_link_create() and the way the link struct it is connected to the node struct. In particular, we remove the duplicate initialization of some fields which are anyway set in tipc_link_reset(). Tested-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/core.h | 5 +++ net/tipc/link.c | 136 +++++++++++++++++++++++++------------------------------- net/tipc/link.h | 18 +++----- net/tipc/node.c | 48 +++++++------------- 4 files changed, 86 insertions(+), 121 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/core.h b/net/tipc/core.h index f4ed67778c54..b96b41eabf12 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -109,6 +109,11 @@ struct tipc_net { atomic_t subscription_count; }; +static inline struct tipc_net *tipc_net(struct net *net) +{ + return net_generic(net, tipc_net_id); +} + static inline u16 mod(u16 x) { return x & 0xffffu; diff --git a/net/tipc/link.c b/net/tipc/link.c index d683fe9f68c8..f067e5425560 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -147,87 +147,71 @@ int tipc_link_is_active(struct tipc_link *l) return (node_active_link(n, 0) == l) || (node_active_link(n, 1) == l); } +static u32 link_own_addr(struct tipc_link *l) +{ + return msg_prevnode(l->pmsg); +} + /** * tipc_link_create - create a new link - * @n_ptr: pointer to associated node - * @b_ptr: pointer to associated bearer - * @media_addr: media address to use when sending messages over link + * @n: pointer to associated node + * @b: pointer to associated bearer + * @ownnode: identity of own node + * @peer: identity of peer node + * @maddr: media address to be used + * @inputq: queue to put messages ready for delivery + * @namedq: queue to put binding table update messages ready for delivery + * @link: return value, pointer to put the created link * - * Returns pointer to link. + * Returns true if link was created, otherwise false */ -struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, - struct tipc_bearer *b_ptr, - const struct tipc_media_addr *media_addr, - struct sk_buff_head *inputq, - struct sk_buff_head *namedq) +bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session, + u32 ownnode, u32 peer, struct tipc_media_addr *maddr, + struct sk_buff_head *inputq, struct sk_buff_head *namedq, + struct tipc_link **link) { - struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); - struct tipc_link *l_ptr; - struct tipc_msg *msg; + struct tipc_link *l; + struct tipc_msg *hdr; char *if_name; - char addr_string[16]; - u32 peer = n_ptr->addr; - if (n_ptr->link_cnt >= MAX_BEARERS) { - tipc_addr_string_fill(addr_string, n_ptr->addr); - pr_err("Cannot establish %uth link to %s. Max %u allowed.\n", - n_ptr->link_cnt, addr_string, MAX_BEARERS); - return NULL; - } + l = kzalloc(sizeof(*l), GFP_ATOMIC); + if (!l) + return false; + *link = l; - if (n_ptr->links[b_ptr->identity].link) { - tipc_addr_string_fill(addr_string, n_ptr->addr); - pr_err("Attempt to establish second link on <%s> to %s\n", - b_ptr->name, addr_string); - return NULL; - } + /* Note: peer i/f name is completed by reset/activate message */ + if_name = strchr(b->name, ':') + 1; + sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown", + tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode), + if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); - l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC); - if (!l_ptr) { - pr_warn("Link creation failed, no memory\n"); - return NULL; - } - l_ptr->addr = peer; - if_name = strchr(b_ptr->name, ':') + 1; - sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown", - tipc_zone(tn->own_addr), tipc_cluster(tn->own_addr), - tipc_node(tn->own_addr), - if_name, - tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); - /* note: peer i/f name is updated by reset/activate message */ - memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); - l_ptr->owner = n_ptr; - l_ptr->peer_session = WILDCARD_SESSION; - l_ptr->bearer_id = b_ptr->identity; - l_ptr->tolerance = b_ptr->tolerance; - l_ptr->snd_nxt = 1; - l_ptr->rcv_nxt = 1; - l_ptr->state = LINK_RESET; - - l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; - msg = l_ptr->pmsg; - tipc_msg_init(tn->own_addr, msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, - l_ptr->addr); - msg_set_size(msg, sizeof(l_ptr->proto_msg)); - msg_set_session(msg, (tn->random & 0xffff)); - msg_set_bearer_id(msg, b_ptr->identity); - strcpy((char *)msg_data(msg), if_name); - l_ptr->net_plane = b_ptr->net_plane; - l_ptr->advertised_mtu = b_ptr->mtu; - l_ptr->mtu = l_ptr->advertised_mtu; - l_ptr->priority = b_ptr->priority; - tipc_link_set_queue_limits(l_ptr, b_ptr->window); - l_ptr->snd_nxt = 1; - __skb_queue_head_init(&l_ptr->transmq); - __skb_queue_head_init(&l_ptr->backlogq); - __skb_queue_head_init(&l_ptr->deferdq); - skb_queue_head_init(&l_ptr->wakeupq); - l_ptr->inputq = inputq; - l_ptr->namedq = namedq; - skb_queue_head_init(l_ptr->inputq); - link_reset_statistics(l_ptr); - tipc_node_attach_link(n_ptr, l_ptr); - return l_ptr; + l->addr = peer; + l->media_addr = maddr; + l->owner = n; + l->peer_session = WILDCARD_SESSION; + l->bearer_id = b->identity; + l->tolerance = b->tolerance; + l->net_plane = b->net_plane; + l->advertised_mtu = b->mtu; + l->mtu = b->mtu; + l->priority = b->priority; + tipc_link_set_queue_limits(l, b->window); + l->inputq = inputq; + l->namedq = namedq; + l->state = LINK_RESETTING; + l->pmsg = (struct tipc_msg *)&l->proto_msg; + hdr = l->pmsg; + tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer); + msg_set_size(hdr, sizeof(l->proto_msg)); + msg_set_session(hdr, session); + msg_set_bearer_id(hdr, l->bearer_id); + strcpy((char *)msg_data(hdr), if_name); + __skb_queue_head_init(&l->transmq); + __skb_queue_head_init(&l->backlogq); + __skb_queue_head_init(&l->deferdq); + skb_queue_head_init(&l->wakeupq); + skb_queue_head_init(l->inputq); + return true; } /* tipc_link_build_bcast_sync_msg() - synchronize broadcast link endpoints. @@ -643,7 +627,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, u16 ack = mod(link->rcv_nxt - 1); u16 seqno = link->snd_nxt; u16 bc_last_in = link->owner->bclink.last_in; - struct tipc_media_addr *addr = &link->media_addr; + struct tipc_media_addr *addr = link->media_addr; struct sk_buff_head *transmq = &link->transmq; struct sk_buff_head *backlogq = &link->backlogq; struct sk_buff *skb, *bskb; @@ -809,7 +793,7 @@ void tipc_link_push_packets(struct tipc_link *link) link->rcv_unacked = 0; __skb_queue_tail(&link->transmq, skb); tipc_bearer_send(link->owner->net, link->bearer_id, - skb, &link->media_addr); + skb, link->media_addr); } link->snd_nxt = seqno; } @@ -912,7 +896,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb, - &l_ptr->media_addr); + l_ptr->media_addr); retransmits--; l_ptr->stats.retransmitted++; } @@ -1200,7 +1184,7 @@ void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, skb = __skb_dequeue(&xmitq); if (!skb) return; - tipc_bearer_send(l->owner->net, l->bearer_id, skb, &l->media_addr); + tipc_bearer_send(l->owner->net, l->bearer_id, skb, l->media_addr); l->rcv_unacked = 0; kfree_skb(skb); } diff --git a/net/tipc/link.h b/net/tipc/link.h index 39b8c4c5121e..39ff8b6919a4 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -148,7 +148,7 @@ struct tipc_stats { struct tipc_link { u32 addr; char name[TIPC_MAX_LINK_NAME]; - struct tipc_media_addr media_addr; + struct tipc_media_addr *media_addr; struct tipc_node *owner; /* Management and link supervision data */ @@ -205,13 +205,10 @@ struct tipc_link { struct tipc_stats stats; }; -struct tipc_port; - -struct tipc_link *tipc_link_create(struct tipc_node *n, - struct tipc_bearer *b, - const struct tipc_media_addr *maddr, - struct sk_buff_head *inputq, - struct sk_buff_head *namedq); +bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session, + u32 ownnode, u32 peer, struct tipc_media_addr *maddr, + struct sk_buff_head *inputq, struct sk_buff_head *namedq, + struct tipc_link **link); void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); void tipc_link_build_bcast_sync_msg(struct tipc_link *l, @@ -246,13 +243,8 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info); int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); -void link_prepare_wakeup(struct tipc_link *l); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); -static inline u32 link_own_addr(struct tipc_link *l) -{ - return msg_prevnode(l->pmsg); -} #endif diff --git a/net/tipc/node.c b/net/tipc/node.c index 9e9b0938bd17..7c191641b44f 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -320,10 +320,6 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, if (!nl || !tipc_link_is_up(nl)) return; - if (n->working_links > 1) { - pr_warn("Attempt to establish 3rd link to %x\n", n->addr); - return; - } n->working_links++; n->action_flags |= TIPC_NOTIFY_LINK_UP; n->link_id = nl->peer_bearer_id << 16 | bearer_id; @@ -470,13 +466,13 @@ void tipc_node_check_dest(struct net *net, u32 onode, { struct tipc_node *n; struct tipc_link *l; - struct tipc_media_addr *curr_maddr; - struct sk_buff_head *inputq; + struct tipc_link_entry *le; bool addr_match = false; bool sign_match = false; bool link_up = false; bool accept_addr = false; bool reset = true; + *dupl_addr = false; *respond = false; @@ -486,13 +482,12 @@ void tipc_node_check_dest(struct net *net, u32 onode, tipc_node_lock(n); - curr_maddr = &n->links[b->identity].maddr; - inputq = &n->links[b->identity].inputq; + le = &n->links[b->identity]; /* Prepare to validate requesting node's signature and media address */ - l = n->links[b->identity].link; + l = le->link; link_up = l && tipc_link_is_up(l); - addr_match = l && !memcmp(curr_maddr, maddr, sizeof(*maddr)); + addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr)); sign_match = (signature == n->signature); /* These three flags give us eight permutations: */ @@ -559,18 +554,25 @@ void tipc_node_check_dest(struct net *net, u32 onode, /* Now create new link if not already existing */ if (!l) { - l = tipc_link_create(n, b, maddr, inputq, &n->bclink.namedq); - if (!l) { + if (n->link_cnt == 2) { + pr_warn("Cannot establish 3rd link to %x\n", n->addr); + goto exit; + } + if (!tipc_link_create(n, b, mod(tipc_net(net)->random), + tipc_own_addr(net), onode, &le->maddr, + &le->inputq, &n->bclink.namedq, &l)) { *respond = false; goto exit; } + tipc_link_reset(l); + le->link = l; + n->link_cnt++; tipc_node_calculate_timer(n, l); if (n->link_cnt == 1) if (!mod_timer(&n->timer, jiffies + n->keepalive_intv)) tipc_node_get(n); } - memcpy(&l->media_addr, maddr, sizeof(*maddr)); - memcpy(curr_maddr, maddr, sizeof(*maddr)); + memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: tipc_node_unlock(n); if (reset) @@ -603,24 +605,6 @@ static void tipc_node_reset_links(struct tipc_node *n) } } -void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) -{ - n_ptr->links[l_ptr->bearer_id].link = l_ptr; - n_ptr->link_cnt++; -} - -void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) -{ - int i; - - for (i = 0; i < MAX_BEARERS; i++) { - if (l_ptr != n_ptr->links[i].link) - continue; - n_ptr->links[i].link = NULL; - n_ptr->link_cnt--; - } -} - /* tipc_node_fsm_evt - node finite state machine * Determines when contact is allowed with peer node */ -- cgit From 343d60aada5a358ca186d6e9e353230379c426d8 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 30 Jul 2015 13:34:53 -0700 Subject: ipv6: change ipv6_stub_impl.ipv6_dst_lookup to take net argument This patch adds net argument to ipv6_stub_impl.ipv6_dst_lookup for use cases where sk is not available (like mpls). sk appears to be needed to get the namespace 'net' and is optional otherwise. This patch series changes ipv6_stub_impl.ipv6_dst_lookup to take net argument. sk remains optional. All callers of ipv6_stub_impl.ipv6_dst_lookup have been modified to pass net. I have modified them to use already available 'net' in the scope of the call. I can change them to sock_net(sk) to avoid any unintended change in behaviour if sock namespace is different. They dont seem to be from code inspection. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 66deebc66aa1..c170d3138953 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -194,7 +194,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, .saddr = src->ipv6, .flowi6_proto = IPPROTO_UDP }; - err = ipv6_stub->ipv6_dst_lookup(ub->ubsock->sk, &ndst, &fl6); + err = ipv6_stub->ipv6_dst_lookup(net, ub->ubsock->sk, &ndst, + &fl6); if (err) goto tx_error; ttl = ip6_dst_hoplimit(ndst); -- cgit From 8f8ff9135b28a7560a5627aceaf289e3f0d4cd64 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Mon, 17 Aug 2015 14:15:10 +0200 Subject: tipc: don't sanity check non-existing TLV (NL compat) A zero length payload means that no TLV (Type Length Value) data has been passed. Prior to this patch a non-existing TLV could be sanity checked with TLV_OK() resulting in random behavior where a user sending an empty message occasionally got a incorrect "operation not supported" message back. Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 53e0fee80086..1eadc95e1132 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1114,7 +1114,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) } len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); - if (TLV_GET_LEN(msg.req) && !TLV_OK(msg.req, len)) { + if (len && !TLV_OK(msg.req, len)) { msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); err = -EOPNOTSUPP; goto send; -- cgit From 17b2063077a7478e5fd3c34b04a059dbb8474638 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 20 Aug 2015 02:12:54 -0400 Subject: tipc: eliminate risk of premature link setup during failover When a link goes down, and there is still a working link towards its destination node, a failover is initiated, and the failed link is not allowed to re-establish until that procedure is finished. To ensure this, the concerned link endpoints are set to state LINK_FAILINGOVER, and the node endpoints to NODE_FAILINGOVER during the failover period. However, if the link reset is due to a disabled bearer, the corres- ponding link endpoint is deleted, and only the node endpoint knows about the ongoing failover. Now, if the disabled bearer is re-enabled during the failover period, the discovery mechanism may create a new link endpoint that is ready to be established, despite that this is not permitted. This situation may cause both the ongoing failover and any subsequent link synchronization to fail. In this commit, we ensure that a newly created link goes directly to state LINK_FAILINGOVER if the corresponding node state is NODE_FAILINGOVER. This eliminates the problem described above. Furthermore, we tighten the criteria for which packets are allowed to end a failover state in the function tipc_node_check_state(). By checking that the receiving link is up and running, instead of just checking that it is not in failover mode, we eliminate the risk that protocol packets from the re-created link may cause the failover to be prematurely terminated. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/node.c b/net/tipc/node.c index 7c191641b44f..004834bd1605 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -565,6 +565,8 @@ void tipc_node_check_dest(struct net *net, u32 onode, goto exit; } tipc_link_reset(l); + if (n->state == NODE_FAILINGOVER) + tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); le->link = l; n->link_cnt++; tipc_node_calculate_timer(n, l); @@ -1129,7 +1131,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, } /* Open parallel link when tunnel link reaches synch point */ - if ((n->state == NODE_FAILINGOVER) && !tipc_link_is_failingover(l)) { + if ((n->state == NODE_FAILINGOVER) && tipc_link_is_up(l)) { if (!more(rcv_nxt, n->sync_point)) return true; tipc_node_fsm_evt(n, NODE_FAILOVER_END_EVT); -- cgit From 5ae2f8e6857968d6dddbd3879ed0a32b860e02d1 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 20 Aug 2015 02:12:55 -0400 Subject: tipc: interrupt link synchronization when a link goes down When we introduced the new link failover/synch mechanism in commit 6e498158a827fd515b514842e9a06bdf0f75ab86 ("tipc: move link synch and failover to link aggregation level"), we missed the case when the non-tunnel link goes down during the link synchronization period. In this case the tunnel link will remain in state LINK_SYNCHING, something leading to unpredictable behavior when the failover procedure is initiated. In this commit, we ensure that the node and remaining link goes back to regular communication state (SELF_UP_PEER_UP/LINK_ESTABLISHED) when one of the parallel links goes down. We also ensure that we don't re-enter synch mode if subsequent SYNCH packets arrive on the remaining link. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 2 +- net/tipc/node.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index f067e5425560..7058c86f5e48 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -351,11 +351,11 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt) l->state = LINK_RESET; break; case LINK_ESTABLISH_EVT: + case LINK_SYNCH_END_EVT: break; case LINK_SYNCH_BEGIN_EVT: l->state = LINK_SYNCHING; break; - case LINK_SYNCH_END_EVT: case LINK_FAILOVER_BEGIN_EVT: case LINK_FAILOVER_END_EVT: default: diff --git a/net/tipc/node.c b/net/tipc/node.c index 004834bd1605..937cc6192bcf 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -423,6 +423,8 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, /* There is still a working link => initiate failover */ tnl = node_active_link(n, 0); + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); + tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1); tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); tipc_link_reset(l); @@ -1140,6 +1142,10 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, return true; } + /* No synching needed if only one link */ + if (!pl || !tipc_link_is_up(pl)) + return true; + /* Initiate or update synch mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) { syncpt = iseqno + exp_pkts - 1; @@ -1158,9 +1164,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Open tunnel link when parallel link reaches synch point */ if ((n->state == NODE_SYNCHING) && tipc_link_is_synching(l)) { - if (pl) - dlv_nxt = mod(pl->rcv_nxt - skb_queue_len(pl->inputq)); - if (!pl || more(dlv_nxt, n->sync_point)) { + dlv_nxt = pl->rcv_nxt - mod(skb_queue_len(pl->inputq)); + if (more(dlv_nxt, n->sync_point)) { tipc_link_fsm_evt(l, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); return true; -- cgit From 2be80c2d87de789550982e74a11e9f9ff5940845 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 20 Aug 2015 02:12:56 -0400 Subject: tipc: fix stale link problem during synchronization Recent changes to the link synchronization means that we can now just drop packets arriving on the synchronizing link before the synch point is reached. This has lead to significant simplifications to the implementation, but also turns out to have a flip side that we need to consider. Under unlucky circumstances, the two endpoints may end up repeatedly dropping each other's packets, while immediately asking for retransmission of the same packets, just to drop them once more. This pattern will eventually be broken when the synch point is reached on the other link, but before that, the endpoints may have arrived at the retransmission limit (stale counter) that indicates that the link should be broken. We see this happen at rare occasions. The fix for this is to not ask for retransmissions when a link is in state LINK_SYNCHING. The fact that the link has reached this state means that it has already received the first SYNCH packet, and that it knows the synch point. Hence, it doesn't need any more packets until the other link has reached the synch point, whereafter it can go ahead and ask for the missing packets. However, because of the reduced traffic on the synching link that follows this change, it may now take longer to discover that the synch point has been reached. We compensate for this by letting all packets, on any of the links, trig a check for synchronization termination. This is possible because the packets themselves don't contain any information that is needed for discovering this condition. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 3 ++- net/tipc/node.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 7058c86f5e48..75db07c78a69 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1330,6 +1330,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 peers_snd_nxt = msg_next_sent(hdr); u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); + u16 rcv_nxt = l->rcv_nxt; char *if_name; int rc = 0; @@ -1393,7 +1394,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, break; /* Send NACK if peer has sent pkts we haven't received yet */ - if (more(peers_snd_nxt, l->rcv_nxt)) + if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) rcvgap = peers_snd_nxt - l->rcv_nxt; if (rcvgap || (msg_probe(hdr))) tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, diff --git a/net/tipc/node.c b/net/tipc/node.c index 937cc6192bcf..703875fd6cde 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1079,7 +1079,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, u16 exp_pkts = msg_msgcnt(hdr); u16 rcv_nxt, syncpt, dlv_nxt; int state = n->state; - struct tipc_link *l, *pl = NULL; + struct tipc_link *l, *tnl, *pl = NULL; struct tipc_media_addr *maddr; int i, pb_id; @@ -1164,12 +1164,20 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Open tunnel link when parallel link reaches synch point */ if ((n->state == NODE_SYNCHING) && tipc_link_is_synching(l)) { + if (tipc_link_is_synching(l)) { + tnl = l; + } else { + tnl = pl; + pl = l; + } dlv_nxt = pl->rcv_nxt - mod(skb_queue_len(pl->inputq)); if (more(dlv_nxt, n->sync_point)) { - tipc_link_fsm_evt(l, LINK_SYNCH_END_EVT); + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); return true; } + if (l == pl) + return true; if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) return true; if (usr == LINK_PROTOCOL) -- cgit From 7845989cb4b3da1db903918c844fccb9817d34a0 Mon Sep 17 00:00:00 2001 From: Kolmakov Dmitriy Date: Mon, 7 Sep 2015 09:05:48 +0000 Subject: net: tipc: fix stall during bclink wakeup procedure If an attempt to wake up users of broadcast link is made when there is no enough place in send queue than it may hang up inside the tipc_sk_rcv() function since the loop breaks only after the wake up queue becomes empty. This can lead to complete CPU stall with the following message generated by RCU: INFO: rcu_sched self-detected stall on CPU { 0} (t=2101 jiffies g=54225 c=54224 q=11465) Task dump for CPU 0: tpch R running task 0 39949 39948 0x0000000a ffffffff818536c0 ffff88181fa037a0 ffffffff8106a4be 0000000000000000 ffffffff818536c0 ffff88181fa037c0 ffffffff8106d8a8 ffff88181fa03800 0000000000000001 ffff88181fa037f0 ffffffff81094a50 ffff88181fa15680 Call Trace: [] sched_show_task+0xae/0x120 [] dump_cpu_task+0x38/0x40 [] rcu_dump_cpu_stacks+0x90/0xd0 [] rcu_check_callbacks+0x3eb/0x6e0 [] ? account_system_time+0x7f/0x170 [] update_process_times+0x34/0x60 [] tick_sched_handle.isra.18+0x31/0x40 [] tick_sched_timer+0x3c/0x70 [] __run_hrtimer.isra.34+0x3d/0xc0 [] hrtimer_interrupt+0xc5/0x1e0 [] ? native_smp_send_reschedule+0x42/0x60 [] local_apic_timer_interrupt+0x34/0x60 [] smp_apic_timer_interrupt+0x3c/0x60 [] apic_timer_interrupt+0x6b/0x70 [] ? _raw_spin_unlock_irqrestore+0x9/0x10 [] __wake_up_sync_key+0x4f/0x60 [] tipc_write_space+0x31/0x40 [tipc] [] filter_rcv+0x31f/0x520 [tipc] [] ? tipc_sk_lookup+0xc9/0x110 [tipc] [] ? _raw_spin_lock_bh+0x19/0x30 [] tipc_sk_rcv+0x2dc/0x3e0 [tipc] [] tipc_bclink_wakeup_users+0x2f/0x40 [tipc] [] tipc_node_unlock+0x186/0x190 [tipc] [] ? kfree_skb+0x2c/0x40 [] tipc_rcv+0x2ac/0x8c0 [tipc] [] tipc_l2_rcv_msg+0x38/0x50 [tipc] [] __netif_receive_skb_core+0x5a3/0x950 [] __netif_receive_skb+0x13/0x60 [] netif_receive_skb_internal+0x1e/0x90 [] napi_gro_receive+0x78/0xa0 [] tg3_poll_work+0xc54/0xf40 [tg3] [] ? consume_skb+0x2c/0x40 [] tg3_poll_msix+0x41/0x160 [tg3] [] net_rx_action+0xe2/0x290 [] __do_softirq+0xda/0x1f0 [] irq_exit+0x76/0xa0 [] do_IRQ+0x55/0xf0 [] common_interrupt+0x6b/0x6b The issue occurs only when tipc_sk_rcv() is used to wake up postponed senders: tipc_bclink_wakeup_users() // wakeupq - is a queue which consists of special // messages with SOCK_WAKEUP type. tipc_sk_rcv(wakeupq) ... while (skb_queue_len(inputq)) { filter_rcv(skb) // Here the type of message is checked // and if it is SOCK_WAKEUP then // it tries to wake up a sender. tipc_write_space(sk) wake_up_interruptible_sync_poll() } After the sender thread is woke up it can gather control and perform an attempt to send a message. But if there is no enough place in send queue it will call link_schedule_user() function which puts a message of type SOCK_WAKEUP to the wakeup queue and put the sender to sleep. Thus the size of the queue actually is not changed and the while() loop never exits. The approach I proposed is to wake up only senders for which there is enough place in send queue so the described issue can't occur. Moreover the same approach is already used to wake up senders on unicast links. I have got into the issue on our product code but to reproduce the issue I changed a benchmark test application (from tipcutils/demos/benchmark) to perform the following scenario: 1. Run 64 instances of test application (nodes). It can be done on the one physical machine. 2. Each application connects to all other using TIPC sockets in RDM mode. 3. When setup is done all nodes start simultaneously send broadcast messages. 4. Everything hangs up. The issue is reproducible only when a congestion on broadcast link occurs. For example, when there are only 8 nodes it works fine since congestion doesn't occur. Send queue limit is 40 in my case (I use a critical importance level) and when 64 nodes send a message at the same moment a congestion occurs every time. Signed-off-by: Dmitry S Kolmakov Reviewed-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 8b010c976b2f..41042de3ae9b 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -169,6 +169,30 @@ static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) } } +/** + * bclink_prepare_wakeup - prepare users for wakeup after congestion + * @bcl: broadcast link + * @resultq: queue for users which can be woken up + * Move a number of waiting users, as permitted by available space in + * the send queue, from link wait queue to specified queue for wakeup + */ +static void bclink_prepare_wakeup(struct tipc_link *bcl, struct sk_buff_head *resultq) +{ + int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,}; + int imp, lim; + struct sk_buff *skb, *tmp; + + skb_queue_walk_safe(&bcl->wakeupq, skb, tmp) { + imp = TIPC_SKB_CB(skb)->chain_imp; + lim = bcl->window + bcl->backlog[imp].limit; + pnd[imp] += TIPC_SKB_CB(skb)->chain_sz; + if ((pnd[imp] + bcl->backlog[imp].len) >= lim) + continue; + skb_unlink(skb, &bcl->wakeupq); + skb_queue_tail(resultq, skb); + } +} + /** * tipc_bclink_wakeup_users - wake up pending users * @@ -177,8 +201,12 @@ static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) void tipc_bclink_wakeup_users(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + struct sk_buff_head resultq; - tipc_sk_rcv(net, &tn->bclink->link.wakeupq); + skb_queue_head_init(&resultq); + bclink_prepare_wakeup(bcl, &resultq); + tipc_sk_rcv(net, &resultq); } /** -- cgit From 4e3ae00100945d39e1f83b7c0179a114ccf55759 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Fri, 18 Sep 2015 10:46:31 +0200 Subject: tipc: reinitialize pointer after skb linearize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The msg pointer into header may change after skb linearization. We must reinitialize it after calling skb_linearize to prevent operating on a freed or invalid pointer. Signed-off-by: Erik Hugne Reported-by: Tamás Végh Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/tipc') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 562c926a51cc..c5ac436235e0 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -539,6 +539,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) *err = -TIPC_ERR_NO_NAME; if (skb_linearize(skb)) return false; + msg = buf_msg(skb); if (msg_reroute_cnt(msg)) return false; dnode = addr_domain(net, msg_lookup_scope(msg)); -- cgit