From 057af70713445fad2459aa348c9c2c4ecf7db938 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 5 Oct 2019 20:04:39 +0200 Subject: net: tipc: have genetlink code to parse the attrs during dumpit Benefit from the fact that the generic netlink code can parse the attrs for dumpit op and avoid need to parse it in the op callback. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/tipc/node.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index c8f6177dd5a2..f2e3cf70c922 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2484,13 +2484,9 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, int err; if (!prev_node) { - struct nlattr **attrs; + struct nlattr **attrs = genl_dumpit_info(cb)->attrs; struct nlattr *mon[TIPC_NLA_MON_MAX + 1]; - err = tipc_nlmsg_parse(cb->nlh, &attrs); - if (err) - return err; - if (!attrs[TIPC_NLA_MON]) return -EINVAL; -- cgit From f73b12812a3d1d798b7517547ccdcf864844d2cd Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Tue, 29 Oct 2019 07:51:21 +0700 Subject: tipc: improve throughput between nodes in netns Currently, TIPC transports intra-node user data messages directly socket to socket, hence shortcutting all the lower layers of the communication stack. This gives TIPC very good intra node performance, both regarding throughput and latency. We now introduce a similar mechanism for TIPC data traffic across network namespaces located in the same kernel. On the send path, the call chain is as always accompanied by the sending node's network name space pointer. However, once we have reliably established that the receiving node is represented by a namespace on the same host, we just replace the namespace pointer with the receiving node/namespace's ditto, and follow the regular socket receive patch though the receiving node. This technique gives us a throughput similar to the node internal throughput, several times larger than if we let the traffic go though the full network stacks. As a comparison, max throughput for 64k messages is four times larger than TCP throughput for the same type of traffic. To meet any security concerns, the following should be noted. - All nodes joining a cluster are supposed to have been be certified and authenticated by mechanisms outside TIPC. This is no different for nodes/namespaces on the same host; they have to auto discover each other using the attached interfaces, and establish links which are supervised via the regular link monitoring mechanism. Hence, a kernel local node has no other way to join a cluster than any other node, and have to obey to policies set in the IP or device layers of the stack. - Only when a sender has established with 100% certainty that the peer node is located in a kernel local namespace does it choose to let user data messages, and only those, take the crossover path to the receiving node/namespace. - If the receiving node/namespace is removed, its namespace pointer is invalidated at all peer nodes, and their neighbor link monitoring will eventually note that this node is gone. - To ensure the "100% certainty" criteria, and prevent any possible spoofing, received discovery messages must contain a proof that the sender knows a common secret. We use the hash mix of the sending node/namespace for this purpose, since it can be accessed directly by all other namespaces in the kernel. Upon reception of a discovery message, the receiver checks this proof against all the local namespaces'hash_mix:es. If it finds a match, that, along with a matching node id and cluster id, this is deemed sufficient proof that the peer node in question is in a local namespace, and a wormhole can be opened. - We should also consider that TIPC is intended to be a cluster local IPC mechanism (just like e.g. UNIX sockets) rather than a network protocol, and hence we think it can justified to allow it to shortcut the lower protocol layers. Regarding traceability, we should notice that since commit 6c9081a3915d ("tipc: add loopback device tracking") it is possible to follow the node internal packet flow by just activating tcpdump on the loopback interface. This will be true even for this mechanism; by activating tcpdump on the involved nodes' loopback interfaces their inter-name space messaging can easily be tracked. v2: - update 'net' pointer when node left/rejoined v3: - grab read/write lock when using node ref obj v4: - clone traffics between netns to loopback Suggested-by: Jon Maloy Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/node.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 151 insertions(+), 4 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index f2e3cf70c922..4b60928049ea 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -126,6 +126,8 @@ struct tipc_node { struct timer_list timer; struct rcu_head rcu; unsigned long delete_at; + struct net *peer_net; + u32 peer_hash_mix; }; /* Node FSM states and events: @@ -184,7 +186,7 @@ static struct tipc_link *node_active_link(struct tipc_node *n, int sel) return n->links[bearer_id].link; } -int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected) { struct tipc_node *n; int bearer_id; @@ -194,6 +196,14 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) if (unlikely(!n)) return mtu; + /* Allow MAX_MSG_SIZE when building connection oriented message + * if they are in the same core network + */ + if (n->peer_net && connected) { + tipc_node_put(n); + return mtu; + } + bearer_id = n->active_links[sel & 1]; if (likely(bearer_id != INVALID_BEARER_ID)) mtu = n->links[bearer_id].mtu; @@ -360,8 +370,37 @@ static void tipc_node_write_unlock(struct tipc_node *n) } } +static void tipc_node_assign_peer_net(struct tipc_node *n, u32 hash_mixes) +{ + int net_id = tipc_netid(n->net); + struct tipc_net *tn_peer; + struct net *tmp; + u32 hash_chk; + + if (n->peer_net) + return; + + for_each_net_rcu(tmp) { + tn_peer = tipc_net(tmp); + if (!tn_peer) + continue; + /* Integrity checking whether node exists in namespace or not */ + if (tn_peer->net_id != net_id) + continue; + if (memcmp(n->peer_id, tn_peer->node_id, NODE_ID_LEN)) + continue; + hash_chk = tipc_net_hash_mixes(tmp, tn_peer->random); + if (hash_mixes ^ hash_chk) + continue; + n->peer_net = tmp; + n->peer_hash_mix = hash_mixes; + break; + } +} + static struct tipc_node *tipc_node_create(struct net *net, u32 addr, - u8 *peer_id, u16 capabilities) + u8 *peer_id, u16 capabilities, + u32 signature, u32 hash_mixes) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *n, *temp_node; @@ -372,6 +411,8 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, spin_lock_bh(&tn->node_list_lock); n = tipc_node_find(net, addr); if (n) { + if (n->peer_hash_mix ^ hash_mixes) + tipc_node_assign_peer_net(n, hash_mixes); if (n->capabilities == capabilities) goto exit; /* Same node may come back with new capabilities */ @@ -389,6 +430,7 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, list_for_each_entry_rcu(temp_node, &tn->node_list, list) { tn->capabilities &= temp_node->capabilities; } + goto exit; } n = kzalloc(sizeof(*n), GFP_ATOMIC); @@ -399,6 +441,10 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, n->addr = addr; memcpy(&n->peer_id, peer_id, 16); n->net = net; + n->peer_net = NULL; + n->peer_hash_mix = 0; + /* Assign kernel local namespace if exists */ + tipc_node_assign_peer_net(n, hash_mixes); n->capabilities = capabilities; kref_init(&n->kref); rwlock_init(&n->lock); @@ -426,6 +472,10 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, tipc_bc_sndlink(net), &n->bc_entry.link)) { pr_warn("Broadcast rcv link creation failed, no memory\n"); + if (n->peer_net) { + n->peer_net = NULL; + n->peer_hash_mix = 0; + } kfree(n); n = NULL; goto exit; @@ -979,7 +1029,7 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) void tipc_node_check_dest(struct net *net, u32 addr, u8 *peer_id, struct tipc_bearer *b, - u16 capabilities, u32 signature, + u16 capabilities, u32 signature, u32 hash_mixes, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr) { @@ -998,7 +1048,8 @@ void tipc_node_check_dest(struct net *net, u32 addr, *dupl_addr = false; *respond = false; - n = tipc_node_create(net, addr, peer_id, capabilities); + n = tipc_node_create(net, addr, peer_id, capabilities, signature, + hash_mixes); if (!n) return; @@ -1343,6 +1394,10 @@ static void node_lost_contact(struct tipc_node *n, /* Notify publications from this node */ n->action_flags |= TIPC_NOTIFY_NODE_DOWN; + if (n->peer_net) { + n->peer_net = NULL; + n->peer_hash_mix = 0; + } /* Notify sockets connected to node */ list_for_each_entry_safe(conn, safe, conns, list) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, @@ -1424,6 +1479,56 @@ msg_full: return -EMSGSIZE; } +static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list) +{ + struct tipc_msg *hdr = buf_msg(skb_peek(list)); + struct sk_buff_head inputq; + + switch (msg_user(hdr)) { + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + if (msg_connected(hdr) || msg_named(hdr)) { + tipc_loopback_trace(peer_net, list); + spin_lock_init(&list->lock); + tipc_sk_rcv(peer_net, list); + return; + } + if (msg_mcast(hdr)) { + tipc_loopback_trace(peer_net, list); + skb_queue_head_init(&inputq); + tipc_sk_mcast_rcv(peer_net, list, &inputq); + __skb_queue_purge(list); + skb_queue_purge(&inputq); + return; + } + return; + case MSG_FRAGMENTER: + if (tipc_msg_assemble(list)) { + tipc_loopback_trace(peer_net, list); + skb_queue_head_init(&inputq); + tipc_sk_mcast_rcv(peer_net, list, &inputq); + __skb_queue_purge(list); + skb_queue_purge(&inputq); + } + return; + case GROUP_PROTOCOL: + case CONN_MANAGER: + tipc_loopback_trace(peer_net, list); + spin_lock_init(&list->lock); + tipc_sk_rcv(peer_net, list); + return; + case LINK_PROTOCOL: + case NAME_DISTRIBUTOR: + case TUNNEL_PROTOCOL: + case BCAST_PROTOCOL: + return; + default: + return; + }; +} + /** * tipc_node_xmit() is the general link level function for message sending * @net: the applicable net namespace @@ -1439,6 +1544,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, struct tipc_link_entry *le = NULL; struct tipc_node *n; struct sk_buff_head xmitq; + bool node_up = false; int bearer_id; int rc; @@ -1456,6 +1562,17 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, } tipc_node_read_lock(n); + node_up = node_is_up(n); + if (node_up && n->peer_net && check_net(n->peer_net)) { + /* xmit inner linux container */ + tipc_lxc_xmit(n->peer_net, list); + if (likely(skb_queue_empty(list))) { + tipc_node_read_unlock(n); + tipc_node_put(n); + return 0; + } + } + bearer_id = n->active_links[selector & 1]; if (unlikely(bearer_id == INVALID_BEARER_ID)) { tipc_node_read_unlock(n); @@ -2587,3 +2704,33 @@ int tipc_node_dump(struct tipc_node *n, bool more, char *buf) return i; } + +void tipc_node_pre_cleanup_net(struct net *exit_net) +{ + struct tipc_node *n; + struct tipc_net *tn; + struct net *tmp; + + rcu_read_lock(); + for_each_net_rcu(tmp) { + if (tmp == exit_net) + continue; + tn = tipc_net(tmp); + if (!tn) + continue; + spin_lock_bh(&tn->node_list_lock); + list_for_each_entry_rcu(n, &tn->node_list, list) { + if (!n->peer_net) + continue; + if (n->peer_net != exit_net) + continue; + tipc_node_write_lock(n); + n->peer_net = NULL; + n->peer_hash_mix = 0; + tipc_node_write_unlock_fast(n); + break; + } + spin_unlock_bh(&tn->node_list_lock); + } + rcu_read_unlock(); +} -- cgit From 6708ef779249b3d8a7a1b7a52ae0b5e7d5a0a9b2 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Wed, 6 Nov 2019 13:26:09 +0700 Subject: tipc: update cluster capabilities if node deleted There are two improvements when re-calculate cluster capabilities: - When deleting a specific down node, need to re-calculate. - In tipc_node_cleanup(), do not need to re-calculate if node is still existing in cluster. Acked-by: Jon Maloy Signed-off-by: Hoang Le Acked-by: Jon Signed-off-by: David S. Miller --- net/tipc/node.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index 4b60928049ea..1f1584518221 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -667,6 +667,11 @@ static bool tipc_node_cleanup(struct tipc_node *peer) } tipc_node_write_unlock(peer); + if (!deleted) { + spin_unlock_bh(&tn->node_list_lock); + return deleted; + } + /* Calculate cluster capabilities */ tn->capabilities = TIPC_NODE_CAPABILITIES; list_for_each_entry_rcu(temp_node, &tn->node_list, list) { @@ -2043,7 +2048,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; - struct tipc_node *peer; + struct tipc_node *peer, *temp_node; u32 addr; int err; @@ -2084,6 +2089,11 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) tipc_node_write_unlock(peer); tipc_node_delete(peer); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } err = 0; err_out: tipc_node_put(peer); -- cgit From d408bef4bfa60bac665b6e7239269570039a968b Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 8 Nov 2019 10:02:37 +0700 Subject: tipc: eliminate checking netns if node established Currently, we scan over all network namespaces at each received discovery message in order to check if the sending peer might be present in a host local namespaces. This is unnecessary since we can assume that a peer will not change its location during an established session. We now improve the condition for this testing so that we don't perform any redundant scans. Fixes: f73b12812a3d ("tipc: improve throughput between nodes in netns") Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/node.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index 1f1584518221..b66d2f67b1dd 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -472,10 +472,6 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, tipc_bc_sndlink(net), &n->bc_entry.link)) { pr_warn("Broadcast rcv link creation failed, no memory\n"); - if (n->peer_net) { - n->peer_net = NULL; - n->peer_hash_mix = 0; - } kfree(n); n = NULL; goto exit; @@ -1073,6 +1069,9 @@ void tipc_node_check_dest(struct net *net, u32 addr, if (sign_match && addr_match && link_up) { /* All is fine. Do nothing. */ reset = false; + /* Peer node is not a container/local namespace */ + if (!n->peer_hash_mix) + n->peer_hash_mix = hash_mixes; } else if (sign_match && addr_match && !link_up) { /* Respond. The link will come up in due time */ *respond = true; @@ -1398,11 +1397,8 @@ static void node_lost_contact(struct tipc_node *n, /* Notify publications from this node */ n->action_flags |= TIPC_NOTIFY_NODE_DOWN; - - if (n->peer_net) { - n->peer_net = NULL; - n->peer_hash_mix = 0; - } + n->peer_net = NULL; + n->peer_hash_mix = 0; /* Notify sockets connected to node */ list_for_each_entry_safe(conn, safe, conns, list) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, -- cgit From 4cbf8ac2fe5a0846508fe02b95a5de1a90fa73f4 Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Fri, 8 Nov 2019 12:05:09 +0700 Subject: tipc: enable creating a "preliminary" node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When user sets RX key for a peer not existing on the own node, a new node entry is needed to which the RX key will be attached. However, since the peer node address (& capabilities) is unknown at that moment, only the node-ID is provided, this commit allows the creation of a node with only the data that we call as “preliminary”. A preliminary node is not the object of the “tipc_node_find()” but the “tipc_node_find_by_id()”. Once the first message i.e. LINK_CONFIG comes from that peer, and is successfully decrypted by the own node, the actual peer node data will be properly updated and the node will function as usual. In addition, the node timer always starts when a node object is created so if a preliminary node is not used, it will be cleaned up. The later encryption functions will also use the node timer and be able to create a preliminary node automatically when needed. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/node.c | 99 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 27 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index b66d2f67b1dd..43d12a630f34 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -89,6 +89,7 @@ struct tipc_bclink_entry { * @links: array containing references to all links to node * @action_flags: bit mask of different types of node actions * @state: connectivity state vs peer node + * @preliminary: a preliminary node or not * @sync_point: sequence number where synch/failover is finished * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) @@ -112,6 +113,7 @@ struct tipc_node { int action_flags; struct list_head list; int state; + bool preliminary; bool failover_sent; u16 sync_point; int link_cnt; @@ -120,6 +122,7 @@ struct tipc_node { u32 signature; u32 link_id; u8 peer_id[16]; + char peer_id_string[NODE_ID_STR_LEN]; struct list_head publ_list; struct list_head conn_sks; unsigned long keepalive_intv; @@ -245,6 +248,16 @@ u16 tipc_node_get_capabilities(struct net *net, u32 addr) return caps; } +u32 tipc_node_get_addr(struct tipc_node *node) +{ + return (node) ? node->addr : 0; +} + +char *tipc_node_get_id_str(struct tipc_node *node) +{ + return node->peer_id_string; +} + static void tipc_node_kref_release(struct kref *kref) { struct tipc_node *n = container_of(kref, struct tipc_node, kref); @@ -274,7 +287,7 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr) rcu_read_lock(); hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) { - if (node->addr != addr) + if (node->addr != addr || node->preliminary) continue; if (!kref_get_unless_zero(&node->kref)) node = NULL; @@ -400,17 +413,39 @@ static void tipc_node_assign_peer_net(struct tipc_node *n, u32 hash_mixes) static struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, u16 capabilities, - u32 signature, u32 hash_mixes) + u32 hash_mixes, bool preliminary) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *n, *temp_node; struct tipc_link *l; + unsigned long intv; int bearer_id; int i; spin_lock_bh(&tn->node_list_lock); - n = tipc_node_find(net, addr); + n = tipc_node_find(net, addr) ?: + tipc_node_find_by_id(net, peer_id); if (n) { + if (!n->preliminary) + goto update; + if (preliminary) + goto exit; + /* A preliminary node becomes "real" now, refresh its data */ + tipc_node_write_lock(n); + n->preliminary = false; + n->addr = addr; + hlist_del_rcu(&n->hash); + hlist_add_head_rcu(&n->hash, + &tn->node_htable[tipc_hashfn(addr)]); + list_del_rcu(&n->list); + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + if (n->addr < temp_node->addr) + break; + } + list_add_tail_rcu(&n->list, &temp_node->list); + tipc_node_write_unlock_fast(n); + +update: if (n->peer_hash_mix ^ hash_mixes) tipc_node_assign_peer_net(n, hash_mixes); if (n->capabilities == capabilities) @@ -438,7 +473,9 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, pr_warn("Node creation failed, no memory\n"); goto exit; } + tipc_nodeid2string(n->peer_id_string, peer_id); n->addr = addr; + n->preliminary = preliminary; memcpy(&n->peer_id, peer_id, 16); n->net = net; n->peer_net = NULL; @@ -463,22 +500,14 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, n->signature = INVALID_NODE_SIG; n->active_links[0] = INVALID_BEARER_ID; n->active_links[1] = INVALID_BEARER_ID; - if (!tipc_link_bc_create(net, tipc_own_addr(net), - addr, U16_MAX, - tipc_link_window(tipc_bc_sndlink(net)), - n->capabilities, - &n->bc_entry.inputq1, - &n->bc_entry.namedq, - tipc_bc_sndlink(net), - &n->bc_entry.link)) { - pr_warn("Broadcast rcv link creation failed, no memory\n"); - kfree(n); - n = NULL; - goto exit; - } + n->bc_entry.link = NULL; tipc_node_get(n); timer_setup(&n->timer, tipc_node_timeout, 0); - n->keepalive_intv = U32_MAX; + /* Start a slow timer anyway, crypto needs it */ + n->keepalive_intv = 10000; + intv = jiffies + msecs_to_jiffies(n->keepalive_intv); + if (!mod_timer(&n->timer, intv)) + tipc_node_get(n); hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n->addr < temp_node->addr) @@ -1001,6 +1030,8 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) { struct tipc_net *tn = tipc_net(net); struct tipc_node *n; + bool preliminary; + u32 sugg_addr; /* Suggest new address if some other peer is using this one */ n = tipc_node_find(net, addr); @@ -1016,9 +1047,11 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) /* Suggest previously used address if peer is known */ n = tipc_node_find_by_id(net, id); if (n) { - addr = n->addr; + sugg_addr = n->addr; + preliminary = n->preliminary; tipc_node_put(n); - return addr; + if (!preliminary) + return sugg_addr; } /* Even this node may be in conflict */ @@ -1035,7 +1068,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool *respond, bool *dupl_addr) { struct tipc_node *n; - struct tipc_link *l; + struct tipc_link *l, *snd_l; struct tipc_link_entry *le; bool addr_match = false; bool sign_match = false; @@ -1049,12 +1082,27 @@ void tipc_node_check_dest(struct net *net, u32 addr, *dupl_addr = false; *respond = false; - n = tipc_node_create(net, addr, peer_id, capabilities, signature, - hash_mixes); + n = tipc_node_create(net, addr, peer_id, capabilities, hash_mixes, + false); if (!n) return; tipc_node_write_lock(n); + if (unlikely(!n->bc_entry.link)) { + snd_l = tipc_bc_sndlink(net); + if (!tipc_link_bc_create(net, tipc_own_addr(net), + addr, U16_MAX, + tipc_link_window(snd_l), + n->capabilities, + &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, + &n->bc_entry.link)) { + pr_warn("Broadcast rcv link creation failed, no mem\n"); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); + return; + } + } le = &n->links[b->identity]; @@ -2134,6 +2182,8 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) } list_for_each_entry_rcu(node, &tn->node_list, list) { + if (node->preliminary) + continue; if (last_addr) { if (node->addr == last_addr) last_addr = 0; @@ -2649,11 +2699,6 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, return skb->len; } -u32 tipc_node_get_addr(struct tipc_node *node) -{ - return (node) ? node->addr : 0; -} - /** * tipc_node_dump - dump TIPC node data * @n: tipc node to be dumped -- cgit From fc1b6d6de2208774efd2a20bf0daddb02d18b1e0 Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Fri, 8 Nov 2019 12:05:11 +0700 Subject: tipc: introduce TIPC encryption & authentication This commit offers an option to encrypt and authenticate all messaging, including the neighbor discovery messages. The currently most advanced algorithm supported is the AEAD AES-GCM (like IPSec or TLS). All encryption/decryption is done at the bearer layer, just before leaving or after entering TIPC. Supported features: - Encryption & authentication of all TIPC messages (header + data); - Two symmetric-key modes: Cluster and Per-node; - Automatic key switching; - Key-expired revoking (sequence number wrapped); - Lock-free encryption/decryption (RCU); - Asynchronous crypto, Intel AES-NI supported; - Multiple cipher transforms; - Logs & statistics; Two key modes: - Cluster key mode: One single key is used for both TX & RX in all nodes in the cluster. - Per-node key mode: Each nodes in the cluster has one specific TX key. For RX, a node requires its peers' TX key to be able to decrypt the messages from those peers. Key setting from user-space is performed via netlink by a user program (e.g. the iproute2 'tipc' tool). Internal key state machine: Attach Align(RX) +-+ +-+ | V | V +---------+ Attach +---------+ | IDLE |---------------->| PENDING |(user = 0) +---------+ +---------+ A A Switch| A | | | | | | Free(switch/revoked) | | (Free)| +----------------------+ | |Timeout | (TX) | | |(RX) | | | | | | v | +---------+ Switch +---------+ | PASSIVE |<----------------| ACTIVE | +---------+ (RX) +---------+ (user = 1) (user >= 1) The number of TFMs is 10 by default and can be changed via the procfs 'net/tipc/max_tfms'. At this moment, as for simplicity, this file is also used to print the crypto statistics at runtime: echo 0xfff1 > /proc/sys/net/tipc/max_tfms The patch defines a new TIPC version (v7) for the encryption message (- backward compatibility as well). The message is basically encapsulated as follows: +----------------------------------------------------------+ | TIPCv7 encryption | Original TIPCv2 | Authentication | | header | packet (encrypted) | Tag | +----------------------------------------------------------+ The throughput is about ~40% for small messages (compared with non- encryption) and ~9% for large messages. With the support from hardware crypto i.e. the Intel AES-NI CPU instructions, the throughput increases upto ~85% for small messages and ~55% for large messages. By default, the new feature is inactive (i.e. no encryption) until user sets a key for TIPC. There is however also a new option - "TIPC_CRYPTO" in the kernel configuration to enable/disable the new code when needed. MAINTAINERS | add two new files 'crypto.h' & 'crypto.c' in tipc Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/node.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 80 insertions(+), 19 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index 43d12a630f34..d8bf2c179562 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -44,6 +44,7 @@ #include "discover.h" #include "netlink.h" #include "trace.h" +#include "crypto.h" #define INVALID_NODE_SIG 0x10000 #define NODE_CLEANUP_AFTER 300000 @@ -100,6 +101,7 @@ struct tipc_bclink_entry { * @publ_list: list of publications * @rcu: rcu struct for tipc_node * @delete_at: indicates the time for deleting a down node + * @crypto_rx: RX crypto handler */ struct tipc_node { u32 addr; @@ -131,6 +133,9 @@ struct tipc_node { unsigned long delete_at; struct net *peer_net; u32 peer_hash_mix; +#ifdef CONFIG_TIPC_CRYPTO + struct tipc_crypto *crypto_rx; +#endif }; /* Node FSM states and events: @@ -168,7 +173,6 @@ static void tipc_node_timeout(struct timer_list *t); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); static struct tipc_node *tipc_node_find(struct net *net, u32 addr); static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id); -static void tipc_node_put(struct tipc_node *node); static bool node_is_up(struct tipc_node *n); static void tipc_node_delete_from_list(struct tipc_node *node); @@ -258,15 +262,41 @@ char *tipc_node_get_id_str(struct tipc_node *node) return node->peer_id_string; } +#ifdef CONFIG_TIPC_CRYPTO +/** + * tipc_node_crypto_rx - Retrieve crypto RX handle from node + * Note: node ref counter must be held first! + */ +struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n) +{ + return (__n) ? __n->crypto_rx : NULL; +} + +struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos) +{ + return container_of(pos, struct tipc_node, list)->crypto_rx; +} +#endif + +void tipc_node_free(struct rcu_head *rp) +{ + struct tipc_node *n = container_of(rp, struct tipc_node, rcu); + +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_stop(&n->crypto_rx); +#endif + kfree(n); +} + static void tipc_node_kref_release(struct kref *kref) { struct tipc_node *n = container_of(kref, struct tipc_node, kref); kfree(n->bc_entry.link); - kfree_rcu(n, rcu); + call_rcu(&n->rcu, tipc_node_free); } -static void tipc_node_put(struct tipc_node *node) +void tipc_node_put(struct tipc_node *node) { kref_put(&node->kref, tipc_node_kref_release); } @@ -411,9 +441,9 @@ static void tipc_node_assign_peer_net(struct tipc_node *n, u32 hash_mixes) } } -static struct tipc_node *tipc_node_create(struct net *net, u32 addr, - u8 *peer_id, u16 capabilities, - u32 hash_mixes, bool preliminary) +struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, + u16 capabilities, u32 hash_mixes, + bool preliminary) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *n, *temp_node; @@ -474,6 +504,14 @@ update: goto exit; } tipc_nodeid2string(n->peer_id_string, peer_id); +#ifdef CONFIG_TIPC_CRYPTO + if (unlikely(tipc_crypto_start(&n->crypto_rx, net, n))) { + pr_warn("Failed to start crypto RX(%s)!\n", n->peer_id_string); + kfree(n); + n = NULL; + goto exit; + } +#endif n->addr = addr; n->preliminary = preliminary; memcpy(&n->peer_id, peer_id, 16); @@ -725,6 +763,10 @@ static void tipc_node_timeout(struct timer_list *t) return; } +#ifdef CONFIG_TIPC_CRYPTO + /* Take any crypto key related actions first */ + tipc_crypto_timeout(n->crypto_rx); +#endif __skb_queue_head_init(&xmitq); /* Initial node interval to value larger (10 seconds), then it will be @@ -745,7 +787,7 @@ static void tipc_node_timeout(struct timer_list *t) remains--; } tipc_node_read_unlock(n); - tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr); + tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr, n); if (rc & TIPC_LINK_DOWN_EVT) tipc_node_link_down(n, bearer_id, false); } @@ -777,7 +819,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, n->link_id = tipc_link_id(nl); /* Leave room for tunnel header when returning 'mtu' to users: */ - n->links[bearer_id].mtu = tipc_link_mtu(nl) - INT_H_SIZE; + n->links[bearer_id].mtu = tipc_link_mss(nl); tipc_bearer_add_dest(n->net, bearer_id, n->addr); tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id); @@ -831,7 +873,7 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id, tipc_node_write_lock(n); __tipc_node_link_up(n, bearer_id, xmitq); maddr = &n->links[bearer_id].maddr; - tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr); + tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr, n); tipc_node_write_unlock(n); } @@ -986,7 +1028,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) if (delete) tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); if (!skb_queue_empty(&xmitq)) - tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); + tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr, n); tipc_sk_rcv(n->net, &le->inputq); } @@ -1640,7 +1682,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, if (unlikely(rc == -ENOBUFS)) tipc_node_link_down(n, bearer_id, false); else - tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); tipc_node_put(n); @@ -1788,7 +1830,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id } if (!skb_queue_empty(&xmitq)) - tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); if (!skb_queue_empty(&be->inputq1)) tipc_node_mcast_rcv(n); @@ -1966,20 +2008,38 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) { struct sk_buff_head xmitq; - struct tipc_node *n; + struct tipc_link_entry *le; struct tipc_msg *hdr; + struct tipc_node *n; int bearer_id = b->identity; - struct tipc_link_entry *le; u32 self = tipc_own_addr(net); int usr, rc = 0; u16 bc_ack; +#ifdef CONFIG_TIPC_CRYPTO + struct tipc_ehdr *ehdr; - __skb_queue_head_init(&xmitq); + /* Check if message must be decrypted first */ + if (TIPC_SKB_CB(skb)->decrypted || !tipc_ehdr_validate(skb)) + goto rcv; + ehdr = (struct tipc_ehdr *)skb->data; + if (likely(ehdr->user != LINK_CONFIG)) { + n = tipc_node_find(net, ntohl(ehdr->addr)); + if (unlikely(!n)) + goto discard; + } else { + n = tipc_node_find_by_id(net, ehdr->id); + } + tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b); + if (!skb) + return; + +rcv: +#endif /* Ensure message is well-formed before touching the header */ - TIPC_SKB_CB(skb)->validated = false; if (unlikely(!tipc_msg_validate(&skb))) goto discard; + __skb_queue_head_init(&xmitq); hdr = buf_msg(skb); usr = msg_user(hdr); bc_ack = msg_bcast_ack(hdr); @@ -2050,7 +2110,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) tipc_sk_rcv(net, &le->inputq); if (!skb_queue_empty(&xmitq)) - tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); tipc_node_put(n); discard: @@ -2081,7 +2141,7 @@ void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, tipc_link_set_mtu(e->link, b->mtu); } tipc_node_write_unlock(n); - tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr); + tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL); } rcu_read_unlock(); @@ -2323,7 +2383,8 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) out: tipc_node_read_unlock(node); - tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr); + tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr, + NULL); return res; } -- cgit From e1f32190cf7ddd55778b460e7d44af3f76529698 Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Fri, 8 Nov 2019 12:05:12 +0700 Subject: tipc: add support for AEAD key setting via netlink This commit adds two netlink commands to TIPC in order for user to be able to set or remove AEAD keys: - TIPC_NL_KEY_SET - TIPC_NL_KEY_FLUSH When the 'KEY_SET' is given along with the key data, the key will be initiated and attached to TIPC crypto. On the other hand, the 'KEY_FLUSH' command will remove all existing keys if any. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/node.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index d8bf2c179562..aaf595613e6e 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2760,6 +2760,141 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, return skb->len; } +#ifdef CONFIG_TIPC_CRYPTO +static int tipc_nl_retrieve_key(struct nlattr **attrs, + struct tipc_aead_key **key) +{ + struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY]; + + if (!attr) + return -ENODATA; + + *key = (struct tipc_aead_key *)nla_data(attr); + if (nla_len(attr) < tipc_aead_key_size(*key)) + return -EINVAL; + + return 0; +} + +static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id) +{ + struct nlattr *attr = attrs[TIPC_NLA_NODE_ID]; + + if (!attr) + return -ENODATA; + + if (nla_len(attr) < TIPC_NODEID_LEN) + return -EINVAL; + + *node_id = (u8 *)nla_data(attr); + return 0; +} + +int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); + struct tipc_node *n = NULL; + struct tipc_aead_key *ukey; + struct tipc_crypto *c; + u8 *id, *own_id; + int rc = 0; + + if (!info->attrs[TIPC_NLA_NODE]) + return -EINVAL; + + rc = nla_parse_nested(attrs, TIPC_NLA_NODE_MAX, + info->attrs[TIPC_NLA_NODE], + tipc_nl_node_policy, info->extack); + if (rc) + goto exit; + + own_id = tipc_own_id(net); + if (!own_id) { + rc = -EPERM; + goto exit; + } + + rc = tipc_nl_retrieve_key(attrs, &ukey); + if (rc) + goto exit; + + rc = tipc_aead_key_validate(ukey); + if (rc) + goto exit; + + rc = tipc_nl_retrieve_nodeid(attrs, &id); + switch (rc) { + case -ENODATA: + /* Cluster key mode */ + rc = tipc_crypto_key_init(tn->crypto_tx, ukey, CLUSTER_KEY); + break; + case 0: + /* Per-node key mode */ + if (!memcmp(id, own_id, NODE_ID_LEN)) { + c = tn->crypto_tx; + } else { + n = tipc_node_find_by_id(net, id) ?: + tipc_node_create(net, 0, id, 0xffffu, 0, true); + if (unlikely(!n)) { + rc = -ENOMEM; + break; + } + c = n->crypto_rx; + } + + rc = tipc_crypto_key_init(c, ukey, PER_NODE_KEY); + if (n) + tipc_node_put(n); + break; + default: + break; + } + +exit: + return (rc < 0) ? rc : 0; +} + +int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_node_set_key(skb, info); + rtnl_unlock(); + + return err; +} + +int __tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); + struct tipc_node *n; + + tipc_crypto_key_flush(tn->crypto_tx); + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) + tipc_crypto_key_flush(n->crypto_rx); + rcu_read_unlock(); + + pr_info("All keys are flushed!\n"); + return 0; +} + +int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_node_flush_key(skb, info); + rtnl_unlock(); + + return err; +} +#endif + /** * tipc_node_dump - dump TIPC node data * @n: tipc node to be dumped -- cgit From ba5f6a8617f4cd8e77da0a190b9647065014eade Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Thu, 21 Nov 2019 10:01:09 +0700 Subject: tipc: update replicast capability for broadcast send link When setting up a cluster with non-replicast/replicast capability supported. This capability will be disabled for broadcast send link in order to be backwards compatible. However, when these non-support nodes left and be removed out the cluster. We don't update this capability on broadcast send link. Then, some of features that based on this capability will also disabling as unexpected. In this commit, we make sure the broadcast send link capabilities will be re-calculated as soon as a node removed/rejoined a cluster. Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/node.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index aaf595613e6e..ab04e00cb95b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -496,6 +496,9 @@ update: tn->capabilities &= temp_node->capabilities; } + tipc_bcast_toggle_rcast(net, + (tn->capabilities & TIPC_BCAST_RCAST)); + goto exit; } n = kzalloc(sizeof(*n), GFP_ATOMIC); @@ -557,6 +560,7 @@ update: list_for_each_entry_rcu(temp_node, &tn->node_list, list) { tn->capabilities &= temp_node->capabilities; } + tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); trace_tipc_node_create(n, true, " "); exit: spin_unlock_bh(&tn->node_list_lock); @@ -740,7 +744,8 @@ static bool tipc_node_cleanup(struct tipc_node *peer) list_for_each_entry_rcu(temp_node, &tn->node_list, list) { tn->capabilities &= temp_node->capabilities; } - + tipc_bcast_toggle_rcast(peer->net, + (tn->capabilities & TIPC_BCAST_RCAST)); spin_unlock_bh(&tn->node_list_lock); return deleted; } @@ -2198,6 +2203,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) list_for_each_entry_rcu(temp_node, &tn->node_list, list) { tn->capabilities &= temp_node->capabilities; } + tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); err = 0; err_out: tipc_node_put(peer); -- cgit