diff options
Diffstat (limited to 'net/tipc')
44 files changed, 7930 insertions, 2218 deletions
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index e450212121d2..bb0d71eb02a6 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # TIPC configuration # @@ -5,7 +6,8 @@ menuconfig TIPC tristate "The TIPC Protocol" depends on INET - ---help--- + depends on IPV6 || IPV6=n + help The Transparent Inter Process Communication (TIPC) protocol is specially designed for intra cluster communication. This protocol originates from Ericsson where it has been used in carrier grade @@ -16,7 +18,7 @@ menuconfig TIPC This protocol support is also available as a module ( = code which can be inserted in and removed from the running kernel whenever you want). The module will be called tipc. If you want to compile it - as a module, say M here and read <file:Documentation/kbuild/modules.txt>. + as a module, say M here and read <file:Documentation/kbuild/modules.rst>. If in doubt, say N. @@ -30,15 +32,29 @@ config TIPC_MEDIA_UDP bool "IP/UDP media type support" depends on TIPC select NET_UDP_TUNNEL + default y help Saying Y here will enable support for running TIPC over IP/UDP - bool + +config TIPC_CRYPTO + bool "TIPC encryption support" + depends on TIPC + select CRYPTO + select CRYPTO_AES + select CRYPTO_GCM default y + help + Saying Y here will enable support for TIPC encryption. + All TIPC messages will be encrypted/decrypted by using the currently most + advanced algorithm: AEAD AES-GCM (like IPSec or TLS) before leaving/ + entering the TIPC stack. + Key setting from user-space is performed via netlink by a user program + (e.g. the iproute2 'tipc' tool). config TIPC_DIAG tristate "TIPC: socket monitoring interface" depends on TIPC default y - ---help--- + help Support for TIPC socket monitoring interface used by ss tool. If unsure, say Y. diff --git a/net/tipc/Makefile b/net/tipc/Makefile index c86aba0282af..18e1636aa036 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -9,15 +9,14 @@ tipc-y += addr.o bcast.o bearer.o \ core.o link.o discover.o msg.o \ name_distr.o subscr.o monitor.o name_table.o net.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ - topsrv.o socket.o group.o trace.o + topsrv.o group.o trace.o CFLAGS_trace.o += -I$(src) tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o +tipc-$(CONFIG_TIPC_CRYPTO) += crypto.o - -obj-$(CONFIG_TIPC_DIAG) += diag.o - -tipc_diag-y := diag.o +obj-$(CONFIG_TIPC_DIAG) += tipc_diag.o +tipc_diag-y += diag.o diff --git a/net/tipc/addr.c b/net/tipc/addr.c index b88d48d00913..6f5c54cbf8d9 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -3,6 +3,7 @@ * * Copyright (c) 2000-2006, 2018, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,12 +56,11 @@ bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr) void tipc_set_node_id(struct net *net, u8 *id) { struct tipc_net *tn = tipc_net(net); - u32 *tmp = (u32 *)id; memcpy(tn->node_id, id, NODE_ID_LEN); tipc_nodeid2string(tn->node_id_string, id); - tn->trial_addr = tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3]; - pr_info("Own node identity %s, cluster identity %u\n", + tn->trial_addr = hash128to32(id); + pr_info("Node identity %s, cluster identity %u\n", tipc_own_id_string(net), tn->net_id); } @@ -75,10 +75,11 @@ void tipc_set_node_addr(struct net *net, u32 addr) tipc_set_node_id(net, node_id); } tn->trial_addr = addr; - pr_info("32-bit node address hash set to %x\n", addr); + tn->addr_trial_end = jiffies; + pr_info("Node number set to %u\n", addr); } -char *tipc_nodeid2string(char *str, u8 *id) +int tipc_nodeid2string(char *str, u8 *id) { int i; u8 c; @@ -108,7 +109,7 @@ char *tipc_nodeid2string(char *str, u8 *id) if (i == NODE_ID_LEN) { memcpy(str, id, NODE_ID_LEN); str[NODE_ID_LEN] = 0; - return str; + return i; } /* Translate to hex string */ @@ -119,5 +120,5 @@ char *tipc_nodeid2string(char *str, u8 *id) for (i = NODE_ID_STR_LEN - 2; str[i] == '0'; i--) str[i] = 0; - return str; + return i + 1; } diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 31bee0ea7b3e..a113cf7e1f89 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -3,6 +3,7 @@ * * Copyright (c) 2000-2006, 2018, Ericsson AB * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,6 +44,50 @@ #include <net/netns/generic.h> #include "core.h" +/* Struct tipc_uaddr: internal version of struct sockaddr_tipc. + * Must be kept aligned both regarding field positions and size. + */ +struct tipc_uaddr { + unsigned short family; + unsigned char addrtype; + signed char scope; + union { + struct { + struct tipc_service_addr sa; + u32 lookup_node; + }; + struct tipc_service_range sr; + struct tipc_socket_addr sk; + }; +}; + +static inline void tipc_uaddr(struct tipc_uaddr *ua, u32 atype, u32 scope, + u32 type, u32 lower, u32 upper) +{ + ua->family = AF_TIPC; + ua->addrtype = atype; + ua->scope = scope; + ua->sr.type = type; + ua->sr.lower = lower; + ua->sr.upper = upper; +} + +static inline bool tipc_uaddr_valid(struct tipc_uaddr *ua, int len) +{ + u32 atype; + + if (len < sizeof(struct sockaddr_tipc)) + return false; + atype = ua->addrtype; + if (ua->family != AF_TIPC) + return false; + if (atype == TIPC_SERVICE_ADDR || atype == TIPC_SOCKET_ADDR) + return true; + if (atype == TIPC_SERVICE_RANGE) + return ua->sr.upper >= ua->sr.lower; + return false; +} + static inline u32 tipc_own_addr(struct net *net) { return tipc_net(net)->node_addr; @@ -85,7 +130,6 @@ static inline int in_own_node(struct net *net, u32 addr) bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr); void tipc_set_node_id(struct net *net, u8 *id); void tipc_set_node_addr(struct net *net, u32 addr); -char *tipc_nodeid2string(char *str, u8 *id); -u32 tipc_node_id2hash(u8 *id128); +int tipc_nodeid2string(char *str, u8 *id); #endif diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index d8026543bf4c..114fef65f92e 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -46,6 +46,7 @@ #define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ const char tipc_bclink_name[] = "broadcast-link"; +unsigned long sysctl_tipc_bc_retruni __read_mostly; /** * struct tipc_bc_base - base structure for keeping broadcast send state @@ -54,7 +55,9 @@ const char tipc_bclink_name[] = "broadcast-link"; * @dests: array keeping number of reachable destinations per bearer * @primary_bearer: a bearer having links to all broadcast destinations, if any * @bcast_support: indicates if primary bearer, if any, supports broadcast + * @force_bcast: forces broadcast for multicast traffic * @rcast_support: indicates if all peer nodes support replicast + * @force_rcast: forces replicast for multicast traffic * @rc_ratio: dest count as percentage of cluster size where send method changes * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast */ @@ -64,7 +67,9 @@ struct tipc_bc_base { int dests[MAX_BEARERS]; int primary_bearer; bool bcast_support; + bool force_bcast; bool rcast_support; + bool force_rcast; int rc_ratio; int bc_threshold; }; @@ -80,12 +85,12 @@ static struct tipc_bc_base *tipc_bc_base(struct net *net) */ int tipc_bcast_get_mtu(struct net *net) { - return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE; + return tipc_link_mss(tipc_bc_sndlink(net)); } -void tipc_bcast_disable_rcast(struct net *net) +void tipc_bcast_toggle_rcast(struct net *net, bool supp) { - tipc_bc_base(net)->rcast_support = false; + tipc_bc_base(net)->rcast_support = supp; } static void tipc_bcbase_calc_bc_threshold(struct net *net) @@ -103,6 +108,8 @@ static void tipc_bcbase_select_primary(struct net *net) { struct tipc_bc_base *bb = tipc_bc_base(net); int all_dests = tipc_link_bc_peers(bb->link); + int max_win = tipc_link_max_win(bb->link); + int min_win = tipc_link_min_win(bb->link); int i, mtu, prim; bb->primary_bearer = INVALID_BEARER_ID; @@ -116,8 +123,12 @@ static void tipc_bcbase_select_primary(struct net *net) continue; mtu = tipc_bearer_mtu(net, i); - if (mtu < tipc_link_mtu(bb->link)) + if (mtu < tipc_link_mtu(bb->link)) { tipc_link_set_mtu(bb->link, mtu); + tipc_link_set_queue_limits(bb->link, + min_win, + max_win); + } bb->bcast_support &= tipc_bearer_bcast_support(net, i); if (bb->dests[i] < all_dests) continue; @@ -181,7 +192,7 @@ static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq) } /* We have to transmit across all bearers */ - skb_queue_head_init(&_xmitq); + __skb_queue_head_init(&_xmitq); for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { if (!bb->dests[bearer_id]) continue; @@ -216,9 +227,24 @@ static void tipc_bcast_select_xmit_method(struct net *net, int dests, } /* Can current method be changed ? */ method->expires = jiffies + TIPC_METHOD_EXPIRE; - if (method->mandatory || time_before(jiffies, exp)) + if (method->mandatory) + return; + + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) && + time_before(jiffies, exp)) return; + /* Configuration as force 'broadcast' method */ + if (bb->force_bcast) { + method->rcast = false; + return; + } + /* Configuration as force 'replicast' method */ + if (bb->force_rcast) { + method->rcast = true; + return; + } + /* Configuration as 'autoselect' or default method */ /* Determine method to use now */ method->rcast = dests <= bb->bc_threshold; } @@ -230,14 +256,14 @@ static void tipc_bcast_select_xmit_method(struct net *net, int dests, * Consumes the buffer chain. * Returns 0 if success, otherwise errno: -EHOSTUNREACH,-EMSGSIZE */ -static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, - u16 *cong_link_cnt) +int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, + u16 *cong_link_cnt) { struct tipc_link *l = tipc_bc_sndlink(net); struct sk_buff_head xmitq; int rc = 0; - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (tipc_link_bc_peers(l)) rc = tipc_link_xmit(l, pkts, &xmitq); @@ -267,7 +293,7 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, u32 dnode, selector; selector = msg_link_selector(buf_msg(skb_peek(pkts))); - skb_queue_head_init(&_pkts); + __skb_queue_head_init(&_pkts); list_for_each_entry_safe(dst, tmp, &dests->list, list) { dnode = dst->node; @@ -281,6 +307,64 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, return 0; } +/* tipc_mcast_send_sync - deliver a dummy message with SYN bit + * @net: the applicable net namespace + * @skb: socket buffer to copy + * @method: send method to be used + * @dests: destination nodes for message. + * Returns 0 if success, otherwise errno + */ +static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, + struct tipc_mc_method *method, + struct tipc_nlist *dests) +{ + struct tipc_msg *hdr, *_hdr; + struct sk_buff_head tmpq; + u16 cong_link_cnt = 0; + struct sk_buff *_skb; + int rc = 0; + + /* Is a cluster supporting with new capabilities ? */ + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL)) + return 0; + + hdr = buf_msg(skb); + if (msg_user(hdr) == MSG_FRAGMENTER) + hdr = msg_inner_hdr(hdr); + if (msg_type(hdr) != TIPC_MCAST_MSG) + return 0; + + /* Allocate dummy message */ + _skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL); + if (!_skb) + return -ENOMEM; + + /* Preparing for 'synching' header */ + msg_set_syn(hdr, 1); + + /* Copy skb's header into a dummy header */ + skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE); + skb_orphan(_skb); + + /* Reverse method for dummy message */ + _hdr = buf_msg(_skb); + msg_set_size(_hdr, MCAST_H_SIZE); + msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); + msg_set_errcode(_hdr, TIPC_ERR_NO_PORT); + + __skb_queue_head_init(&tmpq); + __skb_queue_tail(&tmpq, _skb); + if (method->rcast) + rc = tipc_bcast_xmit(net, &tmpq, &cong_link_cnt); + else + rc = tipc_rcast_xmit(net, &tmpq, dests, &cong_link_cnt); + + /* This queue should normally be empty by now */ + __skb_queue_purge(&tmpq); + + return rc; +} + /* tipc_mcast_xmit - deliver message to indicated destination nodes * and to identified node local sockets * @net: the applicable net namespace @@ -296,10 +380,13 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, u16 *cong_link_cnt) { struct sk_buff_head inputq, localq; + bool rcast = method->rcast; + struct tipc_msg *hdr; + struct sk_buff *skb; int rc = 0; skb_queue_head_init(&inputq); - skb_queue_head_init(&localq); + __skb_queue_head_init(&localq); /* Clone packets before they are consumed by next call */ if (dests->local && !tipc_msg_reassemble(pkts, &localq)) { @@ -309,14 +396,33 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, /* Send according to determined transmit method */ if (dests->remote) { tipc_bcast_select_xmit_method(net, dests->remote, method); + + skb = skb_peek(pkts); + hdr = buf_msg(skb); + if (msg_user(hdr) == MSG_FRAGMENTER) + hdr = msg_inner_hdr(hdr); + msg_set_is_rcast(hdr, method->rcast); + + /* Switch method ? */ + if (rcast != method->rcast) { + rc = tipc_mcast_send_sync(net, skb, method, dests); + if (unlikely(rc)) { + pr_err("Unable to send SYN: method %d, rc %d\n", + rcast, rc); + goto exit; + } + } + if (method->rcast) rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt); else rc = tipc_bcast_xmit(net, pkts, cong_link_cnt); } - if (dests->local) + if (dests->local) { + tipc_loopback_trace(net, &localq); tipc_sk_mcast_rcv(net, &localq, &inputq); + } exit: /* This queue should normally be empty by now */ __skb_queue_purge(pkts); @@ -375,7 +481,7 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); - tipc_link_bc_ack_rcv(l, acked, &xmitq); + tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq, NULL); tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); @@ -390,9 +496,11 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, * RCU is locked, no other locks set */ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, - struct tipc_msg *hdr) + struct tipc_msg *hdr, + struct sk_buff_head *retrq) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct tipc_gap_ack_blks *ga; struct sk_buff_head xmitq; int rc = 0; @@ -402,8 +510,13 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, if (msg_type(hdr) != STATE_MSG) { tipc_link_bc_init_rcv(l, hdr); } else if (!msg_bc_ack_invalid(hdr)) { - tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq); - rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq); + tipc_get_gap_ack_blks(&ga, l, hdr, false); + if (!sysctl_tipc_bc_retruni) + retrq = &xmitq; + rc = tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), + msg_bc_gap(hdr), ga, &xmitq, + retrq); + rc |= tipc_link_bc_sync_rcv(l, hdr, &xmitq); } tipc_bcast_unlock(net); @@ -456,10 +569,8 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) tipc_sk_rcv(net, inputq); } -int tipc_bclink_reset_stats(struct net *net) +int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l) { - struct tipc_link *l = tipc_bc_sndlink(net); - if (!l) return -ENOPROTOOPT; @@ -469,26 +580,79 @@ int tipc_bclink_reset_stats(struct net *net) return 0; } -static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) +static int tipc_bc_link_set_queue_limits(struct net *net, u32 max_win) { struct tipc_link *l = tipc_bc_sndlink(net); if (!l) return -ENOPROTOOPT; - if (limit < BCLINK_WIN_MIN) - limit = BCLINK_WIN_MIN; - if (limit > TIPC_MAX_LINK_WIN) + if (max_win < BCLINK_WIN_MIN) + max_win = BCLINK_WIN_MIN; + if (max_win > TIPC_MAX_LINK_WIN) return -EINVAL; tipc_bcast_lock(net); - tipc_link_set_queue_limits(l, limit); + tipc_link_set_queue_limits(l, tipc_link_min_win(l), max_win); tipc_bcast_unlock(net); return 0; } +static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + switch (bc_mode) { + case BCLINK_MODE_BCAST: + if (!bb->bcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = true; + bb->force_rcast = false; + break; + case BCLINK_MODE_RCAST: + if (!bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = true; + break; + case BCLINK_MODE_SEL: + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = false; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + if (bc_ratio > 100 || bc_ratio <= 0) + return -EINVAL; + + bb->rc_ratio = bc_ratio; + tipc_bcast_lock(net); + tipc_bcbase_calc_bc_threshold(net); + tipc_bcast_unlock(net); + + return 0; +} + int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) { int err; u32 win; + u32 bc_mode; + u32 bc_ratio; struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; if (!attrs[TIPC_NLA_LINK_PROP]) @@ -498,12 +662,28 @@ int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) if (err) return err; - if (!props[TIPC_NLA_PROP_WIN]) + if (!props[TIPC_NLA_PROP_WIN] && + !props[TIPC_NLA_PROP_BROADCAST] && + !props[TIPC_NLA_PROP_BROADCAST_RATIO]) { return -EOPNOTSUPP; + } - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (props[TIPC_NLA_PROP_BROADCAST]) { + bc_mode = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST]); + err = tipc_bc_link_set_broadcast_mode(net, bc_mode); + } - return tipc_bc_link_set_queue_limits(net, win); + if (!err && props[TIPC_NLA_PROP_BROADCAST_RATIO]) { + bc_ratio = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST_RATIO]); + err = tipc_bc_link_set_broadcast_ratio(net, bc_ratio); + } + + if (!err && props[TIPC_NLA_PROP_WIN]) { + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + err = tipc_bc_link_set_queue_limits(net, win); + } + + return err; } int tipc_bcast_init(struct net *net) @@ -518,8 +698,9 @@ int tipc_bcast_init(struct net *net) tn->bcbase = bb; spin_lock_init(&tipc_net(net)->bclock); - if (!tipc_link_bc_create(net, 0, 0, - FB_MTU, + if (!tipc_link_bc_create(net, 0, 0, NULL, + one_page_mtu, + BCLINK_WIN_DEFAULT, BCLINK_WIN_DEFAULT, 0, &bb->inputq, @@ -529,7 +710,7 @@ int tipc_bcast_init(struct net *net) goto enomem; bb->link = l; tn->bcl = l; - bb->rc_ratio = 25; + bb->rc_ratio = 10; bb->rcast_support = true; return 0; enomem: @@ -576,3 +757,108 @@ void tipc_nlist_purge(struct tipc_nlist *nl) nl->remote = 0; nl->local = false; } + +u32 tipc_bcast_get_mode(struct net *net) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + if (bb->force_bcast) + return BCLINK_MODE_BCAST; + + if (bb->force_rcast) + return BCLINK_MODE_RCAST; + + if (bb->bcast_support && bb->rcast_support) + return BCLINK_MODE_SEL; + + return 0; +} + +u32 tipc_bcast_get_broadcast_ratio(struct net *net) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + return bb->rc_ratio; +} + +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, + struct sk_buff_head *inputq) +{ + struct sk_buff *skb, *_skb, *tmp; + struct tipc_msg *hdr, *_hdr; + bool match = false; + u32 node, port; + + skb = skb_peek(inputq); + if (!skb) + return; + + hdr = buf_msg(skb); + + if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq))) + return; + + node = msg_orignode(hdr); + if (node == tipc_own_addr(net)) + return; + + port = msg_origport(hdr); + + /* Has the twin SYN message already arrived ? */ + skb_queue_walk(defq, _skb) { + _hdr = buf_msg(_skb); + if (msg_orignode(_hdr) != node) + continue; + if (msg_origport(_hdr) != port) + continue; + match = true; + break; + } + + if (!match) { + if (!msg_is_syn(hdr)) + return; + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } + + /* Deliver non-SYN message from other link, otherwise queue it */ + if (!msg_is_syn(hdr)) { + if (msg_is_rcast(hdr) != msg_is_rcast(_hdr)) + return; + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } + + /* Queue non-SYN/SYN message from same link */ + if (msg_is_rcast(hdr) == msg_is_rcast(_hdr)) { + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } + + /* Matching SYN messages => return the one with data, if any */ + __skb_unlink(_skb, defq); + if (msg_data_sz(hdr)) { + kfree_skb(_skb); + } else { + __skb_dequeue(inputq); + kfree_skb(skb); + __skb_queue_tail(inputq, _skb); + } + + /* Deliver subsequent non-SYN messages from same peer */ + skb_queue_walk_safe(defq, _skb, tmp) { + _hdr = buf_msg(_skb); + if (msg_orignode(_hdr) != node) + continue; + if (msg_origport(_hdr) != port) + continue; + if (msg_is_syn(_hdr)) + break; + __skb_unlink(_skb, defq); + __skb_queue_tail(inputq, _skb); + } +} diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 751530ab0c49..2d9352dc7b0e 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -45,9 +45,14 @@ struct tipc_nl_msg; struct tipc_nlist; struct tipc_nitem; extern const char tipc_bclink_name[]; +extern unsigned long sysctl_tipc_bc_retruni; #define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000) +#define BCLINK_MODE_BCAST 0x1 +#define BCLINK_MODE_RCAST 0x2 +#define BCLINK_MODE_SEL 0x4 + struct tipc_nlist { struct list_head list; u32 self; @@ -63,11 +68,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node); /* Cookie to be used between socket and broadcast layer * @rcast: replicast (instead of broadcast) was used at previous xmit * @mandatory: broadcast/replicast indication was set by user + * @deferredq: defer queue to make message in order * @expires: re-evaluate non-mandatory transmit method if we are past this */ struct tipc_mc_method { bool rcast; bool mandatory; + struct sk_buff_head deferredq; unsigned long expires; }; @@ -79,18 +86,28 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl); void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id); void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id); int tipc_bcast_get_mtu(struct net *net); -void tipc_bcast_disable_rcast(struct net *net); +void tipc_bcast_toggle_rcast(struct net *net, bool supp); int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, struct tipc_mc_method *method, struct tipc_nlist *dests, u16 *cong_link_cnt); +int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, + u16 *cong_link_cnt); int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr); int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, - struct tipc_msg *hdr); -int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); + struct tipc_msg *hdr, + struct sk_buff_head *retrq); +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *bcl); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); -int tipc_bclink_reset_stats(struct net *net); +int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l); + +u32 tipc_bcast_get_mode(struct net *net); +u32 tipc_bcast_get_broadcast_ratio(struct net *net); + +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, + struct sk_buff_head *inputq); static inline void tipc_bcast_lock(struct net *net) { diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d27f30a9a01d..ae1ddbf71853 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -44,6 +44,7 @@ #include "netlink.h" #include "udp_media.h" #include "trace.h" +#include "crypto.h" #define MAX_ADDR_STR 60 @@ -62,7 +63,7 @@ static struct tipc_bearer *bearer_get(struct net *net, int bearer_id) { struct tipc_net *tn = tipc_net(net); - return rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + return rcu_dereference(tn->bearer_list[bearer_id]); } static void bearer_disable(struct net *net, struct tipc_bearer *b); @@ -71,6 +72,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, /** * tipc_media_find - locates specified media object by name + * @name: name to locate */ struct tipc_media *tipc_media_find(const char *name) { @@ -85,6 +87,7 @@ struct tipc_media *tipc_media_find(const char *name) /** * media_find_id - locates specified media object by type identifier + * @type: type identifier to locate */ static struct tipc_media *media_find_id(u8 type) { @@ -99,6 +102,9 @@ static struct tipc_media *media_find_id(u8 type) /** * tipc_media_addr_printf - record media address in print buffer + * @buf: output buffer + * @len: output buffer size remaining + * @a: input media address */ int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) { @@ -126,7 +132,7 @@ int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) * @name: ptr to bearer name string * @name_parts: ptr to area for bearer name components (or NULL if not needed) * - * Returns 1 if bearer name is valid, otherwise 0. + * Return: 1 if bearer name is valid, otherwise 0. */ static int bearer_name_validate(const char *name, struct tipc_bearer_names *name_parts) @@ -138,10 +144,7 @@ static int bearer_name_validate(const char *name, u32 if_len; /* copy bearer name & ensure length is OK */ - name_copy[TIPC_MAX_BEARER_NAME - 1] = 0; - /* need above in case non-Posix strncpy() doesn't pad with nulls */ - strncpy(name_copy, name, TIPC_MAX_BEARER_NAME); - if (name_copy[TIPC_MAX_BEARER_NAME - 1] != 0) + if (strscpy(name_copy, name, TIPC_MAX_BEARER_NAME) < 0) return 0; /* ensure all component parts of bearer name are present */ @@ -160,18 +163,24 @@ static int bearer_name_validate(const char *name, /* return bearer name components, if necessary */ if (name_parts) { - strcpy(name_parts->media_name, media_name); - strcpy(name_parts->if_name, if_name); + if (strscpy(name_parts->media_name, media_name, + TIPC_MAX_MEDIA_NAME) < 0) + return 0; + if (strscpy(name_parts->if_name, if_name, + TIPC_MAX_IF_NAME) < 0) + return 0; } return 1; } /** * tipc_bearer_find - locates bearer object with matching bearer name + * @net: the applicable net namespace + * @name: bearer name to locate */ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; u32 i; @@ -206,11 +215,10 @@ int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id) void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { - struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + b = bearer_get(net, bearer_id); if (b) tipc_disc_add_dest(b->disc); rcu_read_unlock(); @@ -218,11 +226,10 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { - struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + b = bearer_get(net, bearer_id); if (b) tipc_disc_remove_dest(b->disc); rcu_read_unlock(); @@ -230,10 +237,17 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) /** * tipc_enable_bearer - enable bearer with the given name + * @net: the applicable net namespace + * @name: bearer name to enable + * @disc_domain: bearer domain + * @prio: bearer priority + * @attr: nlattr array + * @extack: netlink extended ack */ static int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain, u32 prio, - struct nlattr *attr[]) + struct nlattr *attr[], + struct netlink_ext_ack *extack) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer_names b_names; @@ -244,20 +258,23 @@ static int tipc_enable_bearer(struct net *net, const char *name, int bearer_id = 0; int res = -EINVAL; char *errstr = ""; + u32 i; if (!bearer_name_validate(name, &b_names)) { - errstr = "illegal name"; - goto rejected; + NL_SET_ERR_MSG(extack, "Illegal name"); + return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { errstr = "illegal priority"; + NL_SET_ERR_MSG(extack, "Illegal priority"); goto rejected; } m = tipc_media_find(b_names.media_name); if (!m) { errstr = "media not registered"; + NL_SET_ERR_MSG(extack, "Media not registered"); goto rejected; } @@ -265,33 +282,43 @@ static int tipc_enable_bearer(struct net *net, const char *name, prio = m->priority; /* Check new bearer vs existing ones and find free bearer id if any */ - while (bearer_id < MAX_BEARERS) { - b = rtnl_dereference(tn->bearer_list[bearer_id]); - if (!b) - break; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + while (i-- != 0) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) { + bearer_id = i; + continue; + } if (!strcmp(name, b->name)) { errstr = "already enabled"; + NL_SET_ERR_MSG(extack, "Already enabled"); goto rejected; } - bearer_id++; - if (b->priority != prio) - continue; - if (++with_this_prio <= 2) - continue; - pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", - name, prio); - if (prio == TIPC_MIN_LINK_PRI) { - errstr = "cannot adjust to lower"; - goto rejected; + + if (b->priority == prio && + (++with_this_prio > 2)) { + pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", + name, prio); + + if (prio == TIPC_MIN_LINK_PRI) { + errstr = "cannot adjust to lower"; + NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); + goto rejected; + } + + pr_warn("Bearer <%s>: trying with adjusted priority\n", + name); + prio--; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + with_this_prio = 1; } - pr_warn("Bearer <%s>: trying with adjusted priority\n", name); - prio--; - bearer_id = 0; - with_this_prio = 1; } if (bearer_id >= MAX_BEARERS) { errstr = "max 3 bearers permitted"; + NL_SET_ERR_MSG(extack, "Max 3 bearers permitted"); goto rejected; } @@ -299,39 +326,45 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (!b) return -ENOMEM; - strcpy(b->name, name); + strscpy(b->name, name); b->media = m; res = m->enable_media(net, b, attr); if (res) { kfree(b); errstr = "failed to enable media"; + NL_SET_ERR_MSG(extack, "Failed to enable media"); goto rejected; } b->identity = bearer_id; b->tolerance = m->tolerance; - b->window = m->window; + b->min_win = m->min_win; + b->max_win = m->max_win; b->domain = disc_domain; b->net_plane = bearer_id + 'A'; b->priority = prio; - test_and_set_bit_lock(0, &b->up); + refcount_set(&b->refcnt, 1); res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { bearer_disable(net, b); errstr = "failed to create discoverer"; + NL_SET_ERR_MSG(extack, "Failed to create discoverer"); goto rejected; } - rcu_assign_pointer(tn->bearer_list[bearer_id], b); - if (skb) - tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); - + /* Create monitoring data before accepting activate messages */ if (tipc_mon_create(net, bearer_id)) { bearer_disable(net, b); + kfree_skb(skb); return -ENOMEM; } + test_and_set_bit_lock(0, &b->up); + rcu_assign_pointer(tn->bearer_list[bearer_id], b); + if (skb) + tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); + pr_info("Enabled bearer <%s>, priority %u\n", name, prio); return res; @@ -342,6 +375,8 @@ rejected: /** * tipc_reset_bearer - Reset all links established over this bearer + * @net: the applicable net namespace + * @b: the target bearer */ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) { @@ -351,8 +386,21 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) return 0; } +bool tipc_bearer_hold(struct tipc_bearer *b) +{ + return (b && refcount_inc_not_zero(&b->refcnt)); +} + +void tipc_bearer_put(struct tipc_bearer *b) +{ + if (b && refcount_dec_and_test(&b->refcnt)) + kfree_rcu(b, rcu); +} + /** - * bearer_disable + * bearer_disable - disable this bearer + * @net: the applicable net namespace + * @b: the bearer to disable * * Note: This routine assumes caller holds RTNL lock. */ @@ -369,7 +417,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b) if (b->disc) tipc_disc_delete(b->disc); RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); - kfree_rcu(b, rcu); + tipc_bearer_put(b); tipc_mon_delete(net, bearer_id); } @@ -385,10 +433,15 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, dev = dev_get_by_name(net, dev_name); if (!dev) return -ENODEV; - if (tipc_mtu_bad(dev, 0)) { + if (tipc_mtu_bad(dev)) { dev_put(dev); return -EINVAL; } + if (dev == net->loopback_dev) { + dev_put(dev); + pr_info("Enabling <%s> not permitted\n", b->name); + return -EINVAL; + } /* Autoconfigure own node identity if needed */ if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) { @@ -412,12 +465,13 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, b->bcast_addr.media_id = b->media->type_id; b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; b->mtu = dev->mtu; - b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); + b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr); rcu_assign_pointer(dev->tipc_ptr, b); return 0; } /* tipc_disable_l2_media - detach TIPC bearer from an L2 interface + * @b: the target bearer * * Mark L2 bearer as inactive so that incoming buffers are thrown away */ @@ -434,6 +488,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) /** * tipc_l2_send_msg - send a TIPC packet out over an L2 interface + * @net: the associated network namespace * @skb: the packet to be sent * @b: the bearer through which the packet is to be sent * @dest: peer destination address @@ -444,7 +499,7 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, struct net_device *dev; int delta; - dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr); + dev = (struct net_device *)rcu_dereference(b->media_ptr); if (!dev) return 0; @@ -481,13 +536,26 @@ int tipc_bearer_mtu(struct net *net, u32 bearer_id) struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference_rtnl(tipc_net(net)->bearer_list[bearer_id]); + b = bearer_get(net, bearer_id); if (b) mtu = b->mtu; rcu_read_unlock(); return mtu; } +int tipc_bearer_min_mtu(struct net *net, u32 bearer_id) +{ + int mtu = TIPC_MIN_BEARER_MTU; + struct tipc_bearer *b; + + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (b) + mtu += b->encap_hlen; + rcu_read_unlock(); + return mtu; +} + /* tipc_bearer_xmit_skb - sends buffer to destination over bearer */ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, @@ -499,10 +567,15 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, rcu_read_lock(); b = bearer_get(net, bearer_id); - if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) - b->media->send_msg(net, skb, b, dest); - else + if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) { +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_xmit(net, &skb, b, dest, NULL); + if (skb) +#endif + b->media->send_msg(net, skb, b, dest); + } else { kfree_skb(skb); + } rcu_read_unlock(); } @@ -510,7 +583,8 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, */ void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq, - struct tipc_media_addr *dst) + struct tipc_media_addr *dst, + struct tipc_node *__dnode) { struct tipc_bearer *b; struct sk_buff *skb, *tmp; @@ -524,10 +598,15 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, __skb_queue_purge(xmitq); skb_queue_walk_safe(xmitq, skb, tmp) { __skb_dequeue(xmitq); - if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) - b->media->send_msg(net, skb, b, dst); - else + if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) { +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_xmit(net, &skb, b, dst, __dnode); + if (skb) +#endif + b->media->send_msg(net, skb, b, dst); + } else { kfree_skb(skb); + } } rcu_read_unlock(); } @@ -538,6 +617,7 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq) { struct tipc_net *tn = tipc_net(net); + struct tipc_media_addr *dst; int net_id = tn->net_id; struct tipc_bearer *b; struct sk_buff *skb, *tmp; @@ -552,14 +632,19 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, msg_set_non_seq(hdr, 1); msg_set_mc_netid(hdr, net_id); __skb_dequeue(xmitq); - b->media->send_msg(net, skb, b, &b->bcast_addr); + dst = &b->bcast_addr; +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_xmit(net, &skb, b, dst, NULL); + if (skb) +#endif + b->media->send_msg(net, skb, b, dst); } rcu_read_unlock(); } /** * tipc_l2_rcv_msg - handle incoming TIPC message from an interface - * @buf: the received packet + * @skb: the received message * @dev: the net device that the packet was received on * @pt: the packet_type structure which was used to register this handler * @orig_dev: the original receive net device in case the device is a bond @@ -574,11 +659,12 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference_rtnl(dev->tipc_ptr) ?: - rcu_dereference_rtnl(orig_dev->tipc_ptr); + b = rcu_dereference(dev->tipc_ptr) ?: + rcu_dereference(orig_dev->tipc_ptr); if (likely(b && test_bit(0, &b->up) && (skb->pkt_type <= PACKET_MULTICAST))) { skb_mark_not_on_list(skb); + TIPC_SKB_CB(skb)->flags = 0; tipc_rcv(dev_net(b->pt.dev), skb, b); rcu_read_unlock(); return NET_RX_SUCCESS; @@ -615,7 +701,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, test_and_set_bit_lock(0, &b->up); break; } - /* fall through */ + fallthrough; case NETDEV_GOING_DOWN: clear_bit_unlock(0, &b->up); tipc_reset_bearer(net, b); @@ -624,7 +710,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, test_and_set_bit_lock(0, &b->up); break; case NETDEV_CHANGEMTU: - if (tipc_mtu_bad(dev, 0)) { + if (tipc_mtu_bad(dev)) { bearer_disable(net, b); break; } @@ -633,7 +719,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_CHANGEADDR: b->media->raw2addr(b, &b->addr, - (char *)dev->dev_addr); + (const char *)dev->dev_addr); tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: @@ -661,7 +747,7 @@ void tipc_bearer_cleanup(void) void tipc_bearer_stop(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; u32 i; @@ -674,6 +760,65 @@ void tipc_bearer_stop(struct net *net) } } +void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts) +{ + struct net_device *dev = net->loopback_dev; + struct sk_buff *skb, *_skb; + int exp; + + skb_queue_walk(pkts, _skb) { + skb = pskb_copy(_skb, GFP_ATOMIC); + if (!skb) + continue; + + exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); + if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) { + kfree_skb(skb); + continue; + } + + skb_reset_network_header(skb); + dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr, + dev->dev_addr, skb->len); + skb->dev = dev; + skb->pkt_type = PACKET_HOST; + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->protocol = eth_type_trans(skb, dev); + netif_rx(skb); + } +} + +static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *od) +{ + consume_skb(skb); + return NET_RX_SUCCESS; +} + +int tipc_attach_loopback(struct net *net) +{ + struct net_device *dev = net->loopback_dev; + struct tipc_net *tn = tipc_net(net); + + if (!dev) + return -ENODEV; + + netdev_hold(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL); + tn->loopback_pt.dev = dev; + tn->loopback_pt.type = htons(ETH_P_TIPC); + tn->loopback_pt.func = tipc_loopback_rcv_pkt; + dev_add_pack(&tn->loopback_pt); + return 0; +} + +void tipc_detach_loopback(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + dev_remove_pack(&tn->loopback_pt); + netdev_put(net->loopback_dev, &tn->loopback_pt.dev_tracker); +} + /* Caller should hold rtnl_lock to protect the bearer */ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, struct tipc_bearer *bearer, int nlflags) @@ -687,21 +832,21 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_BEARER); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name)) goto attr_msg_full; - prop = nla_nest_start(msg->skb, TIPC_NLA_BEARER_PROP); + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_PROP); if (!prop) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance)) goto prop_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window)) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->max_win)) goto prop_msg_full; if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu)) @@ -738,7 +883,7 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) struct tipc_bearer *bearer; struct tipc_nl_msg msg; struct net *net = sock_net(skb->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_net *tn = tipc_net(net); if (i == MAX_BEARERS) return 0; @@ -776,9 +921,9 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -798,6 +943,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) bearer = tipc_bearer_find(net, name); if (!bearer) { err = -EINVAL; + NL_SET_ERR_MSG(info->extack, "Bearer not found"); goto err_out; } @@ -825,9 +971,9 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -837,8 +983,10 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); bearer = tipc_bearer_find(net, name); - if (!bearer) + if (!bearer) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; + } bearer_disable(net, bearer); @@ -870,9 +1018,9 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -896,7 +1044,8 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); } - return tipc_enable_bearer(net, bearer, domain, prio, attrs); + return tipc_enable_bearer(net, bearer, domain, prio, attrs, + info->extack); } int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) @@ -921,9 +1070,9 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -934,23 +1083,27 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); b = tipc_bearer_find(net, name); if (!b) { - rtnl_unlock(); - return -EINVAL; + NL_SET_ERR_MSG(info->extack, "Bearer not found"); + err = -EINVAL; + goto out; } #ifdef CONFIG_TIPC_MEDIA_UDP if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, "UDP option is unsupported"); + err = -EINVAL; + goto out; + } + err = tipc_udp_nl_bearer_add(b, attrs[TIPC_NLA_BEARER_UDP_OPTS]); - if (err) { - rtnl_unlock(); - return err; - } } #endif +out: rtnl_unlock(); - return 0; + return err; } int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) @@ -964,9 +1117,9 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -975,8 +1128,10 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); b = tipc_bearer_find(net, name); - if (!b) + if (!b) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; + } if (attrs[TIPC_NLA_BEARER_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; @@ -993,14 +1148,20 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_PRIO]) b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) - b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { - if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, + "MTU property is unsupported"); return -EINVAL; + } #ifdef CONFIG_TIPC_MEDIA_UDP - if (tipc_udp_mtu_bad(nla_get_u32 - (props[TIPC_NLA_PROP_MTU]))) + if (nla_get_u32(props[TIPC_NLA_PROP_MTU]) < + b->encap_hlen + TIPC_MIN_BEARER_MTU) { + NL_SET_ERR_MSG(info->extack, + "MTU value is out-of-range"); return -EINVAL; + } b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU); #endif @@ -1033,21 +1194,21 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_MEDIA); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name)) goto attr_msg_full; - prop = nla_nest_start(msg->skb, TIPC_NLA_MEDIA_PROP); + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA_PROP); if (!prop) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance)) goto prop_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window)) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->max_win)) goto prop_msg_full; if (media->type_id == TIPC_MEDIA_TYPE_UDP) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu)) @@ -1102,14 +1263,14 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) struct tipc_nl_msg msg; struct tipc_media *media; struct sk_buff *rep; - struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, - info->attrs[TIPC_NLA_MEDIA], - tipc_nl_media_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy, info->extack); if (err) return err; @@ -1128,6 +1289,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); media = tipc_media_find(name); if (!media) { + NL_SET_ERR_MSG(info->extack, "Media not found"); err = -EINVAL; goto err_out; } @@ -1150,23 +1312,24 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) int err; char *name; struct tipc_media *m; - struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, - info->attrs[TIPC_NLA_MEDIA], - tipc_nl_media_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy, info->extack); if (!attrs[TIPC_NLA_MEDIA_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); m = tipc_media_find(name); - if (!m) + if (!m) { + NL_SET_ERR_MSG(info->extack, "Media not found"); return -EINVAL; - + } if (attrs[TIPC_NLA_MEDIA_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; @@ -1180,14 +1343,20 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_PRIO]) m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) - m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { - if (m->type_id != TIPC_MEDIA_TYPE_UDP) + if (m->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, + "MTU property is unsupported"); return -EINVAL; + } #ifdef CONFIG_TIPC_MEDIA_UDP if (tipc_udp_mtu_bad(nla_get_u32 - (props[TIPC_NLA_PROP_MTU]))) + (props[TIPC_NLA_PROP_MTU]))) { + NL_SET_ERR_MSG(info->extack, + "MTU value is out-of-range"); return -EINVAL; + } m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); #endif } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 7f4c569594a5..41eac1ee0c09 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -93,7 +93,8 @@ struct tipc_bearer; * @raw2addr: convert from raw addr format to media addr format * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure - * @window: default window (in packets) before declaring link congestion + * @min_win: minimum window (in packets) before declaring link congestion + * @max_win: maximum window (in packets) before declaring link congestion * @mtu: max packet size bearer can support for media type not dependent on * underlying device MTU * @type_id: TIPC media identifier @@ -116,10 +117,11 @@ struct tipc_media { char *msg); int (*raw2addr)(struct tipc_bearer *b, struct tipc_media_addr *addr, - char *raw); + const char *raw); u32 priority; u32 tolerance; - u32 window; + u32 min_win; + u32 max_win; u32 mtu; u32 type_id; u32 hwaddr_len; @@ -137,34 +139,41 @@ struct tipc_media { * @pt: packet type for bearer * @rcu: rcu struct for tipc_bearer * @priority: default link priority for bearer - * @window: default window size for bearer + * @min_win: minimum window (in packets) before declaring link congestion + * @max_win: maximum window (in packets) before declaring link congestion * @tolerance: default link tolerance for bearer * @domain: network domain to which links can be established * @identity: array index of this bearer within TIPC bearer array - * @link_req: ptr to (optional) structure making periodic link setup requests + * @disc: ptr to link setup request * @net_plane: network plane ('A' through 'H') currently associated with bearer + * @encap_hlen: encap headers length + * @up: bearer up flag (bit 0) + * @refcnt: tipc_bearer reference counter * * Note: media-specific code is responsible for initialization of the fields * indicated below when a bearer is enabled; TIPC's generic bearer code takes * care of initializing all other fields. */ struct tipc_bearer { - void __rcu *media_ptr; /* initalized by media */ - u32 mtu; /* initalized by media */ - struct tipc_media_addr addr; /* initalized by media */ + void __rcu *media_ptr; /* initialized by media */ + u32 mtu; /* initialized by media */ + struct tipc_media_addr addr; /* initialized by media */ char name[TIPC_MAX_BEARER_NAME]; struct tipc_media *media; struct tipc_media_addr bcast_addr; struct packet_type pt; struct rcu_head rcu; u32 priority; - u32 window; + u32 min_win; + u32 max_win; u32 tolerance; u32 domain; u32 identity; struct tipc_discoverer *disc; char net_plane; + u16 encap_hlen; unsigned long up; + refcount_t refcnt; }; struct tipc_bearer_names { @@ -205,11 +214,11 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); -int tipc_media_set_priority(const char *name, u32 new_value); -int tipc_media_set_window(const char *name, u32 new_value); int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, struct nlattr *attrs[]); +bool tipc_bearer_hold(struct tipc_bearer *b); +void tipc_bearer_put(struct tipc_bearer *b); void tipc_disable_l2_media(struct tipc_bearer *b); int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest); @@ -223,20 +232,32 @@ int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); void tipc_bearer_stop(struct net *net); int tipc_bearer_mtu(struct net *net, u32 bearer_id); +int tipc_bearer_min_mtu(struct net *net, u32 bearer_id); bool tipc_bearer_bcast_support(struct net *net, u32 bearer_id); void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, struct sk_buff *skb, struct tipc_media_addr *dest); void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq, - struct tipc_media_addr *dst); + struct tipc_media_addr *dst, + struct tipc_node *__dnode); void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq); +void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts); +int tipc_attach_loopback(struct net *net); +void tipc_detach_loopback(struct net *net); + +static inline void tipc_loopback_trace(struct net *net, + struct sk_buff_head *pkts) +{ + if (unlikely(dev_nit_active(net->loopback_dev))) + tipc_clone_to_loopback(net, pkts); +} /* check if device MTU is too low for tipc headers */ -static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve) +static inline bool tipc_mtu_bad(struct net_device *dev) { - if (dev->mtu >= TIPC_MIN_BEARER_MTU + reserve) + if (dev->mtu >= TIPC_MIN_BEARER_MTU) return false; netdev_warn(dev, "MTU too low for tipc bearer\n"); return true; diff --git a/net/tipc/core.c b/net/tipc/core.c index 5b38f5164281..434e70eabe08 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -34,8 +34,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include "core.h" #include "name_table.h" #include "subscr.h" @@ -43,6 +41,8 @@ #include "net.h" #include "socket.h" #include "bcast.h" +#include "node.h" +#include "crypto.h" #include <linux/module.h> @@ -59,6 +59,8 @@ static int __net_init tipc_init_net(struct net *net) tn->node_addr = 0; tn->trial_addr = 0; tn->addr_trial_end = 0; + tn->capabilities = TIPC_NODE_CAPABILITIES; + INIT_WORK(&tn->work, tipc_net_finalize_work); memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; @@ -66,6 +68,11 @@ static int __net_init tipc_init_net(struct net *net) INIT_LIST_HEAD(&tn->node_list); spin_lock_init(&tn->node_list_lock); +#ifdef CONFIG_TIPC_CRYPTO + err = tipc_crypto_start(&tn->crypto_tx, net, NULL); + if (err) + goto out_crypto; +#endif err = tipc_sk_rht_init(net); if (err) goto out_sk_rht; @@ -74,36 +81,56 @@ static int __net_init tipc_init_net(struct net *net) if (err) goto out_nametbl; - INIT_LIST_HEAD(&tn->dist_queue); - err = tipc_topsrv_start(net); + err = tipc_bcast_init(net); if (err) - goto out_subscr; + goto out_bclink; - err = tipc_bcast_init(net); + err = tipc_attach_loopback(net); if (err) goto out_bclink; return 0; out_bclink: - tipc_bcast_stop(net); -out_subscr: tipc_nametbl_stop(net); out_nametbl: tipc_sk_rht_destroy(net); out_sk_rht: + +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_stop(&tn->crypto_tx); +out_crypto: +#endif return err; } static void __net_exit tipc_exit_net(struct net *net) { - tipc_topsrv_stop(net); + struct tipc_net *tn = tipc_net(net); + + tipc_detach_loopback(net); tipc_net_stop(net); + /* Make sure the tipc_net_finalize_work() finished */ + cancel_work_sync(&tn->work); tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_stop(&tipc_net(net)->crypto_tx); +#endif + while (atomic_read(&tn->wq_count)) + cond_resched(); +} + +static void __net_exit tipc_pernet_pre_exit(struct net *net) +{ + tipc_node_pre_cleanup_net(net); } +static struct pernet_operations tipc_pernet_pre_exit_ops = { + .pre_exit = tipc_pernet_pre_exit, +}; + static struct pernet_operations tipc_net_ops = { .init = tipc_init_net, .exit = tipc_exit_net, @@ -111,6 +138,11 @@ static struct pernet_operations tipc_net_ops = { .size = sizeof(struct tipc_net), }; +static struct pernet_operations tipc_topsrv_net_ops = { + .init = tipc_topsrv_init_net, + .exit = tipc_topsrv_exit_net, +}; + static int __init tipc_init(void) { int err; @@ -121,54 +153,69 @@ static int __init tipc_init(void) sysctl_tipc_rmem[1] = RCVBUF_DEF; sysctl_tipc_rmem[2] = RCVBUF_MAX; - err = tipc_netlink_start(); + err = tipc_register_sysctl(); if (err) - goto out_netlink; + goto out_sysctl; - err = tipc_netlink_compat_start(); + err = register_pernet_device(&tipc_net_ops); if (err) - goto out_netlink_compat; + goto out_pernet; err = tipc_socket_init(); if (err) goto out_socket; - err = tipc_register_sysctl(); + err = register_pernet_device(&tipc_topsrv_net_ops); if (err) - goto out_sysctl; + goto out_pernet_topsrv; - err = register_pernet_subsys(&tipc_net_ops); + err = register_pernet_subsys(&tipc_pernet_pre_exit_ops); if (err) - goto out_pernet; + goto out_register_pernet_subsys; err = tipc_bearer_setup(); if (err) goto out_bearer; + err = tipc_netlink_start(); + if (err) + goto out_netlink; + + err = tipc_netlink_compat_start(); + if (err) + goto out_netlink_compat; + pr_info("Started in single node mode\n"); return 0; + +out_netlink_compat: + tipc_netlink_stop(); +out_netlink: + tipc_bearer_cleanup(); out_bearer: - unregister_pernet_subsys(&tipc_net_ops); + unregister_pernet_subsys(&tipc_pernet_pre_exit_ops); +out_register_pernet_subsys: + unregister_pernet_device(&tipc_topsrv_net_ops); +out_pernet_topsrv: + tipc_socket_stop(); +out_socket: + unregister_pernet_device(&tipc_net_ops); out_pernet: tipc_unregister_sysctl(); out_sysctl: - tipc_socket_stop(); -out_socket: - tipc_netlink_compat_stop(); -out_netlink_compat: - tipc_netlink_stop(); -out_netlink: pr_err("Unable to start in single node mode\n"); return err; } static void __exit tipc_exit(void) { - tipc_bearer_cleanup(); - unregister_pernet_subsys(&tipc_net_ops); - tipc_netlink_stop(); tipc_netlink_compat_stop(); + tipc_netlink_stop(); + tipc_bearer_cleanup(); + unregister_pernet_subsys(&tipc_pernet_pre_exit_ops); + unregister_pernet_device(&tipc_topsrv_net_ops); tipc_socket_stop(); + unregister_pernet_device(&tipc_net_ops); tipc_unregister_sysctl(); pr_info("Deactivated\n"); diff --git a/net/tipc/core.h b/net/tipc/core.h index 8020a6c360ff..7f3fe3401c45 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -3,6 +3,7 @@ * * Copyright (c) 2005-2006, 2013-2018 Ericsson AB * Copyright (c) 2005-2007, 2010-2013, Wind River Systems + * Copyright (c) 2020, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,14 +60,23 @@ #include <net/netns/generic.h> #include <linux/rhashtable.h> #include <net/genetlink.h> +#include <net/netns/hash.h> + +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt struct tipc_node; struct tipc_bearer; struct tipc_bc_base; struct tipc_link; -struct tipc_name_table; struct tipc_topsrv; struct tipc_monitor; +#ifdef CONFIG_TIPC_CRYPTO +struct tipc_crypto; +#endif #define TIPC_MOD_VER "2.0.0" @@ -116,12 +126,24 @@ struct tipc_net { spinlock_t nametbl_lock; struct name_table *nametbl; - /* Name dist queue */ - struct list_head dist_queue; - /* Topology subscription server */ struct tipc_topsrv *topsrv; atomic_t subscription_count; + + /* Cluster capabilities */ + u16 capabilities; + + /* Tracing of node internal messages */ + struct packet_type loopback_pt; + +#ifdef CONFIG_TIPC_CRYPTO + /* TX crypto handler */ + struct tipc_crypto *crypto_tx; +#endif + /* Work item for net finalize */ + struct work_struct work; + /* The numbers of work queues in schedule */ + atomic_t wq_count; }; static inline struct tipc_net *tipc_net(struct net *net) @@ -174,11 +196,27 @@ static inline int less(u16 left, u16 right) return less_eq(left, right) && (mod(right) != mod(left)); } -static inline int in_range(u16 val, u16 min, u16 max) +static inline int tipc_in_range(u16 val, u16 min, u16 max) { return !less(val, min) && !more(val, max); } +static inline u32 tipc_net_hash_mixes(struct net *net, int tn_rand) +{ + return net_hash_mix(&init_net) ^ net_hash_mix(net) ^ tn_rand; +} + +static inline u32 hash128to32(char *bytes) +{ + __be32 *tmp = (__be32 *)bytes; + u32 res; + + res = ntohl(tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3]); + if (likely(res)) + return res; + return ntohl(tmp[0] | tmp[1] | tmp[2] | tmp[3]); +} + #ifdef CONFIG_SYSCTL int tipc_register_sysctl(void); void tipc_unregister_sysctl(void); diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c new file mode 100644 index 000000000000..751904f10aab --- /dev/null +++ b/net/tipc/crypto.c @@ -0,0 +1,2484 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * net/tipc/crypto.c: TIPC crypto for key handling & packet en/decryption + * + * Copyright (c) 2019, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <crypto/aead.h> +#include <crypto/aes.h> +#include <crypto/rng.h> +#include "crypto.h" +#include "msg.h" +#include "bcast.h" + +#define TIPC_TX_GRACE_PERIOD msecs_to_jiffies(5000) /* 5s */ +#define TIPC_TX_LASTING_TIME msecs_to_jiffies(10000) /* 10s */ +#define TIPC_RX_ACTIVE_LIM msecs_to_jiffies(3000) /* 3s */ +#define TIPC_RX_PASSIVE_LIM msecs_to_jiffies(15000) /* 15s */ + +#define TIPC_MAX_TFMS_DEF 10 +#define TIPC_MAX_TFMS_LIM 1000 + +#define TIPC_REKEYING_INTV_DEF (60 * 24) /* default: 1 day */ + +/* + * TIPC Key ids + */ +enum { + KEY_MASTER = 0, + KEY_MIN = KEY_MASTER, + KEY_1 = 1, + KEY_2, + KEY_3, + KEY_MAX = KEY_3, +}; + +/* + * TIPC Crypto statistics + */ +enum { + STAT_OK, + STAT_NOK, + STAT_ASYNC, + STAT_ASYNC_OK, + STAT_ASYNC_NOK, + STAT_BADKEYS, /* tx only */ + STAT_BADMSGS = STAT_BADKEYS, /* rx only */ + STAT_NOKEYS, + STAT_SWITCHES, + + MAX_STATS, +}; + +/* TIPC crypto statistics' header */ +static const char *hstats[MAX_STATS] = {"ok", "nok", "async", "async_ok", + "async_nok", "badmsgs", "nokeys", + "switches"}; + +/* Max TFMs number per key */ +int sysctl_tipc_max_tfms __read_mostly = TIPC_MAX_TFMS_DEF; +/* Key exchange switch, default: on */ +int sysctl_tipc_key_exchange_enabled __read_mostly = 1; + +/* + * struct tipc_key - TIPC keys' status indicator + * + * 7 6 5 4 3 2 1 0 + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * key: | (reserved)|passive idx| active idx|pending idx| + * +-----+-----+-----+-----+-----+-----+-----+-----+ + */ +struct tipc_key { +#define KEY_BITS (2) +#define KEY_MASK ((1 << KEY_BITS) - 1) + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 pending:2, + active:2, + passive:2, /* rx only */ + reserved:2; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 reserved:2, + passive:2, /* rx only */ + active:2, + pending:2; +#else +#error "Please fix <asm/byteorder.h>" +#endif + } __packed; + u8 keys; + }; +}; + +/** + * struct tipc_tfm - TIPC TFM structure to form a list of TFMs + * @tfm: cipher handle/key + * @list: linked list of TFMs + */ +struct tipc_tfm { + struct crypto_aead *tfm; + struct list_head list; +}; + +/** + * struct tipc_aead - TIPC AEAD key structure + * @tfm_entry: per-cpu pointer to one entry in TFM list + * @crypto: TIPC crypto owns this key + * @cloned: reference to the source key in case cloning + * @users: the number of the key users (TX/RX) + * @salt: the key's SALT value + * @authsize: authentication tag size (max = 16) + * @mode: crypto mode is applied to the key + * @hint: a hint for user key + * @rcu: struct rcu_head + * @key: the aead key + * @gen: the key's generation + * @seqno: the key seqno (cluster scope) + * @refcnt: the key reference counter + */ +struct tipc_aead { +#define TIPC_AEAD_HINT_LEN (5) + struct tipc_tfm * __percpu *tfm_entry; + struct tipc_crypto *crypto; + struct tipc_aead *cloned; + atomic_t users; + u32 salt; + u8 authsize; + u8 mode; + char hint[2 * TIPC_AEAD_HINT_LEN + 1]; + struct rcu_head rcu; + struct tipc_aead_key *key; + u16 gen; + + atomic64_t seqno ____cacheline_aligned; + refcount_t refcnt ____cacheline_aligned; + +} ____cacheline_aligned; + +/** + * struct tipc_crypto_stats - TIPC Crypto statistics + * @stat: array of crypto statistics + */ +struct tipc_crypto_stats { + unsigned int stat[MAX_STATS]; +}; + +/** + * struct tipc_crypto - TIPC TX/RX crypto structure + * @net: struct net + * @node: TIPC node (RX) + * @aead: array of pointers to AEAD keys for encryption/decryption + * @peer_rx_active: replicated peer RX active key index + * @key_gen: TX/RX key generation + * @key: the key states + * @skey_mode: session key's mode + * @skey: received session key + * @wq: common workqueue on TX crypto + * @work: delayed work sched for TX/RX + * @key_distr: key distributing state + * @rekeying_intv: rekeying interval (in minutes) + * @stats: the crypto statistics + * @name: the crypto name + * @sndnxt: the per-peer sndnxt (TX) + * @timer1: general timer 1 (jiffies) + * @timer2: general timer 2 (jiffies) + * @working: the crypto is working or not + * @key_master: flag indicates if master key exists + * @legacy_user: flag indicates if a peer joins w/o master key (for bwd comp.) + * @nokey: no key indication + * @flags: combined flags field + * @lock: tipc_key lock + */ +struct tipc_crypto { + struct net *net; + struct tipc_node *node; + struct tipc_aead __rcu *aead[KEY_MAX + 1]; + atomic_t peer_rx_active; + u16 key_gen; + struct tipc_key key; + u8 skey_mode; + struct tipc_aead_key *skey; + struct workqueue_struct *wq; + struct delayed_work work; +#define KEY_DISTR_SCHED 1 +#define KEY_DISTR_COMPL 2 + atomic_t key_distr; + u32 rekeying_intv; + + struct tipc_crypto_stats __percpu *stats; + char name[48]; + + atomic64_t sndnxt ____cacheline_aligned; + unsigned long timer1; + unsigned long timer2; + union { + struct { + u8 working:1; + u8 key_master:1; + u8 legacy_user:1; + u8 nokey: 1; + }; + u8 flags; + }; + spinlock_t lock; /* crypto lock */ + +} ____cacheline_aligned; + +/* struct tipc_crypto_tx_ctx - TX context for callbacks */ +struct tipc_crypto_tx_ctx { + struct tipc_aead *aead; + struct tipc_bearer *bearer; + struct tipc_media_addr dst; +}; + +/* struct tipc_crypto_rx_ctx - RX context for callbacks */ +struct tipc_crypto_rx_ctx { + struct tipc_aead *aead; + struct tipc_bearer *bearer; +}; + +static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead); +static inline void tipc_aead_put(struct tipc_aead *aead); +static void tipc_aead_free(struct rcu_head *rp); +static int tipc_aead_users(struct tipc_aead __rcu *aead); +static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim); +static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim); +static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val); +static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead); +static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, + u8 mode); +static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src); +static void *tipc_aead_mem_alloc(struct crypto_aead *tfm, + unsigned int crypto_ctx_size, + u8 **iv, struct aead_request **req, + struct scatterlist **sg, int nsg); +static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *dst, + struct tipc_node *__dnode); +static void tipc_aead_encrypt_done(void *data, int err); +static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, + struct sk_buff *skb, struct tipc_bearer *b); +static void tipc_aead_decrypt_done(void *data, int err); +static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr); +static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead, + u8 tx_key, struct sk_buff *skb, + struct tipc_crypto *__rx); +static inline void tipc_crypto_key_set_state(struct tipc_crypto *c, + u8 new_passive, + u8 new_active, + u8 new_pending); +static int tipc_crypto_key_attach(struct tipc_crypto *c, + struct tipc_aead *aead, u8 pos, + bool master_key); +static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending); +static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx, + struct tipc_crypto *rx, + struct sk_buff *skb, + u8 tx_key); +static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb); +static int tipc_crypto_key_revoke(struct net *net, u8 tx_key); +static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb, + struct tipc_bearer *b, + struct tipc_media_addr *dst, + struct tipc_node *__dnode, u8 type); +static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead, + struct tipc_bearer *b, + struct sk_buff **skb, int err); +static void tipc_crypto_do_cmd(struct net *net, int cmd); +static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf); +static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new, + char *buf); +static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey, + u16 gen, u8 mode, u32 dnode); +static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr); +static void tipc_crypto_work_tx(struct work_struct *work); +static void tipc_crypto_work_rx(struct work_struct *work); +static int tipc_aead_key_generate(struct tipc_aead_key *skey); + +#define is_tx(crypto) (!(crypto)->node) +#define is_rx(crypto) (!is_tx(crypto)) + +#define key_next(cur) ((cur) % KEY_MAX + 1) + +#define tipc_aead_rcu_ptr(rcu_ptr, lock) \ + rcu_dereference_protected((rcu_ptr), lockdep_is_held(lock)) + +#define tipc_aead_rcu_replace(rcu_ptr, ptr, lock) \ +do { \ + struct tipc_aead *__tmp = rcu_dereference_protected((rcu_ptr), \ + lockdep_is_held(lock)); \ + rcu_assign_pointer((rcu_ptr), (ptr)); \ + tipc_aead_put(__tmp); \ +} while (0) + +#define tipc_crypto_key_detach(rcu_ptr, lock) \ + tipc_aead_rcu_replace((rcu_ptr), NULL, lock) + +/** + * tipc_aead_key_validate - Validate a AEAD user key + * @ukey: pointer to user key data + * @info: netlink info pointer + */ +int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info) +{ + int keylen; + + /* Check if algorithm exists */ + if (unlikely(!crypto_has_alg(ukey->alg_name, 0, 0))) { + GENL_SET_ERR_MSG(info, "unable to load the algorithm (module existed?)"); + return -ENODEV; + } + + /* Currently, we only support the "gcm(aes)" cipher algorithm */ + if (strcmp(ukey->alg_name, "gcm(aes)")) { + GENL_SET_ERR_MSG(info, "not supported yet the algorithm"); + return -ENOTSUPP; + } + + /* Check if key size is correct */ + keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE; + if (unlikely(keylen != TIPC_AES_GCM_KEY_SIZE_128 && + keylen != TIPC_AES_GCM_KEY_SIZE_192 && + keylen != TIPC_AES_GCM_KEY_SIZE_256)) { + GENL_SET_ERR_MSG(info, "incorrect key length (20, 28 or 36 octets?)"); + return -EKEYREJECTED; + } + + return 0; +} + +/** + * tipc_aead_key_generate - Generate new session key + * @skey: input/output key with new content + * + * Return: 0 in case of success, otherwise < 0 + */ +static int tipc_aead_key_generate(struct tipc_aead_key *skey) +{ + int rc = 0; + + /* Fill the key's content with a random value via RNG cipher */ + rc = crypto_get_default_rng(); + if (likely(!rc)) { + rc = crypto_rng_get_bytes(crypto_default_rng, skey->key, + skey->keylen); + crypto_put_default_rng(); + } + + return rc; +} + +static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead) +{ + struct tipc_aead *tmp; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (unlikely(!tmp || !refcount_inc_not_zero(&tmp->refcnt))) + tmp = NULL; + rcu_read_unlock(); + + return tmp; +} + +static inline void tipc_aead_put(struct tipc_aead *aead) +{ + if (aead && refcount_dec_and_test(&aead->refcnt)) + call_rcu(&aead->rcu, tipc_aead_free); +} + +/** + * tipc_aead_free - Release AEAD key incl. all the TFMs in the list + * @rp: rcu head pointer + */ +static void tipc_aead_free(struct rcu_head *rp) +{ + struct tipc_aead *aead = container_of(rp, struct tipc_aead, rcu); + struct tipc_tfm *tfm_entry, *head, *tmp; + + if (aead->cloned) { + tipc_aead_put(aead->cloned); + } else { + head = *get_cpu_ptr(aead->tfm_entry); + put_cpu_ptr(aead->tfm_entry); + list_for_each_entry_safe(tfm_entry, tmp, &head->list, list) { + crypto_free_aead(tfm_entry->tfm); + list_del(&tfm_entry->list); + kfree(tfm_entry); + } + /* Free the head */ + crypto_free_aead(head->tfm); + list_del(&head->list); + kfree(head); + } + free_percpu(aead->tfm_entry); + kfree_sensitive(aead->key); + kfree_sensitive(aead); +} + +static int tipc_aead_users(struct tipc_aead __rcu *aead) +{ + struct tipc_aead *tmp; + int users = 0; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (tmp) + users = atomic_read(&tmp->users); + rcu_read_unlock(); + + return users; +} + +static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim) +{ + struct tipc_aead *tmp; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (tmp) + atomic_add_unless(&tmp->users, 1, lim); + rcu_read_unlock(); +} + +static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim) +{ + struct tipc_aead *tmp; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (tmp) + atomic_add_unless(&rcu_dereference(aead)->users, -1, lim); + rcu_read_unlock(); +} + +static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val) +{ + struct tipc_aead *tmp; + int cur; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (tmp) { + do { + cur = atomic_read(&tmp->users); + if (cur == val) + break; + } while (atomic_cmpxchg(&tmp->users, cur, val) != cur); + } + rcu_read_unlock(); +} + +/** + * tipc_aead_tfm_next - Move TFM entry to the next one in list and return it + * @aead: the AEAD key pointer + */ +static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead) +{ + struct tipc_tfm **tfm_entry; + struct crypto_aead *tfm; + + tfm_entry = get_cpu_ptr(aead->tfm_entry); + *tfm_entry = list_next_entry(*tfm_entry, list); + tfm = (*tfm_entry)->tfm; + put_cpu_ptr(tfm_entry); + + return tfm; +} + +/** + * tipc_aead_init - Initiate TIPC AEAD + * @aead: returned new TIPC AEAD key handle pointer + * @ukey: pointer to user key data + * @mode: the key mode + * + * Allocate a (list of) new cipher transformation (TFM) with the specific user + * key data if valid. The number of the allocated TFMs can be set via the sysfs + * "net/tipc/max_tfms" first. + * Also, all the other AEAD data are also initialized. + * + * Return: 0 if the initiation is successful, otherwise: < 0 + */ +static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, + u8 mode) +{ + struct tipc_tfm *tfm_entry, *head; + struct crypto_aead *tfm; + struct tipc_aead *tmp; + int keylen, err, cpu; + int tfm_cnt = 0; + + if (unlikely(*aead)) + return -EEXIST; + + /* Allocate a new AEAD */ + tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); + if (unlikely(!tmp)) + return -ENOMEM; + + /* The key consists of two parts: [AES-KEY][SALT] */ + keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE; + + /* Allocate per-cpu TFM entry pointer */ + tmp->tfm_entry = alloc_percpu(struct tipc_tfm *); + if (!tmp->tfm_entry) { + kfree_sensitive(tmp); + return -ENOMEM; + } + + /* Make a list of TFMs with the user key data */ + do { + tfm = crypto_alloc_aead(ukey->alg_name, 0, 0); + if (IS_ERR(tfm)) { + err = PTR_ERR(tfm); + break; + } + + if (unlikely(!tfm_cnt && + crypto_aead_ivsize(tfm) != TIPC_AES_GCM_IV_SIZE)) { + crypto_free_aead(tfm); + err = -ENOTSUPP; + break; + } + + err = crypto_aead_setauthsize(tfm, TIPC_AES_GCM_TAG_SIZE); + err |= crypto_aead_setkey(tfm, ukey->key, keylen); + if (unlikely(err)) { + crypto_free_aead(tfm); + break; + } + + tfm_entry = kmalloc(sizeof(*tfm_entry), GFP_KERNEL); + if (unlikely(!tfm_entry)) { + crypto_free_aead(tfm); + err = -ENOMEM; + break; + } + INIT_LIST_HEAD(&tfm_entry->list); + tfm_entry->tfm = tfm; + + /* First entry? */ + if (!tfm_cnt) { + head = tfm_entry; + for_each_possible_cpu(cpu) { + *per_cpu_ptr(tmp->tfm_entry, cpu) = head; + } + } else { + list_add_tail(&tfm_entry->list, &head->list); + } + + } while (++tfm_cnt < sysctl_tipc_max_tfms); + + /* Not any TFM is allocated? */ + if (!tfm_cnt) { + free_percpu(tmp->tfm_entry); + kfree_sensitive(tmp); + return err; + } + + /* Form a hex string of some last bytes as the key's hint */ + bin2hex(tmp->hint, ukey->key + keylen - TIPC_AEAD_HINT_LEN, + TIPC_AEAD_HINT_LEN); + + /* Initialize the other data */ + tmp->mode = mode; + tmp->cloned = NULL; + tmp->authsize = TIPC_AES_GCM_TAG_SIZE; + tmp->key = kmemdup(ukey, tipc_aead_key_size(ukey), GFP_KERNEL); + if (!tmp->key) { + tipc_aead_free(&tmp->rcu); + return -ENOMEM; + } + memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE); + atomic_set(&tmp->users, 0); + atomic64_set(&tmp->seqno, 0); + refcount_set(&tmp->refcnt, 1); + + *aead = tmp; + return 0; +} + +/** + * tipc_aead_clone - Clone a TIPC AEAD key + * @dst: dest key for the cloning + * @src: source key to clone from + * + * Make a "copy" of the source AEAD key data to the dest, the TFMs list is + * common for the keys. + * A reference to the source is hold in the "cloned" pointer for the later + * freeing purposes. + * + * Note: this must be done in cluster-key mode only! + * Return: 0 in case of success, otherwise < 0 + */ +static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src) +{ + struct tipc_aead *aead; + int cpu; + + if (!src) + return -ENOKEY; + + if (src->mode != CLUSTER_KEY) + return -EINVAL; + + if (unlikely(*dst)) + return -EEXIST; + + aead = kzalloc(sizeof(*aead), GFP_ATOMIC); + if (unlikely(!aead)) + return -ENOMEM; + + aead->tfm_entry = alloc_percpu_gfp(struct tipc_tfm *, GFP_ATOMIC); + if (unlikely(!aead->tfm_entry)) { + kfree_sensitive(aead); + return -ENOMEM; + } + + for_each_possible_cpu(cpu) { + *per_cpu_ptr(aead->tfm_entry, cpu) = + *per_cpu_ptr(src->tfm_entry, cpu); + } + + memcpy(aead->hint, src->hint, sizeof(src->hint)); + aead->mode = src->mode; + aead->salt = src->salt; + aead->authsize = src->authsize; + atomic_set(&aead->users, 0); + atomic64_set(&aead->seqno, 0); + refcount_set(&aead->refcnt, 1); + + WARN_ON(!refcount_inc_not_zero(&src->refcnt)); + aead->cloned = src; + + *dst = aead; + return 0; +} + +/** + * tipc_aead_mem_alloc - Allocate memory for AEAD request operations + * @tfm: cipher handle to be registered with the request + * @crypto_ctx_size: size of crypto context for callback + * @iv: returned pointer to IV data + * @req: returned pointer to AEAD request data + * @sg: returned pointer to SG lists + * @nsg: number of SG lists to be allocated + * + * Allocate memory to store the crypto context data, AEAD request, IV and SG + * lists, the memory layout is as follows: + * crypto_ctx || iv || aead_req || sg[] + * + * Return: the pointer to the memory areas in case of success, otherwise NULL + */ +static void *tipc_aead_mem_alloc(struct crypto_aead *tfm, + unsigned int crypto_ctx_size, + u8 **iv, struct aead_request **req, + struct scatterlist **sg, int nsg) +{ + unsigned int iv_size, req_size; + unsigned int len; + u8 *mem; + + iv_size = crypto_aead_ivsize(tfm); + req_size = sizeof(**req) + crypto_aead_reqsize(tfm); + + len = crypto_ctx_size; + len += iv_size; + len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1); + len = ALIGN(len, crypto_tfm_ctx_alignment()); + len += req_size; + len = ALIGN(len, __alignof__(struct scatterlist)); + len += nsg * sizeof(**sg); + + mem = kmalloc(len, GFP_ATOMIC); + if (!mem) + return NULL; + + *iv = (u8 *)PTR_ALIGN(mem + crypto_ctx_size, + crypto_aead_alignmask(tfm) + 1); + *req = (struct aead_request *)PTR_ALIGN(*iv + iv_size, + crypto_tfm_ctx_alignment()); + *sg = (struct scatterlist *)PTR_ALIGN((u8 *)*req + req_size, + __alignof__(struct scatterlist)); + + return (void *)mem; +} + +/** + * tipc_aead_encrypt - Encrypt a message + * @aead: TIPC AEAD key for the message encryption + * @skb: the input/output skb + * @b: TIPC bearer where the message will be delivered after the encryption + * @dst: the destination media address + * @__dnode: TIPC dest node if "known" + * + * Return: + * * 0 : if the encryption has completed + * * -EINPROGRESS/-EBUSY : if a callback will be performed + * * < 0 : the encryption has failed + */ +static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *dst, + struct tipc_node *__dnode) +{ + struct crypto_aead *tfm = tipc_aead_tfm_next(aead); + struct tipc_crypto_tx_ctx *tx_ctx; + struct aead_request *req; + struct sk_buff *trailer; + struct scatterlist *sg; + struct tipc_ehdr *ehdr; + int ehsz, len, tailen, nsg, rc; + void *ctx; + u32 salt; + u8 *iv; + + /* Make sure message len at least 4-byte aligned */ + len = ALIGN(skb->len, 4); + tailen = len - skb->len + aead->authsize; + + /* Expand skb tail for authentication tag: + * As for simplicity, we'd have made sure skb having enough tailroom + * for authentication tag @skb allocation. Even when skb is nonlinear + * but there is no frag_list, it should be still fine! + * Otherwise, we must cow it to be a writable buffer with the tailroom. + */ + SKB_LINEAR_ASSERT(skb); + if (tailen > skb_tailroom(skb)) { + pr_debug("TX(): skb tailroom is not enough: %d, requires: %d\n", + skb_tailroom(skb), tailen); + } + + nsg = skb_cow_data(skb, tailen, &trailer); + if (unlikely(nsg < 0)) { + pr_err("TX: skb_cow_data() returned %d\n", nsg); + return nsg; + } + + pskb_put(skb, trailer, tailen); + + /* Allocate memory for the AEAD operation */ + ctx = tipc_aead_mem_alloc(tfm, sizeof(*tx_ctx), &iv, &req, &sg, nsg); + if (unlikely(!ctx)) + return -ENOMEM; + TIPC_SKB_CB(skb)->crypto_ctx = ctx; + + /* Map skb to the sg lists */ + sg_init_table(sg, nsg); + rc = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(rc < 0)) { + pr_err("TX: skb_to_sgvec() returned %d, nsg %d!\n", rc, nsg); + goto exit; + } + + /* Prepare IV: [SALT (4 octets)][SEQNO (8 octets)] + * In case we're in cluster-key mode, SALT is varied by xor-ing with + * the source address (or w0 of id), otherwise with the dest address + * if dest is known. + */ + ehdr = (struct tipc_ehdr *)skb->data; + salt = aead->salt; + if (aead->mode == CLUSTER_KEY) + salt ^= __be32_to_cpu(ehdr->addr); + else if (__dnode) + salt ^= tipc_node_get_addr(__dnode); + memcpy(iv, &salt, 4); + memcpy(iv + 4, (u8 *)&ehdr->seqno, 8); + + /* Prepare request */ + ehsz = tipc_ehdr_size(ehdr); + aead_request_set_tfm(req, tfm); + aead_request_set_ad(req, ehsz); + aead_request_set_crypt(req, sg, sg, len - ehsz, iv); + + /* Set callback function & data */ + aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tipc_aead_encrypt_done, skb); + tx_ctx = (struct tipc_crypto_tx_ctx *)ctx; + tx_ctx->aead = aead; + tx_ctx->bearer = b; + memcpy(&tx_ctx->dst, dst, sizeof(*dst)); + + /* Hold bearer */ + if (unlikely(!tipc_bearer_hold(b))) { + rc = -ENODEV; + goto exit; + } + + /* Get net to avoid freed tipc_crypto when delete namespace */ + if (!maybe_get_net(aead->crypto->net)) { + tipc_bearer_put(b); + rc = -ENODEV; + goto exit; + } + + /* Now, do encrypt */ + rc = crypto_aead_encrypt(req); + if (rc == -EINPROGRESS || rc == -EBUSY) + return rc; + + tipc_bearer_put(b); + put_net(aead->crypto->net); + +exit: + kfree(ctx); + TIPC_SKB_CB(skb)->crypto_ctx = NULL; + return rc; +} + +static void tipc_aead_encrypt_done(void *data, int err) +{ + struct sk_buff *skb = data; + struct tipc_crypto_tx_ctx *tx_ctx = TIPC_SKB_CB(skb)->crypto_ctx; + struct tipc_bearer *b = tx_ctx->bearer; + struct tipc_aead *aead = tx_ctx->aead; + struct tipc_crypto *tx = aead->crypto; + struct net *net = tx->net; + + switch (err) { + case 0: + this_cpu_inc(tx->stats->stat[STAT_ASYNC_OK]); + rcu_read_lock(); + if (likely(test_bit(0, &b->up))) + b->media->send_msg(net, skb, b, &tx_ctx->dst); + else + kfree_skb(skb); + rcu_read_unlock(); + break; + case -EINPROGRESS: + return; + default: + this_cpu_inc(tx->stats->stat[STAT_ASYNC_NOK]); + kfree_skb(skb); + break; + } + + kfree(tx_ctx); + tipc_bearer_put(b); + tipc_aead_put(aead); + put_net(net); +} + +/** + * tipc_aead_decrypt - Decrypt an encrypted message + * @net: struct net + * @aead: TIPC AEAD for the message decryption + * @skb: the input/output skb + * @b: TIPC bearer where the message has been received + * + * Return: + * * 0 : if the decryption has completed + * * -EINPROGRESS/-EBUSY : if a callback will be performed + * * < 0 : the decryption has failed + */ +static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, + struct sk_buff *skb, struct tipc_bearer *b) +{ + struct tipc_crypto_rx_ctx *rx_ctx; + struct aead_request *req; + struct crypto_aead *tfm; + struct sk_buff *unused; + struct scatterlist *sg; + struct tipc_ehdr *ehdr; + int ehsz, nsg, rc; + void *ctx; + u32 salt; + u8 *iv; + + if (unlikely(!aead)) + return -ENOKEY; + + nsg = skb_cow_data(skb, 0, &unused); + if (unlikely(nsg < 0)) { + pr_err("RX: skb_cow_data() returned %d\n", nsg); + return nsg; + } + + /* Allocate memory for the AEAD operation */ + tfm = tipc_aead_tfm_next(aead); + ctx = tipc_aead_mem_alloc(tfm, sizeof(*rx_ctx), &iv, &req, &sg, nsg); + if (unlikely(!ctx)) + return -ENOMEM; + TIPC_SKB_CB(skb)->crypto_ctx = ctx; + + /* Map skb to the sg lists */ + sg_init_table(sg, nsg); + rc = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(rc < 0)) { + pr_err("RX: skb_to_sgvec() returned %d, nsg %d\n", rc, nsg); + goto exit; + } + + /* Reconstruct IV: */ + ehdr = (struct tipc_ehdr *)skb->data; + salt = aead->salt; + if (aead->mode == CLUSTER_KEY) + salt ^= __be32_to_cpu(ehdr->addr); + else if (ehdr->destined) + salt ^= tipc_own_addr(net); + memcpy(iv, &salt, 4); + memcpy(iv + 4, (u8 *)&ehdr->seqno, 8); + + /* Prepare request */ + ehsz = tipc_ehdr_size(ehdr); + aead_request_set_tfm(req, tfm); + aead_request_set_ad(req, ehsz); + aead_request_set_crypt(req, sg, sg, skb->len - ehsz, iv); + + /* Set callback function & data */ + aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tipc_aead_decrypt_done, skb); + rx_ctx = (struct tipc_crypto_rx_ctx *)ctx; + rx_ctx->aead = aead; + rx_ctx->bearer = b; + + /* Hold bearer */ + if (unlikely(!tipc_bearer_hold(b))) { + rc = -ENODEV; + goto exit; + } + + /* Now, do decrypt */ + rc = crypto_aead_decrypt(req); + if (rc == -EINPROGRESS || rc == -EBUSY) + return rc; + + tipc_bearer_put(b); + +exit: + kfree(ctx); + TIPC_SKB_CB(skb)->crypto_ctx = NULL; + return rc; +} + +static void tipc_aead_decrypt_done(void *data, int err) +{ + struct sk_buff *skb = data; + struct tipc_crypto_rx_ctx *rx_ctx = TIPC_SKB_CB(skb)->crypto_ctx; + struct tipc_bearer *b = rx_ctx->bearer; + struct tipc_aead *aead = rx_ctx->aead; + struct tipc_crypto_stats __percpu *stats = aead->crypto->stats; + struct net *net = aead->crypto->net; + + switch (err) { + case 0: + this_cpu_inc(stats->stat[STAT_ASYNC_OK]); + break; + case -EINPROGRESS: + return; + default: + this_cpu_inc(stats->stat[STAT_ASYNC_NOK]); + break; + } + + kfree(rx_ctx); + tipc_crypto_rcv_complete(net, aead, b, &skb, err); + if (likely(skb)) { + if (likely(test_bit(0, &b->up))) + tipc_rcv(net, skb, b); + else + kfree_skb(skb); + } + + tipc_bearer_put(b); +} + +static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr) +{ + return (ehdr->user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE; +} + +/** + * tipc_ehdr_validate - Validate an encryption message + * @skb: the message buffer + * + * Return: "true" if this is a valid encryption message, otherwise "false" + */ +bool tipc_ehdr_validate(struct sk_buff *skb) +{ + struct tipc_ehdr *ehdr; + int ehsz; + + if (unlikely(!pskb_may_pull(skb, EHDR_MIN_SIZE))) + return false; + + ehdr = (struct tipc_ehdr *)skb->data; + if (unlikely(ehdr->version != TIPC_EVERSION)) + return false; + ehsz = tipc_ehdr_size(ehdr); + if (unlikely(!pskb_may_pull(skb, ehsz))) + return false; + if (unlikely(skb->len <= ehsz + TIPC_AES_GCM_TAG_SIZE)) + return false; + + return true; +} + +/** + * tipc_ehdr_build - Build TIPC encryption message header + * @net: struct net + * @aead: TX AEAD key to be used for the message encryption + * @tx_key: key id used for the message encryption + * @skb: input/output message skb + * @__rx: RX crypto handle if dest is "known" + * + * Return: the header size if the building is successful, otherwise < 0 + */ +static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead, + u8 tx_key, struct sk_buff *skb, + struct tipc_crypto *__rx) +{ + struct tipc_msg *hdr = buf_msg(skb); + struct tipc_ehdr *ehdr; + u32 user = msg_user(hdr); + u64 seqno; + int ehsz; + + /* Make room for encryption header */ + ehsz = (user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE; + WARN_ON(skb_headroom(skb) < ehsz); + ehdr = (struct tipc_ehdr *)skb_push(skb, ehsz); + + /* Obtain a seqno first: + * Use the key seqno (= cluster wise) if dest is unknown or we're in + * cluster key mode, otherwise it's better for a per-peer seqno! + */ + if (!__rx || aead->mode == CLUSTER_KEY) + seqno = atomic64_inc_return(&aead->seqno); + else + seqno = atomic64_inc_return(&__rx->sndnxt); + + /* Revoke the key if seqno is wrapped around */ + if (unlikely(!seqno)) + return tipc_crypto_key_revoke(net, tx_key); + + /* Word 1-2 */ + ehdr->seqno = cpu_to_be64(seqno); + + /* Words 0, 3- */ + ehdr->version = TIPC_EVERSION; + ehdr->user = 0; + ehdr->keepalive = 0; + ehdr->tx_key = tx_key; + ehdr->destined = (__rx) ? 1 : 0; + ehdr->rx_key_active = (__rx) ? __rx->key.active : 0; + ehdr->rx_nokey = (__rx) ? __rx->nokey : 0; + ehdr->master_key = aead->crypto->key_master; + ehdr->reserved_1 = 0; + ehdr->reserved_2 = 0; + + switch (user) { + case LINK_CONFIG: + ehdr->user = LINK_CONFIG; + memcpy(ehdr->id, tipc_own_id(net), NODE_ID_LEN); + break; + default: + if (user == LINK_PROTOCOL && msg_type(hdr) == STATE_MSG) { + ehdr->user = LINK_PROTOCOL; + ehdr->keepalive = msg_is_keepalive(hdr); + } + ehdr->addr = hdr->hdr[3]; + break; + } + + return ehsz; +} + +static inline void tipc_crypto_key_set_state(struct tipc_crypto *c, + u8 new_passive, + u8 new_active, + u8 new_pending) +{ + struct tipc_key old = c->key; + char buf[32]; + + c->key.keys = ((new_passive & KEY_MASK) << (KEY_BITS * 2)) | + ((new_active & KEY_MASK) << (KEY_BITS)) | + ((new_pending & KEY_MASK)); + + pr_debug("%s: key changing %s ::%pS\n", c->name, + tipc_key_change_dump(old, c->key, buf), + __builtin_return_address(0)); +} + +/** + * tipc_crypto_key_init - Initiate a new user / AEAD key + * @c: TIPC crypto to which new key is attached + * @ukey: the user key + * @mode: the key mode (CLUSTER_KEY or PER_NODE_KEY) + * @master_key: specify this is a cluster master key + * + * A new TIPC AEAD key will be allocated and initiated with the specified user + * key, then attached to the TIPC crypto. + * + * Return: new key id in case of success, otherwise: < 0 + */ +int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey, + u8 mode, bool master_key) +{ + struct tipc_aead *aead = NULL; + int rc = 0; + + /* Initiate with the new user key */ + rc = tipc_aead_init(&aead, ukey, mode); + + /* Attach it to the crypto */ + if (likely(!rc)) { + rc = tipc_crypto_key_attach(c, aead, 0, master_key); + if (rc < 0) + tipc_aead_free(&aead->rcu); + } + + return rc; +} + +/** + * tipc_crypto_key_attach - Attach a new AEAD key to TIPC crypto + * @c: TIPC crypto to which the new AEAD key is attached + * @aead: the new AEAD key pointer + * @pos: desired slot in the crypto key array, = 0 if any! + * @master_key: specify this is a cluster master key + * + * Return: new key id in case of success, otherwise: -EBUSY + */ +static int tipc_crypto_key_attach(struct tipc_crypto *c, + struct tipc_aead *aead, u8 pos, + bool master_key) +{ + struct tipc_key key; + int rc = -EBUSY; + u8 new_key; + + spin_lock_bh(&c->lock); + key = c->key; + if (master_key) { + new_key = KEY_MASTER; + goto attach; + } + if (key.active && key.passive) + goto exit; + if (key.pending) { + if (tipc_aead_users(c->aead[key.pending]) > 0) + goto exit; + /* if (pos): ok with replacing, will be aligned when needed */ + /* Replace it */ + new_key = key.pending; + } else { + if (pos) { + if (key.active && pos != key_next(key.active)) { + key.passive = pos; + new_key = pos; + goto attach; + } else if (!key.active && !key.passive) { + key.pending = pos; + new_key = pos; + goto attach; + } + } + key.pending = key_next(key.active ?: key.passive); + new_key = key.pending; + } + +attach: + aead->crypto = c; + aead->gen = (is_tx(c)) ? ++c->key_gen : c->key_gen; + tipc_aead_rcu_replace(c->aead[new_key], aead, &c->lock); + if (likely(c->key.keys != key.keys)) + tipc_crypto_key_set_state(c, key.passive, key.active, + key.pending); + c->working = 1; + c->nokey = 0; + c->key_master |= master_key; + rc = new_key; + +exit: + spin_unlock_bh(&c->lock); + return rc; +} + +void tipc_crypto_key_flush(struct tipc_crypto *c) +{ + struct tipc_crypto *tx, *rx; + int k; + + spin_lock_bh(&c->lock); + if (is_rx(c)) { + /* Try to cancel pending work */ + rx = c; + tx = tipc_net(rx->net)->crypto_tx; + if (cancel_delayed_work(&rx->work)) { + kfree(rx->skey); + rx->skey = NULL; + atomic_xchg(&rx->key_distr, 0); + tipc_node_put(rx->node); + } + /* RX stopping => decrease TX key users if any */ + k = atomic_xchg(&rx->peer_rx_active, 0); + if (k) { + tipc_aead_users_dec(tx->aead[k], 0); + /* Mark the point TX key users changed */ + tx->timer1 = jiffies; + } + } + + c->flags = 0; + tipc_crypto_key_set_state(c, 0, 0, 0); + for (k = KEY_MIN; k <= KEY_MAX; k++) + tipc_crypto_key_detach(c->aead[k], &c->lock); + atomic64_set(&c->sndnxt, 0); + spin_unlock_bh(&c->lock); +} + +/** + * tipc_crypto_key_try_align - Align RX keys if possible + * @rx: RX crypto handle + * @new_pending: new pending slot if aligned (= TX key from peer) + * + * Peer has used an unknown key slot, this only happens when peer has left and + * rejoned, or we are newcomer. + * That means, there must be no active key but a pending key at unaligned slot. + * If so, we try to move the pending key to the new slot. + * Note: A potential passive key can exist, it will be shifted correspondingly! + * + * Return: "true" if key is successfully aligned, otherwise "false" + */ +static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending) +{ + struct tipc_aead *tmp1, *tmp2 = NULL; + struct tipc_key key; + bool aligned = false; + u8 new_passive = 0; + int x; + + spin_lock(&rx->lock); + key = rx->key; + if (key.pending == new_pending) { + aligned = true; + goto exit; + } + if (key.active) + goto exit; + if (!key.pending) + goto exit; + if (tipc_aead_users(rx->aead[key.pending]) > 0) + goto exit; + + /* Try to "isolate" this pending key first */ + tmp1 = tipc_aead_rcu_ptr(rx->aead[key.pending], &rx->lock); + if (!refcount_dec_if_one(&tmp1->refcnt)) + goto exit; + rcu_assign_pointer(rx->aead[key.pending], NULL); + + /* Move passive key if any */ + if (key.passive) { + tmp2 = rcu_replace_pointer(rx->aead[key.passive], tmp2, lockdep_is_held(&rx->lock)); + x = (key.passive - key.pending + new_pending) % KEY_MAX; + new_passive = (x <= 0) ? x + KEY_MAX : x; + } + + /* Re-allocate the key(s) */ + tipc_crypto_key_set_state(rx, new_passive, 0, new_pending); + rcu_assign_pointer(rx->aead[new_pending], tmp1); + if (new_passive) + rcu_assign_pointer(rx->aead[new_passive], tmp2); + refcount_set(&tmp1->refcnt, 1); + aligned = true; + pr_info_ratelimited("%s: key[%d] -> key[%d]\n", rx->name, key.pending, + new_pending); + +exit: + spin_unlock(&rx->lock); + return aligned; +} + +/** + * tipc_crypto_key_pick_tx - Pick one TX key for message decryption + * @tx: TX crypto handle + * @rx: RX crypto handle (can be NULL) + * @skb: the message skb which will be decrypted later + * @tx_key: peer TX key id + * + * This function looks up the existing TX keys and pick one which is suitable + * for the message decryption, that must be a cluster key and not used before + * on the same message (i.e. recursive). + * + * Return: the TX AEAD key handle in case of success, otherwise NULL + */ +static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx, + struct tipc_crypto *rx, + struct sk_buff *skb, + u8 tx_key) +{ + struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(skb); + struct tipc_aead *aead = NULL; + struct tipc_key key = tx->key; + u8 k, i = 0; + + /* Initialize data if not yet */ + if (!skb_cb->tx_clone_deferred) { + skb_cb->tx_clone_deferred = 1; + memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx)); + } + + skb_cb->tx_clone_ctx.rx = rx; + if (++skb_cb->tx_clone_ctx.recurs > 2) + return NULL; + + /* Pick one TX key */ + spin_lock(&tx->lock); + if (tx_key == KEY_MASTER) { + aead = tipc_aead_rcu_ptr(tx->aead[KEY_MASTER], &tx->lock); + goto done; + } + do { + k = (i == 0) ? key.pending : + ((i == 1) ? key.active : key.passive); + if (!k) + continue; + aead = tipc_aead_rcu_ptr(tx->aead[k], &tx->lock); + if (!aead) + continue; + if (aead->mode != CLUSTER_KEY || + aead == skb_cb->tx_clone_ctx.last) { + aead = NULL; + continue; + } + /* Ok, found one cluster key */ + skb_cb->tx_clone_ctx.last = aead; + WARN_ON(skb->next); + skb->next = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb->next)) + pr_warn("Failed to clone skb for next round if any\n"); + break; + } while (++i < 3); + +done: + if (likely(aead)) + WARN_ON(!refcount_inc_not_zero(&aead->refcnt)); + spin_unlock(&tx->lock); + + return aead; +} + +/** + * tipc_crypto_key_synch: Synch own key data according to peer key status + * @rx: RX crypto handle + * @skb: TIPCv2 message buffer (incl. the ehdr from peer) + * + * This function updates the peer node related data as the peer RX active key + * has changed, so the number of TX keys' users on this node are increased and + * decreased correspondingly. + * + * It also considers if peer has no key, then we need to make own master key + * (if any) taking over i.e. starting grace period and also trigger key + * distributing process. + * + * The "per-peer" sndnxt is also reset when the peer key has switched. + */ +static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb) +{ + struct tipc_ehdr *ehdr = (struct tipc_ehdr *)skb_network_header(skb); + struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx; + struct tipc_msg *hdr = buf_msg(skb); + u32 self = tipc_own_addr(rx->net); + u8 cur, new; + unsigned long delay; + + /* Update RX 'key_master' flag according to peer, also mark "legacy" if + * a peer has no master key. + */ + rx->key_master = ehdr->master_key; + if (!rx->key_master) + tx->legacy_user = 1; + + /* For later cases, apply only if message is destined to this node */ + if (!ehdr->destined || msg_short(hdr) || msg_destnode(hdr) != self) + return; + + /* Case 1: Peer has no keys, let's make master key take over */ + if (ehdr->rx_nokey) { + /* Set or extend grace period */ + tx->timer2 = jiffies; + /* Schedule key distributing for the peer if not yet */ + if (tx->key.keys && + !atomic_cmpxchg(&rx->key_distr, 0, KEY_DISTR_SCHED)) { + get_random_bytes(&delay, 2); + delay %= 5; + delay = msecs_to_jiffies(500 * ++delay); + if (queue_delayed_work(tx->wq, &rx->work, delay)) + tipc_node_get(rx->node); + } + } else { + /* Cancel a pending key distributing if any */ + atomic_xchg(&rx->key_distr, 0); + } + + /* Case 2: Peer RX active key has changed, let's update own TX users */ + cur = atomic_read(&rx->peer_rx_active); + new = ehdr->rx_key_active; + if (tx->key.keys && + cur != new && + atomic_cmpxchg(&rx->peer_rx_active, cur, new) == cur) { + if (new) + tipc_aead_users_inc(tx->aead[new], INT_MAX); + if (cur) + tipc_aead_users_dec(tx->aead[cur], 0); + + atomic64_set(&rx->sndnxt, 0); + /* Mark the point TX key users changed */ + tx->timer1 = jiffies; + + pr_debug("%s: key users changed %d-- %d++, peer %s\n", + tx->name, cur, new, rx->name); + } +} + +static int tipc_crypto_key_revoke(struct net *net, u8 tx_key) +{ + struct tipc_crypto *tx = tipc_net(net)->crypto_tx; + struct tipc_key key; + + spin_lock_bh(&tx->lock); + key = tx->key; + WARN_ON(!key.active || tx_key != key.active); + + /* Free the active key */ + tipc_crypto_key_set_state(tx, key.passive, 0, key.pending); + tipc_crypto_key_detach(tx->aead[key.active], &tx->lock); + spin_unlock_bh(&tx->lock); + + pr_warn("%s: key is revoked\n", tx->name); + return -EKEYREVOKED; +} + +int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, + struct tipc_node *node) +{ + struct tipc_crypto *c; + + if (*crypto) + return -EEXIST; + + /* Allocate crypto */ + c = kzalloc(sizeof(*c), GFP_ATOMIC); + if (!c) + return -ENOMEM; + + /* Allocate workqueue on TX */ + if (!node) { + c->wq = alloc_ordered_workqueue("tipc_crypto", 0); + if (!c->wq) { + kfree(c); + return -ENOMEM; + } + } + + /* Allocate statistic structure */ + c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC); + if (!c->stats) { + if (c->wq) + destroy_workqueue(c->wq); + kfree_sensitive(c); + return -ENOMEM; + } + + c->flags = 0; + c->net = net; + c->node = node; + get_random_bytes(&c->key_gen, 2); + tipc_crypto_key_set_state(c, 0, 0, 0); + atomic_set(&c->key_distr, 0); + atomic_set(&c->peer_rx_active, 0); + atomic64_set(&c->sndnxt, 0); + c->timer1 = jiffies; + c->timer2 = jiffies; + c->rekeying_intv = TIPC_REKEYING_INTV_DEF; + spin_lock_init(&c->lock); + scnprintf(c->name, 48, "%s(%s)", (is_rx(c)) ? "RX" : "TX", + (is_rx(c)) ? tipc_node_get_id_str(c->node) : + tipc_own_id_string(c->net)); + + if (is_rx(c)) + INIT_DELAYED_WORK(&c->work, tipc_crypto_work_rx); + else + INIT_DELAYED_WORK(&c->work, tipc_crypto_work_tx); + + *crypto = c; + return 0; +} + +void tipc_crypto_stop(struct tipc_crypto **crypto) +{ + struct tipc_crypto *c = *crypto; + u8 k; + + if (!c) + return; + + /* Flush any queued works & destroy wq */ + if (is_tx(c)) { + c->rekeying_intv = 0; + cancel_delayed_work_sync(&c->work); + destroy_workqueue(c->wq); + } + + /* Release AEAD keys */ + rcu_read_lock(); + for (k = KEY_MIN; k <= KEY_MAX; k++) + tipc_aead_put(rcu_dereference(c->aead[k])); + rcu_read_unlock(); + pr_debug("%s: has been stopped\n", c->name); + + /* Free this crypto statistics */ + free_percpu(c->stats); + + *crypto = NULL; + kfree_sensitive(c); +} + +void tipc_crypto_timeout(struct tipc_crypto *rx) +{ + struct tipc_net *tn = tipc_net(rx->net); + struct tipc_crypto *tx = tn->crypto_tx; + struct tipc_key key; + int cmd; + + /* TX pending: taking all users & stable -> active */ + spin_lock(&tx->lock); + key = tx->key; + if (key.active && tipc_aead_users(tx->aead[key.active]) > 0) + goto s1; + if (!key.pending || tipc_aead_users(tx->aead[key.pending]) <= 0) + goto s1; + if (time_before(jiffies, tx->timer1 + TIPC_TX_LASTING_TIME)) + goto s1; + + tipc_crypto_key_set_state(tx, key.passive, key.pending, 0); + if (key.active) + tipc_crypto_key_detach(tx->aead[key.active], &tx->lock); + this_cpu_inc(tx->stats->stat[STAT_SWITCHES]); + pr_info("%s: key[%d] is activated\n", tx->name, key.pending); + +s1: + spin_unlock(&tx->lock); + + /* RX pending: having user -> active */ + spin_lock(&rx->lock); + key = rx->key; + if (!key.pending || tipc_aead_users(rx->aead[key.pending]) <= 0) + goto s2; + + if (key.active) + key.passive = key.active; + key.active = key.pending; + rx->timer2 = jiffies; + tipc_crypto_key_set_state(rx, key.passive, key.active, 0); + this_cpu_inc(rx->stats->stat[STAT_SWITCHES]); + pr_info("%s: key[%d] is activated\n", rx->name, key.pending); + goto s5; + +s2: + /* RX pending: not working -> remove */ + if (!key.pending || tipc_aead_users(rx->aead[key.pending]) > -10) + goto s3; + + tipc_crypto_key_set_state(rx, key.passive, key.active, 0); + tipc_crypto_key_detach(rx->aead[key.pending], &rx->lock); + pr_debug("%s: key[%d] is removed\n", rx->name, key.pending); + goto s5; + +s3: + /* RX active: timed out or no user -> pending */ + if (!key.active) + goto s4; + if (time_before(jiffies, rx->timer1 + TIPC_RX_ACTIVE_LIM) && + tipc_aead_users(rx->aead[key.active]) > 0) + goto s4; + + if (key.pending) + key.passive = key.active; + else + key.pending = key.active; + rx->timer2 = jiffies; + tipc_crypto_key_set_state(rx, key.passive, 0, key.pending); + tipc_aead_users_set(rx->aead[key.pending], 0); + pr_debug("%s: key[%d] is deactivated\n", rx->name, key.active); + goto s5; + +s4: + /* RX passive: outdated or not working -> free */ + if (!key.passive) + goto s5; + if (time_before(jiffies, rx->timer2 + TIPC_RX_PASSIVE_LIM) && + tipc_aead_users(rx->aead[key.passive]) > -10) + goto s5; + + tipc_crypto_key_set_state(rx, 0, key.active, key.pending); + tipc_crypto_key_detach(rx->aead[key.passive], &rx->lock); + pr_debug("%s: key[%d] is freed\n", rx->name, key.passive); + +s5: + spin_unlock(&rx->lock); + + /* Relax it here, the flag will be set again if it really is, but only + * when we are not in grace period for safety! + */ + if (time_after(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD)) + tx->legacy_user = 0; + + /* Limit max_tfms & do debug commands if needed */ + if (likely(sysctl_tipc_max_tfms <= TIPC_MAX_TFMS_LIM)) + return; + + cmd = sysctl_tipc_max_tfms; + sysctl_tipc_max_tfms = TIPC_MAX_TFMS_DEF; + tipc_crypto_do_cmd(rx->net, cmd); +} + +static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb, + struct tipc_bearer *b, + struct tipc_media_addr *dst, + struct tipc_node *__dnode, u8 type) +{ + struct sk_buff *skb; + + skb = skb_clone(_skb, GFP_ATOMIC); + if (skb) { + TIPC_SKB_CB(skb)->xmit_type = type; + tipc_crypto_xmit(net, &skb, b, dst, __dnode); + if (skb) + b->media->send_msg(net, skb, b, dst); + } +} + +/** + * tipc_crypto_xmit - Build & encrypt TIPC message for xmit + * @net: struct net + * @skb: input/output message skb pointer + * @b: bearer used for xmit later + * @dst: destination media address + * @__dnode: destination node for reference if any + * + * First, build an encryption message header on the top of the message, then + * encrypt the original TIPC message by using the pending, master or active + * key with this preference order. + * If the encryption is successful, the encrypted skb is returned directly or + * via the callback. + * Otherwise, the skb is freed! + * + * Return: + * * 0 : the encryption has succeeded (or no encryption) + * * -EINPROGRESS/-EBUSY : the encryption is ongoing, a callback will be made + * * -ENOKEK : the encryption has failed due to no key + * * -EKEYREVOKED : the encryption has failed due to key revoked + * * -ENOMEM : the encryption has failed due to no memory + * * < 0 : the encryption has failed due to other reasons + */ +int tipc_crypto_xmit(struct net *net, struct sk_buff **skb, + struct tipc_bearer *b, struct tipc_media_addr *dst, + struct tipc_node *__dnode) +{ + struct tipc_crypto *__rx = tipc_node_crypto_rx(__dnode); + struct tipc_crypto *tx = tipc_net(net)->crypto_tx; + struct tipc_crypto_stats __percpu *stats = tx->stats; + struct tipc_msg *hdr = buf_msg(*skb); + struct tipc_key key = tx->key; + struct tipc_aead *aead = NULL; + u32 user = msg_user(hdr); + u32 type = msg_type(hdr); + int rc = -ENOKEY; + u8 tx_key = 0; + + /* No encryption? */ + if (!tx->working) + return 0; + + /* Pending key if peer has active on it or probing time */ + if (unlikely(key.pending)) { + tx_key = key.pending; + if (!tx->key_master && !key.active) + goto encrypt; + if (__rx && atomic_read(&__rx->peer_rx_active) == tx_key) + goto encrypt; + if (TIPC_SKB_CB(*skb)->xmit_type == SKB_PROBING) { + pr_debug("%s: probing for key[%d]\n", tx->name, + key.pending); + goto encrypt; + } + if (user == LINK_CONFIG || user == LINK_PROTOCOL) + tipc_crypto_clone_msg(net, *skb, b, dst, __dnode, + SKB_PROBING); + } + + /* Master key if this is a *vital* message or in grace period */ + if (tx->key_master) { + tx_key = KEY_MASTER; + if (!key.active) + goto encrypt; + if (TIPC_SKB_CB(*skb)->xmit_type == SKB_GRACING) { + pr_debug("%s: gracing for msg (%d %d)\n", tx->name, + user, type); + goto encrypt; + } + if (user == LINK_CONFIG || + (user == LINK_PROTOCOL && type == RESET_MSG) || + (user == MSG_CRYPTO && type == KEY_DISTR_MSG) || + time_before(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD)) { + if (__rx && __rx->key_master && + !atomic_read(&__rx->peer_rx_active)) + goto encrypt; + if (!__rx) { + if (likely(!tx->legacy_user)) + goto encrypt; + tipc_crypto_clone_msg(net, *skb, b, dst, + __dnode, SKB_GRACING); + } + } + } + + /* Else, use the active key if any */ + if (likely(key.active)) { + tx_key = key.active; + goto encrypt; + } + + goto exit; + +encrypt: + aead = tipc_aead_get(tx->aead[tx_key]); + if (unlikely(!aead)) + goto exit; + rc = tipc_ehdr_build(net, aead, tx_key, *skb, __rx); + if (likely(rc > 0)) + rc = tipc_aead_encrypt(aead, *skb, b, dst, __dnode); + +exit: + switch (rc) { + case 0: + this_cpu_inc(stats->stat[STAT_OK]); + break; + case -EINPROGRESS: + case -EBUSY: + this_cpu_inc(stats->stat[STAT_ASYNC]); + *skb = NULL; + return rc; + default: + this_cpu_inc(stats->stat[STAT_NOK]); + if (rc == -ENOKEY) + this_cpu_inc(stats->stat[STAT_NOKEYS]); + else if (rc == -EKEYREVOKED) + this_cpu_inc(stats->stat[STAT_BADKEYS]); + kfree_skb(*skb); + *skb = NULL; + break; + } + + tipc_aead_put(aead); + return rc; +} + +/** + * tipc_crypto_rcv - Decrypt an encrypted TIPC message from peer + * @net: struct net + * @rx: RX crypto handle + * @skb: input/output message skb pointer + * @b: bearer where the message has been received + * + * If the decryption is successful, the decrypted skb is returned directly or + * as the callback, the encryption header and auth tag will be trimmed out + * before forwarding to tipc_rcv() via the tipc_crypto_rcv_complete(). + * Otherwise, the skb will be freed! + * Note: RX key(s) can be re-aligned, or in case of no key suitable, TX + * cluster key(s) can be taken for decryption (- recursive). + * + * Return: + * * 0 : the decryption has successfully completed + * * -EINPROGRESS/-EBUSY : the decryption is ongoing, a callback will be made + * * -ENOKEY : the decryption has failed due to no key + * * -EBADMSG : the decryption has failed due to bad message + * * -ENOMEM : the decryption has failed due to no memory + * * < 0 : the decryption has failed due to other reasons + */ +int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx, + struct sk_buff **skb, struct tipc_bearer *b) +{ + struct tipc_crypto *tx = tipc_net(net)->crypto_tx; + struct tipc_crypto_stats __percpu *stats; + struct tipc_aead *aead = NULL; + struct tipc_key key; + int rc = -ENOKEY; + u8 tx_key, n; + + tx_key = ((struct tipc_ehdr *)(*skb)->data)->tx_key; + + /* New peer? + * Let's try with TX key (i.e. cluster mode) & verify the skb first! + */ + if (unlikely(!rx || tx_key == KEY_MASTER)) + goto pick_tx; + + /* Pick RX key according to TX key if any */ + key = rx->key; + if (tx_key == key.active || tx_key == key.pending || + tx_key == key.passive) + goto decrypt; + + /* Unknown key, let's try to align RX key(s) */ + if (tipc_crypto_key_try_align(rx, tx_key)) + goto decrypt; + +pick_tx: + /* No key suitable? Try to pick one from TX... */ + aead = tipc_crypto_key_pick_tx(tx, rx, *skb, tx_key); + if (aead) + goto decrypt; + goto exit; + +decrypt: + rcu_read_lock(); + if (!aead) + aead = tipc_aead_get(rx->aead[tx_key]); + rc = tipc_aead_decrypt(net, aead, *skb, b); + rcu_read_unlock(); + +exit: + stats = ((rx) ?: tx)->stats; + switch (rc) { + case 0: + this_cpu_inc(stats->stat[STAT_OK]); + break; + case -EINPROGRESS: + case -EBUSY: + this_cpu_inc(stats->stat[STAT_ASYNC]); + *skb = NULL; + return rc; + default: + this_cpu_inc(stats->stat[STAT_NOK]); + if (rc == -ENOKEY) { + kfree_skb(*skb); + *skb = NULL; + if (rx) { + /* Mark rx->nokey only if we dont have a + * pending received session key, nor a newer + * one i.e. in the next slot. + */ + n = key_next(tx_key); + rx->nokey = !(rx->skey || + rcu_access_pointer(rx->aead[n])); + pr_debug_ratelimited("%s: nokey %d, key %d/%x\n", + rx->name, rx->nokey, + tx_key, rx->key.keys); + tipc_node_put(rx->node); + } + this_cpu_inc(stats->stat[STAT_NOKEYS]); + return rc; + } else if (rc == -EBADMSG) { + this_cpu_inc(stats->stat[STAT_BADMSGS]); + } + break; + } + + tipc_crypto_rcv_complete(net, aead, b, skb, rc); + return rc; +} + +static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead, + struct tipc_bearer *b, + struct sk_buff **skb, int err) +{ + struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(*skb); + struct tipc_crypto *rx = aead->crypto; + struct tipc_aead *tmp = NULL; + struct tipc_ehdr *ehdr; + struct tipc_node *n; + + /* Is this completed by TX? */ + if (unlikely(is_tx(aead->crypto))) { + rx = skb_cb->tx_clone_ctx.rx; + pr_debug("TX->RX(%s): err %d, aead %p, skb->next %p, flags %x\n", + (rx) ? tipc_node_get_id_str(rx->node) : "-", err, aead, + (*skb)->next, skb_cb->flags); + pr_debug("skb_cb [recurs %d, last %p], tx->aead [%p %p %p]\n", + skb_cb->tx_clone_ctx.recurs, skb_cb->tx_clone_ctx.last, + aead->crypto->aead[1], aead->crypto->aead[2], + aead->crypto->aead[3]); + if (unlikely(err)) { + if (err == -EBADMSG && (*skb)->next) + tipc_rcv(net, (*skb)->next, b); + goto free_skb; + } + + if (likely((*skb)->next)) { + kfree_skb((*skb)->next); + (*skb)->next = NULL; + } + ehdr = (struct tipc_ehdr *)(*skb)->data; + if (!rx) { + WARN_ON(ehdr->user != LINK_CONFIG); + n = tipc_node_create(net, 0, ehdr->id, 0xffffu, 0, + true); + rx = tipc_node_crypto_rx(n); + if (unlikely(!rx)) + goto free_skb; + } + + /* Ignore cloning if it was TX master key */ + if (ehdr->tx_key == KEY_MASTER) + goto rcv; + if (tipc_aead_clone(&tmp, aead) < 0) + goto rcv; + WARN_ON(!refcount_inc_not_zero(&tmp->refcnt)); + if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key, false) < 0) { + tipc_aead_free(&tmp->rcu); + goto rcv; + } + tipc_aead_put(aead); + aead = tmp; + } + + if (unlikely(err)) { + tipc_aead_users_dec((struct tipc_aead __force __rcu *)aead, INT_MIN); + goto free_skb; + } + + /* Set the RX key's user */ + tipc_aead_users_set((struct tipc_aead __force __rcu *)aead, 1); + + /* Mark this point, RX works */ + rx->timer1 = jiffies; + +rcv: + /* Remove ehdr & auth. tag prior to tipc_rcv() */ + ehdr = (struct tipc_ehdr *)(*skb)->data; + + /* Mark this point, RX passive still works */ + if (rx->key.passive && ehdr->tx_key == rx->key.passive) + rx->timer2 = jiffies; + + skb_reset_network_header(*skb); + skb_pull(*skb, tipc_ehdr_size(ehdr)); + if (pskb_trim(*skb, (*skb)->len - aead->authsize)) + goto free_skb; + + /* Validate TIPCv2 message */ + if (unlikely(!tipc_msg_validate(skb))) { + pr_err_ratelimited("Packet dropped after decryption!\n"); + goto free_skb; + } + + /* Ok, everything's fine, try to synch own keys according to peers' */ + tipc_crypto_key_synch(rx, *skb); + + /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */ + skb_cb = TIPC_SKB_CB(*skb); + + /* Mark skb decrypted */ + skb_cb->decrypted = 1; + + /* Clear clone cxt if any */ + if (likely(!skb_cb->tx_clone_deferred)) + goto exit; + skb_cb->tx_clone_deferred = 0; + memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx)); + goto exit; + +free_skb: + kfree_skb(*skb); + *skb = NULL; + +exit: + tipc_aead_put(aead); + if (rx) + tipc_node_put(rx->node); +} + +static void tipc_crypto_do_cmd(struct net *net, int cmd) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_crypto *tx = tn->crypto_tx, *rx; + struct list_head *p; + unsigned int stat; + int i, j, cpu; + char buf[200]; + + /* Currently only one command is supported */ + switch (cmd) { + case 0xfff1: + goto print_stats; + default: + return; + } + +print_stats: + /* Print a header */ + pr_info("\n=============== TIPC Crypto Statistics ===============\n\n"); + + /* Print key status */ + pr_info("Key status:\n"); + pr_info("TX(%7.7s)\n%s", tipc_own_id_string(net), + tipc_crypto_key_dump(tx, buf)); + + rcu_read_lock(); + for (p = tn->node_list.next; p != &tn->node_list; p = p->next) { + rx = tipc_node_crypto_rx_by_list(p); + pr_info("RX(%7.7s)\n%s", tipc_node_get_id_str(rx->node), + tipc_crypto_key_dump(rx, buf)); + } + rcu_read_unlock(); + + /* Print crypto statistics */ + for (i = 0, j = 0; i < MAX_STATS; i++) + j += scnprintf(buf + j, 200 - j, "|%11s ", hstats[i]); + pr_info("Counter %s", buf); + + memset(buf, '-', 115); + buf[115] = '\0'; + pr_info("%s\n", buf); + + j = scnprintf(buf, 200, "TX(%7.7s) ", tipc_own_id_string(net)); + for_each_possible_cpu(cpu) { + for (i = 0; i < MAX_STATS; i++) { + stat = per_cpu_ptr(tx->stats, cpu)->stat[i]; + j += scnprintf(buf + j, 200 - j, "|%11d ", stat); + } + pr_info("%s", buf); + j = scnprintf(buf, 200, "%12s", " "); + } + + rcu_read_lock(); + for (p = tn->node_list.next; p != &tn->node_list; p = p->next) { + rx = tipc_node_crypto_rx_by_list(p); + j = scnprintf(buf, 200, "RX(%7.7s) ", + tipc_node_get_id_str(rx->node)); + for_each_possible_cpu(cpu) { + for (i = 0; i < MAX_STATS; i++) { + stat = per_cpu_ptr(rx->stats, cpu)->stat[i]; + j += scnprintf(buf + j, 200 - j, "|%11d ", + stat); + } + pr_info("%s", buf); + j = scnprintf(buf, 200, "%12s", " "); + } + } + rcu_read_unlock(); + + pr_info("\n======================== Done ========================\n"); +} + +static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf) +{ + struct tipc_key key = c->key; + struct tipc_aead *aead; + int k, i = 0; + char *s; + + for (k = KEY_MIN; k <= KEY_MAX; k++) { + if (k == KEY_MASTER) { + if (is_rx(c)) + continue; + if (time_before(jiffies, + c->timer2 + TIPC_TX_GRACE_PERIOD)) + s = "ACT"; + else + s = "PAS"; + } else { + if (k == key.passive) + s = "PAS"; + else if (k == key.active) + s = "ACT"; + else if (k == key.pending) + s = "PEN"; + else + s = "-"; + } + i += scnprintf(buf + i, 200 - i, "\tKey%d: %s", k, s); + + rcu_read_lock(); + aead = rcu_dereference(c->aead[k]); + if (aead) + i += scnprintf(buf + i, 200 - i, + "{\"0x...%s\", \"%s\"}/%d:%d", + aead->hint, + (aead->mode == CLUSTER_KEY) ? "c" : "p", + atomic_read(&aead->users), + refcount_read(&aead->refcnt)); + rcu_read_unlock(); + i += scnprintf(buf + i, 200 - i, "\n"); + } + + if (is_rx(c)) + i += scnprintf(buf + i, 200 - i, "\tPeer RX active: %d\n", + atomic_read(&c->peer_rx_active)); + + return buf; +} + +static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new, + char *buf) +{ + struct tipc_key *key = &old; + int k, i = 0; + char *s; + + /* Output format: "[%s %s %s] -> [%s %s %s]", max len = 32 */ +again: + i += scnprintf(buf + i, 32 - i, "["); + for (k = KEY_1; k <= KEY_3; k++) { + if (k == key->passive) + s = "pas"; + else if (k == key->active) + s = "act"; + else if (k == key->pending) + s = "pen"; + else + s = "-"; + i += scnprintf(buf + i, 32 - i, + (k != KEY_3) ? "%s " : "%s", s); + } + if (key != &new) { + i += scnprintf(buf + i, 32 - i, "] -> "); + key = &new; + goto again; + } + i += scnprintf(buf + i, 32 - i, "]"); + return buf; +} + +/** + * tipc_crypto_msg_rcv - Common 'MSG_CRYPTO' processing point + * @net: the struct net + * @skb: the receiving message buffer + */ +void tipc_crypto_msg_rcv(struct net *net, struct sk_buff *skb) +{ + struct tipc_crypto *rx; + struct tipc_msg *hdr; + + if (unlikely(skb_linearize(skb))) + goto exit; + + hdr = buf_msg(skb); + rx = tipc_node_crypto_rx_by_addr(net, msg_prevnode(hdr)); + if (unlikely(!rx)) + goto exit; + + switch (msg_type(hdr)) { + case KEY_DISTR_MSG: + if (tipc_crypto_key_rcv(rx, hdr)) + goto exit; + break; + default: + break; + } + + tipc_node_put(rx->node); + +exit: + kfree_skb(skb); +} + +/** + * tipc_crypto_key_distr - Distribute a TX key + * @tx: the TX crypto + * @key: the key's index + * @dest: the destination tipc node, = NULL if distributing to all nodes + * + * Return: 0 in case of success, otherwise < 0 + */ +int tipc_crypto_key_distr(struct tipc_crypto *tx, u8 key, + struct tipc_node *dest) +{ + struct tipc_aead *aead; + u32 dnode = tipc_node_get_addr(dest); + int rc = -ENOKEY; + + if (!sysctl_tipc_key_exchange_enabled) + return 0; + + if (key) { + rcu_read_lock(); + aead = tipc_aead_get(tx->aead[key]); + if (likely(aead)) { + rc = tipc_crypto_key_xmit(tx->net, aead->key, + aead->gen, aead->mode, + dnode); + tipc_aead_put(aead); + } + rcu_read_unlock(); + } + + return rc; +} + +/** + * tipc_crypto_key_xmit - Send a session key + * @net: the struct net + * @skey: the session key to be sent + * @gen: the key's generation + * @mode: the key's mode + * @dnode: the destination node address, = 0 if broadcasting to all nodes + * + * The session key 'skey' is packed in a TIPC v2 'MSG_CRYPTO/KEY_DISTR_MSG' + * as its data section, then xmit-ed through the uc/bc link. + * + * Return: 0 in case of success, otherwise < 0 + */ +static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey, + u16 gen, u8 mode, u32 dnode) +{ + struct sk_buff_head pkts; + struct tipc_msg *hdr; + struct sk_buff *skb; + u16 size, cong_link_cnt; + u8 *data; + int rc; + + size = tipc_aead_key_size(skey); + skb = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + hdr = buf_msg(skb); + tipc_msg_init(tipc_own_addr(net), hdr, MSG_CRYPTO, KEY_DISTR_MSG, + INT_H_SIZE, dnode); + msg_set_size(hdr, INT_H_SIZE + size); + msg_set_key_gen(hdr, gen); + msg_set_key_mode(hdr, mode); + + data = msg_data(hdr); + *((__be32 *)(data + TIPC_AEAD_ALG_NAME)) = htonl(skey->keylen); + memcpy(data, skey->alg_name, TIPC_AEAD_ALG_NAME); + memcpy(data + TIPC_AEAD_ALG_NAME + sizeof(__be32), skey->key, + skey->keylen); + + __skb_queue_head_init(&pkts); + __skb_queue_tail(&pkts, skb); + if (dnode) + rc = tipc_node_xmit(net, &pkts, dnode, 0); + else + rc = tipc_bcast_xmit(net, &pkts, &cong_link_cnt); + + return rc; +} + +/** + * tipc_crypto_key_rcv - Receive a session key + * @rx: the RX crypto + * @hdr: the TIPC v2 message incl. the receiving session key in its data + * + * This function retrieves the session key in the message from peer, then + * schedules a RX work to attach the key to the corresponding RX crypto. + * + * Return: "true" if the key has been scheduled for attaching, otherwise + * "false". + */ +static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr) +{ + struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx; + struct tipc_aead_key *skey = NULL; + u16 key_gen = msg_key_gen(hdr); + u32 size = msg_data_sz(hdr); + u8 *data = msg_data(hdr); + unsigned int keylen; + + /* Verify whether the size can exist in the packet */ + if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) { + pr_debug("%s: message data size is too small\n", rx->name); + goto exit; + } + + keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME))); + + /* Verify the supplied size values */ + if (unlikely(keylen > TIPC_AEAD_KEY_SIZE_MAX || + size != keylen + sizeof(struct tipc_aead_key))) { + pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name); + goto exit; + } + + spin_lock(&rx->lock); + if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) { + pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name, + rx->skey, key_gen, rx->key_gen); + goto exit_unlock; + } + + /* Allocate memory for the key */ + skey = kmalloc(size, GFP_ATOMIC); + if (unlikely(!skey)) { + pr_err("%s: unable to allocate memory for skey\n", rx->name); + goto exit_unlock; + } + + /* Copy key from msg data */ + skey->keylen = keylen; + memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME); + memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32), + skey->keylen); + + rx->key_gen = key_gen; + rx->skey_mode = msg_key_mode(hdr); + rx->skey = skey; + rx->nokey = 0; + mb(); /* for nokey flag */ + +exit_unlock: + spin_unlock(&rx->lock); + +exit: + /* Schedule the key attaching on this crypto */ + if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0))) + return true; + + return false; +} + +/** + * tipc_crypto_work_rx - Scheduled RX works handler + * @work: the struct RX work + * + * The function processes the previous scheduled works i.e. distributing TX key + * or attaching a received session key on RX crypto. + */ +static void tipc_crypto_work_rx(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct tipc_crypto *rx = container_of(dwork, struct tipc_crypto, work); + struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx; + unsigned long delay = msecs_to_jiffies(5000); + bool resched = false; + u8 key; + int rc; + + /* Case 1: Distribute TX key to peer if scheduled */ + if (atomic_cmpxchg(&rx->key_distr, + KEY_DISTR_SCHED, + KEY_DISTR_COMPL) == KEY_DISTR_SCHED) { + /* Always pick the newest one for distributing */ + key = tx->key.pending ?: tx->key.active; + rc = tipc_crypto_key_distr(tx, key, rx->node); + if (unlikely(rc)) + pr_warn("%s: unable to distr key[%d] to %s, err %d\n", + tx->name, key, tipc_node_get_id_str(rx->node), + rc); + + /* Sched for key_distr releasing */ + resched = true; + } else { + atomic_cmpxchg(&rx->key_distr, KEY_DISTR_COMPL, 0); + } + + /* Case 2: Attach a pending received session key from peer if any */ + if (rx->skey) { + rc = tipc_crypto_key_init(rx, rx->skey, rx->skey_mode, false); + if (unlikely(rc < 0)) + pr_warn("%s: unable to attach received skey, err %d\n", + rx->name, rc); + switch (rc) { + case -EBUSY: + case -ENOMEM: + /* Resched the key attaching */ + resched = true; + break; + default: + synchronize_rcu(); + kfree(rx->skey); + rx->skey = NULL; + break; + } + } + + if (resched && queue_delayed_work(tx->wq, &rx->work, delay)) + return; + + tipc_node_put(rx->node); +} + +/** + * tipc_crypto_rekeying_sched - (Re)schedule rekeying w/o new interval + * @tx: TX crypto + * @changed: if the rekeying needs to be rescheduled with new interval + * @new_intv: new rekeying interval (when "changed" = true) + */ +void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed, + u32 new_intv) +{ + unsigned long delay; + bool now = false; + + if (changed) { + if (new_intv == TIPC_REKEYING_NOW) + now = true; + else + tx->rekeying_intv = new_intv; + cancel_delayed_work_sync(&tx->work); + } + + if (tx->rekeying_intv || now) { + delay = (now) ? 0 : tx->rekeying_intv * 60 * 1000; + queue_delayed_work(tx->wq, &tx->work, msecs_to_jiffies(delay)); + } +} + +/** + * tipc_crypto_work_tx - Scheduled TX works handler + * @work: the struct TX work + * + * The function processes the previous scheduled work, i.e. key rekeying, by + * generating a new session key based on current one, then attaching it to the + * TX crypto and finally distributing it to peers. It also re-schedules the + * rekeying if needed. + */ +static void tipc_crypto_work_tx(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct tipc_crypto *tx = container_of(dwork, struct tipc_crypto, work); + struct tipc_aead_key *skey = NULL; + struct tipc_key key = tx->key; + struct tipc_aead *aead; + int rc = -ENOMEM; + + if (unlikely(key.pending)) + goto resched; + + /* Take current key as a template */ + rcu_read_lock(); + aead = rcu_dereference(tx->aead[key.active ?: KEY_MASTER]); + if (unlikely(!aead)) { + rcu_read_unlock(); + /* At least one key should exist for securing */ + return; + } + + /* Lets duplicate it first */ + skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC); + rcu_read_unlock(); + + /* Now, generate new key, initiate & distribute it */ + if (likely(skey)) { + rc = tipc_aead_key_generate(skey) ?: + tipc_crypto_key_init(tx, skey, PER_NODE_KEY, false); + if (likely(rc > 0)) + rc = tipc_crypto_key_distr(tx, rc, NULL); + kfree_sensitive(skey); + } + + if (unlikely(rc)) + pr_warn_ratelimited("%s: rekeying returns %d\n", tx->name, rc); + +resched: + /* Re-schedule rekeying if any */ + tipc_crypto_rekeying_sched(tx, false, 0); +} diff --git a/net/tipc/crypto.h b/net/tipc/crypto.h new file mode 100644 index 000000000000..ce7d4cc8a9e0 --- /dev/null +++ b/net/tipc/crypto.h @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * net/tipc/crypto.h: Include file for TIPC crypto + * + * Copyright (c) 2019, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef CONFIG_TIPC_CRYPTO +#ifndef _TIPC_CRYPTO_H +#define _TIPC_CRYPTO_H + +#include "core.h" +#include "node.h" +#include "msg.h" +#include "bearer.h" + +#define TIPC_EVERSION 7 + +/* AEAD aes(gcm) */ +#define TIPC_AES_GCM_KEY_SIZE_128 16 +#define TIPC_AES_GCM_KEY_SIZE_192 24 +#define TIPC_AES_GCM_KEY_SIZE_256 32 + +#define TIPC_AES_GCM_SALT_SIZE 4 +#define TIPC_AES_GCM_IV_SIZE 12 +#define TIPC_AES_GCM_TAG_SIZE 16 + +/* + * TIPC crypto modes: + * - CLUSTER_KEY: + * One single key is used for both TX & RX in all nodes in the cluster. + * - PER_NODE_KEY: + * Each nodes in the cluster has one TX key, for RX a node needs to know + * its peers' TX key for the decryption of messages from those nodes. + */ +enum { + CLUSTER_KEY = 1, + PER_NODE_KEY = (1 << 1), +}; + +extern int sysctl_tipc_max_tfms __read_mostly; +extern int sysctl_tipc_key_exchange_enabled __read_mostly; + +/* + * TIPC encryption message format: + * + * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 + * 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w0:|Ver=7| User |D|TX |RX |K|M|N| Rsvd | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w1:| Seqno | + * w2:| (8 octets) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w3:\ Prevnode \ + * / (4 or 16 octets) / + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Encrypted complete TIPC V2 header and user data / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | AuthTag | + * | (16 octets) | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Word0: + * Ver : = 7 i.e. TIPC encryption message version + * User : = 7 (for LINK_PROTOCOL); = 13 (for LINK_CONFIG) or = 0 + * D : The destined bit i.e. the message's destination node is + * "known" or not at the message encryption + * TX : TX key used for the message encryption + * RX : Currently RX active key corresponding to the destination + * node's TX key (when the "D" bit is set) + * K : Keep-alive bit (for RPS, LINK_PROTOCOL/STATE_MSG only) + * M : Bit indicates if sender has master key + * N : Bit indicates if sender has no RX keys corresponding to the + * receiver's TX (when the "D" bit is set) + * Rsvd : Reserved bit, field + * Word1-2: + * Seqno : The 64-bit sequence number of the encrypted message, also + * part of the nonce used for the message encryption/decryption + * Word3-: + * Prevnode: The source node address, or ID in case LINK_CONFIG only + * AuthTag : The authentication tag for the message integrity checking + * generated by the message encryption + */ +struct tipc_ehdr { + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 destined:1, + user:4, + version:3; + __u8 reserved_1:1, + rx_nokey:1, + master_key:1, + keepalive:1, + rx_key_active:2, + tx_key:2; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 version:3, + user:4, + destined:1; + __u8 tx_key:2, + rx_key_active:2, + keepalive:1, + master_key:1, + rx_nokey:1, + reserved_1:1; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __be16 reserved_2; + } __packed; + __be32 w0; + }; + __be64 seqno; + union { + __be32 addr; + __u8 id[NODE_ID_LEN]; /* For a LINK_CONFIG message only! */ + }; +#define EHDR_SIZE (offsetof(struct tipc_ehdr, addr) + sizeof(__be32)) +#define EHDR_CFG_SIZE (sizeof(struct tipc_ehdr)) +#define EHDR_MIN_SIZE (EHDR_SIZE) +#define EHDR_MAX_SIZE (EHDR_CFG_SIZE) +#define EMSG_OVERHEAD (EHDR_SIZE + TIPC_AES_GCM_TAG_SIZE) +} __packed; + +int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, + struct tipc_node *node); +void tipc_crypto_stop(struct tipc_crypto **crypto); +void tipc_crypto_timeout(struct tipc_crypto *rx); +int tipc_crypto_xmit(struct net *net, struct sk_buff **skb, + struct tipc_bearer *b, struct tipc_media_addr *dst, + struct tipc_node *__dnode); +int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx, + struct sk_buff **skb, struct tipc_bearer *b); +int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey, + u8 mode, bool master_key); +void tipc_crypto_key_flush(struct tipc_crypto *c); +int tipc_crypto_key_distr(struct tipc_crypto *tx, u8 key, + struct tipc_node *dest); +void tipc_crypto_msg_rcv(struct net *net, struct sk_buff *skb); +void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed, + u32 new_intv); +int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info); +bool tipc_ehdr_validate(struct sk_buff *skb); + +static inline u32 msg_key_gen(struct tipc_msg *m) +{ + return msg_bits(m, 4, 16, 0xffff); +} + +static inline void msg_set_key_gen(struct tipc_msg *m, u32 gen) +{ + msg_set_bits(m, 4, 16, 0xffff, gen); +} + +static inline u32 msg_key_mode(struct tipc_msg *m) +{ + return msg_bits(m, 4, 0, 0xf); +} + +static inline void msg_set_key_mode(struct tipc_msg *m, u32 mode) +{ + msg_set_bits(m, 4, 0, 0xf, mode); +} + +#endif /* _TIPC_CRYPTO_H */ +#endif diff --git a/net/tipc/diag.c b/net/tipc/diag.c index 73137f4aeb68..54dde8c4e4d4 100644 --- a/net/tipc/diag.c +++ b/net/tipc/diag.c @@ -95,6 +95,7 @@ static int tipc_sock_diag_handler_dump(struct sk_buff *skb, } static const struct sock_diag_handler tipc_sock_diag_handler = { + .owner = THIS_MODULE, .family = AF_TIPC, .dump = tipc_sock_diag_handler_dump, }; @@ -113,4 +114,5 @@ module_init(tipc_diag_init); module_exit(tipc_diag_exit); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("TIPC socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index c138d68e8a69..775fd4f3f072 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -74,7 +74,8 @@ struct tipc_discoverer { /** * tipc_disc_init_msg - initialize a link setup message * @net: the applicable net namespace - * @type: message type (request or response) + * @skb: buffer containing message + * @mtyp: message type (request or response) * @b: ptr to bearer issuing message */ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, @@ -94,6 +95,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, msg_set_dest_domain(hdr, dest_domain); msg_set_bc_netid(hdr, tn->net_id); b->media->addr2msg(msg_media_addr(hdr), &b->addr); + msg_set_peer_net_hash(hdr, tipc_net_hash_mixes(net, tn->random)); msg_set_node_id(hdr, tipc_own_id(net)); } @@ -146,8 +148,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, { struct net *net = d->net; struct tipc_net *tn = tipc_net(net); - bool trial = time_before(jiffies, tn->addr_trial_end); u32 self = tipc_own_addr(net); + bool trial = time_before(jiffies, tn->addr_trial_end) && !self; if (mtyp == DSC_TRIAL_FAIL_MSG) { if (!trial) @@ -166,7 +168,7 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, /* Apply trial address if we just left trial period */ if (!trial && !self) { - tipc_sched_net_finalize(net, tn->trial_addr); + schedule_work(&tn->work); msg_set_prevnode(buf_msg(d->skb), tn->trial_addr); msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); } @@ -193,6 +195,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, { struct tipc_net *tn = tipc_net(net); struct tipc_msg *hdr = buf_msg(skb); + u32 pnet_hash = msg_peer_net_hash(hdr); u16 caps = msg_node_capabilities(hdr); bool legacy = tn->legacy_addr_format; u32 sugg = msg_sugg_node_addr(hdr); @@ -208,7 +211,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, u32 self; int err; - skb_linearize(skb); + if (skb_linearize(skb)) { + kfree_skb(skb); + return; + } hdr = buf_msg(skb); if (caps & TIPC_NODE_ID128) @@ -241,7 +247,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, return; if (!tipc_in_scope(legacy, b->domain, src)) return; - tipc_node_check_dest(net, src, peer_id, b, caps, signature, + tipc_node_check_dest(net, src, peer_id, b, caps, signature, pnet_hash, &maddr, &respond, &dupl_addr); if (dupl_addr) disc_dupl_alert(b, src, &maddr); @@ -286,7 +292,7 @@ void tipc_disc_remove_dest(struct tipc_discoverer *d) */ static void tipc_disc_timeout(struct timer_list *t) { - struct tipc_discoverer *d = from_timer(d, t, timer); + struct tipc_discoverer *d = timer_container_of(d, t, timer); struct tipc_net *tn = tipc_net(d->net); struct tipc_media_addr maddr; struct sk_buff *skb = NULL; @@ -305,7 +311,7 @@ static void tipc_disc_timeout(struct timer_list *t) if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) { mod_timer(&d->timer, jiffies + TIPC_DISC_INIT); spin_unlock_bh(&d->lock); - tipc_sched_net_finalize(net, tn->trial_addr); + schedule_work(&tn->work); return; } @@ -337,9 +343,9 @@ exit: * @net: the applicable net namespace * @b: ptr to bearer issuing requests * @dest: destination address for request messages - * @dest_domain: network domain to which links can be established + * @skb: pointer to created frame * - * Returns 0 if successful, otherwise -errno. + * Return: 0 if successful, otherwise -errno. */ int tipc_disc_create(struct net *net, struct tipc_bearer *b, struct tipc_media_addr *dest, struct sk_buff **skb) @@ -378,11 +384,11 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b, /** * tipc_disc_delete - destroy object sending periodic link setup requests - * @d: ptr to link duest structure + * @d: ptr to link dest structure */ void tipc_disc_delete(struct tipc_discoverer *d) { - del_timer_sync(&d->timer); + timer_shutdown_sync(&d->timer); kfree_skb(d->skb); kfree(d); } @@ -391,7 +397,6 @@ void tipc_disc_delete(struct tipc_discoverer *d) * tipc_disc_reset - reset object to send periodic link setup requests * @net: the applicable net namespace * @b: ptr to bearer issuing requests - * @dest_domain: network domain to which links can be established */ void tipc_disc_reset(struct net *net, struct tipc_bearer *b) { diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index f69a2fde9f4a..cb0d185e06af 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -60,14 +60,12 @@ static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) /* Convert raw mac address format to media addr format */ static int tipc_eth_raw2addr(struct tipc_bearer *b, struct tipc_media_addr *addr, - char *msg) + const char *msg) { - char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - memset(addr, 0, sizeof(*addr)); ether_addr_copy(addr->value, msg); addr->media_id = TIPC_MEDIA_TYPE_ETH; - addr->broadcast = !memcmp(addr->value, bcast_mac, ETH_ALEN); + addr->broadcast = is_broadcast_ether_addr(addr->value); return 0; } @@ -92,7 +90,8 @@ struct tipc_media eth_media_info = { .raw2addr = tipc_eth_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, - .window = TIPC_DEF_LINK_WIN, + .min_win = TIPC_DEF_LINK_WIN, + .max_win = TIPC_MAX_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_ETH, .hwaddr_len = ETH_ALEN, .name = "eth" diff --git a/net/tipc/group.c b/net/tipc/group.c index 06fee142f09f..3e137d8c9d2f 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -2,6 +2,7 @@ * net/tipc/group.c: TIPC group messaging code * * Copyright (c) 2017, Ericsson AB + * Copyright (c) 2020, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -199,7 +200,7 @@ void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf) struct tipc_member *m, *tmp; struct sk_buff_head xmitq; - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq); tipc_group_update_member(m, 0); @@ -218,6 +219,7 @@ void tipc_group_delete(struct net *net, struct tipc_group *grp) rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq); + __skb_queue_purge(&m->deferredq); list_del(&m->list); kfree(m); } @@ -272,8 +274,8 @@ static struct tipc_member *tipc_group_find_node(struct tipc_group *grp, return NULL; } -static void tipc_group_add_to_tree(struct tipc_group *grp, - struct tipc_member *m) +static int tipc_group_add_to_tree(struct tipc_group *grp, + struct tipc_member *m) { u64 nkey, key = (u64)m->node << 32 | m->port; struct rb_node **n, *parent = NULL; @@ -290,10 +292,11 @@ static void tipc_group_add_to_tree(struct tipc_group *grp, else if (key > nkey) n = &(*n)->rb_right; else - return; + return -EEXIST; } rb_link_node(&m->tree_node, parent, n); rb_insert_color(&m->tree_node, &grp->members); + return 0; } static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, @@ -301,6 +304,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, u32 instance, int state) { struct tipc_member *m; + int ret; m = kzalloc(sizeof(*m), GFP_ATOMIC); if (!m) @@ -313,8 +317,12 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, m->port = port; m->instance = instance; m->bc_acked = grp->bc_snd_nxt - 1; + ret = tipc_group_add_to_tree(grp, m); + if (ret < 0) { + kfree(m); + return NULL; + } grp->member_cnt++; - tipc_group_add_to_tree(grp, m); tipc_nlist_add(&grp->dests, m->node); m->state = state; return m; @@ -352,7 +360,7 @@ struct tipc_nlist *tipc_group_dests(struct tipc_group *grp) return &grp->dests; } -void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, +void tipc_group_self(struct tipc_group *grp, struct tipc_service_range *seq, int *scope) { seq->type = grp->type; @@ -434,7 +442,7 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, return true; if (state == MBR_PENDING && adv == ADV_IDLE) return true; - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq); tipc_node_distr_xmit(grp->net, &xmitq); return true; @@ -535,7 +543,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, update = true; deliver = false; } - /* Fall thru */ + fallthrough; case TIPC_GRP_BCAST_MSG: m->bc_rcv_nxt++; ack = msg_grp_bc_ack_req(hdr); @@ -917,7 +925,10 @@ void tipc_group_member_evt(struct tipc_group *grp, int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb) { - struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP); + struct nlattr *group = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_GROUP); + + if (!group) + return -EMSGSIZE; if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID, grp->type) || diff --git a/net/tipc/group.h b/net/tipc/group.h index 76b4e5a7b39d..ea4c3be64c78 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -2,6 +2,7 @@ * net/tipc/group.h: Include file for TIPC group unicast/multicast functions * * Copyright (c) 2017, Ericsson AB + * Copyright (c) 2020, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,7 +51,7 @@ void tipc_group_delete(struct net *net, struct tipc_group *grp); void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port, u32 instance); struct tipc_nlist *tipc_group_dests(struct tipc_group *grp); -void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, +void tipc_group_self(struct tipc_group *grp, struct tipc_service_range *seq, int *scope); u32 tipc_group_exclude(struct tipc_group *grp); void tipc_group_filter_msg(struct tipc_group *grp, diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index e8c16718e3fa..b9ad0434c3cd 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -42,6 +42,8 @@ #include "core.h" #include "bearer.h" +#define TIPC_MAX_IB_LINK_WIN 500 + /* convert InfiniBand address (media address format) media address to string */ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) @@ -65,7 +67,7 @@ static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) /* Convert raw InfiniBand address format to media addr format */ static int tipc_ib_raw2addr(struct tipc_bearer *b, struct tipc_media_addr *addr, - char *msg) + const char *msg) { memset(addr, 0, sizeof(*addr)); memcpy(addr->value, msg, INFINIBAND_ALEN); @@ -94,7 +96,8 @@ struct tipc_media ib_media_info = { .raw2addr = tipc_ib_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, - .window = TIPC_DEF_LINK_WIN, + .min_win = TIPC_DEF_LINK_WIN, + .max_win = TIPC_MAX_IB_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_IB, .hwaddr_len = INFINIBAND_ALEN, .name = "ib" diff --git a/net/tipc/link.c b/net/tipc/link.c index ac306d17f8ad..931f55f781a1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -44,6 +44,7 @@ #include "netlink.h" #include "monitor.h" #include "trace.h" +#include "crypto.h" #include <linux/pkt_sched.h> @@ -81,10 +82,7 @@ struct tipc_stats { * struct tipc_link - TIPC link data structure * @addr: network address of link's peer node * @name: link name character string - * @media_addr: media address to use when sending messages over link - * @timer: link timer * @net: pointer to namespace struct - * @refcnt: reference counter for permanent references (owner node & timer) * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint * @bearer_id: local bearer id used by link @@ -93,35 +91,48 @@ struct tipc_stats { * @state: current state of link FSM * @peer_caps: bitmap describing capabilities of peer node * @silent_intv_cnt: # of timer intervals without any reception from peer - * @proto_msg: template for control messages generated by link - * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') * @mon_state: cookie with information needed by link monitor - * @backlog_limit: backlog queue congestion thresholds (indexed by importance) - * @exp_msg_count: # of tunnelled messages expected during link changeover - * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset * @mtu: current maximum packet size for this link * @advertised_mtu: advertised own mtu when link is being established - * @transmitq: queue for sent, non-acked messages * @backlogq: queue for messages waiting to be sent - * @snt_nxt: next sequence number to use for outbound messages - * @prev_from: sequence number of most previous retransmission request - * @stale_cnt: counter for number of identical retransmit attempts - * @stale_limit: time when repeated identical retransmits must force link reset * @ackers: # of peers that needs to ack each packet before it can be released * @acked: # last packet acked by a certain peer. Used for broadcast. * @rcv_nxt: next sequence number to expect for inbound messages - * @deferred_queue: deferred queue saved OOS b'cast message received from node - * @unacked_window: # of inbound messages rx'd without ack'ing back to peer * @inputq: buffer queue for messages to be delivered upwards * @namedq: buffer queue for name table messages to be delivered upwards - * @next_out: ptr to first unsent outbound message in queue * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate - * @long_msg_seq_no: next identifier to use for outbound fragmented messages * @reasm_buf: head of partially reassembled inbound message fragments - * @bc_rcvr: marks that this is a broadcast receiver link * @stats: collects statistics regarding link activity + * @session: session to be used by link + * @snd_nxt_state: next send seq number + * @rcv_nxt_state: next rcv seq number + * @in_session: have received ACTIVATE_MSG from peer + * @active: link is active + * @if_name: associated interface name + * @rst_cnt: link reset counter + * @drop_point: seq number for failover handling (FIXME) + * @failover_reasm_skb: saved failover msg ptr (FIXME) + * @failover_deferdq: deferred message queue for failover processing (FIXME) + * @transmq: the link's transmit queue + * @backlog: link's backlog by priority (importance) + * @snd_nxt: next sequence number to be used + * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @deferdq: deferred receive queue + * @window: sliding window size for congestion handling + * @min_win: minimal send window to be used by link + * @ssthresh: slow start threshold for congestion handling + * @max_win: maximal send window to be used by link + * @cong_acks: congestion acks for congestion avoidance (FIXME) + * @checkpoint: seq number for congestion window size handling + * @reasm_tnlmsg: fragmentation/reassembly area for tunnel protocol message + * @last_gap: last gap ack blocks for bcast (FIXME) + * @last_ga: ptr to gap ack blocks + * @bc_rcvlink: the peer specific link used for broadcast reception + * @bc_sndlink: the namespace global link used for broadcast sending + * @nack_state: bcast nack state + * @bc_peer_is_up: peer has acked the bcast init msg */ struct tipc_link { u32 addr; @@ -151,6 +162,7 @@ struct tipc_link { /* Failover/synch */ u16 drop_point; struct sk_buff *failover_reasm_skb; + struct sk_buff_head failover_deferdq; /* Max packet negotiation */ u16 mtu; @@ -162,12 +174,9 @@ struct tipc_link { struct { u16 len; u16 limit; + struct sk_buff *target_bskb; } backlog[5]; u16 snd_nxt; - u16 prev_from; - u16 window; - u16 stale_cnt; - unsigned long stale_limit; /* Reception */ u16 rcv_nxt; @@ -178,13 +187,22 @@ struct tipc_link { /* Congestion handling */ struct sk_buff_head wakeupq; + u16 window; + u16 min_win; + u16 ssthresh; + u16 max_win; + u16 cong_acks; + u16 checkpoint; /* Fragmentation/reassembly */ struct sk_buff *reasm_buf; + struct sk_buff *reasm_tnlmsg; /* Broadcast */ u16 ackers; u16 acked; + u16 last_gap; + struct tipc_gap_ack_blks *last_ga; struct tipc_link *bc_rcvlink; struct tipc_link *bc_sndlink; u8 nack_state; @@ -208,12 +226,8 @@ enum { BC_NACK_SND_SUPPRESS, }; -#define TIPC_BC_RETR_LIM msecs_to_jiffies(10) /* [ms] */ - -/* - * Interval between NACKs when packets arrive out of order - */ -#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2) +#define TIPC_BC_RETR_LIM (jiffies + msecs_to_jiffies(10)) +#define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1)) /* Link FSM states: */ @@ -227,13 +241,6 @@ enum { LINK_SYNCHING = 0xc << 24 }; -/* Link FSM state checking routines - */ -static int link_is_up(struct tipc_link *l) -{ - return l->state & (LINK_ESTABLISHED | LINK_SYNCHING); -} - static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, @@ -245,14 +252,22 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static void tipc_link_build_bc_init_msg(struct tipc_link *l, struct sk_buff_head *xmitq); -static bool tipc_link_release_pkts(struct tipc_link *l, u16 to); - +static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga, + struct tipc_link *l, u8 start_index); +static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr); +static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, + u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq, + bool *retransmitted, int *rc); +static void tipc_link_update_cwin(struct tipc_link *l, int released, + bool retransmitted); /* * Simple non-static link routines (i.e. referenced outside this file) */ bool tipc_link_is_up(struct tipc_link *l) { - return link_is_up(l); + return l->state & (LINK_ESTABLISHED | LINK_SYNCHING); } bool tipc_link_peer_is_down(struct tipc_link *l) @@ -305,9 +320,14 @@ u32 tipc_link_id(struct tipc_link *l) return l->peer_bearer_id << 16 | l->bearer_id; } -int tipc_link_window(struct tipc_link *l) +int tipc_link_min_win(struct tipc_link *l) { - return l->window; + return l->min_win; +} + +int tipc_link_max_win(struct tipc_link *l) +{ + return l->max_win; } int tipc_link_prio(struct tipc_link *l) @@ -330,6 +350,11 @@ char tipc_link_plane(struct tipc_link *l) return l->net_plane; } +struct net *tipc_link_net(struct tipc_link *l) +{ + return l->net; +} + void tipc_link_update_caps(struct tipc_link *l, u16 capabilities) { l->peer_caps = capabilities; @@ -356,7 +381,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l, snd_l->ackers--; rcv_l->bc_peer_is_up = true; rcv_l->state = LINK_ESTABLISHED; - tipc_link_bc_ack_rcv(rcv_l, ack, xmitq); + tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq, NULL); trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!"); tipc_link_reset(rcv_l); rcv_l->state = LINK_RESET; @@ -395,6 +420,15 @@ int tipc_link_mtu(struct tipc_link *l) return l->mtu; } +int tipc_link_mss(struct tipc_link *l) +{ +#ifdef CONFIG_TIPC_CRYPTO + return l->mtu - INT_H_SIZE - EMSG_OVERHEAD; +#else + return l->mtu - INT_H_SIZE; +#endif +} + u16 tipc_link_rcv_nxt(struct tipc_link *l) { return l->rcv_nxt; @@ -417,16 +451,16 @@ u32 tipc_link_state(struct tipc_link *l) /** * tipc_link_create - create a new link - * @n: pointer to associated node + * @net: pointer to associated network namespace * @if_name: associated interface name * @bearer_id: id (index) of associated bearer * @tolerance: link tolerance to be used by link * @net_plane: network plane (A,B,c..) this link belongs to * @mtu: mtu to be advertised by link * @priority: priority to be used by link - * @window: send window to be used by link + * @min_win: minimal send window to be used by link + * @max_win: maximal send window to be used by link * @session: session to be used by link - * @ownnode: identity of own node * @peer: node id of peer node * @peer_caps: bitmap describing peer node capabilities * @bc_sndlink: the namespace global link used for broadcast sending @@ -434,12 +468,14 @@ u32 tipc_link_state(struct tipc_link *l) * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link + * @self: local unicast link id + * @peer_id: 128-bit ID of peer * - * Returns true if link was created, otherwise false + * Return: true if link was created, otherwise false */ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, - int window, u32 session, u32 self, + u32 min_win, u32 max_win, u32 session, u32 self, u32 peer, u8 *peer_id, u16 peer_caps, struct tipc_link *bc_sndlink, struct tipc_link *bc_rcvlink, @@ -459,11 +495,9 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, /* Set link name for unicast links only */ if (peer_id) { - tipc_nodeid2string(self_str, tipc_own_id(net)); - if (strlen(self_str) > 16) + if (tipc_nodeid2string(self_str, tipc_own_id(net)) > NODE_ID_LEN) sprintf(self_str, "%x", self); - tipc_nodeid2string(peer_str, peer_id); - if (strlen(peer_str) > 16) + if (tipc_nodeid2string(peer_str, peer_id) > NODE_ID_LEN) sprintf(peer_str, "%x", peer); } /* Peer i/f name will be completed by reset/activate message */ @@ -483,7 +517,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, l->advertised_mtu = mtu; l->mtu = mtu; l->priority = priority; - tipc_link_set_queue_limits(l, window); + tipc_link_set_queue_limits(l, min_win, max_win); l->ackers = 1; l->bc_sndlink = bc_sndlink; l->bc_rcvlink = bc_rcvlink; @@ -493,6 +527,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, __skb_queue_head_init(&l->transmq); __skb_queue_head_init(&l->backlogq); __skb_queue_head_init(&l->deferdq); + __skb_queue_head_init(&l->failover_deferdq); skb_queue_head_init(&l->wakeupq); skb_queue_head_init(l->inputq); return true; @@ -500,17 +535,23 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, /** * tipc_link_bc_create - create new link to be used for broadcast - * @n: pointer to associated node + * @net: pointer to associated network namespace * @mtu: mtu to be used initially if no peers - * @window: send window to be used + * @min_win: minimal send window to be used by link + * @max_win: maximal send window to be used by link * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link + * @ownnode: identity of own node + * @peer: node id of peer node + * @peer_id: 128-bit ID of peer + * @peer_caps: bitmap describing peer node capabilities + * @bc_sndlink: the namespace global link used for broadcast sending * - * Returns true if link was created, otherwise false + * Return: true if link was created, otherwise false */ -bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, - int mtu, int window, u16 peer_caps, +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id, + int mtu, u32 min_win, u32 max_win, u16 peer_caps, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link *bc_sndlink, @@ -518,13 +559,23 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, { struct tipc_link *l; - if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window, - 0, ownnode, peer, NULL, peer_caps, bc_sndlink, - NULL, inputq, namedq, link)) + if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, min_win, + max_win, 0, ownnode, peer, NULL, peer_caps, + bc_sndlink, NULL, inputq, namedq, link)) return false; l = *link; - strcpy(l->name, tipc_bclink_name); + if (peer_id) { + char peer_str[NODE_ID_STR_LEN] = {0,}; + + if (tipc_nodeid2string(peer_str, peer_id) > NODE_ID_LEN) + sprintf(peer_str, "%x", peer); + /* Broadcast receiver link name: "broadcast-link:<peer>" */ + snprintf(l->name, sizeof(l->name), "%s:%s", tipc_bclink_name, + peer_str); + } else { + strcpy(l->name, tipc_bclink_name); + } trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!"); tipc_link_reset(l); l->state = LINK_RESET; @@ -537,7 +588,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, /* Disable replicast if even a single peer doesn't support it */ if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST)) - tipc_bcast_disable_rcast(net); + tipc_bcast_toggle_rcast(net, false); return true; } @@ -578,6 +629,7 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt) break; case LINK_FAILOVER_BEGIN_EVT: l->state = LINK_FAILINGOVER; + break; case LINK_FAILURE_EVT: case LINK_RESET_EVT: case LINK_ESTABLISH_EVT: @@ -727,7 +779,7 @@ static void link_profile_stats(struct tipc_link *l) if (msg_user(msg) == MSG_FRAGMENTER) { if (msg_type(msg) != FIRST_FRAGMENT) return; - length = msg_size(msg_get_wrapped(msg)); + length = msg_size(msg_inner_hdr(msg)); } l->stats.msg_lengths_total += length; l->stats.msg_length_counts++; @@ -751,7 +803,7 @@ static void link_profile_stats(struct tipc_link *l) * tipc_link_too_silent - check if link is "too silent" * @l: tipc link to be checked * - * Returns true if the link 'silent_intv_cnt' is about to reach the + * Return: true if the link 'silent_intv_cnt' is about to reach the * 'abort_limit' value, otherwise false */ bool tipc_link_too_silent(struct tipc_link *l) @@ -786,11 +838,16 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) state |= l->bc_rcvlink->rcv_unacked; state |= l->rcv_unacked; state |= !skb_queue_empty(&l->transmq); - state |= !skb_queue_empty(&l->deferdq); probe = mstate->probing; probe |= l->silent_intv_cnt; if (probe || mstate->monitoring) l->silent_intv_cnt++; + probe |= !skb_queue_empty(&l->deferdq); + if (l->snd_nxt == l->checkpoint) { + tipc_link_update_cwin(l, 0, 0); + probe = true; + } + l->checkpoint = l->snd_nxt; break; case LINK_RESET: setup = l->rst_cnt++ <= 4; @@ -848,27 +905,58 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) */ static void link_prepare_wakeup(struct tipc_link *l) { + struct sk_buff_head *wakeupq = &l->wakeupq; + struct sk_buff_head *inputq = l->inputq; struct sk_buff *skb, *tmp; - int imp, i = 0; + struct sk_buff_head tmpq; + int avail[5] = {0,}; + int imp = 0; + + __skb_queue_head_init(&tmpq); - skb_queue_walk_safe(&l->wakeupq, skb, tmp) { + for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) + avail[imp] = l->backlog[imp].limit - l->backlog[imp].len; + + skb_queue_walk_safe(wakeupq, skb, tmp) { imp = TIPC_SKB_CB(skb)->chain_imp; - if (l->backlog[imp].len < l->backlog[imp].limit) { - skb_unlink(skb, &l->wakeupq); - skb_queue_tail(l->inputq, skb); - } else if (i++ > 10) { - break; - } + if (avail[imp] <= 0) + continue; + avail[imp]--; + __skb_unlink(skb, wakeupq); + __skb_queue_tail(&tmpq, skb); } + + spin_lock_bh(&inputq->lock); + skb_queue_splice_tail(&tmpq, inputq); + spin_unlock_bh(&inputq->lock); + +} + +/** + * tipc_link_set_skb_retransmit_time - set the time at which retransmission of + * the given skb should be next attempted + * @skb: skb to set a future retransmission time for + * @l: link the skb will be transmitted on + */ +static void tipc_link_set_skb_retransmit_time(struct sk_buff *skb, + struct tipc_link *l) +{ + if (link_is_bc_sndlink(l)) + TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + else + TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; } void tipc_link_reset(struct tipc_link *l) { struct sk_buff_head list; + u32 imp; __skb_queue_head_init(&list); l->in_session = false; + /* Force re-synch of peer session number before establishing */ + l->peer_session--; l->session++; l->mtu = l->advertised_mtu; @@ -883,14 +971,16 @@ void tipc_link_reset(struct tipc_link *l) __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); __skb_queue_purge(&l->backlogq); - l->backlog[TIPC_LOW_IMPORTANCE].len = 0; - l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; - l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; - l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; - l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; + __skb_queue_purge(&l->failover_deferdq); + for (imp = 0; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) { + l->backlog[imp].len = 0; + l->backlog[imp].target_bskb = NULL; + } kfree_skb(l->reasm_buf); + kfree_skb(l->reasm_tnlmsg); kfree_skb(l->failover_reasm_skb); l->reasm_buf = NULL; + l->reasm_tnlmsg = NULL; l->failover_reasm_skb = NULL; l->rcv_unacked = 0; l->snd_nxt = 1; @@ -898,9 +988,11 @@ void tipc_link_reset(struct tipc_link *l) l->snd_nxt_state = 1; l->rcv_nxt_state = 1; l->acked = 0; + l->last_gap = 0; + kfree(l->last_ga); + l->last_ga = NULL; l->silent_intv_cnt = 0; l->rst_cnt = 0; - l->stale_cnt = 0; l->bc_peer_is_up = false; memset(&l->mon_state, 0, sizeof(l->mon_state)); tipc_link_reset_stats(l); @@ -908,39 +1000,50 @@ void tipc_link_reset(struct tipc_link *l) /** * tipc_link_xmit(): enqueue buffer list according to queue situation - * @link: link to use + * @l: link to use * @list: chain of buffers containing message * @xmitq: returned list of packets to be sent by caller * * Consumes the buffer chain. - * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted + * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS */ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *xmitq) { - struct tipc_msg *hdr = buf_msg(skb_peek(list)); - unsigned int maxwin = l->window; - int imp = msg_importance(hdr); - unsigned int mtu = l->mtu; + struct sk_buff_head *backlogq = &l->backlogq; + struct sk_buff_head *transmq = &l->transmq; + struct sk_buff *skb, *_skb; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; u16 ack = l->rcv_nxt - 1; u16 seqno = l->snd_nxt; - u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; - struct sk_buff_head *transmq = &l->transmq; - struct sk_buff_head *backlogq = &l->backlogq; - struct sk_buff *skb, *_skb, *bskb; int pkt_cnt = skb_queue_len(list); + unsigned int mss = tipc_link_mss(l); + unsigned int cwin = l->window; + unsigned int mtu = l->mtu; + struct tipc_msg *hdr; + bool new_bundle; int rc = 0; + int imp; + if (pkt_cnt <= 0) + return 0; + + hdr = buf_msg(skb_peek(list)); if (unlikely(msg_size(hdr) > mtu)) { - skb_queue_purge(list); + pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", + skb_queue_len(list), msg_user(hdr), + msg_type(hdr), msg_size(hdr), mtu); + __skb_queue_purge(list); return -EMSGSIZE; } + imp = msg_importance(hdr); /* Allow oversubscription of one data msg per source at congestion */ if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) { if (imp == TIPC_SYSTEM_IMPORTANCE) { pr_warn("%s<%s>, link overflow", link_rst_msg, l->name); + __skb_queue_purge(list); return -ENOBUFS; } rc = link_schedule_user(l, hdr); @@ -952,25 +1055,20 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, } /* Prepare each packet for sending, and add to relevant queue: */ - while (skb_queue_len(list)) { - skb = skb_peek(list); - hdr = buf_msg(skb); - msg_set_seqno(hdr, seqno); - msg_set_ack(hdr, ack); - msg_set_bcast_ack(hdr, bc_ack); - - if (likely(skb_queue_len(transmq) < maxwin)) { + while ((skb = __skb_dequeue(list))) { + if (likely(skb_queue_len(transmq) < cwin)) { + hdr = buf_msg(skb); + msg_set_seqno(hdr, seqno); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); _skb = skb_clone(skb, GFP_ATOMIC); if (!_skb) { - skb_queue_purge(list); + kfree_skb(skb); + __skb_queue_purge(list); return -ENOBUFS; } - __skb_dequeue(list); __skb_queue_tail(transmq, skb); - /* next retransmit attempt */ - if (link_is_bc_sndlink(l)) - TIPC_SKB_CB(skb)->nxt_retr = - jiffies + TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; l->rcv_unacked = 0; @@ -978,36 +1076,86 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, seqno++; continue; } - if (tipc_msg_bundle(skb_peek_tail(backlogq), hdr, mtu)) { - kfree_skb(__skb_dequeue(list)); - l->stats.sent_bundled++; - continue; - } - if (tipc_msg_make_bundle(&bskb, hdr, mtu, l->addr)) { - kfree_skb(__skb_dequeue(list)); - __skb_queue_tail(backlogq, bskb); - l->backlog[msg_importance(buf_msg(bskb))].len++; - l->stats.sent_bundled++; - l->stats.sent_bundles++; + if (tipc_msg_try_bundle(l->backlog[imp].target_bskb, &skb, + mss, l->addr, &new_bundle)) { + if (skb) { + /* Keep a ref. to the skb for next try */ + l->backlog[imp].target_bskb = skb; + l->backlog[imp].len++; + __skb_queue_tail(backlogq, skb); + } else { + if (new_bundle) { + l->stats.sent_bundles++; + l->stats.sent_bundled++; + } + l->stats.sent_bundled++; + } continue; } - l->backlog[imp].len += skb_queue_len(list); + l->backlog[imp].target_bskb = NULL; + l->backlog[imp].len += (1 + skb_queue_len(list)); + __skb_queue_tail(backlogq, skb); skb_queue_splice_tail_init(list, backlogq); } l->snd_nxt = seqno; return rc; } +static void tipc_link_update_cwin(struct tipc_link *l, int released, + bool retransmitted) +{ + int bklog_len = skb_queue_len(&l->backlogq); + struct sk_buff_head *txq = &l->transmq; + int txq_len = skb_queue_len(txq); + u16 cwin = l->window; + + /* Enter fast recovery */ + if (unlikely(retransmitted)) { + l->ssthresh = max_t(u16, l->window / 2, 300); + l->window = min_t(u16, l->ssthresh, l->window); + return; + } + /* Enter slow start */ + if (unlikely(!released)) { + l->ssthresh = max_t(u16, l->window / 2, 300); + l->window = l->min_win; + return; + } + /* Don't increase window if no pressure on the transmit queue */ + if (txq_len + bklog_len < cwin) + return; + + /* Don't increase window if there are holes the transmit queue */ + if (txq_len && l->snd_nxt - buf_seqno(skb_peek(txq)) != txq_len) + return; + + l->cong_acks += released; + + /* Slow start */ + if (cwin <= l->ssthresh) { + l->window = min_t(u16, cwin + released, l->max_win); + return; + } + /* Congestion avoidance */ + if (l->cong_acks < cwin) + return; + l->window = min_t(u16, ++cwin, l->max_win); + l->cong_acks = 0; +} + static void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) { + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + struct sk_buff_head *txq = &l->transmq; struct sk_buff *skb, *_skb; - struct tipc_msg *hdr; - u16 seqno = l->snd_nxt; u16 ack = l->rcv_nxt - 1; - u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + u16 seqno = l->snd_nxt; + struct tipc_msg *hdr; + u16 cwin = l->window; + u32 imp; - while (skb_queue_len(&l->transmq) < l->window) { + while (skb_queue_len(txq) < cwin) { skb = skb_peek(&l->backlogq); if (!skb) break; @@ -1016,11 +1164,12 @@ static void tipc_link_advance_backlog(struct tipc_link *l, break; __skb_dequeue(&l->backlogq); hdr = buf_msg(skb); - l->backlog[msg_importance(hdr)].len--; + imp = msg_importance(hdr); + l->backlog[imp].len--; + if (unlikely(skb == l->backlog[imp].target_bskb)) + l->backlog[imp].target_bskb = NULL; __skb_queue_tail(&l->transmq, skb); - /* next retransmit attempt */ - if (link_is_bc_sndlink(l)) - TIPC_SKB_CB(skb)->nxt_retr = jiffies + TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; @@ -1034,76 +1183,57 @@ static void tipc_link_advance_backlog(struct tipc_link *l, l->snd_nxt = seqno; } -static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb) -{ - struct tipc_msg *hdr = buf_msg(skb); - - pr_warn("Retransmission failure on link <%s>\n", l->name); - link_print(l, "State of link "); - pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", - msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); - pr_info("sqno %u, prev: %x, src: %x\n", - msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr)); -} - -/* tipc_link_retrans() - retransmit one or more packets - * @l: the link to transmit on - * @r: the receiving link ordering the retransmit. Same as l if unicast - * @from: retransmit from (inclusive) this sequence number - * @to: retransmit to (inclusive) this sequence number - * xmitq: queue for accumulating the retransmitted packets +/** + * link_retransmit_failure() - Detect repeated retransmit failures + * @l: tipc link sender + * @r: tipc link receiver (= l in case of unicast) + * @rc: returned code + * + * Return: true if the repeated retransmit failures happens, otherwise + * false */ -static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r, - u16 from, u16 to, struct sk_buff_head *xmitq) +static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, + int *rc) { - struct sk_buff *_skb, *skb = skb_peek(&l->transmq); - u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; - u16 ack = l->rcv_nxt - 1; + struct sk_buff *skb = skb_peek(&l->transmq); struct tipc_msg *hdr; if (!skb) - return 0; - if (less(to, from)) - return 0; + return false; - trace_tipc_link_retrans(r, from, to, &l->transmq); - /* Detect repeated retransmit failures on same packet */ - if (r->prev_from != from) { - r->prev_from = from; - r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance); - r->stale_cnt = 0; - } else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) { - link_retransmit_failure(l, skb); - trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); - trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); - trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); - if (link_is_bc_sndlink(l)) - return TIPC_LINK_DOWN_EVT; - return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); - } + if (!TIPC_SKB_CB(skb)->retr_cnt) + return false; - skb_queue_walk(&l->transmq, skb) { - hdr = buf_msg(skb); - if (less(msg_seqno(hdr), from)) - continue; - if (more(msg_seqno(hdr), to)) - break; - if (link_is_bc_sndlink(l)) { - if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) - continue; - TIPC_SKB_CB(skb)->nxt_retr = jiffies + TIPC_BC_RETR_LIM; - } - _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); - if (!_skb) - return 0; - hdr = buf_msg(_skb); - msg_set_ack(hdr, ack); - msg_set_bcast_ack(hdr, bc_ack); - _skb->priority = TC_PRIO_CONTROL; - __skb_queue_tail(xmitq, _skb); - l->stats.retransmitted++; + if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp + + msecs_to_jiffies(r->tolerance * 10))) + return false; + + hdr = buf_msg(skb); + if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr))) + return false; + + pr_warn("Retransmission failure on link <%s>\n", l->name); + link_print(l, "State of link "); + pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", + msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); + pr_info("sqno %u, prev: %x, dest: %x\n", + msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr)); + pr_info("retr_stamp %d, retr_cnt %d\n", + jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp), + TIPC_SKB_CB(skb)->retr_cnt); + + trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); + trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); + trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); + + if (link_is_bc_sndlink(l)) { + r->state = LINK_RESET; + *rc |= TIPC_LINK_DOWN_EVT; + } else { + *rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } - return 0; + + return true; } /* tipc_data_input - deliver data and name distr msgs to upper layer @@ -1126,7 +1256,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, skb_queue_tail(mc_inputq, skb); return true; } - /* fall through */ + fallthrough; case CONN_MANAGER: skb_queue_tail(inputq, skb); return true; @@ -1142,11 +1272,20 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, case MSG_FRAGMENTER: case BCAST_PROTOCOL: return false; +#ifdef CONFIG_TIPC_CRYPTO + case MSG_CRYPTO: + if (sysctl_tipc_key_exchange_enabled && + TIPC_SKB_CB(skb)->decrypted) { + tipc_crypto_msg_rcv(l->net, skb); + return true; + } + fallthrough; +#endif default: pr_warn("Dropping received illegal msg type\n"); kfree_skb(skb); - return false; - }; + return true; + } } /* tipc_link_input - process packet that has passed link protocol check @@ -1154,34 +1293,14 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, * Consumes buffer */ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, - struct sk_buff_head *inputq) + struct sk_buff_head *inputq, + struct sk_buff **reasm_skb) { struct tipc_msg *hdr = buf_msg(skb); - struct sk_buff **reasm_skb = &l->reasm_buf; struct sk_buff *iskb; struct sk_buff_head tmpq; int usr = msg_user(hdr); - int rc = 0; int pos = 0; - int ipos = 0; - - if (unlikely(usr == TUNNEL_PROTOCOL)) { - if (msg_type(hdr) == SYNCH_MSG) { - __skb_queue_purge(&l->deferdq); - goto drop; - } - if (!tipc_msg_extract(skb, &iskb, &ipos)) - return rc; - kfree_skb(skb); - skb = iskb; - hdr = buf_msg(skb); - if (less(msg_seqno(hdr), l->drop_point)) - goto drop; - if (tipc_data_input(l, skb, inputq)) - return rc; - usr = msg_user(hdr); - reasm_skb = &l->failover_reasm_skb; - } if (usr == MSG_BUNDLER) { skb_queue_head_init(&tmpq); @@ -1206,24 +1325,359 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, tipc_link_bc_init_rcv(l->bc_rcvlink, hdr); tipc_bcast_unlock(l->net); } -drop: + kfree_skb(skb); return 0; } -static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) +/* tipc_link_tnl_rcv() - receive TUNNEL_PROTOCOL message, drop or process the + * inner message along with the ones in the old link's + * deferdq + * @l: tunnel link + * @skb: TUNNEL_PROTOCOL message + * @inputq: queue to put messages ready for delivery + */ +static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *inputq) { - bool released = false; - struct sk_buff *skb, *tmp; + struct sk_buff **reasm_skb = &l->failover_reasm_skb; + struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg; + struct sk_buff_head *fdefq = &l->failover_deferdq; + struct tipc_msg *hdr = buf_msg(skb); + struct sk_buff *iskb; + int ipos = 0; + int rc = 0; + u16 seqno; - skb_queue_walk_safe(&l->transmq, skb, tmp) { - if (more(buf_seqno(skb), acked)) - break; - __skb_unlink(skb, &l->transmq); + if (msg_type(hdr) == SYNCH_MSG) { + kfree_skb(skb); + return 0; + } + + /* Not a fragment? */ + if (likely(!msg_nof_fragms(hdr))) { + if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) { + pr_warn_ratelimited("Unable to extract msg, defq: %d\n", + skb_queue_len(fdefq)); + return 0; + } kfree_skb(skb); - released = true; + } else { + /* Set fragment type for buf_append */ + if (msg_fragm_no(hdr) == 1) + msg_set_type(hdr, FIRST_FRAGMENT); + else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr)) + msg_set_type(hdr, FRAGMENT); + else + msg_set_type(hdr, LAST_FRAGMENT); + + if (!tipc_buf_append(reasm_tnlmsg, &skb)) { + /* Successful but non-complete reassembly? */ + if (*reasm_tnlmsg || link_is_bc_rcvlink(l)) + return 0; + pr_warn_ratelimited("Unable to reassemble tunnel msg\n"); + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + } + iskb = skb; + } + + do { + seqno = buf_seqno(iskb); + if (unlikely(less(seqno, l->drop_point))) { + kfree_skb(iskb); + continue; + } + if (unlikely(seqno != l->drop_point)) { + __tipc_skb_queue_sorted(fdefq, seqno, iskb); + continue; + } + + l->drop_point++; + if (!tipc_data_input(l, iskb, inputq)) + rc |= tipc_link_input(l, iskb, inputq, reasm_skb); + if (unlikely(rc)) + break; + } while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point))); + + return rc; +} + +/** + * tipc_get_gap_ack_blks - get Gap ACK blocks from PROTOCOL/STATE_MSG + * @ga: returned pointer to the Gap ACK blocks if any + * @l: the tipc link + * @hdr: the PROTOCOL/STATE_MSG header + * @uc: desired Gap ACK blocks type, i.e. unicast (= 1) or broadcast (= 0) + * + * Return: the total Gap ACK blocks size + */ +u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, + struct tipc_msg *hdr, bool uc) +{ + struct tipc_gap_ack_blks *p; + u16 sz = 0; + + /* Does peer support the Gap ACK blocks feature? */ + if (l->peer_caps & TIPC_GAP_ACK_BLOCK) { + p = (struct tipc_gap_ack_blks *)msg_data(hdr); + sz = ntohs(p->len); + /* Sanity check */ + if (sz == struct_size(p, gacks, size_add(p->ugack_cnt, p->bgack_cnt))) { + /* Good, check if the desired type exists */ + if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt)) + goto ok; + /* Backward compatible: peer might not support bc, but uc? */ + } else if (uc && sz == struct_size(p, gacks, p->ugack_cnt)) { + if (p->ugack_cnt) { + p->bgack_cnt = 0; + goto ok; + } + } + } + /* Other cases: ignore! */ + p = NULL; + +ok: + *ga = p; + return sz; +} + +static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga, + struct tipc_link *l, u8 start_index) +{ + struct tipc_gap_ack *gacks = &ga->gacks[start_index]; + struct sk_buff *skb = skb_peek(&l->deferdq); + u16 expect, seqno = 0; + u8 n = 0; + + if (!skb) + return 0; + + expect = buf_seqno(skb); + skb_queue_walk(&l->deferdq, skb) { + seqno = buf_seqno(skb); + if (unlikely(more(seqno, expect))) { + gacks[n].ack = htons(expect - 1); + gacks[n].gap = htons(seqno - expect); + if (++n >= MAX_GAP_ACK_BLKS / 2) { + pr_info_ratelimited("Gacks on %s: %d, ql: %d!\n", + l->name, n, + skb_queue_len(&l->deferdq)); + return n; + } + } else if (unlikely(less(seqno, expect))) { + pr_warn("Unexpected skb in deferdq!\n"); + continue; + } + expect = seqno + 1; + } + + /* last block */ + gacks[n].ack = htons(seqno); + gacks[n].gap = 0; + n++; + return n; +} + +/* tipc_build_gap_ack_blks - build Gap ACK blocks + * @l: tipc unicast link + * @hdr: the tipc message buffer to store the Gap ACK blocks after built + * + * The function builds Gap ACK blocks for both the unicast & broadcast receiver + * links of a certain peer, the buffer after built has the network data format + * as found at the struct tipc_gap_ack_blks definition. + * + * returns the actual allocated memory size + */ +static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr) +{ + struct tipc_link *bcl = l->bc_rcvlink; + struct tipc_gap_ack_blks *ga; + u16 len; + + ga = (struct tipc_gap_ack_blks *)msg_data(hdr); + + /* Start with broadcast link first */ + tipc_bcast_lock(bcl->net); + msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); + msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); + ga->bgack_cnt = __tipc_build_gap_ack_blks(ga, bcl, 0); + tipc_bcast_unlock(bcl->net); + + /* Now for unicast link, but an explicit NACK only (???) */ + ga->ugack_cnt = (msg_seq_gap(hdr)) ? + __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0; + + /* Total len */ + len = struct_size(ga, gacks, size_add(ga->bgack_cnt, ga->ugack_cnt)); + ga->len = htons(len); + return len; +} + +/* tipc_link_advance_transmq - advance TIPC link transmq queue by releasing + * acked packets, also doing retransmissions if + * gaps found + * @l: tipc link with transmq queue to be advanced + * @r: tipc link "receiver" i.e. in case of broadcast (= "l" if unicast) + * @acked: seqno of last packet acked by peer without any gaps before + * @gap: # of gap packets + * @ga: buffer pointer to Gap ACK blocks from peer + * @xmitq: queue for accumulating the retransmitted packets if any + * @retransmitted: returned boolean value if a retransmission is really issued + * @rc: returned code e.g. TIPC_LINK_DOWN_EVT if a repeated retransmit failures + * happens (- unlikely case) + * + * Return: the number of packets released from the link transmq + */ +static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, + u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq, + bool *retransmitted, int *rc) +{ + struct tipc_gap_ack_blks *last_ga = r->last_ga, *this_ga = NULL; + struct tipc_gap_ack *gacks = NULL; + struct sk_buff *skb, *_skb, *tmp; + struct tipc_msg *hdr; + u32 qlen = skb_queue_len(&l->transmq); + u16 nacked = acked, ngap = gap, gack_cnt = 0; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + u16 ack = l->rcv_nxt - 1; + u16 seqno, n = 0; + u16 end = r->acked, start = end, offset = r->last_gap; + u16 si = (last_ga) ? last_ga->start_index : 0; + bool is_uc = !link_is_bc_sndlink(l); + bool bc_has_acked = false; + + trace_tipc_link_retrans(r, acked + 1, acked + gap, &l->transmq); + + /* Determine Gap ACK blocks if any for the particular link */ + if (ga && is_uc) { + /* Get the Gap ACKs, uc part */ + gack_cnt = ga->ugack_cnt; + gacks = &ga->gacks[ga->bgack_cnt]; + } else if (ga) { + /* Copy the Gap ACKs, bc part, for later renewal if needed */ + this_ga = kmemdup(ga, struct_size(ga, gacks, ga->bgack_cnt), + GFP_ATOMIC); + if (likely(this_ga)) { + this_ga->start_index = 0; + /* Start with the bc Gap ACKs */ + gack_cnt = this_ga->bgack_cnt; + gacks = &this_ga->gacks[0]; + } else { + /* Hmm, we can get in trouble..., simply ignore it */ + pr_warn_ratelimited("Ignoring bc Gap ACKs, no memory\n"); + } + } + + /* Advance the link transmq */ + skb_queue_walk_safe(&l->transmq, skb, tmp) { + seqno = buf_seqno(skb); + +next_gap_ack: + if (less_eq(seqno, nacked)) { + if (is_uc) + goto release; + /* Skip packets peer has already acked */ + if (!more(seqno, r->acked)) + continue; + /* Get the next of last Gap ACK blocks */ + while (more(seqno, end)) { + if (!last_ga || si >= last_ga->bgack_cnt) + break; + start = end + offset + 1; + end = ntohs(last_ga->gacks[si].ack); + offset = ntohs(last_ga->gacks[si].gap); + si++; + WARN_ONCE(more(start, end) || + (!offset && + si < last_ga->bgack_cnt) || + si > MAX_GAP_ACK_BLKS, + "Corrupted Gap ACK: %d %d %d %d %d\n", + start, end, offset, si, + last_ga->bgack_cnt); + } + /* Check against the last Gap ACK block */ + if (tipc_in_range(seqno, start, end)) + continue; + /* Update/release the packet peer is acking */ + bc_has_acked = true; + if (--TIPC_SKB_CB(skb)->ackers) + continue; +release: + /* release skb */ + __skb_unlink(skb, &l->transmq); + kfree_skb(skb); + } else if (less_eq(seqno, nacked + ngap)) { + /* First gap: check if repeated retrans failures? */ + if (unlikely(seqno == acked + 1 && + link_retransmit_failure(l, r, rc))) { + /* Ignore this bc Gap ACKs if any */ + kfree(this_ga); + this_ga = NULL; + break; + } + /* retransmit skb if unrestricted*/ + if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) + continue; + tipc_link_set_skb_retransmit_time(skb, l); + _skb = pskb_copy(skb, GFP_ATOMIC); + if (!_skb) + continue; + hdr = buf_msg(_skb); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); + _skb->priority = TC_PRIO_CONTROL; + __skb_queue_tail(xmitq, _skb); + l->stats.retransmitted++; + if (!is_uc) + r->stats.retransmitted++; + *retransmitted = true; + /* Increase actual retrans counter & mark first time */ + if (!TIPC_SKB_CB(skb)->retr_cnt++) + TIPC_SKB_CB(skb)->retr_stamp = jiffies; + } else { + /* retry with Gap ACK blocks if any */ + if (n >= gack_cnt) + break; + nacked = ntohs(gacks[n].ack); + ngap = ntohs(gacks[n].gap); + n++; + goto next_gap_ack; + } } - return released; + + /* Renew last Gap ACK blocks for bc if needed */ + if (bc_has_acked) { + if (this_ga) { + kfree(last_ga); + r->last_ga = this_ga; + r->last_gap = gap; + } else if (last_ga) { + if (less(acked, start)) { + si--; + offset = start - acked - 1; + } else if (less(acked, end)) { + acked = end; + } + if (si < last_ga->bgack_cnt) { + last_ga->start_index = si; + r->last_gap = offset; + } else { + kfree(last_ga); + r->last_ga = NULL; + r->last_gap = 0; + } + } else { + r->last_gap = 0; + } + r->acked = acked; + } else { + kfree(this_ga); + } + + return qlen - skb_queue_len(&l->transmq); } /* tipc_link_build_state_msg: prepare link state message for transmission @@ -1246,7 +1700,6 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) l->snd_nxt = l->rcv_nxt; return TIPC_LINK_SND_STATE; } - /* Unicast ACK */ l->rcv_unacked = 0; l->stats.sent_acks++; @@ -1280,6 +1733,8 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { u32 def_cnt = ++l->stats.deferred_recv; + struct sk_buff_head *dfq = &l->deferdq; + u32 defq_len = skb_queue_len(dfq); int match1, match2; if (link_is_bc_rcvlink(l)) { @@ -1290,8 +1745,12 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, return 0; } - if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); + if (defq_len >= 3 && !((defq_len - 3) % 16)) { + u16 rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; + + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, + rcvgap, 0, 0, xmitq); + } return 0; } @@ -1304,46 +1763,44 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct sk_buff_head *defq = &l->deferdq; - struct tipc_msg *hdr; + struct tipc_msg *hdr = buf_msg(skb); u16 seqno, rcv_nxt, win_lim; + int released = 0; int rc = 0; + /* Verify and update link state */ + if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) + return tipc_link_proto_rcv(l, skb, xmitq); + + /* Don't send probe at next timeout expiration */ + l->silent_intv_cnt = 0; + do { hdr = buf_msg(skb); seqno = msg_seqno(hdr); rcv_nxt = l->rcv_nxt; win_lim = rcv_nxt + TIPC_MAX_LINK_WIN; - /* Verify and update link state */ - if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) - return tipc_link_proto_rcv(l, skb, xmitq); - - if (unlikely(!link_is_up(l))) { + if (unlikely(!tipc_link_is_up(l))) { if (l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; - goto drop; + kfree_skb(skb); + break; } - /* Don't send probe at next timeout expiration */ - l->silent_intv_cnt = 0; - /* Drop if outside receive window */ if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) { l->stats.duplicates++; - goto drop; - } - - /* Forward queues and wake up waiting users */ - if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) { - l->stale_cnt = 0; - tipc_link_advance_backlog(l, xmitq); - if (unlikely(!skb_queue_empty(&l->wakeupq))) - link_prepare_wakeup(l); + kfree_skb(skb); + break; } + released += tipc_link_advance_transmq(l, l, msg_ack(hdr), 0, + NULL, NULL, NULL, NULL); /* Defer delivery if sequence gap */ if (unlikely(seqno != rcv_nxt)) { - __tipc_skb_queue_sorted(defq, seqno, skb); + if (!__tipc_skb_queue_sorted(defq, seqno, skb)) + l->stats.duplicates++; rc |= tipc_link_build_nack_msg(l, xmitq); break; } @@ -1351,17 +1808,24 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, /* Deliver packet */ l->rcv_nxt++; l->stats.recv_pkts++; - if (!tipc_data_input(l, skb, l->inputq)) - rc |= tipc_link_input(l, skb, l->inputq); + + if (unlikely(msg_user(hdr) == TUNNEL_PROTOCOL)) + rc |= tipc_link_tnl_rcv(l, skb, l->inputq); + else if (!tipc_data_input(l, skb, l->inputq)) + rc |= tipc_link_input(l, skb, l->inputq, &l->reasm_buf); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) rc |= tipc_link_build_state_msg(l, xmitq); if (unlikely(rc & ~TIPC_LINK_SND_STATE)) break; - } while ((skb = __skb_dequeue(defq))); + } while ((skb = __tipc_skb_dequeue(defq, l->rcv_nxt))); - return rc; -drop: - kfree_skb(skb); + /* Forward queues and wake up waiting users */ + if (released) { + tipc_link_update_cwin(l, released, 0); + tipc_link_advance_backlog(l, xmitq); + if (unlikely(!skb_queue_empty(&l->wakeupq))) + link_prepare_wakeup(l); + } return rc; } @@ -1370,12 +1834,13 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, int tolerance, int priority, struct sk_buff_head *xmitq) { + struct tipc_mon_state *mstate = &l->mon_state; + struct sk_buff_head *dfq = &l->deferdq; struct tipc_link *bcl = l->bc_rcvlink; - struct sk_buff *skb; struct tipc_msg *hdr; - struct sk_buff_head *dfq = &l->deferdq; - bool node_up = link_is_up(bcl); - struct tipc_mon_state *mstate = &l->mon_state; + struct sk_buff *skb; + bool node_up = tipc_link_is_up(bcl); + u16 glen = 0, bc_rcvgap = 0; int dlen = 0; void *data; @@ -1386,12 +1851,12 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, if (!tipc_link_is_up(l) && (mtyp == STATE_MSG)) return; - if (!skb_queue_empty(dfq)) + if ((probe || probe_reply) && !skb_queue_empty(dfq)) rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, - tipc_max_domain_size, l->addr, - tipc_own_addr(l->net), 0, 0, 0); + tipc_max_domain_size + MAX_GAP_ACK_BLKS_SZ, + l->addr, tipc_own_addr(l->net), 0, 0, 0); if (!skb) return; @@ -1415,16 +1880,23 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, if (l->peer_caps & TIPC_LINK_PROTO_SEQNO) msg_set_seqno(hdr, l->snd_nxt_state++); msg_set_seq_gap(hdr, rcvgap); - msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); + bc_rcvgap = link_bc_rcv_gap(bcl); + msg_set_bc_gap(hdr, bc_rcvgap); msg_set_probe(hdr, probe); msg_set_is_keepalive(hdr, probe || probe_reply); - tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); - msg_set_size(hdr, INT_H_SIZE + dlen); - skb_trim(skb, INT_H_SIZE + dlen); + if (l->peer_caps & TIPC_GAP_ACK_BLOCK) + glen = tipc_build_gap_ack_blks(l, hdr); + tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id); + msg_set_size(hdr, INT_H_SIZE + glen + dlen); + skb_trim(skb, INT_H_SIZE + glen + dlen); l->stats.sent_states++; l->rcv_unacked = 0; } else { /* RESET_MSG or ACTIVATE_MSG */ + if (mtyp == ACTIVATE_MSG) { + msg_set_dest_session_valid(hdr, 1); + msg_set_dest_session(hdr, l->peer_session); + } msg_set_max_pkt(hdr, l->advertised_mtu); strcpy(data, l->if_name); msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); @@ -1434,6 +1906,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, l->stats.sent_probes++; if (rcvgap) l->stats.sent_nacks++; + if (bc_rcvgap) + bcl->stats.sent_nacks++; skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, skb); trace_tipc_proto_build(skb, false, l->name); @@ -1448,7 +1922,7 @@ void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, struct sk_buff *skb; u32 dnode = l->addr; - skb_queue_head_init(&tnlq); + __skb_queue_head_init(&tnlq); skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, INT_H_SIZE, BASIC_H_SIZE, dnode, onode, 0, 0, 0); @@ -1478,15 +1952,43 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, struct sk_buff *skb, *tnlskb; struct tipc_msg *hdr, tnlhdr; struct sk_buff_head *queue = &l->transmq; - struct sk_buff_head tmpxq, tnlq; + struct sk_buff_head tmpxq, tnlq, frags; u16 pktlen, pktcnt, seqno = l->snd_nxt; + bool pktcnt_need_update = false; + u16 syncpt; + int rc; if (!tnl) return; - skb_queue_head_init(&tnlq); - skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&tnlq); + /* Link Synching: + * From now on, send only one single ("dummy") SYNCH message + * to peer. The SYNCH message does not contain any data, just + * a header conveying the synch point to the peer. + */ + if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { + tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG, + INT_H_SIZE, 0, l->addr, + tipc_own_addr(l->net), + 0, 0, 0); + if (!tnlskb) { + pr_warn("%sunable to create dummy SYNCH_MSG\n", + link_co_err); + return; + } + + hdr = buf_msg(tnlskb); + syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1; + msg_set_syncpt(hdr, syncpt); + msg_set_bearer_id(hdr, l->peer_bearer_id); + __skb_queue_tail(&tnlq, tnlskb); + tipc_link_xmit(tnl, &tnlq, xmitq); + return; + } + __skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&frags); /* At least one packet required for safe algorithm => add dummy */ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net), @@ -1495,14 +1997,18 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, pr_warn("%sunable to create tunnel packet\n", link_co_err); return; } - skb_queue_tail(&tnlq, skb); + __skb_queue_tail(&tnlq, skb); tipc_link_xmit(l, &tnlq, &tmpxq); __skb_queue_purge(&tmpxq); /* Initialize reusable tunnel packet header */ tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL, mtyp, INT_H_SIZE, l->addr); - pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq); + if (mtyp == SYNCH_MSG) + pktcnt = l->snd_nxt - buf_seqno(skb_peek(&l->transmq)); + else + pktcnt = skb_queue_len(&l->transmq); + pktcnt += skb_queue_len(&l->backlogq); msg_set_msgcnt(&tnlhdr, pktcnt); msg_set_bearer_id(&tnlhdr, l->peer_bearer_id); tnl: @@ -1512,6 +2018,39 @@ tnl: if (queue == &l->backlogq) msg_set_seqno(hdr, seqno++); pktlen = msg_size(hdr); + + /* Tunnel link MTU is not large enough? This could be + * due to: + * 1) Link MTU has just changed or set differently; + * 2) Or FAILOVER on the top of a SYNCH message + * + * The 2nd case should not happen if peer supports + * TIPC_TUNNEL_ENHANCED + */ + if (pktlen > tnl->mtu - INT_H_SIZE) { + if (mtyp == FAILOVER_MSG && + (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { + rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu, + &frags); + if (rc) { + pr_warn("%sunable to frag msg: rc %d\n", + link_co_err, rc); + return; + } + pktcnt += skb_queue_len(&frags) - 1; + pktcnt_need_update = true; + skb_queue_splice_tail_init(&frags, &tnlq); + continue; + } + /* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED + * => Just warn it and return! + */ + pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n", + link_co_err, msg_user(hdr), + msg_type(hdr), msg_size(hdr)); + return; + } + msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); if (!tnlskb) { @@ -1527,12 +2066,62 @@ tnl: goto tnl; } + if (pktcnt_need_update) + skb_queue_walk(&tnlq, skb) { + hdr = buf_msg(skb); + msg_set_msgcnt(hdr, pktcnt); + } + tipc_link_xmit(tnl, &tnlq, xmitq); if (mtyp == FAILOVER_MSG) { + struct sk_buff_head *fdefq = &tnl->failover_deferdq; + tnl->drop_point = l->rcv_nxt; tnl->failover_reasm_skb = l->reasm_buf; l->reasm_buf = NULL; + + /* Failover the link's deferdq */ + if (unlikely(!skb_queue_empty(fdefq))) { + pr_warn("Link failover deferdq not empty: %d!\n", + skb_queue_len(fdefq)); + __skb_queue_purge(fdefq); + } + skb_queue_splice_init(&l->deferdq, fdefq); + } +} + +/** + * tipc_link_failover_prepare() - prepare tnl for link failover + * + * This is a special version of the precursor - tipc_link_tnl_prepare(), + * see the tipc_node_link_failover() for details + * + * @l: failover link + * @tnl: tunnel link + * @xmitq: queue for messages to be xmited + */ +void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl, + struct sk_buff_head *xmitq) +{ + struct sk_buff_head *fdefq = &tnl->failover_deferdq; + + tipc_link_create_dummy_tnl_msg(tnl, xmitq); + + /* This failover link endpoint was never established before, + * so it has not received anything from peer. + * Otherwise, it must be a normal failover situation or the + * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes + * would have to start over from scratch instead. + */ + tnl->drop_point = 1; + tnl->failover_reasm_skb = NULL; + + /* Initiate the link's failover deferdq */ + if (unlikely(!skb_queue_empty(fdefq))) { + pr_warn("Link failover deferdq not empty: %d!\n", + skb_queue_len(fdefq)); + __skb_queue_purge(fdefq); } } @@ -1566,7 +2155,7 @@ bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr) if (session != curr_session) return false; /* Extra sanity check */ - if (!link_is_up(l) && msg_ack(hdr)) + if (!tipc_link_is_up(l) && msg_ack(hdr)) return false; if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO)) return true; @@ -1586,28 +2175,35 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); - u16 rcvgap = 0; - u16 ack = msg_ack(hdr); - u16 gap = msg_seq_gap(hdr); + struct tipc_gap_ack_blks *ga = NULL; + bool reply = msg_probe(hdr), retransmitted = false; + u32 dlen = msg_data_sz(hdr), glen = 0, msg_max; u16 peers_snd_nxt = msg_next_sent(hdr); u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); + u16 gap = msg_seq_gap(hdr); + u16 ack = msg_ack(hdr); u16 rcv_nxt = l->rcv_nxt; - u16 dlen = msg_data_sz(hdr); + u16 rcvgap = 0; int mtyp = msg_type(hdr); - bool reply = msg_probe(hdr); - void *data; + int rc = 0, released; char *if_name; - int rc = 0; + void *data; trace_tipc_proto_rcv(skb, false, l->name); + + if (dlen > U16_MAX) + goto exit; + if (tipc_link_is_blocked(l) || !xmitq) goto exit; if (tipc_own_addr(l->net) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); - skb_linearize(skb); + if (skb_linearize(skb)) + goto exit; + hdr = buf_msg(skb); data = msg_data(hdr); @@ -1620,21 +2216,24 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, switch (mtyp) { case RESET_MSG: case ACTIVATE_MSG: + msg_max = msg_max_pkt(hdr); + if (msg_max < tipc_bearer_min_mtu(l->net, l->bearer_id)) + break; /* Complete own link name with peer's interface name */ if_name = strrchr(l->name, ':') + 1; if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME) break; if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME) break; - strncpy(if_name, data, TIPC_MAX_IF_NAME); + strscpy(if_name, data, TIPC_MAX_IF_NAME); /* Update own tolerance if peer indicates a non-zero value */ - if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { + if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; l->bc_rcvlink->tolerance = peers_tol; } /* Update own priority if peer's priority is higher */ - if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) + if (tipc_in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) l->priority = peers_prio; /* If peer is going down we want full re-establish cycle */ @@ -1642,8 +2241,19 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); break; } + + /* If this endpoint was re-created while peer was ESTABLISHING + * it doesn't know current session number. Force re-synch. + */ + if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) && + l->session != msg_dest_session(hdr)) { + if (less(l->session, msg_dest_session(hdr))) + l->session = msg_dest_session(hdr) + 1; + break; + } + /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ - if (mtyp == RESET_MSG || !link_is_up(l)) + if (mtyp == RESET_MSG || !tipc_link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); /* ACTIVATE_MSG takes up link if it was already locally reset */ @@ -1653,21 +2263,26 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, l->peer_session = msg_session(hdr); l->in_session = true; l->peer_bearer_id = msg_bearer_id(hdr); - if (l->mtu > msg_max_pkt(hdr)) - l->mtu = msg_max_pkt(hdr); + if (l->mtu > msg_max) + l->mtu = msg_max; break; case STATE_MSG: + /* Validate Gap ACK blocks, drop if invalid */ + glen = tipc_get_gap_ack_blks(&ga, l, hdr, true); + if (glen > dlen) + break; + l->rcv_nxt_state = msg_seqno(hdr) + 1; /* Update own tolerance if peer indicates a non-zero value */ - if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { + if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; l->bc_rcvlink->tolerance = peers_tol; } /* Update own prio if peer indicates a different value */ if ((peers_prio != l->priority) && - in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { + tipc_in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { l->priority = peers_prio; rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } @@ -1677,29 +2292,33 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (msg_probe(hdr)) l->stats.recv_probes++; - if (!link_is_up(l)) { + if (!tipc_link_is_up(l)) { if (l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; break; } - tipc_mon_rcv(l->net, data, dlen, l->addr, + + tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr, &l->mon_state, l->bearer_id); /* Send NACK if peer has sent pkts we haven't received yet */ - if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) + if ((reply || msg_is_keepalive(hdr)) && + more(peers_snd_nxt, rcv_nxt) && + !tipc_link_is_synching(l) && + skb_queue_empty(&l->deferdq)) rcvgap = peers_snd_nxt - l->rcv_nxt; if (rcvgap || reply) tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, rcvgap, 0, 0, xmitq); - tipc_link_release_pkts(l, ack); - /* If NACK, retransmit will now start at right position */ - if (gap) { - rc = tipc_link_retrans(l, l, ack + 1, ack + gap, xmitq); + released = tipc_link_advance_transmq(l, l, ack, gap, ga, xmitq, + &retransmitted, &rc); + if (gap) l->stats.recv_nacks++; - } - - tipc_link_advance_backlog(l, xmitq); + if (released || retransmitted) + tipc_link_update_cwin(l, released, retransmitted); + if (released) + tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) link_prepare_wakeup(l); } @@ -1760,7 +2379,7 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) int mtyp = msg_type(hdr); u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); - if (link_is_up(l)) + if (tipc_link_is_up(l)) return; if (msg_user(hdr) == BCAST_PROTOCOL) { @@ -1785,33 +2404,26 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, struct sk_buff_head *xmitq) { - struct tipc_link *snd_l = l->bc_sndlink; u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); - u16 from = msg_bcast_ack(hdr) + 1; - u16 to = from + msg_bc_gap(hdr) - 1; int rc = 0; - if (!link_is_up(l)) + if (!tipc_link_is_up(l)) return rc; if (!msg_peer_node_is_up(hdr)) return rc; - /* Open when peer ackowledges our bcast init msg (pkt #1) */ + /* Open when peer acknowledges our bcast init msg (pkt #1) */ if (msg_ack(hdr)) l->bc_peer_is_up = true; if (!l->bc_peer_is_up) return rc; - l->stats.recv_nacks++; - /* Ignore if peers_snd_nxt goes beyond receive window */ if (more(peers_snd_nxt, l->rcv_nxt + l->window)) return rc; - rc = tipc_link_retrans(snd_l, l, from, to, xmitq); - l->snd_nxt = peers_snd_nxt; if (link_bc_rcv_gap(l)) rc |= TIPC_LINK_SND_STATE; @@ -1846,38 +2458,34 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, return 0; } -void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, - struct sk_buff_head *xmitq) +int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq, + struct sk_buff_head *retrq) { - struct sk_buff *skb, *tmp; - struct tipc_link *snd_l = l->bc_sndlink; - - if (!link_is_up(l) || !l->bc_peer_is_up) - return; + struct tipc_link *l = r->bc_sndlink; + bool unused = false; + int rc = 0; - if (!more(acked, l->acked)) - return; + if (!tipc_link_is_up(r) || !r->bc_peer_is_up) + return 0; - trace_tipc_link_bc_ack(l, l->acked, acked, &snd_l->transmq); - /* Skip over packets peer has already acked */ - skb_queue_walk(&snd_l->transmq, skb) { - if (more(buf_seqno(skb), l->acked)) - break; + if (gap) { + l->stats.recv_nacks++; + r->stats.recv_nacks++; } - /* Update/release the packets peer is acking now */ - skb_queue_walk_from_safe(&snd_l->transmq, skb, tmp) { - if (more(buf_seqno(skb), acked)) - break; - if (!--TIPC_SKB_CB(skb)->ackers) { - __skb_unlink(skb, &snd_l->transmq); - kfree_skb(skb); - } - } - l->acked = acked; - tipc_link_advance_backlog(snd_l, xmitq); - if (unlikely(!skb_queue_empty(&snd_l->wakeupq))) - link_prepare_wakeup(snd_l); + if (less(acked, r->acked) || (acked == r->acked && !gap && !ga)) + return 0; + + trace_tipc_link_bc_ack(r, acked, gap, &l->transmq); + tipc_link_advance_transmq(l, r, acked, gap, ga, retrq, &unused, &rc); + + tipc_link_advance_backlog(l, xmitq); + if (unlikely(!skb_queue_empty(&l->wakeupq))) + link_prepare_wakeup(l); + + return rc; } /* tipc_link_bc_nack_rcv(): receive broadcast nack message @@ -1905,8 +2513,8 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, return 0; if (dnode == tipc_own_addr(l->net)) { - tipc_link_bc_ack_rcv(l, acked, xmitq); - rc = tipc_link_retrans(l->bc_sndlink, l, from, to, xmitq); + rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq, + xmitq); l->stats.recv_nacks++; return rc; } @@ -1918,20 +2526,23 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, return 0; } -void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) +void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win) { int max_bulk = TIPC_MAX_PUBL / (l->mtu / ITEM_SIZE); - l->window = win; - l->backlog[TIPC_LOW_IMPORTANCE].limit = max_t(u16, 50, win); - l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = max_t(u16, 100, win * 2); - l->backlog[TIPC_HIGH_IMPORTANCE].limit = max_t(u16, 150, win * 3); - l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = max_t(u16, 200, win * 4); + l->min_win = min_win; + l->ssthresh = max_win; + l->max_win = max_win; + l->window = min_win; + l->backlog[TIPC_LOW_IMPORTANCE].limit = min_win * 2; + l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = min_win * 4; + l->backlog[TIPC_HIGH_IMPORTANCE].limit = min_win * 6; + l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = min_win * 8; l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } /** - * link_reset_stats - reset link statistics + * tipc_link_reset_stats - reset link statistics * @l: pointer to link */ void tipc_link_reset_stats(struct tipc_link *l) @@ -1957,8 +2568,8 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) { int err; - err = nla_parse_nested(props, TIPC_NLA_PROP_MAX, prop, - tipc_nl_prop_policy, NULL); + err = nla_parse_nested_deprecated(props, TIPC_NLA_PROP_MAX, prop, + tipc_nl_prop_policy, NULL); if (err) return err; @@ -1979,10 +2590,10 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) } if (props[TIPC_NLA_PROP_WIN]) { - u32 win; + u32 max_win; - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); - if ((win < TIPC_MIN_LINK_WIN) || (win > TIPC_MAX_LINK_WIN)) + max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (max_win < TIPC_DEF_LINK_WIN || max_win > TIPC_MAX_LINK_WIN) return -EINVAL; } @@ -2037,7 +2648,7 @@ static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) (s->accu_queue_sz / s->queue_sz_counts) : 0} }; - stats = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + stats = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS); if (!stats) return -EMSGSIZE; @@ -2069,7 +2680,7 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK); if (!attrs) goto msg_full; @@ -2091,7 +2702,7 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE)) goto attr_msg_full; - prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) @@ -2158,7 +2769,7 @@ static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, (stats->accu_queue_sz / stats->queue_sz_counts) : 0} }; - nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + nest = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS); if (!nest) return -EMSGSIZE; @@ -2175,14 +2786,15 @@ msg_full: return -EMSGSIZE; } -int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *bcl) { int err; void *hdr; struct nlattr *attrs; struct nlattr *prop; - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; + u32 bc_mode = tipc_bcast_get_mode(net); + u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net); if (!bcl) return 0; @@ -2196,7 +2808,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) return -EMSGSIZE; } - attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK); if (!attrs) goto msg_full; @@ -2213,11 +2825,17 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, 0)) goto attr_msg_full; - prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->max_win)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode)) goto prop_msg_full; + if (bc_mode & BCLINK_MODE_SEL) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST_RATIO, + bc_ratio)) + goto prop_msg_full; nla_nest_end(msg->skb, prop); err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); @@ -2247,7 +2865,7 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, l->tolerance = tol; if (l->bc_rcvlink) l->bc_rcvlink->tolerance = tol; - if (link_is_up(l)) + if (tipc_link_is_up(l)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } @@ -2263,21 +2881,6 @@ void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) l->abort_limit = limit; } -char *tipc_link_name_ext(struct tipc_link *l, char *buf) -{ - if (!l) - scnprintf(buf, TIPC_MAX_LINK_NAME, "null"); - else if (link_is_bc_sndlink(l)) - scnprintf(buf, TIPC_MAX_LINK_NAME, "broadcast-sender"); - else if (link_is_bc_rcvlink(l)) - scnprintf(buf, TIPC_MAX_LINK_NAME, - "broadcast-receiver, peer %x", l->addr); - else - memcpy(buf, l->name, TIPC_MAX_LINK_NAME); - - return buf; -} - /** * tipc_link_dump - dump TIPC link data * @l: tipc link to be dumped @@ -2316,8 +2919,8 @@ int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf) i += scnprintf(buf + i, sz - i, " %x", l->peer_caps); i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt); i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt); - i += scnprintf(buf + i, sz - i, " %u", l->prev_from); - i += scnprintf(buf + i, sz - i, " %u", l->stale_cnt); + i += scnprintf(buf + i, sz - i, " %u", 0); + i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", l->acked); list = &l->transmq; diff --git a/net/tipc/link.h b/net/tipc/link.h index 8439e0ee53a8..d80f5649b395 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -73,15 +73,15 @@ enum { bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, - int window, u32 session, u32 ownnode, + u32 min_win, u32 max_win, u32 session, u32 ownnode, u32 peer, u8 *peer_id, u16 peer_caps, struct tipc_link *bc_sndlink, struct tipc_link *bc_rcvlink, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link **link); -bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, - int mtu, int window, u16 peer_caps, +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id, + int mtu, u32 min_win, u32 max_win, u16 peer_caps, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link *bc_sndlink, @@ -90,6 +90,8 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl, struct sk_buff_head *xmitq); +void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl, + struct sk_buff_head *xmitq); void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); bool tipc_link_is_up(struct tipc_link *l); @@ -109,11 +111,11 @@ u16 tipc_link_rcv_nxt(struct tipc_link *l); u16 tipc_link_acked(struct tipc_link *l); u32 tipc_link_id(struct tipc_link *l); char *tipc_link_name(struct tipc_link *l); -char *tipc_link_name_ext(struct tipc_link *l, char *buf); u32 tipc_link_state(struct tipc_link *l); char tipc_link_plane(struct tipc_link *l); int tipc_link_prio(struct tipc_link *l); -int tipc_link_window(struct tipc_link *l); +int tipc_link_min_win(struct tipc_link *l); +int tipc_link_max_win(struct tipc_link *l); void tipc_link_update_caps(struct tipc_link *l, u16 capabilities); bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr); unsigned long tipc_link_tolerance(struct tipc_link *l); @@ -122,7 +124,7 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, void tipc_link_set_prio(struct tipc_link *l, u32 prio, struct sk_buff_head *xmitq); void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit); -void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); +void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win); int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_link *link, int nlflags); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); @@ -139,14 +141,18 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l, int tipc_link_bc_peers(struct tipc_link *l); void tipc_link_set_mtu(struct tipc_link *l, int mtu); int tipc_link_mtu(struct tipc_link *l); -void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, - struct sk_buff_head *xmitq); -void tipc_link_build_bc_sync_msg(struct tipc_link *l, - struct sk_buff_head *xmitq); +int tipc_link_mss(struct tipc_link *l); +u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, + struct tipc_msg *hdr, bool uc); +int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq, + struct sk_buff_head *retrq); void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr); int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, struct sk_buff_head *xmitq); int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); bool tipc_link_too_silent(struct tipc_link *l); +struct net *tipc_link_net(struct tipc_link *l); #endif diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 67f69389ec17..572b79bf76ce 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -104,11 +104,41 @@ static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id) const int tipc_max_domain_size = sizeof(struct tipc_mon_domain); +static inline u16 mon_cpu_to_le16(u16 val) +{ + return (__force __u16)htons(val); +} + +static inline u32 mon_cpu_to_le32(u32 val) +{ + return (__force __u32)htonl(val); +} + +static inline u64 mon_cpu_to_le64(u64 val) +{ + return (__force __u64)cpu_to_be64(val); +} + +static inline u16 mon_le16_to_cpu(u16 val) +{ + return ntohs((__force __be16)val); +} + +static inline u32 mon_le32_to_cpu(u32 val) +{ + return ntohl((__force __be32)val); +} + +static inline u64 mon_le64_to_cpu(u64 val) +{ + return be64_to_cpu((__force __be64)val); +} + /* dom_rec_len(): actual length of domain record for transport */ static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt) { - return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32)); + return (offsetof(struct tipc_mon_domain, members)) + (mcnt * sizeof(u32)); } /* dom_size() : calculate size of own domain based on number of peers @@ -119,7 +149,7 @@ static int dom_size(int peers) while ((i * i) < peers) i++; - return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN; + return min(i, MAX_MON_DOMAIN); } static void map_set(u64 *up_map, int i, unsigned int v) @@ -130,7 +160,7 @@ static void map_set(u64 *up_map, int i, unsigned int v) static int map_get(u64 up_map, int i) { - return (up_map & (1 << i)) >> i; + return (up_map & (1ULL << i)) >> i; } static struct tipc_peer *peer_prev(struct tipc_peer *peer) @@ -260,16 +290,16 @@ static void mon_update_local_domain(struct tipc_monitor *mon) diff |= dom->members[i] != peer->addr; dom->members[i] = peer->addr; map_set(&dom->up_map, i, peer->is_up); - cache->members[i] = htonl(peer->addr); + cache->members[i] = mon_cpu_to_le32(peer->addr); } diff |= dom->up_map != prev_up_map; if (!diff) return; dom->gen = ++mon->dom_gen; - cache->len = htons(dom->len); - cache->gen = htons(dom->gen); - cache->member_cnt = htons(member_cnt); - cache->up_map = cpu_to_be64(dom->up_map); + cache->len = mon_cpu_to_le16(dom->len); + cache->gen = mon_cpu_to_le16(dom->gen); + cache->member_cnt = mon_cpu_to_le16(member_cnt); + cache->up_map = mon_cpu_to_le64(dom->up_map); mon_apply_domain(mon, self); } @@ -322,9 +352,13 @@ static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head) void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *self; struct tipc_peer *peer, *prev, *head; + if (!mon) + return; + + self = get_self(net, bearer_id); write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer) @@ -407,11 +441,15 @@ exit: void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *self; struct tipc_peer *peer, *head; struct tipc_mon_domain *dom; int applied; + if (!mon) + return; + + self = get_self(net, bearer_id); write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer) { @@ -447,21 +485,24 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, struct tipc_mon_domain dom_bef; struct tipc_mon_domain *dom; struct tipc_peer *peer; - u16 new_member_cnt = ntohs(arrv_dom->member_cnt); + u16 new_member_cnt = mon_le16_to_cpu(arrv_dom->member_cnt); int new_dlen = dom_rec_len(arrv_dom, new_member_cnt); - u16 new_gen = ntohs(arrv_dom->gen); - u16 acked_gen = ntohs(arrv_dom->ack_gen); + u16 new_gen = mon_le16_to_cpu(arrv_dom->gen); + u16 acked_gen = mon_le16_to_cpu(arrv_dom->ack_gen); + u16 arrv_dlen = mon_le16_to_cpu(arrv_dom->len); bool probing = state->probing; int i, applied_bef; state->probing = false; /* Sanity check received domain record */ + if (new_member_cnt > MAX_MON_DOMAIN) + return; if (dlen < dom_rec_len(arrv_dom, 0)) return; if (dlen != dom_rec_len(arrv_dom, new_member_cnt)) return; - if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) + if (dlen < new_dlen || arrv_dlen != new_dlen) return; /* Synch generation numbers with peer if link just came up */ @@ -509,9 +550,9 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, dom->len = new_dlen; dom->gen = new_gen; dom->member_cnt = new_member_cnt; - dom->up_map = be64_to_cpu(arrv_dom->up_map); + dom->up_map = mon_le64_to_cpu(arrv_dom->up_map); for (i = 0; i < new_member_cnt; i++) - dom->members[i] = ntohl(arrv_dom->members[i]); + dom->members[i] = mon_le32_to_cpu(arrv_dom->members[i]); /* Update peers affected by this domain record */ applied_bef = peer->applied; @@ -540,19 +581,19 @@ void tipc_mon_prep(struct net *net, void *data, int *dlen, if (likely(state->acked_gen == gen)) { len = dom_rec_len(dom, 0); *dlen = len; - dom->len = htons(len); - dom->gen = htons(gen); - dom->ack_gen = htons(state->peer_gen); + dom->len = mon_cpu_to_le16(len); + dom->gen = mon_cpu_to_le16(gen); + dom->ack_gen = mon_cpu_to_le16(state->peer_gen); dom->member_cnt = 0; return; } /* Send the full record */ read_lock_bh(&mon->lock); - len = ntohs(mon->cache.len); + len = mon_le16_to_cpu(mon->cache.len); *dlen = len; memcpy(data, &mon->cache, len); read_unlock_bh(&mon->lock); - dom->ack_gen = htons(state->peer_gen); + dom->ack_gen = mon_cpu_to_le16(state->peer_gen); } void tipc_mon_get_state(struct net *net, u32 addr, @@ -589,7 +630,7 @@ void tipc_mon_get_state(struct net *net, u32 addr, static void mon_timeout(struct timer_list *t) { - struct tipc_monitor *mon = from_timer(mon, t, timer); + struct tipc_monitor *mon = timer_container_of(mon, t, timer); struct tipc_peer *self; int best_member_cnt = dom_size(mon->peer_cnt) - 1; @@ -659,12 +700,28 @@ void tipc_mon_delete(struct net *net, int bearer_id) } mon->self = NULL; write_unlock_bh(&mon->lock); - del_timer_sync(&mon->timer); + timer_shutdown_sync(&mon->timer); kfree(self->domain); kfree(self); kfree(mon); } +void tipc_mon_reinit_self(struct net *net) +{ + struct tipc_monitor *mon; + int bearer_id; + + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + mon = tipc_monitor(net, bearer_id); + if (!mon) + continue; + write_lock_bh(&mon->lock); + if (mon->self) + mon->self->addr = tipc_own_addr(net); + write_unlock_bh(&mon->lock); + } +} + int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) { struct tipc_net *tn = tipc_net(net); @@ -696,7 +753,7 @@ static int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_MON_PEER); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON_PEER); if (!attrs) goto msg_full; @@ -785,7 +842,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON); if (!attrs) goto msg_full; diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h index 2a21b93e0d04..ed63d2e650b0 100644 --- a/net/tipc/monitor.h +++ b/net/tipc/monitor.h @@ -77,6 +77,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id); int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id, u32 *prev_node); +void tipc_mon_reinit_self(struct net *net); extern const int tipc_max_domain_size; #endif diff --git a/net/tipc/msg.c b/net/tipc/msg.c index f48e5857210f..76284fc538eb 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -39,31 +39,37 @@ #include "msg.h" #include "addr.h" #include "name_table.h" +#include "crypto.h" +#define BUF_ALIGN(x) ALIGN(x, 4) #define MAX_FORWARD_SIZE 1024 +#ifdef CONFIG_TIPC_CRYPTO +#define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16) +#define BUF_OVERHEAD (BUF_HEADROOM + TIPC_AES_GCM_TAG_SIZE) +#else #define BUF_HEADROOM (LL_MAX_HEADER + 48) -#define BUF_TAILROOM 16 +#define BUF_OVERHEAD BUF_HEADROOM +#endif -static unsigned int align(unsigned int i) -{ - return (i + 3) & ~3u; -} +const int one_page_mtu = PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /** * tipc_buf_acquire - creates a TIPC message buffer * @size: message size (including TIPC header) + * @gfp: memory allocation flags * - * Returns a new buffer with data pointers set to the specified size. + * Return: a new buffer with data pointers set to the specified size. * - * NOTE: Headroom is reserved to allow prepending of a data link header. - * There may also be unrequested tailroom present at the buffer's end. + * NOTE: + * Headroom is reserved to allow prepending of a data link header. + * There may also be unrequested tailroom present at the buffer's end. */ struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) { struct sk_buff *skb; - unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; - skb = alloc_skb_fclone(buf_size, gfp); + skb = alloc_skb_fclone(BUF_OVERHEAD + size, gfp); if (skb) { skb_reserve(skb, BUF_HEADROOM); skb_put(skb, size); @@ -105,10 +111,6 @@ struct sk_buff *tipc_msg_create(uint user, uint type, msg_set_origport(msg, oport); msg_set_destport(msg, dport); msg_set_errcode(msg, errcode); - if (hdr_sz > SHORT_H_SIZE) { - msg_set_orignode(msg, onode); - msg_set_destnode(msg, dnode); - } return buf; } @@ -140,24 +142,25 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - if (unlikely(skb_unclone(frag, GFP_ATOMIC))) + if (skb_has_frag_list(frag) && __skb_linearize(frag)) goto err; - head = *headbuf = frag; *buf = NULL; + frag = skb_unshare(frag, GFP_ATOMIC); + if (unlikely(!frag)) + goto err; + head = *headbuf = frag; TIPC_SKB_CB(head)->tail = NULL; - if (skb_is_nonlinear(head)) { - skb_walk_frags(head, tail) { - TIPC_SKB_CB(head)->tail = tail; - } - } else { - skb_frag_list_init(head); - } return 0; } if (!head) goto err; + /* Either the input skb ownership is transferred to headskb + * or the input skb is freed, clear the reference to avoid + * bad access on error path. + */ + *buf = NULL; if (skb_try_coalesce(head, frag, &headstolen, &delta)) { kfree_skb_partial(frag, headstolen); } else { @@ -173,7 +176,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) } if (fragid == LAST_FRAGMENT) { - TIPC_SKB_CB(head)->validated = false; + TIPC_SKB_CB(head)->validated = 0; if (unlikely(!tipc_msg_validate(&head))) goto err; *buf = head; @@ -181,7 +184,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) *headbuf = NULL; return 1; } - *buf = NULL; return 0; err: kfree_skb(*buf); @@ -190,6 +192,56 @@ err: return 0; } +/** + * tipc_msg_append(): Append data to tail of an existing buffer queue + * @_hdr: header to be used + * @m: the data to be appended + * @mss: max allowable size of buffer + * @dlen: size of data to be appended + * @txq: queue to append to + * + * Return: the number of 1k blocks appended or errno value + */ +int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen, + int mss, struct sk_buff_head *txq) +{ + struct sk_buff *skb; + int accounted, total, curr; + int mlen, cpy, rem = dlen; + struct tipc_msg *hdr; + + skb = skb_peek_tail(txq); + accounted = skb ? msg_blocks(buf_msg(skb)) : 0; + total = accounted; + + do { + if (!skb || skb->len >= mss) { + skb = tipc_buf_acquire(mss, GFP_KERNEL); + if (unlikely(!skb)) + return -ENOMEM; + skb_orphan(skb); + skb_trim(skb, MIN_H_SIZE); + hdr = buf_msg(skb); + skb_copy_to_linear_data(skb, _hdr, MIN_H_SIZE); + msg_set_hdr_sz(hdr, MIN_H_SIZE); + msg_set_size(hdr, MIN_H_SIZE); + __skb_queue_tail(txq, skb); + total += 1; + } + hdr = buf_msg(skb); + curr = msg_blocks(hdr); + mlen = msg_size(hdr); + cpy = min_t(size_t, rem, mss - mlen); + if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter)) + return -EFAULT; + msg_set_size(hdr, mlen + cpy); + skb_put(skb, cpy); + rem -= cpy; + total += msg_blocks(hdr) - curr; + } while (rem > 0); + return total - accounted; +} + /* tipc_msg_validate - validate basic format of received message * * This routine ensures a TIPC message has an acceptable header, and at least @@ -218,6 +270,7 @@ bool tipc_msg_validate(struct sk_buff **_skb) if (unlikely(TIPC_SKB_CB(skb)->validated)) return true; + if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE))) return false; @@ -239,14 +292,74 @@ bool tipc_msg_validate(struct sk_buff **_skb) if (unlikely(skb->len < msz)) return false; - TIPC_SKB_CB(skb)->validated = true; + TIPC_SKB_CB(skb)->validated = 1; return true; } /** + * tipc_msg_fragment - build a fragment skb list for TIPC message + * + * @skb: TIPC message skb + * @hdr: internal msg header to be put on the top of the fragments + * @pktmax: max size of a fragment incl. the header + * @frags: returned fragment skb list + * + * Return: 0 if the fragmentation is successful, otherwise: -EINVAL + * or -ENOMEM + */ +int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, + int pktmax, struct sk_buff_head *frags) +{ + int pktno, nof_fragms, dsz, dmax, eat; + struct tipc_msg *_hdr; + struct sk_buff *_skb; + u8 *data; + + /* Non-linear buffer? */ + if (skb_linearize(skb)) + return -ENOMEM; + + data = (u8 *)skb->data; + dsz = msg_size(buf_msg(skb)); + dmax = pktmax - INT_H_SIZE; + if (dsz <= dmax || !dmax) + return -EINVAL; + + nof_fragms = dsz / dmax + 1; + for (pktno = 1; pktno <= nof_fragms; pktno++) { + if (pktno < nof_fragms) + eat = dmax; + else + eat = dsz % dmax; + /* Allocate a new fragment */ + _skb = tipc_buf_acquire(INT_H_SIZE + eat, GFP_ATOMIC); + if (!_skb) + goto error; + skb_orphan(_skb); + __skb_queue_tail(frags, _skb); + /* Copy header & data to the fragment */ + skb_copy_to_linear_data(_skb, hdr, INT_H_SIZE); + skb_copy_to_linear_data_offset(_skb, INT_H_SIZE, data, eat); + data += eat; + /* Update the fragment's header */ + _hdr = buf_msg(_skb); + msg_set_fragm_no(_hdr, pktno); + msg_set_nof_fragms(_hdr, nof_fragms); + msg_set_size(_hdr, INT_H_SIZE + eat); + } + return 0; + +error: + __skb_queue_purge(frags); + __skb_queue_head_init(frags); + return -ENOMEM; +} + +/** * tipc_msg_build - create buffer chain containing specified header and data * @mhdr: Message header, to be prepended to data * @m: User message + * @offset: buffer offset for fragmented messages (FIXME) * @dsz: Total length of user data * @pktmax: Max packet size that can be used * @list: Buffer or chain of buffers to be returned to caller @@ -254,7 +367,7 @@ bool tipc_msg_validate(struct sk_buff **_skb) * Note that the recursive call we are making here is safe, since it can * logically go only one further level down. * - * Returns message data size or errno: -ENOMEM, -EFAULT + * Return: message data size or errno: -ENOMEM, -EFAULT */ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int pktmax, struct sk_buff_head *list) @@ -280,7 +393,8 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, if (unlikely(!skb)) { if (pktmax != MAX_MSG_SIZE) return -ENOMEM; - rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list); + rc = tipc_msg_build(mhdr, m, offset, dsz, + one_page_mtu, list); if (rc != dsz) return rc; if (tipc_msg_assemble(list)) @@ -360,48 +474,98 @@ error: } /** - * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one - * @skb: the buffer to append to ("bundle") - * @msg: message to be appended - * @mtu: max allowable size for the bundle buffer - * Consumes buffer if successful - * Returns true if bundling could be performed, otherwise false + * tipc_msg_bundle - Append contents of a buffer to tail of an existing one + * @bskb: the bundle buffer to append to + * @msg: message to be appended + * @max: max allowable size for the bundle buffer + * + * Return: "true" if bundling has been performed, otherwise "false" */ -bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu) +static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg, + u32 max) { - struct tipc_msg *bmsg; - unsigned int bsz; - unsigned int msz = msg_size(msg); - u32 start, pad; - u32 max = mtu - INT_H_SIZE; + struct tipc_msg *bmsg = buf_msg(bskb); + u32 msz, bsz, offset, pad; - if (likely(msg_user(msg) == MSG_FRAGMENTER)) - return false; - if (!skb) - return false; - bmsg = buf_msg(skb); + msz = msg_size(msg); bsz = msg_size(bmsg); - start = align(bsz); - pad = start - bsz; + offset = BUF_ALIGN(bsz); + pad = offset - bsz; - if (unlikely(msg_user(msg) == TUNNEL_PROTOCOL)) + if (unlikely(skb_tailroom(bskb) < (pad + msz))) return false; - if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) + if (unlikely(max < (offset + msz))) return false; - if (unlikely(msg_user(bmsg) != MSG_BUNDLER)) + + skb_put(bskb, pad + msz); + skb_copy_to_linear_data_offset(bskb, offset, msg, msz); + msg_set_size(bmsg, offset + msz); + msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1); + return true; +} + +/** + * tipc_msg_try_bundle - Try to bundle a new message to the last one + * @tskb: the last/target message to which the new one will be appended + * @skb: the new message skb pointer + * @mss: max message size (header inclusive) + * @dnode: destination node for the message + * @new_bundle: if this call made a new bundle or not + * + * Return: "true" if the new message skb is potential for bundling this time or + * later, in the case a bundling has been done this time, the skb is consumed + * (the skb pointer = NULL). + * Otherwise, "false" if the skb cannot be bundled at all. + */ +bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss, + u32 dnode, bool *new_bundle) +{ + struct tipc_msg *msg, *inner, *outer; + u32 tsz; + + /* First, check if the new buffer is suitable for bundling */ + msg = buf_msg(*skb); + if (msg_user(msg) == MSG_FRAGMENTER) return false; - if (unlikely(skb_tailroom(skb) < (pad + msz))) + if (msg_user(msg) == TUNNEL_PROTOCOL) return false; - if (unlikely(max < (start + msz))) + if (msg_user(msg) == BCAST_PROTOCOL) return false; - if ((msg_importance(msg) < TIPC_SYSTEM_IMPORTANCE) && - (msg_importance(bmsg) == TIPC_SYSTEM_IMPORTANCE)) + if (mss <= INT_H_SIZE + msg_size(msg)) return false; - skb_put(skb, pad + msz); - skb_copy_to_linear_data_offset(skb, start, msg, msz); - msg_set_size(bmsg, start + msz); - msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1); + /* Ok, but the last/target buffer can be empty? */ + if (unlikely(!tskb)) + return true; + + /* Is it a bundle already? Try to bundle the new message to it */ + if (msg_user(buf_msg(tskb)) == MSG_BUNDLER) { + *new_bundle = false; + goto bundle; + } + + /* Make a new bundle of the two messages if possible */ + tsz = msg_size(buf_msg(tskb)); + if (unlikely(mss < BUF_ALIGN(INT_H_SIZE + tsz) + msg_size(msg))) + return true; + if (unlikely(pskb_expand_head(tskb, INT_H_SIZE, mss - tsz - INT_H_SIZE, + GFP_ATOMIC))) + return true; + inner = buf_msg(tskb); + skb_push(tskb, INT_H_SIZE); + outer = buf_msg(tskb); + tipc_msg_init(msg_prevnode(inner), outer, MSG_BUNDLER, 0, INT_H_SIZE, + dnode); + msg_set_importance(outer, msg_importance(inner)); + msg_set_size(outer, INT_H_SIZE + tsz); + msg_set_msgcnt(outer, 1); + *new_bundle = true; + +bundle: + if (likely(tipc_msg_bundle(tskb, msg, mss))) { + consume_skb(*skb); + *skb = NULL; + } return true; } @@ -410,9 +574,9 @@ bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu) * @skb: buffer to be extracted from. * @iskb: extracted inner buffer, to be returned * @pos: position in outer message of msg to be extracted. - * Returns position of next msg + * Returns position of next msg. * Consumes outer buffer when last packet extracted - * Returns true when when there is an extracted buffer, otherwise false + * Return: true when there is an extracted buffer, otherwise false */ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) { @@ -441,7 +605,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) if (unlikely(!tipc_msg_validate(iskb))) goto none; - *pos += align(imsz); + *pos += BUF_ALIGN(imsz); return true; none: kfree_skb(skb); @@ -451,58 +615,12 @@ none: } /** - * tipc_msg_make_bundle(): Create bundle buf and append message to its tail - * @list: the buffer chain, where head is the buffer to replace/append - * @skb: buffer to be created, appended to and returned in case of success - * @msg: message to be appended - * @mtu: max allowable size for the bundle buffer, inclusive header - * @dnode: destination node for message. (Not always present in header) - * Returns true if success, otherwise false - */ -bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, - u32 mtu, u32 dnode) -{ - struct sk_buff *_skb; - struct tipc_msg *bmsg; - u32 msz = msg_size(msg); - u32 max = mtu - INT_H_SIZE; - - if (msg_user(msg) == MSG_FRAGMENTER) - return false; - if (msg_user(msg) == TUNNEL_PROTOCOL) - return false; - if (msg_user(msg) == BCAST_PROTOCOL) - return false; - if (msz > (max / 2)) - return false; - - _skb = tipc_buf_acquire(max, GFP_ATOMIC); - if (!_skb) - return false; - - skb_trim(_skb, INT_H_SIZE); - bmsg = buf_msg(_skb); - tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0, - INT_H_SIZE, dnode); - if (msg_isdata(msg)) - msg_set_importance(bmsg, TIPC_CRITICAL_IMPORTANCE); - else - msg_set_importance(bmsg, TIPC_SYSTEM_IMPORTANCE); - msg_set_seqno(bmsg, msg_seqno(msg)); - msg_set_ack(bmsg, msg_ack(msg)); - msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); - tipc_msg_bundle(_skb, msg, mtu); - *skb = _skb; - return true; -} - -/** * tipc_msg_reverse(): swap source and destination addresses and add error code * @own_node: originating node id for reversed message * @skb: buffer containing message to be reversed; will be consumed * @err: error code to be set in message, if any * Replaces consumed buffer with new one when successful - * Returns true if success, otherwise false + * Return: true if success, otherwise false */ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) { @@ -574,16 +692,20 @@ bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy) /** * tipc_msg_lookup_dest(): try to find new destination for named message + * @net: pointer to associated network namespace * @skb: the buffer containing the message. * @err: error code to be used by caller if lookup fails * Does not consume buffer - * Returns true if a destination is found, false otherwise + * Return: true if a destination is found, false otherwise */ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) { struct tipc_msg *msg = buf_msg(skb); - u32 dport, dnode; - u32 onode = tipc_own_addr(net); + u32 scope = msg_lookup_scope(msg); + u32 self = tipc_own_addr(net); + u32 inst = msg_nameinst(msg); + struct tipc_socket_addr sk; + struct tipc_uaddr ua; if (!msg_isdata(msg)) return false; @@ -597,21 +719,18 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) msg = buf_msg(skb); if (msg_reroute_cnt(msg)) return false; - dnode = tipc_scope2node(net, msg_lookup_scope(msg)); - dport = tipc_nametbl_translate(net, msg_nametype(msg), - msg_nameinst(msg), &dnode); - if (!dport) + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, scope, + msg_nametype(msg), inst, inst); + sk.node = tipc_scope2node(net, scope); + if (!tipc_nametbl_lookup_anycast(net, &ua, &sk)) return false; msg_incr_reroute_cnt(msg); - if (dnode != onode) - msg_set_prevnode(msg, onode); - msg_set_destnode(msg, dnode); - msg_set_destport(msg, dport); + if (sk.node != self) + msg_set_prevnode(msg, self); + msg_set_destnode(msg, sk.node); + msg_set_destport(msg, sk.ref); *err = TIPC_OK; - if (!skb_cloned(skb)) - return true; - return true; } @@ -701,19 +820,19 @@ bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, * @seqno: sequence number of buffer to add * @skb: buffer to add */ -void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, +bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, struct sk_buff *skb) { struct sk_buff *_skb, *tmp; if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) { __skb_queue_head(list, skb); - return; + return true; } if (more(seqno, buf_seqno(skb_peek_tail(list)))) { __skb_queue_tail(list, skb); - return; + return true; } skb_queue_walk_safe(list, _skb, tmp) { @@ -722,9 +841,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, if (seqno == buf_seqno(_skb)) break; __skb_queue_before(list, _skb, skb); - return; + return true; } kfree_skb(skb); + return false; } void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, diff --git a/net/tipc/msg.h b/net/tipc/msg.h index a0924956bb61..c5eec16213d7 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -82,6 +82,7 @@ struct plist; #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 #define LINK_CONFIG 13 +#define MSG_CRYPTO 14 #define SOCK_WAKEUP 14 /* pseudo user */ #define TOP_SRV 15 /* pseudo user */ @@ -98,18 +99,49 @@ struct plist; #define MAX_H_SIZE 60 /* Largest possible TIPC header size */ #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) -#define FB_MTU 3744 #define TIPC_MEDIA_INFO_OFFSET 5 +extern const int one_page_mtu; + struct tipc_skb_cb { - u32 bytes_read; - u32 orig_member; - struct sk_buff *tail; - unsigned long nxt_retr; - bool validated; - u16 chain_imp; - u16 ackers; -}; + union { + struct { + struct sk_buff *tail; + unsigned long nxt_retr; + unsigned long retr_stamp; + u32 bytes_read; + u32 orig_member; + u16 chain_imp; + u16 ackers; + u16 retr_cnt; + } __packed; +#ifdef CONFIG_TIPC_CRYPTO + struct { + struct tipc_crypto *rx; + struct tipc_aead *last; + u8 recurs; + } tx_clone_ctx __packed; +#endif + } __packed; + union { + struct { + u8 validated:1; +#ifdef CONFIG_TIPC_CRYPTO + u8 encrypted:1; + u8 decrypted:1; +#define SKB_PROBING 1 +#define SKB_GRACING 2 + u8 xmit_type:2; + u8 tx_clone_deferred:1; +#endif + }; + u8 flags; + }; + u8 reserved; +#ifdef CONFIG_TIPC_CRYPTO + void *crypto_ctx; +#endif +} __packed; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) @@ -117,6 +149,54 @@ struct tipc_msg { __be32 hdr[15]; }; +/* struct tipc_gap_ack - TIPC Gap ACK block + * @ack: seqno of the last consecutive packet in link deferdq + * @gap: number of gap packets since the last ack + * + * E.g: + * link deferdq: 1 2 3 4 10 11 13 14 15 20 + * --> Gap ACK blocks: <4, 5>, <11, 1>, <15, 4>, <20, 0> + */ +struct tipc_gap_ack { + __be16 ack; + __be16 gap; +}; + +/* struct tipc_gap_ack_blks + * @len: actual length of the record + * @ugack_cnt: number of Gap ACK blocks for unicast (following the broadcast + * ones) + * @start_index: starting index for "valid" broadcast Gap ACK blocks + * @bgack_cnt: number of Gap ACK blocks for broadcast in the record + * @gacks: array of Gap ACK blocks + * + * 31 16 15 0 + * +-------------+-------------+-------------+-------------+ + * | bgack_cnt | ugack_cnt | len | + * +-------------+-------------+-------------+-------------+ - + * | gap | ack | | + * +-------------+-------------+-------------+-------------+ > bc gacks + * : : : | + * +-------------+-------------+-------------+-------------+ - + * | gap | ack | | + * +-------------+-------------+-------------+-------------+ > uc gacks + * : : : | + * +-------------+-------------+-------------+-------------+ - + */ +struct tipc_gap_ack_blks { + __be16 len; + union { + u8 ugack_cnt; + u8 start_index; + }; + u8 bgack_cnt; + struct tipc_gap_ack gacks[]; +}; + +#define MAX_GAP_ACK_BLKS 128 +#define MAX_GAP_ACK_BLKS_SZ (sizeof(struct tipc_gap_ack_blks) + \ + sizeof(struct tipc_gap_ack) * MAX_GAP_ACK_BLKS) + static inline struct tipc_msg *buf_msg(struct sk_buff *skb) { return (struct tipc_msg *)skb->data; @@ -146,14 +226,6 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w, m->hdr[w] |= htonl(val); } -static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b) -{ - u32 temp = msg->hdr[a]; - - msg->hdr[a] = msg->hdr[b]; - msg->hdr[b] = temp; -} - /* * Word 0 */ @@ -257,6 +329,36 @@ static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d) msg_set_bits(m, 0, 18, 1, d); } +static inline int msg_ack_required(struct tipc_msg *m) +{ + return msg_bits(m, 0, 18, 1); +} + +static inline void msg_set_ack_required(struct tipc_msg *m) +{ + msg_set_bits(m, 0, 18, 1, 1); +} + +static inline int msg_nagle_ack(struct tipc_msg *m) +{ + return msg_bits(m, 0, 18, 1); +} + +static inline void msg_set_nagle_ack(struct tipc_msg *m) +{ + msg_set_bits(m, 0, 18, 1, 1); +} + +static inline bool msg_is_rcast(struct tipc_msg *m) +{ + return msg_bits(m, 0, 18, 0x1); +} + +static inline void msg_set_is_rcast(struct tipc_msg *m, bool d) +{ + msg_set_bits(m, 0, 18, 0x1, d); +} + static inline void msg_set_size(struct tipc_msg *m, u32 sz) { m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz); @@ -267,7 +369,7 @@ static inline unchar *msg_data(struct tipc_msg *m) return ((unchar *)m) + msg_hdr_sz(m); } -static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) +static inline struct tipc_msg *msg_inner_hdr(struct tipc_msg *m) { return (struct tipc_msg *)msg_data(m); } @@ -315,6 +417,11 @@ static inline u32 msg_connected(struct tipc_msg *m) return msg_type(m) == TIPC_CONN_MSG; } +static inline u32 msg_direct(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_DIRECT_MSG; +} + static inline u32 msg_errcode(struct tipc_msg *m) { return msg_bits(m, 1, 25, 0xf); @@ -325,6 +432,36 @@ static inline void msg_set_errcode(struct tipc_msg *m, u32 err) msg_set_bits(m, 1, 25, 0xf, err); } +static inline void msg_set_bulk(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 28, 0x1, 1); +} + +static inline u32 msg_is_bulk(struct tipc_msg *m) +{ + return msg_bits(m, 1, 28, 0x1); +} + +static inline void msg_set_last_bulk(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 27, 0x1, 1); +} + +static inline u32 msg_is_last_bulk(struct tipc_msg *m) +{ + return msg_bits(m, 1, 27, 0x1); +} + +static inline void msg_set_non_legacy(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 26, 0x1, 1); +} + +static inline u32 msg_is_legacy(struct tipc_msg *m) +{ + return !msg_bits(m, 1, 26, 0x1); +} + static inline u32 msg_reroute_cnt(struct tipc_msg *m) { return msg_bits(m, 1, 21, 0xf); @@ -335,11 +472,6 @@ static inline void msg_incr_reroute_cnt(struct tipc_msg *m) msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1); } -static inline void msg_reset_reroute_cnt(struct tipc_msg *m) -{ - msg_set_bits(m, 1, 21, 0xf, 0); -} - static inline u32 msg_lookup_scope(struct tipc_msg *m) { return msg_bits(m, 1, 19, 0x3); @@ -360,6 +492,28 @@ static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n) msg_set_bits(m, 1, 0, 0xffff, n); } +/* Note: reusing bits in word 1 for ACTIVATE_MSG only, to re-synch + * link peer session number + */ +static inline bool msg_dest_session_valid(struct tipc_msg *m) +{ + return msg_bits(m, 1, 16, 0x1); +} + +static inline void msg_set_dest_session_valid(struct tipc_msg *m, bool valid) +{ + msg_set_bits(m, 1, 16, 0x1, valid); +} + +static inline u16 msg_dest_session(struct tipc_msg *m) +{ + return msg_bits(m, 1, 0, 0xffff); +} + +static inline void msg_set_dest_session(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 1, 0, 0xffff, n); +} /* * Word 2 @@ -423,7 +577,7 @@ static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) static inline u32 msg_origport(struct tipc_msg *m) { if (msg_user(m) == MSG_FRAGMENTER) - m = msg_get_wrapped(m); + m = msg_inner_hdr(m); return msg_word(m, 4); } @@ -432,6 +586,16 @@ static inline void msg_set_origport(struct tipc_msg *m, u32 p) msg_set_word(m, 4, p); } +static inline u16 msg_named_seqno(struct tipc_msg *m) +{ + return msg_bits(m, 4, 0, 0xffff); +} + +static inline void msg_set_named_seqno(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 4, 0, 0xffff, n); +} + static inline u32 msg_destport(struct tipc_msg *m) { return msg_word(m, 5); @@ -574,6 +738,9 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) #define GRP_RECLAIM_MSG 4 #define GRP_REMIT_MSG 5 +/* Crypto message types */ +#define KEY_DISTR_MSG 0 + /* * Word 1 */ @@ -620,11 +787,6 @@ static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n) msg_set_word(m, 2, n); } -static inline u32 msg_bcgap_after(struct tipc_msg *m) -{ - return msg_bits(m, 2, 16, 0xffff); -} - static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n) { msg_set_bits(m, 2, 16, 0xffff, n); @@ -658,12 +820,26 @@ static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n) msg_set_bits(m, 4, 16, 0xffff, n); } +static inline u32 msg_nof_fragms(struct tipc_msg *m) +{ + return msg_bits(m, 4, 0, 0xffff); +} + +static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 4, 0, 0xffff, n); +} + +static inline u32 msg_fragm_no(struct tipc_msg *m) +{ + return msg_bits(m, 4, 16, 0xffff); +} + static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n) { msg_set_bits(m, 4, 16, 0xffff, n); } - static inline u16 msg_next_sent(struct tipc_msg *m) { return msg_bits(m, 4, 0, 0xffff); @@ -674,11 +850,6 @@ static inline void msg_set_next_sent(struct tipc_msg *m, u16 n) msg_set_bits(m, 4, 0, 0xffff, n); } -static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n) -{ - msg_set_bits(m, 4, 0, 0xffff, n); -} - static inline u32 msg_bc_netid(struct tipc_msg *m) { return msg_word(m, 4); @@ -814,6 +985,16 @@ static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n) msg_set_bits(m, 9, 16, 0xffff, n); } +static inline u16 msg_syncpt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_syncpt(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + static inline u32 msg_conn_ack(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); @@ -937,6 +1118,20 @@ static inline bool msg_is_reset(struct tipc_msg *hdr) return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG); } +/* Word 13 + */ +static inline void msg_set_peer_net_hash(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 13, n); +} + +static inline u32 msg_peer_net_hash(struct tipc_msg *m) +{ + return msg_word(m, 13); +} + +/* Word 14 + */ static inline u32 msg_sugg_node_addr(struct tipc_msg *m) { return msg_word(m, 14); @@ -968,18 +1163,21 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, uint data_sz, u32 dnode, u32 onode, u32 dport, u32 oport, int errcode); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); -bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu); -bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, - u32 mtu, u32 dnode); +bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss, + u32 dnode, bool *new_bundle); bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); +int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, + int pktmax, struct sk_buff_head *frags); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); +int tipc_msg_append(struct tipc_msg *hdr, struct msghdr *m, int dlen, + int mss, struct sk_buff_head *txq); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); bool tipc_msg_assemble(struct sk_buff_head *list); bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, struct sk_buff_head *cpy); -void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, +bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, struct sk_buff *skb); bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy); @@ -1088,4 +1286,25 @@ static inline void tipc_skb_queue_splice_tail_init(struct sk_buff_head *list, tipc_skb_queue_splice_tail(&tmp, head); } +/* __tipc_skb_dequeue() - dequeue the head skb according to expected seqno + * @list: list to be dequeued from + * @seqno: seqno of the expected msg + * + * returns skb dequeued from the list if its seqno is less than or equal to + * the expected one, otherwise the skb is still hold + * + * Note: must be used with appropriate locks held only + */ +static inline struct sk_buff *__tipc_skb_dequeue(struct sk_buff_head *list, + u16 seqno) +{ + struct sk_buff *skb = skb_peek(list); + + if (skb && less_eq(buf_seqno(skb), seqno)) { + __skb_unlink(skb, list); + return skb; + } + return NULL; +} + #endif diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 61219f0b9677..190b49c5cbc3 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -1,8 +1,9 @@ /* * net/tipc/name_distr.c: TIPC name distribution code * - * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2000-2006, 2014-2019, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,28 +41,26 @@ int sysctl_tipc_named_timeout __read_mostly = 2000; -struct distr_queue_item { - struct distr_item i; - u32 dtype; - u32 node; - unsigned long expires; - struct list_head next; -}; - /** * publ_to_item - add publication info to a publication message + * @p: publication info + * @i: location of item in the message */ static void publ_to_item(struct distr_item *i, struct publication *p) { - i->type = htonl(p->type); - i->lower = htonl(p->lower); - i->upper = htonl(p->upper); - i->port = htonl(p->port); + i->type = htonl(p->sr.type); + i->lower = htonl(p->sr.lower); + i->upper = htonl(p->sr.upper); + i->port = htonl(p->sk.ref); i->key = htonl(p->key); } /** * named_prepare_buf - allocate & initialize a publication message + * @net: the associated network namespace + * @type: message type + * @size: payload size + * @dest: destination node * * The buffer returned is of size INT_H_SIZE + payload size */ @@ -83,72 +82,81 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, /** * tipc_named_publish - tell other nodes about a new publication by this node + * @net: the associated network namespace + * @p: the new publication */ -struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) +struct sk_buff *tipc_named_publish(struct net *net, struct publication *p) { struct name_table *nt = tipc_name_table(net); struct distr_item *item; struct sk_buff *skb; - if (publ->scope == TIPC_NODE_SCOPE) { - list_add_tail_rcu(&publ->binding_node, &nt->node_scope); + if (p->scope == TIPC_NODE_SCOPE) { + list_add_tail_rcu(&p->binding_node, &nt->node_scope); return NULL; } write_lock_bh(&nt->cluster_scope_lock); - list_add_tail(&publ->binding_node, &nt->cluster_scope); + list_add_tail(&p->binding_node, &nt->cluster_scope); write_unlock_bh(&nt->cluster_scope_lock); skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); if (!skb) { pr_warn("Publication distribution failure\n"); return NULL; } - + msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++); + msg_set_non_legacy(buf_msg(skb)); item = (struct distr_item *)msg_data(buf_msg(skb)); - publ_to_item(item, publ); + publ_to_item(item, p); return skb; } /** * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node + * @net: the associated network namespace + * @p: the withdrawn publication */ -struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *p) { struct name_table *nt = tipc_name_table(net); - struct sk_buff *buf; struct distr_item *item; + struct sk_buff *skb; write_lock_bh(&nt->cluster_scope_lock); - list_del(&publ->binding_node); + list_del(&p->binding_node); write_unlock_bh(&nt->cluster_scope_lock); - if (publ->scope == TIPC_NODE_SCOPE) + if (p->scope == TIPC_NODE_SCOPE) return NULL; - buf = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0); - if (!buf) { + skb = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0); + if (!skb) { pr_warn("Withdrawal distribution failure\n"); return NULL; } - - item = (struct distr_item *)msg_data(buf_msg(buf)); - publ_to_item(item, publ); - return buf; + msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++); + msg_set_non_legacy(buf_msg(skb)); + item = (struct distr_item *)msg_data(buf_msg(skb)); + publ_to_item(item, p); + return skb; } /** * named_distribute - prepare name info for bulk distribution to another node + * @net: the associated network namespace * @list: list of messages (buffers) to be returned from this function * @dnode: node to be updated * @pls: linked list of publication items to be packed into buffer chain + * @seqno: sequence number for this message */ static void named_distribute(struct net *net, struct sk_buff_head *list, - u32 dnode, struct list_head *pls) + u32 dnode, struct list_head *pls, u16 seqno) { struct publication *publ; struct sk_buff *skb = NULL; struct distr_item *item = NULL; - u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0) - INT_H_SIZE) / + u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) / ITEM_SIZE) * ITEM_SIZE; u32 msg_rem = msg_dsz; + struct tipc_msg *hdr; list_for_each_entry(publ, pls, binding_node) { /* Prepare next buffer: */ @@ -159,8 +167,11 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, pr_warn("Bulk publication failure\n"); return; } - msg_set_bc_ack_invalid(buf_msg(skb), true); - item = (struct distr_item *)msg_data(buf_msg(skb)); + hdr = buf_msg(skb); + msg_set_bc_ack_invalid(hdr, true); + msg_set_bulk(hdr); + msg_set_non_legacy(hdr); + item = (struct distr_item *)msg_data(hdr); } /* Pack publication into message: */ @@ -176,147 +187,197 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, } } if (skb) { - msg_set_size(buf_msg(skb), INT_H_SIZE + (msg_dsz - msg_rem)); + hdr = buf_msg(skb); + msg_set_size(hdr, INT_H_SIZE + (msg_dsz - msg_rem)); skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem)); __skb_queue_tail(list, skb); } + hdr = buf_msg(skb_peek_tail(list)); + msg_set_last_bulk(hdr); + msg_set_named_seqno(hdr, seqno); } /** * tipc_named_node_up - tell specified node about all publications by this node + * @net: the associated network namespace + * @dnode: destination node + * @capabilities: peer node's capabilities */ -void tipc_named_node_up(struct net *net, u32 dnode) +void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities) { struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); struct sk_buff_head head; + u16 seqno; __skb_queue_head_init(&head); + spin_lock_bh(&tn->nametbl_lock); + if (!(capabilities & TIPC_NAMED_BCAST)) + nt->rc_dests++; + seqno = nt->snd_nxt; + spin_unlock_bh(&tn->nametbl_lock); read_lock_bh(&nt->cluster_scope_lock); - named_distribute(net, &head, dnode, &nt->cluster_scope); + named_distribute(net, &head, dnode, &nt->cluster_scope, seqno); tipc_node_xmit(net, &head, dnode, 0); read_unlock_bh(&nt->cluster_scope_lock); } /** * tipc_publ_purge - remove publication associated with a failed node + * @net: the associated network namespace + * @p: the publication to remove + * @addr: failed node's address * * Invoked for each publication issued by a newly failed node. * Removes publication structure from name table & deletes it. */ -static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) +static void tipc_publ_purge(struct net *net, struct publication *p, u32 addr) { struct tipc_net *tn = tipc_net(net); - struct publication *p; + struct publication *_p; + struct tipc_uaddr ua; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, p->scope, p->sr.type, + p->sr.lower, p->sr.upper); spin_lock_bh(&tn->nametbl_lock); - p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->upper, - publ->node, publ->key); - if (p) - tipc_node_unsubscribe(net, &p->binding_node, addr); + _p = tipc_nametbl_remove_publ(net, &ua, &p->sk, p->key); + if (_p) + tipc_node_unsubscribe(net, &_p->binding_node, addr); spin_unlock_bh(&tn->nametbl_lock); - - if (p != publ) { - pr_err("Unable to remove publication from failed node\n" - " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n", - publ->type, publ->lower, publ->node, publ->port, - publ->key); - } - - kfree_rcu(p, rcu); + if (_p) + kfree_rcu(_p, rcu); } -/** - * tipc_dist_queue_purge - remove deferred updates from a node that went down - */ -static void tipc_dist_queue_purge(struct net *net, u32 addr) +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, + u32 addr, u16 capabilities) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct distr_queue_item *e, *tmp; - - spin_lock_bh(&tn->nametbl_lock); - list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) { - if (e->node != addr) - continue; - list_del(&e->next); - kfree(e); - } - spin_unlock_bh(&tn->nametbl_lock); -} + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); -void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) -{ struct publication *publ, *tmp; list_for_each_entry_safe(publ, tmp, nsub_list, binding_node) tipc_publ_purge(net, publ, addr); - tipc_dist_queue_purge(net, addr); + spin_lock_bh(&tn->nametbl_lock); + if (!(capabilities & TIPC_NAMED_BCAST)) + nt->rc_dests--; + spin_unlock_bh(&tn->nametbl_lock); } /** * tipc_update_nametbl - try to process a nametable update and notify * subscribers + * @net: the associated network namespace + * @i: location of item in the message + * @node: node address + * @dtype: name distributor message type * * tipc_nametbl_lock must be held. - * Returns the publication item if successful, otherwise NULL. + * Return: the publication item if successful, otherwise NULL. */ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, u32 node, u32 dtype) { struct publication *p = NULL; - u32 lower = ntohl(i->lower); - u32 upper = ntohl(i->upper); - u32 type = ntohl(i->type); - u32 port = ntohl(i->port); + struct tipc_socket_addr sk; + struct tipc_uaddr ua; u32 key = ntohl(i->key); + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE, + ntohl(i->type), ntohl(i->lower), ntohl(i->upper)); + sk.ref = ntohl(i->port); + sk.node = node; + if (dtype == PUBLICATION) { - p = tipc_nametbl_insert_publ(net, type, lower, upper, - TIPC_CLUSTER_SCOPE, node, - port, key); + p = tipc_nametbl_insert_publ(net, &ua, &sk, key); if (p) { tipc_node_subscribe(net, &p->binding_node, node); return true; } } else if (dtype == WITHDRAWAL) { - p = tipc_nametbl_remove_publ(net, type, lower, - upper, node, key); + p = tipc_nametbl_remove_publ(net, &ua, &sk, key); if (p) { tipc_node_unsubscribe(net, &p->binding_node, node); kfree_rcu(p, rcu); return true; } - pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n", - type, lower, node); + pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n", + ua.sr.type, ua.sr.lower, node); } else { - pr_warn("Unrecognized name table message received\n"); + pr_warn_ratelimited("Unknown name table message received\n"); } return false; } +static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, + u16 *rcv_nxt, bool *open) +{ + struct sk_buff *skb, *tmp; + struct tipc_msg *hdr; + u16 seqno; + + spin_lock_bh(&namedq->lock); + skb_queue_walk_safe(namedq, skb, tmp) { + if (unlikely(skb_linearize(skb))) { + __skb_unlink(skb, namedq); + kfree_skb(skb); + continue; + } + hdr = buf_msg(skb); + seqno = msg_named_seqno(hdr); + if (msg_is_last_bulk(hdr)) { + *rcv_nxt = seqno; + *open = true; + } + + if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) { + __skb_unlink(skb, namedq); + spin_unlock_bh(&namedq->lock); + return skb; + } + + if (*open && (*rcv_nxt == seqno)) { + (*rcv_nxt)++; + __skb_unlink(skb, namedq); + spin_unlock_bh(&namedq->lock); + return skb; + } + + if (less(seqno, *rcv_nxt)) { + __skb_unlink(skb, namedq); + kfree_skb(skb); + continue; + } + } + spin_unlock_bh(&namedq->lock); + return NULL; +} + /** * tipc_named_rcv - process name table update messages sent by another node + * @net: the associated network namespace + * @namedq: queue to receive from + * @rcv_nxt: store last received seqno here + * @open: last bulk msg was received (FIXME) */ -void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) +void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq, + u16 *rcv_nxt, bool *open) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_msg *msg; + struct tipc_net *tn = tipc_net(net); struct distr_item *item; - uint count; - u32 node; + struct tipc_msg *hdr; struct sk_buff *skb; - int mtype; + u32 count, node; spin_lock_bh(&tn->nametbl_lock); - for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) { - skb_linearize(skb); - msg = buf_msg(skb); - mtype = msg_type(msg); - item = (struct distr_item *)msg_data(msg); - count = msg_data_sz(msg) / ITEM_SIZE; - node = msg_orignode(msg); + while ((skb = tipc_named_dequeue(namedq, rcv_nxt, open))) { + hdr = buf_msg(skb); + node = msg_orignode(hdr); + item = (struct distr_item *)msg_data(hdr); + count = msg_data_sz(hdr) / ITEM_SIZE; while (count--) { - tipc_update_nametbl(net, item, node, mtype); + tipc_update_nametbl(net, item, node, msg_type(hdr)); item++; } kfree_skb(skb); @@ -326,6 +387,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) /** * tipc_named_reinit - re-initialize local publications + * @net: the associated network namespace * * This routine is called whenever TIPC networking is enabled. * All name table entries published by this node are updated to reflect @@ -335,15 +397,15 @@ void tipc_named_reinit(struct net *net) { struct name_table *nt = tipc_name_table(net); struct tipc_net *tn = tipc_net(net); - struct publication *publ; + struct publication *p; u32 self = tipc_own_addr(net); spin_lock_bh(&tn->nametbl_lock); - list_for_each_entry_rcu(publ, &nt->node_scope, binding_node) - publ->node = self; - list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node) - publ->node = self; - + list_for_each_entry_rcu(p, &nt->node_scope, binding_node) + p->sk.node = self; + list_for_each_entry_rcu(p, &nt->cluster_scope, binding_node) + p->sk.node = self; + nt->rc_dests = 0; spin_unlock_bh(&tn->nametbl_lock); } diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 63fc73e0fa6c..c677f6f082df 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -46,7 +46,7 @@ * @type: name sequence type * @lower: name sequence lower bound * @upper: name sequence upper bound - * @ref: publishing port reference + * @port: publishing port reference * @key: publication key * * ===> All fields are stored in network byte order. <=== @@ -69,9 +69,11 @@ struct distr_item { struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ); struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ); -void tipc_named_node_up(struct net *net, u32 dnode); -void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue); +void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities); +void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq, + u16 *rcv_nxt, bool *open); void tipc_named_reinit(struct net *net); -void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr); +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, + u32 addr, u16 capabilities); #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index bff241f03525..e74940eab3a4 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -3,6 +3,7 @@ * * Copyright (c) 2000-2006, 2014-2018, Ericsson AB * Copyright (c) 2004-2008, 2010-2014, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +36,8 @@ */ #include <net/sock.h> +#include <linux/list_sort.h> +#include <linux/rbtree_augmented.h> #include "core.h" #include "netlink.h" #include "name_table.h" @@ -50,6 +53,7 @@ * @lower: service range lower bound * @upper: service range upper bound * @tree_node: member of service range RB tree + * @max: largest 'upper' in this node subtree * @local_publ: list of identical publications made from this node * Used by closest_first lookup and multicast lookup algorithm * @all_publ: all publications identical to this one, whatever node and scope @@ -59,6 +63,7 @@ struct service_range { u32 lower; u32 upper; struct rb_node tree_node; + u32 max; struct list_head local_publ; struct list_head all_publ; }; @@ -66,6 +71,7 @@ struct service_range { /** * struct tipc_service - container for all published instances of a service type * @type: 32 bit 'type' value for service + * @publ_cnt: increasing counter for publications in this service * @ranges: rb tree containing all service ranges for this service * @service_list: links to adjacent name ranges in hash chain * @subscriptions: list of subscriptions for this service type @@ -74,6 +80,7 @@ struct service_range { */ struct tipc_service { u32 type; + u32 publ_cnt; struct rb_root ranges; struct hlist_node service_list; struct list_head subscriptions; @@ -81,6 +88,133 @@ struct tipc_service { struct rcu_head rcu; }; +#define service_range_upper(sr) ((sr)->upper) +RB_DECLARE_CALLBACKS_MAX(static, sr_callbacks, + struct service_range, tree_node, u32, max, + service_range_upper) + +#define service_range_entry(rbtree_node) \ + (container_of(rbtree_node, struct service_range, tree_node)) + +#define service_range_overlap(sr, start, end) \ + ((sr)->lower <= (end) && (sr)->upper >= (start)) + +/** + * service_range_foreach_match - iterate over tipc service rbtree for each + * range match + * @sr: the service range pointer as a loop cursor + * @sc: the pointer to tipc service which holds the service range rbtree + * @start: beginning of the search range (end >= start) for matching + * @end: end of the search range (end >= start) for matching + */ +#define service_range_foreach_match(sr, sc, start, end) \ + for (sr = service_range_match_first((sc)->ranges.rb_node, \ + start, \ + end); \ + sr; \ + sr = service_range_match_next(&(sr)->tree_node, \ + start, \ + end)) + +/** + * service_range_match_first - find first service range matching a range + * @n: the root node of service range rbtree for searching + * @start: beginning of the search range (end >= start) for matching + * @end: end of the search range (end >= start) for matching + * + * Return: the leftmost service range node in the rbtree that overlaps the + * specific range if any. Otherwise, returns NULL. + */ +static struct service_range *service_range_match_first(struct rb_node *n, + u32 start, u32 end) +{ + struct service_range *sr; + struct rb_node *l, *r; + + /* Non overlaps in tree at all? */ + if (!n || service_range_entry(n)->max < start) + return NULL; + + while (n) { + l = n->rb_left; + if (l && service_range_entry(l)->max >= start) { + /* A leftmost overlap range node must be one in the left + * subtree. If not, it has lower > end, then nodes on + * the right side cannot satisfy the condition either. + */ + n = l; + continue; + } + + /* No one in the left subtree can match, return if this node is + * an overlap i.e. leftmost. + */ + sr = service_range_entry(n); + if (service_range_overlap(sr, start, end)) + return sr; + + /* Ok, try to lookup on the right side */ + r = n->rb_right; + if (sr->lower <= end && + r && service_range_entry(r)->max >= start) { + n = r; + continue; + } + break; + } + + return NULL; +} + +/** + * service_range_match_next - find next service range matching a range + * @n: a node in service range rbtree from which the searching starts + * @start: beginning of the search range (end >= start) for matching + * @end: end of the search range (end >= start) for matching + * + * Return: the next service range node to the given node in the rbtree that + * overlaps the specific range if any. Otherwise, returns NULL. + */ +static struct service_range *service_range_match_next(struct rb_node *n, + u32 start, u32 end) +{ + struct service_range *sr; + struct rb_node *p, *r; + + while (n) { + r = n->rb_right; + if (r && service_range_entry(r)->max >= start) + /* A next overlap range node must be one in the right + * subtree. If not, it has lower > end, then any next + * successor (- an ancestor) of this node cannot + * satisfy the condition either. + */ + return service_range_match_first(r, start, end); + + /* No one in the right subtree can match, go up to find an + * ancestor of this node which is parent of a left-hand child. + */ + while ((p = rb_parent(n)) && n == p->rb_right) + n = p; + if (!p) + break; + + /* Return if this ancestor is an overlap */ + sr = service_range_entry(p); + if (service_range_overlap(sr, start, end)) + return sr; + + /* Ok, try to lookup more from this ancestor */ + if (sr->lower <= end) { + n = p; + continue; + } + break; + } + + return NULL; +} + static int hash(int x) { return x & (TIPC_NAMETBL_SIZE - 1); @@ -88,187 +222,173 @@ static int hash(int x) /** * tipc_publ_create - create a publication structure + * @ua: the service range the user is binding to + * @sk: the address of the socket that is bound + * @key: publication key */ -static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port, +static struct publication *tipc_publ_create(struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key) { - struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC); + struct publication *p = kzalloc(sizeof(*p), GFP_ATOMIC); - if (!publ) + if (!p) return NULL; - publ->type = type; - publ->lower = lower; - publ->upper = upper; - publ->scope = scope; - publ->node = node; - publ->port = port; - publ->key = key; - INIT_LIST_HEAD(&publ->binding_sock); - INIT_LIST_HEAD(&publ->binding_node); - INIT_LIST_HEAD(&publ->local_publ); - INIT_LIST_HEAD(&publ->all_publ); - return publ; + p->sr = ua->sr; + p->sk = *sk; + p->scope = ua->scope; + p->key = key; + INIT_LIST_HEAD(&p->binding_sock); + INIT_LIST_HEAD(&p->binding_node); + INIT_LIST_HEAD(&p->local_publ); + INIT_LIST_HEAD(&p->all_publ); + INIT_LIST_HEAD(&p->list); + return p; } /** * tipc_service_create - create a service structure for the specified 'type' + * @net: network namespace + * @ua: address representing the service to be bound * * Allocates a single range structure and sets it to all 0's. */ -static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd) +static struct tipc_service *tipc_service_create(struct net *net, + struct tipc_uaddr *ua) { - struct tipc_service *service = kzalloc(sizeof(*service), GFP_ATOMIC); + struct name_table *nt = tipc_name_table(net); + struct tipc_service *service; + struct hlist_head *hd; + service = kzalloc(sizeof(*service), GFP_ATOMIC); if (!service) { pr_warn("Service creation failed, no memory\n"); return NULL; } spin_lock_init(&service->lock); - service->type = type; + service->type = ua->sr.type; service->ranges = RB_ROOT; INIT_HLIST_NODE(&service->service_list); INIT_LIST_HEAD(&service->subscriptions); + hd = &nt->services[hash(ua->sr.type)]; hlist_add_head_rcu(&service->service_list, hd); return service; } -/** - * tipc_service_first_range - find first service range in tree matching instance - * - * Very time-critical, so binary search through range rb tree - */ -static struct service_range *tipc_service_first_range(struct tipc_service *sc, - u32 instance) -{ - struct rb_node *n = sc->ranges.rb_node; - struct service_range *sr; - - while (n) { - sr = container_of(n, struct service_range, tree_node); - if (sr->lower > instance) - n = n->rb_left; - else if (sr->upper < instance) - n = n->rb_right; - else - return sr; - } - return NULL; -} - /* tipc_service_find_range - find service range matching publication parameters */ static struct service_range *tipc_service_find_range(struct tipc_service *sc, - u32 lower, u32 upper) + struct tipc_uaddr *ua) { - struct rb_node *n = sc->ranges.rb_node; struct service_range *sr; - sr = tipc_service_first_range(sc, lower); - if (!sr) - return NULL; - - /* Look for exact match */ - for (n = &sr->tree_node; n; n = rb_next(n)) { - sr = container_of(n, struct service_range, tree_node); - if (sr->upper == upper) - break; + service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) { + /* Look for exact match */ + if (sr->lower == ua->sr.lower && sr->upper == ua->sr.upper) + return sr; } - if (!n || sr->lower != lower || sr->upper != upper) - return NULL; - return sr; + return NULL; } static struct service_range *tipc_service_create_range(struct tipc_service *sc, - u32 lower, u32 upper) + struct publication *p) { struct rb_node **n, *parent = NULL; - struct service_range *sr, *tmp; + struct service_range *sr; + u32 lower = p->sr.lower; + u32 upper = p->sr.upper; n = &sc->ranges.rb_node; while (*n) { - tmp = container_of(*n, struct service_range, tree_node); parent = *n; - tmp = container_of(parent, struct service_range, tree_node); - if (lower < tmp->lower) - n = &(*n)->rb_left; - else if (lower > tmp->lower) - n = &(*n)->rb_right; - else if (upper < tmp->upper) - n = &(*n)->rb_left; - else if (upper > tmp->upper) - n = &(*n)->rb_right; + sr = service_range_entry(parent); + if (lower == sr->lower && upper == sr->upper) + return sr; + if (sr->max < upper) + sr->max = upper; + if (lower <= sr->lower) + n = &parent->rb_left; else - return tmp; + n = &parent->rb_right; } sr = kzalloc(sizeof(*sr), GFP_ATOMIC); if (!sr) return NULL; sr->lower = lower; sr->upper = upper; + sr->max = upper; INIT_LIST_HEAD(&sr->local_publ); INIT_LIST_HEAD(&sr->all_publ); rb_link_node(&sr->tree_node, parent, n); - rb_insert_color(&sr->tree_node, &sc->ranges); + rb_insert_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); return sr; } -static struct publication *tipc_service_insert_publ(struct net *net, - struct tipc_service *sc, - u32 type, u32 lower, - u32 upper, u32 scope, - u32 node, u32 port, - u32 key) +static bool tipc_service_insert_publ(struct net *net, + struct tipc_service *sc, + struct publication *p) { struct tipc_subscription *sub, *tmp; struct service_range *sr; - struct publication *p; + struct publication *_p; + u32 node = p->sk.node; bool first = false; + bool res = false; + u32 key = p->key; - sr = tipc_service_create_range(sc, lower, upper); + spin_lock_bh(&sc->lock); + sr = tipc_service_create_range(sc, p); if (!sr) - goto err; + goto exit; first = list_empty(&sr->all_publ); /* Return if the publication already exists */ - list_for_each_entry(p, &sr->all_publ, all_publ) { - if (p->key == key && (!p->node || p->node == node)) - return NULL; + list_for_each_entry(_p, &sr->all_publ, all_publ) { + if (_p->key == key && (!_p->sk.node || _p->sk.node == node)) { + pr_debug("Failed to bind duplicate %u,%u,%u/%u:%u/%u\n", + p->sr.type, p->sr.lower, p->sr.upper, + node, p->sk.ref, key); + goto exit; + } } - /* Create and insert publication */ - p = tipc_publ_create(type, lower, upper, scope, node, port, key); - if (!p) - goto err; - if (in_own_node(net, node)) + if (in_own_node(net, p->sk.node)) list_add(&p->local_publ, &sr->local_publ); list_add(&p->all_publ, &sr->all_publ); + p->id = sc->publ_cnt++; /* Any subscriptions waiting for notification? */ list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) { - tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_PUBLISHED, - p->port, p->node, p->scope, first); + tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, first); } - return p; -err: - pr_warn("Failed to bind to %u,%u,%u, no memory\n", type, lower, upper); - return NULL; + res = true; +exit: + if (!res) + pr_warn("Failed to bind to %u,%u,%u\n", + p->sr.type, p->sr.lower, p->sr.upper); + spin_unlock_bh(&sc->lock); + return res; } /** * tipc_service_remove_publ - remove a publication from a service + * @r: service_range to remove publication from + * @sk: address publishing socket + * @key: target publication key */ -static struct publication *tipc_service_remove_publ(struct service_range *sr, - u32 node, u32 key) +static struct publication *tipc_service_remove_publ(struct service_range *r, + struct tipc_socket_addr *sk, + u32 key) { struct publication *p; + u32 node = sk->node; - list_for_each_entry(p, &sr->all_publ, all_publ) { - if (p->key != key || (node && node != p->node)) + list_for_each_entry(p, &r->all_publ, all_publ) { + if (p->key != key || (node && node != p->sk.node)) continue; list_del(&p->all_publ); list_del(&p->local_publ); @@ -277,229 +397,265 @@ static struct publication *tipc_service_remove_publ(struct service_range *sr, return NULL; } +/* + * Code reused: time_after32() for the same purpose + */ +#define publication_after(pa, pb) time_after32((pa)->id, (pb)->id) +static int tipc_publ_sort(void *priv, const struct list_head *a, + const struct list_head *b) +{ + struct publication *pa, *pb; + + pa = container_of(a, struct publication, list); + pb = container_of(b, struct publication, list); + return publication_after(pa, pb); +} + /** * tipc_service_subscribe - attach a subscription, and optionally * issue the prescribed number of events if there is any service * range overlapping with the requested range + * @service: the tipc_service to attach the @sub to + * @sub: the subscription to attach */ static void tipc_service_subscribe(struct tipc_service *service, struct tipc_subscription *sub) { - struct tipc_subscr *sb = &sub->evt.s; + struct publication *p, *first, *tmp; + struct list_head publ_list; struct service_range *sr; - struct tipc_name_seq ns; - struct publication *p; - struct rb_node *n; - bool first; + u32 filter, lower, upper; - ns.type = tipc_sub_read(sb, seq.type); - ns.lower = tipc_sub_read(sb, seq.lower); - ns.upper = tipc_sub_read(sb, seq.upper); + filter = sub->s.filter; + lower = sub->s.seq.lower; + upper = sub->s.seq.upper; tipc_sub_get(sub); list_add(&sub->service_list, &service->subscriptions); - if (tipc_sub_read(sb, filter) & TIPC_SUB_NO_STATUS) + if (filter & TIPC_SUB_NO_STATUS) return; - for (n = rb_first(&service->ranges); n; n = rb_next(n)) { - sr = container_of(n, struct service_range, tree_node); - if (sr->lower > ns.upper) - break; - if (!tipc_sub_check_overlap(&ns, sr->lower, sr->upper)) - continue; - first = true; - + INIT_LIST_HEAD(&publ_list); + service_range_foreach_match(sr, service, lower, upper) { + first = NULL; list_for_each_entry(p, &sr->all_publ, all_publ) { - tipc_sub_report_overlap(sub, sr->lower, sr->upper, - TIPC_PUBLISHED, p->port, - p->node, p->scope, first); - first = false; + if (filter & TIPC_SUB_PORTS) + list_add_tail(&p->list, &publ_list); + else if (!first || publication_after(first, p)) + /* Pick this range's *first* publication */ + first = p; } + if (first) + list_add_tail(&first->list, &publ_list); + } + + /* Sort the publications before reporting */ + list_sort(NULL, &publ_list, tipc_publ_sort); + list_for_each_entry_safe(p, tmp, &publ_list, list) { + tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, true); + list_del_init(&p->list); } } -static struct tipc_service *tipc_service_find(struct net *net, u32 type) +static struct tipc_service *tipc_service_find(struct net *net, + struct tipc_uaddr *ua) { struct name_table *nt = tipc_name_table(net); struct hlist_head *service_head; struct tipc_service *service; - service_head = &nt->services[hash(type)]; + service_head = &nt->services[hash(ua->sr.type)]; hlist_for_each_entry_rcu(service, service_head, service_list) { - if (service->type == type) + if (service->type == ua->sr.type) return service; } return NULL; }; -struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, - u32 lower, u32 upper, - u32 scope, u32 node, - u32 port, u32 key) +struct publication *tipc_nametbl_insert_publ(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, + u32 key) { - struct name_table *nt = tipc_name_table(net); struct tipc_service *sc; struct publication *p; - if (scope > TIPC_NODE_SCOPE || lower > upper) { - pr_debug("Failed to bind illegal {%u,%u,%u} with scope %u\n", - type, lower, upper, scope); - return NULL; - } - sc = tipc_service_find(net, type); - if (!sc) - sc = tipc_service_create(type, &nt->services[hash(type)]); - if (!sc) + p = tipc_publ_create(ua, sk, key); + if (!p) return NULL; - spin_lock_bh(&sc->lock); - p = tipc_service_insert_publ(net, sc, type, lower, upper, - scope, node, port, key); - spin_unlock_bh(&sc->lock); - return p; + sc = tipc_service_find(net, ua); + if (!sc) + sc = tipc_service_create(net, ua); + if (sc && tipc_service_insert_publ(net, sc, p)) + return p; + kfree(p); + return NULL; } -struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, - u32 lower, u32 upper, - u32 node, u32 key) +struct publication *tipc_nametbl_remove_publ(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, + u32 key) { - struct tipc_service *sc = tipc_service_find(net, type); struct tipc_subscription *sub, *tmp; - struct service_range *sr = NULL; struct publication *p = NULL; + struct service_range *sr; + struct tipc_service *sc; bool last; + sc = tipc_service_find(net, ua); if (!sc) - return NULL; + goto exit; spin_lock_bh(&sc->lock); - sr = tipc_service_find_range(sc, lower, upper); + sr = tipc_service_find_range(sc, ua); if (!sr) - goto exit; - p = tipc_service_remove_publ(sr, node, key); + goto unlock; + p = tipc_service_remove_publ(sr, sk, key); if (!p) - goto exit; + goto unlock; /* Notify any waiting subscriptions */ last = list_empty(&sr->all_publ); list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) { - tipc_sub_report_overlap(sub, lower, upper, TIPC_WITHDRAWN, - p->port, node, p->scope, last); + tipc_sub_report_overlap(sub, p, TIPC_WITHDRAWN, last); } /* Remove service range item if this was its last publication */ if (list_empty(&sr->all_publ)) { - rb_erase(&sr->tree_node, &sc->ranges); + rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); kfree(sr); } - /* Delete service item if this no more publications and subscriptions */ + /* Delete service item if no more publications and subscriptions */ if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) { hlist_del_init_rcu(&sc->service_list); kfree_rcu(sc, rcu); } -exit: +unlock: spin_unlock_bh(&sc->lock); +exit: + if (!p) { + pr_err("Failed to remove unknown binding: %u,%u,%u/%u:%u/%u\n", + ua->sr.type, ua->sr.lower, ua->sr.upper, + sk->node, sk->ref, key); + } return p; } /** - * tipc_nametbl_translate - perform service instance to socket translation + * tipc_nametbl_lookup_anycast - perform service instance to socket translation + * @net: network namespace + * @ua: service address to look up + * @sk: address to socket we want to find * - * On entry, 'dnode' is the search domain used during translation. + * On entry, a non-zero 'sk->node' indicates the node where we want lookup to be + * performed, which may not be this one. * * On exit: - * - if translation is deferred to another node, leave 'dnode' unchanged and - * return 0 - * - if translation is attempted and succeeds, set 'dnode' to the publishing - * node and return the published (non-zero) port number - * - if translation is attempted and fails, set 'dnode' to 0 and return 0 + * + * - If lookup is deferred to another node, leave 'sk->node' unchanged and + * return 'true'. + * - If lookup is successful, set the 'sk->node' and 'sk->ref' (== portid) which + * represent the bound socket and return 'true'. + * - If lookup fails, return 'false' * * Note that for legacy users (node configured with Z.C.N address format) the - * 'closest-first' lookup algorithm must be maintained, i.e., if dnode is 0 + * 'closest-first' lookup algorithm must be maintained, i.e., if sk.node is 0 * we must look in the local binding list first */ -u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode) +bool tipc_nametbl_lookup_anycast(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk) { struct tipc_net *tn = tipc_net(net); bool legacy = tn->legacy_addr_format; u32 self = tipc_own_addr(net); - struct service_range *sr; + u32 inst = ua->sa.instance; + struct service_range *r; struct tipc_service *sc; - struct list_head *list; struct publication *p; - u32 port = 0; - u32 node = 0; + struct list_head *l; + bool res = false; - if (!tipc_in_scope(legacy, *dnode, self)) - return 0; + if (!tipc_in_scope(legacy, sk->node, self)) + return true; rcu_read_lock(); - sc = tipc_service_find(net, type); + sc = tipc_service_find(net, ua); if (unlikely(!sc)) - goto not_found; + goto exit; spin_lock_bh(&sc->lock); - sr = tipc_service_first_range(sc, instance); - if (unlikely(!sr)) - goto no_match; - - /* Select lookup algorithm: local, closest-first or round-robin */ - if (*dnode == self) { - list = &sr->local_publ; - if (list_empty(list)) - goto no_match; - p = list_first_entry(list, struct publication, local_publ); - list_move_tail(&p->local_publ, &sr->local_publ); - } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) { - list = &sr->local_publ; - p = list_first_entry(list, struct publication, local_publ); - list_move_tail(&p->local_publ, &sr->local_publ); - } else { - list = &sr->all_publ; - p = list_first_entry(list, struct publication, all_publ); - list_move_tail(&p->all_publ, &sr->all_publ); + service_range_foreach_match(r, sc, inst, inst) { + /* Select lookup algo: local, closest-first or round-robin */ + if (sk->node == self) { + l = &r->local_publ; + if (list_empty(l)) + continue; + p = list_first_entry(l, struct publication, local_publ); + list_move_tail(&p->local_publ, &r->local_publ); + } else if (legacy && !sk->node && !list_empty(&r->local_publ)) { + l = &r->local_publ; + p = list_first_entry(l, struct publication, local_publ); + list_move_tail(&p->local_publ, &r->local_publ); + } else { + l = &r->all_publ; + p = list_first_entry(l, struct publication, all_publ); + list_move_tail(&p->all_publ, &r->all_publ); + } + *sk = p->sk; + res = true; + /* Todo: as for legacy, pick the first matching range only, a + * "true" round-robin will be performed as needed. + */ + break; } - port = p->port; - node = p->node; -no_match: spin_unlock_bh(&sc->lock); -not_found: + +exit: rcu_read_unlock(); - *dnode = node; - return port; + return res; } -bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, - struct list_head *dsts, int *dstcnt, u32 exclude, - bool all) +/* tipc_nametbl_lookup_group(): lookup destinaton(s) in a communication group + * Returns a list of one (== group anycast) or more (== group multicast) + * destination socket/node pairs matching the given address. + * The requester may or may not want to exclude himself from the list. + */ +bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua, + struct list_head *dsts, int *dstcnt, + u32 exclude, bool mcast) { u32 self = tipc_own_addr(net); + u32 inst = ua->sa.instance; struct service_range *sr; struct tipc_service *sc; struct publication *p; *dstcnt = 0; rcu_read_lock(); - sc = tipc_service_find(net, type); + sc = tipc_service_find(net, ua); if (unlikely(!sc)) goto exit; spin_lock_bh(&sc->lock); - sr = tipc_service_first_range(sc, instance); + /* Todo: a full search i.e. service_range_foreach_match() instead? */ + sr = service_range_match_first(sc->ranges.rb_node, inst, inst); if (!sr) goto no_match; list_for_each_entry(p, &sr->all_publ, all_publ) { - if (p->scope != scope) + if (p->scope != ua->scope) continue; - if (p->port == exclude && p->node == self) + if (p->sk.ref == exclude && p->sk.node == self) continue; - tipc_dest_push(dsts, p->node, p->port); + tipc_dest_push(dsts, p->sk.node, p->sk.ref); (*dstcnt)++; - if (all) + if (mcast) continue; list_move_tail(&p->all_publ, &sr->all_publ); break; @@ -511,30 +667,29 @@ exit: return !list_empty(dsts); } -void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, - u32 scope, bool exact, struct list_head *dports) +/* tipc_nametbl_lookup_mcast_sockets(): look up node local destinaton sockets + * matching the given address + * Used on nodes which have received a multicast/broadcast message + * Returns a list of local sockets + */ +void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua, + struct list_head *dports) { struct service_range *sr; struct tipc_service *sc; struct publication *p; - struct rb_node *n; + u8 scope = ua->scope; rcu_read_lock(); - sc = tipc_service_find(net, type); + sc = tipc_service_find(net, ua); if (!sc) goto exit; spin_lock_bh(&sc->lock); - - for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { - sr = container_of(n, struct service_range, tree_node); - if (sr->upper < lower) - continue; - if (sr->lower > upper) - break; + service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) { list_for_each_entry(p, &sr->local_publ, local_publ) { - if (p->scope == scope || (!exact && p->scope < scope)) - tipc_dest_push(dports, 0, p->port); + if (scope == p->scope || scope == TIPC_ANY_SCOPE) + tipc_dest_push(dports, 0, p->sk.ref); } } spin_unlock_bh(&sc->lock); @@ -542,33 +697,27 @@ exit: rcu_read_unlock(); } -/* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes - * - Creates list of nodes that overlap the given multicast address - * - Determines if any node local destinations overlap +/* tipc_nametbl_lookup_mcast_nodes(): look up all destination nodes matching + * the given address. Used in sending node. + * Used on nodes which are sending out a multicast/broadcast message + * Returns a list of nodes, including own node if applicable */ -void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, - u32 upper, struct tipc_nlist *nodes) +void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua, + struct tipc_nlist *nodes) { struct service_range *sr; struct tipc_service *sc; struct publication *p; - struct rb_node *n; rcu_read_lock(); - sc = tipc_service_find(net, type); + sc = tipc_service_find(net, ua); if (!sc) goto exit; spin_lock_bh(&sc->lock); - - for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { - sr = container_of(n, struct service_range, tree_node); - if (sr->upper < lower) - continue; - if (sr->lower > upper) - break; + service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) { list_for_each_entry(p, &sr->all_publ, all_publ) { - tipc_nlist_add(nodes, p->node); + tipc_nlist_add(nodes, p->sk.node); } } spin_unlock_bh(&sc->lock); @@ -579,7 +728,7 @@ exit: /* tipc_nametbl_build_group - build list of communication group members */ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, - u32 type, u32 scope) + struct tipc_uaddr *ua) { struct service_range *sr; struct tipc_service *sc; @@ -587,7 +736,7 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, struct rb_node *n; rcu_read_lock(); - sc = tipc_service_find(net, type); + sc = tipc_service_find(net, ua); if (!sc) goto exit; @@ -595,9 +744,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { sr = container_of(n, struct service_range, tree_node); list_for_each_entry(p, &sr->all_publ, all_publ) { - if (p->scope != scope) + if (p->scope != ua->scope) continue; - tipc_group_add_member(grp, p->node, p->port, p->lower); + tipc_group_add_member(grp, p->sk.node, p->sk.ref, + p->sr.lower); } } spin_unlock_bh(&sc->lock); @@ -607,14 +757,14 @@ exit: /* tipc_nametbl_publish - add service binding to name table */ -struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, - u32 upper, u32 scope, u32 port, - u32 key) +struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key) { struct name_table *nt = tipc_name_table(net); struct tipc_net *tn = tipc_net(net); struct publication *p = NULL; struct sk_buff *skb = NULL; + u32 rc_dests; spin_lock_bh(&tn->nametbl_lock); @@ -623,77 +773,78 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, goto exit; } - p = tipc_nametbl_insert_publ(net, type, lower, upper, scope, - tipc_own_addr(net), port, key); + p = tipc_nametbl_insert_publ(net, ua, sk, key); if (p) { nt->local_publ_count++; skb = tipc_named_publish(net, p); } + rc_dests = nt->rc_dests; exit: spin_unlock_bh(&tn->nametbl_lock); if (skb) - tipc_node_broadcast(net, skb); + tipc_node_broadcast(net, skb, rc_dests); return p; + } /** * tipc_nametbl_withdraw - withdraw a service binding + * @net: network namespace + * @ua: service address/range being unbound + * @sk: address of the socket being unbound from + * @key: target publication key */ -int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, - u32 upper, u32 key) +void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key) { struct name_table *nt = tipc_name_table(net); struct tipc_net *tn = tipc_net(net); - u32 self = tipc_own_addr(net); struct sk_buff *skb = NULL; struct publication *p; + u32 rc_dests; spin_lock_bh(&tn->nametbl_lock); - p = tipc_nametbl_remove_publ(net, type, lower, upper, self, key); + p = tipc_nametbl_remove_publ(net, ua, sk, key); if (p) { nt->local_publ_count--; skb = tipc_named_withdraw(net, p); list_del_init(&p->binding_sock); kfree_rcu(p, rcu); - } else { - pr_err("Failed to remove local publication {%u,%u,%u}/%u\n", - type, lower, upper, key); } + rc_dests = nt->rc_dests; spin_unlock_bh(&tn->nametbl_lock); - if (skb) { - tipc_node_broadcast(net, skb); - return 1; - } - return 0; + if (skb) + tipc_node_broadcast(net, skb, rc_dests); } /** * tipc_nametbl_subscribe - add a subscription object to the name table + * @sub: subscription to add */ bool tipc_nametbl_subscribe(struct tipc_subscription *sub) { - struct name_table *nt = tipc_name_table(sub->net); struct tipc_net *tn = tipc_net(sub->net); - struct tipc_subscr *s = &sub->evt.s; - u32 type = tipc_sub_read(s, seq.type); + u32 type = sub->s.seq.type; struct tipc_service *sc; + struct tipc_uaddr ua; bool res = true; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type, + sub->s.seq.lower, sub->s.seq.upper); spin_lock_bh(&tn->nametbl_lock); - sc = tipc_service_find(sub->net, type); + sc = tipc_service_find(sub->net, &ua); if (!sc) - sc = tipc_service_create(type, &nt->services[hash(type)]); + sc = tipc_service_create(sub->net, &ua); if (sc) { spin_lock_bh(&sc->lock); tipc_service_subscribe(sc, sub); spin_unlock_bh(&sc->lock); } else { - pr_warn("Failed to subscribe for {%u,%u,%u}\n", type, - tipc_sub_read(s, seq.lower), - tipc_sub_read(s, seq.upper)); + pr_warn("Failed to subscribe for {%u,%u,%u}\n", + type, sub->s.seq.lower, sub->s.seq.upper); res = false; } spin_unlock_bh(&tn->nametbl_lock); @@ -702,16 +853,18 @@ bool tipc_nametbl_subscribe(struct tipc_subscription *sub) /** * tipc_nametbl_unsubscribe - remove a subscription object from name table + * @sub: subscription to remove */ void tipc_nametbl_unsubscribe(struct tipc_subscription *sub) { struct tipc_net *tn = tipc_net(sub->net); - struct tipc_subscr *s = &sub->evt.s; - u32 type = tipc_sub_read(s, seq.type); struct tipc_service *sc; + struct tipc_uaddr ua; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, + sub->s.seq.type, sub->s.seq.lower, sub->s.seq.upper); spin_lock_bh(&tn->nametbl_lock); - sc = tipc_service_find(sub->net, type); + sc = tipc_service_find(sub->net, &ua); if (!sc) goto exit; @@ -751,7 +904,9 @@ int tipc_nametbl_init(struct net *net) } /** - * tipc_service_delete - purge all publications for a service and delete it + * tipc_service_delete - purge all publications for a service and delete it + * @net: the associated network namespace + * @sc: tipc_service to delete */ static void tipc_service_delete(struct net *net, struct tipc_service *sc) { @@ -761,10 +916,10 @@ static void tipc_service_delete(struct net *net, struct tipc_service *sc) spin_lock_bh(&sc->lock); rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) { list_for_each_entry_safe(p, tmp, &sr->all_publ, all_publ) { - tipc_service_remove_publ(sr, p->node, p->key); + tipc_service_remove_publ(sr, &p->sk, p->key); kfree_rcu(p, rcu); } - rb_erase(&sr->tree_node, &sc->ranges); + rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); kfree(sr); } hlist_del_init_rcu(&sc->service_list); @@ -794,8 +949,8 @@ void tipc_nametbl_stop(struct net *net) } spin_unlock_bh(&tn->nametbl_lock); - synchronize_net(); - kfree(nt); + /* TODO: clear tn->nametbl, implement proper RCU rules ? */ + kfree_rcu(nt, rcu); } static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, @@ -812,7 +967,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, list_for_each_entry(p, &sr->all_publ, all_publ) if (p->key == *last_key) break; - if (p->key != *last_key) + if (list_entry_is_head(p, &sr->all_publ, all_publ)) return -EPIPE; } else { p = list_first_entry(&sr->all_publ, @@ -829,11 +984,11 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NAME_TABLE); if (!attrs) goto msg_full; - b = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE_PUBL); + b = nla_nest_start_noflag(msg->skb, TIPC_NLA_NAME_TABLE_PUBL); if (!b) goto attr_msg_full; @@ -845,9 +1000,9 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, goto publ_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_SCOPE, p->scope)) goto publ_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node)) + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->sk.node)) goto publ_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port)) + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->sk.ref)) goto publ_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key)) goto publ_msg_full; @@ -898,6 +1053,7 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg, struct tipc_net *tn = tipc_net(net); struct tipc_service *service = NULL; struct hlist_head *head; + struct tipc_uaddr ua; int err; int i; @@ -909,8 +1065,11 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg, for (; i < TIPC_NAMETBL_SIZE; i++) { head = &tn->nametbl->services[i]; - if (*last_type) { - service = tipc_service_find(net, *last_type); + if (*last_type || + (!i && *last_key && (*last_lower == *last_key))) { + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, + *last_type, *last_lower, *last_lower); + service = tipc_service_find(net, &ua); if (!service) return -EPIPE; } else { @@ -1043,14 +1202,3 @@ void tipc_dest_list_purge(struct list_head *l) kfree(dst); } } - -int tipc_dest_list_len(struct list_head *l) -{ - struct tipc_dest *dst; - int i = 0; - - list_for_each_entry(dst, l, list) { - i++; - } - return i; -} diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index f79066334cc8..7ff6eeebaae6 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -3,6 +3,7 @@ * * Copyright (c) 2000-2006, 2014-2018, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,6 +42,7 @@ struct tipc_subscription; struct tipc_plist; struct tipc_nlist; struct tipc_group; +struct tipc_uaddr; /* * TIPC name types reserved for internal TIPC use (both current and planned) @@ -49,18 +51,18 @@ struct tipc_group; #define TIPC_PUBL_SCOPE_NUM (TIPC_NODE_SCOPE + 1) #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ +#define TIPC_ANY_SCOPE 10 /* Both node and cluster scope will match */ + /** - * struct publication - info about a published (name or) name sequence - * @type: name sequence type - * @lower: name sequence lower bound - * @upper: name sequence upper bound + * struct publication - info about a published service address or range + * @sr: service range represented by this publication + * @sk: address of socket bound to this publication * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE - * @node: network address of publishing socket's node - * @port: publishing port * @key: publication key, unique across the cluster + * @id: publication id * @binding_node: all publications from the same node which bound this one - * - Remote publications: in node->publ_list - * Used by node/name distr to withdraw publications when node is lost + * - Remote publications: in node->publ_list; + * Used by node/name distr to withdraw publications when node is lost * - Local/node scope publications: in name_table->node_scope list * - Local/cluster scope publications: in name_table->cluster_scope list * @binding_sock: all publications from the same socket which bound this one @@ -69,64 +71,72 @@ struct tipc_group; * Used by closest_first and multicast receive lookup algorithms * @all_publ: all publications identical to this one, whatever node and scope * Used by round-robin lookup algorithm + * @list: to form a list of publications in temporal order * @rcu: RCU callback head used for deferred freeing */ struct publication { - u32 type; - u32 lower; - u32 upper; - u32 scope; - u32 node; - u32 port; + struct tipc_service_range sr; + struct tipc_socket_addr sk; + u16 scope; u32 key; + u32 id; struct list_head binding_node; struct list_head binding_sock; struct list_head local_publ; struct list_head all_publ; + struct list_head list; struct rcu_head rcu; }; /** * struct name_table - table containing all existing port name publications - * @seq_hlist: name sequence hash lists + * @rcu: RCU callback head used for deferred freeing + * @services: name sequence hash lists * @node_scope: all local publications with node scope * - used by name_distr during re-init of name table * @cluster_scope: all local publications with cluster scope * - used by name_distr to send bulk updates to new nodes * - used by name_distr during re-init of name table + * @cluster_scope_lock: lock for accessing @cluster_scope * @local_publ_count: number of publications issued by this node + * @rc_dests: destination node counter + * @snd_nxt: next sequence number to be used */ struct name_table { + struct rcu_head rcu; struct hlist_head services[TIPC_NAMETBL_SIZE]; struct list_head node_scope; struct list_head cluster_scope; rwlock_t cluster_scope_lock; u32 local_publ_count; + u32 rc_dests; + u32 snd_nxt; }; int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); - -u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); -void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, - u32 scope, bool exact, struct list_head *dports); +bool tipc_nametbl_lookup_anycast(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk); +void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua, + struct list_head *dports); +void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua, + struct tipc_nlist *nodes); +bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua, + struct list_head *dsts, int *dstcnt, + u32 exclude, bool mcast); void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, - u32 type, u32 domain); -void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, - u32 upper, struct tipc_nlist *nodes); -bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, - struct list_head *dsts, int *dstcnt, u32 exclude, - bool all); -struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, - u32 upper, u32 scope, u32 port, - u32 key); -int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 upper, - u32 key); -struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, - u32 lower, u32 upper, u32 scope, - u32 node, u32 ref, u32 key); -struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, - u32 lower, u32 upper, - u32 node, u32 key); + struct tipc_uaddr *ua); +struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key); +void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key); +struct publication *tipc_nametbl_insert_publ(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, + u32 key); +struct publication *tipc_nametbl_remove_publ(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, + u32 key); bool tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(struct net *net); @@ -143,6 +153,5 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 port); bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port); bool tipc_dest_del(struct list_head *l, u32 node, u32 port); void tipc_dest_list_purge(struct list_head *l); -int tipc_dest_list_len(struct list_head *l); #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index f076edb74338..7e65d0b0c4a8 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -41,7 +41,9 @@ #include "socket.h" #include "node.h" #include "bcast.h" +#include "link.h" #include "netlink.h" +#include "monitor.h" /* * The TIPC locking policy is designed to ensure a very fine locking @@ -88,7 +90,7 @@ * - A spin lock to protect the registry of kernel/driver users (reg.c) * - A global spin_lock (tipc_port_lock), which only task is to ensure * consistency where more than one port is involved in an operation, - * i.e., whe a port is part of a linked list of ports. + * i.e., when a port is part of a linked list of ports. * There are two such lists; 'port_list', which is used for management, * and 'wait_list', which is used to queue ports during congestion. * @@ -104,12 +106,6 @@ * - A local spin_lock protecting the queue of subscriber events. */ -struct tipc_net_work { - struct work_struct work; - struct net *net; - u32 addr; -}; - static void tipc_net_finalize(struct net *net, u32 addr); int tipc_net_init(struct net *net, u8 *node_id, u32 addr) @@ -130,45 +126,35 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr) static void tipc_net_finalize(struct net *net, u32 addr) { struct tipc_net *tn = tipc_net(net); + struct tipc_socket_addr sk = {0, addr}; + struct tipc_uaddr ua; + + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE, + TIPC_NODE_STATE, addr, addr); if (cmpxchg(&tn->node_addr, 0, addr)) return; tipc_set_node_addr(net, addr); tipc_named_reinit(net); tipc_sk_reinit(net); - tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, - TIPC_CLUSTER_SCOPE, 0, addr); + tipc_mon_reinit_self(net); + tipc_nametbl_publish(net, &ua, &sk, addr); } -static void tipc_net_finalize_work(struct work_struct *work) +void tipc_net_finalize_work(struct work_struct *work) { - struct tipc_net_work *fwork; - - fwork = container_of(work, struct tipc_net_work, work); - tipc_net_finalize(fwork->net, fwork->addr); - kfree(fwork); -} + struct tipc_net *tn = container_of(work, struct tipc_net, work); -void tipc_sched_net_finalize(struct net *net, u32 addr) -{ - struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); - - if (!fwork) - return; - INIT_WORK(&fwork->work, tipc_net_finalize_work); - fwork->net = net; - fwork->addr = addr; - schedule_work(&fwork->work); + rtnl_lock(); + tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr); + rtnl_unlock(); } void tipc_net_stop(struct net *net) { - u32 self = tipc_own_addr(net); - - if (!self) + if (!tipc_own_id(net)) return; - tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, self, self); rtnl_lock(); tipc_bearer_stop(net); tipc_node_stop(net); @@ -190,7 +176,7 @@ static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg) if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_NET); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NET); if (!attrs) goto msg_full; @@ -248,9 +234,9 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_NET]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, - info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, - info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], + tipc_nl_net_policy, info->extack); if (err) return err; @@ -303,3 +289,59 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) return err; } + +static int __tipc_nl_addr_legacy_get(struct net *net, struct tipc_nl_msg *msg) +{ + struct tipc_net *tn = tipc_net(net); + struct nlattr *attrs; + void *hdr; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + 0, TIPC_NL_ADDR_LEGACY_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_NET); + if (!attrs) + goto msg_full; + + if (tn->legacy_addr_format) + if (nla_put_flag(msg->skb, TIPC_NLA_NET_ADDR_LEGACY)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_nl_msg msg; + struct sk_buff *rep; + int err; + + rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!rep) + return -ENOMEM; + + msg.skb = rep; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + err = __tipc_nl_addr_legacy_get(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + + return genlmsg_reply(msg.skb, info); +} diff --git a/net/tipc/net.h b/net/tipc/net.h index b7f2e364eb99..1cb1e43cf34a 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -42,10 +42,11 @@ extern const struct nla_policy tipc_nl_net_policy[]; int tipc_net_init(struct net *net, u8 *node_id, u32 addr); -void tipc_sched_net_finalize(struct net *net, u32 addr); +void tipc_net_finalize_work(struct work_struct *work); void tipc_net_stop(struct net *net); int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info); #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 99ee419210ba..1a9a5bdaccf4 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -83,11 +83,12 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { [TIPC_NLA_NET_ADDR] = { .type = NLA_U32 }, [TIPC_NLA_NET_NODEID] = { .type = NLA_U64 }, [TIPC_NLA_NET_NODEID_W1] = { .type = NLA_U64 }, + [TIPC_NLA_NET_ADDR_LEGACY] = { .type = NLA_FLAG } }; const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_LINK_NAME] = { .type = NLA_STRING, + [TIPC_NLA_LINK_NAME] = { .type = NLA_NUL_STRING, .len = TIPC_MAX_LINK_NAME }, [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, @@ -102,7 +103,13 @@ const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = { [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 }, - [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG } + [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }, + [TIPC_NLA_NODE_ID] = { .type = NLA_BINARY, + .len = TIPC_NODEID_LEN}, + [TIPC_NLA_NODE_KEY] = { .type = NLA_BINARY, + .len = TIPC_AEAD_KEY_SIZE_MAX}, + [TIPC_NLA_NODE_KEY_MASTER] = { .type = NLA_FLAG }, + [TIPC_NLA_NODE_REKEYING] = { .type = NLA_U32 }, }; /* Properties valid for media, bearer and link */ @@ -110,12 +117,15 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 }, [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 }, - [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 } + [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_MTU] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_BROADCAST] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_BROADCAST_RATIO] = { .type = NLA_U32 } }; const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_BEARER_NAME] = { .type = NLA_STRING, + [TIPC_NLA_BEARER_NAME] = { .type = NLA_NUL_STRING, .len = TIPC_MAX_BEARER_NAME }, [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED }, [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 } @@ -141,117 +151,137 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { static const struct genl_ops tipc_genl_v2_ops[] = { { .cmd = TIPC_NL_BEARER_DISABLE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_disable, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_ENABLE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_enable, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_get, .dumpit = tipc_nl_bearer_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_add, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_SOCK_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .start = tipc_dump_start, .dumpit = tipc_nl_sk_dump, .done = tipc_dump_done, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_PUBL_GET, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = tipc_nl_publ_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_GET, + .validate = GENL_DONT_VALIDATE_STRICT, .doit = tipc_nl_node_get_link, .dumpit = tipc_nl_node_dump_link, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_set_link, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_RESET_STATS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_reset_link_stats, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MEDIA_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_media_get, .dumpit = tipc_nl_media_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MEDIA_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_media_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NODE_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_node_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NET_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_net_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NET_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_net_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NAME_TABLE_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_name_table_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_set_monitor, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_get_monitor, .dumpit = tipc_nl_node_dump_monitor, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_PEER_GET, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = tipc_nl_node_dump_monitor_peer, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_PEER_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_peer_rm, - .policy = tipc_nl_policy, }, #ifdef CONFIG_TIPC_MEDIA_UDP { .cmd = TIPC_NL_UDP_GET_REMOTEIP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = tipc_udp_nl_dump_remoteip, - .policy = tipc_nl_policy, }, #endif +#ifdef CONFIG_TIPC_CRYPTO + { + .cmd = TIPC_NL_KEY_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_node_set_key, + }, + { + .cmd = TIPC_NL_KEY_FLUSH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_node_flush_key, + }, +#endif + { + .cmd = TIPC_NL_ADDR_LEGACY_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_net_addr_legacy_get, + }, }; struct genl_family tipc_genl_family __ro_after_init = { @@ -259,24 +289,14 @@ struct genl_family tipc_genl_family __ro_after_init = { .version = TIPC_GENL_V2_VERSION, .hdrsize = 0, .maxattr = TIPC_NLA_MAX, + .policy = tipc_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = tipc_genl_v2_ops, .n_ops = ARRAY_SIZE(tipc_genl_v2_ops), + .resv_start_op = TIPC_NL_ADDR_LEGACY_GET + 1, }; -int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) -{ - u32 maxattr = tipc_genl_family.maxattr; - - *attr = genl_family_attrbuf(&tipc_genl_family); - if (!*attr) - return -EOPNOTSUPP; - - return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy, - NULL); -} - int __init tipc_netlink_start(void) { int res; diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h index 4ba0ad422110..7cf777723e3e 100644 --- a/net/tipc/netlink.h +++ b/net/tipc/netlink.h @@ -38,7 +38,6 @@ #include <net/netlink.h> extern struct genl_family tipc_genl_family; -int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf); struct tipc_nl_msg { struct sk_buff *skb; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 4ad3586da8f0..079aebb16ed8 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -39,6 +39,7 @@ #include "node.h" #include "net.h" #include <net/genetlink.h> +#include <linux/string_helpers.h> #include <linux/tipc_config.h> /* The legacy API had an artificial message length limit called @@ -55,6 +56,7 @@ struct tipc_nl_compat_msg { int rep_type; int rep_size; int req_type; + int req_size; struct net *net; struct sk_buff *rep; struct tlv_desc *req; @@ -100,6 +102,7 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) return -EMSGSIZE; skb_put(skb, TLV_SPACE(len)); + memset(tlv, 0, TLV_SPACE(len)); tlv->tlv_type = htons(type); tlv->tlv_len = htons(TLV_LENGTH(len)); if (len && data) @@ -117,7 +120,8 @@ static void tipc_tlv_init(struct sk_buff *skb, u16 type) skb_put(skb, sizeof(struct tlv_desc)); } -static int tipc_tlv_sprintf(struct sk_buff *skb, const char *fmt, ...) +static __printf(2, 3) int tipc_tlv_sprintf(struct sk_buff *skb, + const char *fmt, ...) { int n; u16 len; @@ -164,31 +168,29 @@ static struct sk_buff *tipc_get_err_tlv(char *str) int str_len = strlen(str) + 1; struct sk_buff *buf; - buf = tipc_tlv_alloc(TLV_SPACE(str_len)); + buf = tipc_tlv_alloc(str_len); if (buf) tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len); return buf; } -static inline bool string_is_valid(char *s, int len) -{ - return memchr(s, '\0', len) ? true : false; -} - static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, struct tipc_nl_compat_msg *msg, struct sk_buff *arg) { + struct genl_dumpit_info info; int len = 0; int err; struct sk_buff *buf; struct nlmsghdr *nlmsg; struct netlink_callback cb; + struct nlattr **attrbuf; memset(&cb, 0, sizeof(cb)); cb.nlh = (struct nlmsghdr *)arg->data; cb.skb = arg; + cb.data = &info; buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!buf) @@ -200,19 +202,37 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, return -ENOMEM; } + attrbuf = kcalloc(tipc_genl_family.maxattr + 1, + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) { + err = -ENOMEM; + goto err_out; + } + + info.info.attrs = attrbuf; + + if (nlmsg_len(cb.nlh) > 0) { + err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf, + tipc_genl_family.maxattr, + tipc_genl_family.policy, NULL); + if (err) + goto err_out; + } do { int rem; len = (*cmd->dumpit)(buf, &cb); nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) { - struct nlattr **attrs; - - err = tipc_nlmsg_parse(nlmsg, &attrs); + err = nlmsg_parse_deprecated(nlmsg, GENL_HDRLEN, + attrbuf, + tipc_genl_family.maxattr, + tipc_genl_family.policy, + NULL); if (err) goto err_out; - err = (*cmd->format)(msg, attrs); + err = (*cmd->format)(msg, attrbuf); if (err) goto err_out; @@ -230,6 +250,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, err = 0; err_out: + kfree(attrbuf); tipc_dump_done(&cb); kfree_skb(buf); @@ -254,10 +275,12 @@ err_out: static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, struct tipc_nl_compat_msg *msg) { - int err; + struct nlmsghdr *nlh; struct sk_buff *arg; + int err; - if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type)) + if (msg->req_type && (!msg->req_size || + !TLV_CHECK_TYPE(msg->req, msg->req_type))) return -EINVAL; msg->rep = tipc_tlv_alloc(msg->rep_size); @@ -267,8 +290,14 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, if (msg->rep_type) tipc_tlv_init(msg->rep, msg->rep_type); - if (cmd->header) - (*cmd->header)(msg); + if (cmd->header) { + err = (*cmd->header)(msg); + if (err) { + kfree_skb(msg->rep); + msg->rep = NULL; + return err; + } + } arg = nlmsg_new(0, GFP_KERNEL); if (!arg) { @@ -277,6 +306,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, return -ENOMEM; } + nlh = nlmsg_put(arg, 0, 0, tipc_genl_family.id, 0, NLM_F_MULTI); + if (!nlh) { + kfree_skb(arg); + kfree_skb(msg->rep); + msg->rep = NULL; + return -EMSGSIZE; + } + nlmsg_end(arg, nlh); + err = __tipc_nl_compat_dumpit(cmd, msg, arg); if (err) { kfree_skb(msg->rep); @@ -322,9 +360,9 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, if (err) goto doit_out; - err = nla_parse(attrbuf, tipc_genl_family.maxattr, - (const struct nlattr *)trans_buf->data, - trans_buf->len, NULL, NULL); + err = nla_parse_deprecated(attrbuf, tipc_genl_family.maxattr, + (const struct nlattr *)trans_buf->data, + trans_buf->len, NULL, NULL); if (err) goto doit_out; @@ -348,7 +386,8 @@ static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, { int err; - if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type)) + if (msg->req_type && (!msg->req_size || + !TLV_CHECK_TYPE(msg->req, msg->req_type))) return -EINVAL; err = __tipc_nl_compat_doit(cmd, msg); @@ -372,8 +411,8 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, - attrs[TIPC_NLA_BEARER], NULL, NULL); + err = nla_parse_nested_deprecated(bearer, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], NULL, NULL); if (err) return err; @@ -393,12 +432,17 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, b = (struct tipc_bearer_config *)TLV_DATA(msg->req); - bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER); if (!bearer) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); - if (!string_is_valid(b->name, len)) + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_bearer_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); + if (!string_is_terminated(b->name, len)) return -EINVAL; if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name)) @@ -408,7 +452,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, return -EMSGSIZE; if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) { - prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + prop = nla_nest_start_noflag(skb, TIPC_NLA_BEARER_PROP); if (!prop) return -EMSGSIZE; if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority))) @@ -430,12 +474,16 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, name = (char *)TLV_DATA(msg->req); - bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER); if (!bearer) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); - if (!string_is_valid(name, len)) + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); + if (!string_is_terminated(name, len)) return -EINVAL; if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name)) @@ -503,38 +551,44 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], - NULL, NULL); + err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL, NULL); if (err) return err; if (!link[TIPC_NLA_LINK_PROP]) return -EINVAL; - err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX, - link[TIPC_NLA_LINK_PROP], NULL, NULL); + err = nla_parse_nested_deprecated(prop, TIPC_NLA_PROP_MAX, + link[TIPC_NLA_LINK_PROP], NULL, + NULL); if (err) return err; if (!link[TIPC_NLA_LINK_STATS]) return -EINVAL; - err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX, - link[TIPC_NLA_LINK_STATS], NULL, NULL); + err = nla_parse_nested_deprecated(stats, TIPC_NLA_STATS_MAX, + link[TIPC_NLA_LINK_STATS], NULL, + NULL); if (err) return err; name = (char *)TLV_DATA(msg->req); - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); - if (!string_is_valid(name, len)) + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); + if (!string_is_terminated(name, len)) return -EINVAL; if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) return 0; tipc_tlv_sprintf(msg->rep, "\nLink <%s>\n", - nla_data(link[TIPC_NLA_LINK_NAME])); + (char *)nla_data(link[TIPC_NLA_LINK_NAME])); if (link[TIPC_NLA_LINK_BROADCAST]) { __fill_bc_link_stat(msg, prop, stats); @@ -634,14 +688,14 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], - NULL, NULL); + err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL, NULL); if (err) return err; - link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); + link_info.dest = htonl(nla_get_flag(link[TIPC_NLA_LINK_DEST])); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); - nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME], + nla_strscpy(link_info.str, link[TIPC_NLA_LINK_NAME], TIPC_MAX_LINK_NAME); return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, @@ -670,22 +724,17 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, struct nlattr *prop; struct nlattr *media; struct tipc_link_config *lc; - int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); - media = nla_nest_start(skb, TIPC_NLA_MEDIA); + media = nla_nest_start_noflag(skb, TIPC_NLA_MEDIA); if (!media) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); - if (!string_is_valid(lc->name, len)) - return -EINVAL; - if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name)) return -EMSGSIZE; - prop = nla_nest_start(skb, TIPC_NLA_MEDIA_PROP); + prop = nla_nest_start_noflag(skb, TIPC_NLA_MEDIA_PROP); if (!prop) return -EMSGSIZE; @@ -702,22 +751,17 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, struct nlattr *prop; struct nlattr *bearer; struct tipc_link_config *lc; - int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); - bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER); if (!bearer) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); - if (!string_is_valid(lc->name, len)) - return -EINVAL; - if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name)) return -EMSGSIZE; - prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + prop = nla_nest_start_noflag(skb, TIPC_NLA_BEARER_PROP); if (!prop) return -EMSGSIZE; @@ -737,14 +781,14 @@ static int __tipc_nl_compat_link_set(struct sk_buff *skb, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - link = nla_nest_start(skb, TIPC_NLA_LINK); + link = nla_nest_start_noflag(skb, TIPC_NLA_LINK); if (!link) return -EMSGSIZE; if (nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name)) return -EMSGSIZE; - prop = nla_nest_start(skb, TIPC_NLA_LINK_PROP); + prop = nla_nest_start_noflag(skb, TIPC_NLA_LINK_PROP); if (!prop) return -EMSGSIZE; @@ -766,8 +810,13 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); - if (!string_is_valid(lc->name, len)) + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_link_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); + if (!string_is_terminated(lc->name, len)) return -EINVAL; media = tipc_media_find(lc->name); @@ -795,12 +844,16 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, name = (char *)TLV_DATA(msg->req); - link = nla_nest_start(skb, TIPC_NLA_LINK); + link = nla_nest_start_noflag(skb, TIPC_NLA_LINK); if (!link) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); - if (!string_is_valid(name, len)) + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); + if (!string_is_terminated(name, len)) return -EINVAL; if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name)) @@ -824,7 +877,7 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) }; ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); - if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query)) + if (TLV_GET_DATA_LEN(msg->req) < (int)sizeof(struct tipc_name_table_query)) return -EINVAL; depth = ntohl(ntq->depth); @@ -853,16 +906,18 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_NAME_TABLE]) return -EINVAL; - err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, - attrs[TIPC_NLA_NAME_TABLE], NULL, NULL); + err = nla_parse_nested_deprecated(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL, + NULL); if (err) return err; if (!nt[TIPC_NLA_NAME_TABLE_PUBL]) return -EINVAL; - err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, - nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, NULL); + err = nla_parse_nested_deprecated(publ, TIPC_NLA_PUBL_MAX, + nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, + NULL); if (err) return err; @@ -921,8 +976,8 @@ static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_PUBL]) return -EINVAL; - err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], - NULL, NULL); + err = nla_parse_nested_deprecated(publ, TIPC_NLA_PUBL_MAX, + attrs[TIPC_NLA_PUBL], NULL, NULL); if (err) return err; @@ -957,7 +1012,7 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) return -EMSGSIZE; } - nest = nla_nest_start(args, TIPC_NLA_SOCK); + nest = nla_nest_start_noflag(args, TIPC_NLA_SOCK); if (!nest) { kfree_skb(args); return -EMSGSIZE; @@ -991,8 +1046,8 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_SOCK]) return -EINVAL; - err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], - NULL, NULL); + err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX, + attrs[TIPC_NLA_SOCK], NULL, NULL); if (err) return err; @@ -1003,8 +1058,9 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, u32 node; struct nlattr *con[TIPC_NLA_CON_MAX + 1]; - err = nla_parse_nested(con, TIPC_NLA_CON_MAX, - sock[TIPC_NLA_SOCK_CON], NULL, NULL); + err = nla_parse_nested_deprecated(con, TIPC_NLA_CON_MAX, + sock[TIPC_NLA_SOCK_CON], + NULL, NULL); if (err) return err; @@ -1043,8 +1099,8 @@ static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_MEDIA]) return -EINVAL; - err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, - attrs[TIPC_NLA_MEDIA], NULL, NULL); + err = nla_parse_nested_deprecated(media, TIPC_NLA_MEDIA_MAX, + attrs[TIPC_NLA_MEDIA], NULL, NULL); if (err) return err; @@ -1063,8 +1119,8 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_NODE]) return -EINVAL; - err = nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], - NULL, NULL); + err = nla_parse_nested_deprecated(node, TIPC_NLA_NODE_MAX, + attrs[TIPC_NLA_NODE], NULL, NULL); if (err) return err; @@ -1084,7 +1140,7 @@ static int tipc_nl_compat_net_set(struct tipc_nl_compat_cmd_doit *cmd, val = ntohl(*(__be32 *)TLV_DATA(msg->req)); - net = nla_nest_start(skb, TIPC_NLA_NET); + net = nla_nest_start_noflag(skb, TIPC_NLA_NET); if (!net) return -EMSGSIZE; @@ -1110,8 +1166,8 @@ static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_NET]) return -EINVAL; - err = nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], - NULL, NULL); + err = nla_parse_nested_deprecated(net, TIPC_NLA_NET_MAX, + attrs[TIPC_NLA_NET], NULL, NULL); if (err) return err; @@ -1239,7 +1295,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) struct tipc_nl_compat_msg msg; struct nlmsghdr *req_nlh; struct nlmsghdr *rep_nlh; - struct tipc_genlmsghdr *req_userhdr = info->userhdr; + struct tipc_genlmsghdr *req_userhdr = genl_info_userhdr(info); memset(&msg, 0, sizeof(msg)); @@ -1255,8 +1311,8 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) goto send; } - len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); - if (!len || !TLV_OK(msg.req, len)) { + msg.req_size = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); + if (msg.req_size && !TLV_OK(msg.req, msg.req_size)) { msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); err = -EOPNOTSUPP; goto send; @@ -1281,9 +1337,10 @@ send: return err; } -static const struct genl_ops tipc_genl_compat_ops[] = { +static const struct genl_small_ops tipc_genl_compat_ops[] = { { .cmd = TIPC_GENL_CMD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_compat_recv, }, }; @@ -1295,8 +1352,9 @@ static struct genl_family tipc_genl_compat_family __ro_after_init = { .maxattr = 0, .netnsok = true, .module = THIS_MODULE, - .ops = tipc_genl_compat_ops, - .n_ops = ARRAY_SIZE(tipc_genl_compat_ops), + .small_ops = tipc_genl_compat_ops, + .n_small_ops = ARRAY_SIZE(tipc_genl_compat_ops), + .resv_start_op = TIPC_GENL_CMD + 1, }; int __init tipc_netlink_compat_start(void) diff --git a/net/tipc/node.c b/net/tipc/node.c index db2a6c3e0be9..a07fb073368c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -44,6 +44,7 @@ #include "discover.h" #include "netlink.h" #include "trace.h" +#include "crypto.h" #define INVALID_NODE_SIG 0x10000 #define NODE_CLEANUP_AFTER 300000 @@ -74,21 +75,24 @@ struct tipc_bclink_entry { struct sk_buff_head arrvq; struct sk_buff_head inputq2; struct sk_buff_head namedq; + u16 named_rcv_nxt; + bool named_open; }; /** * struct tipc_node - TIPC node structure * @addr: network address of node - * @ref: reference counter to node object + * @kref: reference counter to node object * @lock: rwlock governing access to structure * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain - * @inputq: pointer to input queue containing messages for msg event - * @namedq: pointer to name table input queue with name table messages * @active_links: bearer ids of active links, used as index into links[] array * @links: array containing references to all links to node + * @bc_entry: broadcast link entry * @action_flags: bit mask of different types of node actions * @state: connectivity state vs peer node + * @preliminary: a preliminary node or not + * @failover_sent: failover sent or not * @sync_point: sequence number where synch/failover is finished * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) @@ -96,9 +100,17 @@ struct tipc_bclink_entry { * @capabilities: bitmap, indicating peer node's functional capabilities * @signature: node instance identifier * @link_id: local and remote bearer ids of changing link, if any + * @peer_id: 128-bit ID of peer + * @peer_id_string: ID string of peer * @publ_list: list of publications + * @conn_sks: list of connections (FIXME) + * @timer: node's keepalive timer + * @keepalive_intv: keepalive interval in milliseconds * @rcu: rcu struct for tipc_node * @delete_at: indicates the time for deleting a down node + * @peer_net: peer's net namespace + * @peer_hash_mix: hash for this peer (FIXME) + * @crypto_rx: RX crypto handler */ struct tipc_node { u32 addr; @@ -112,6 +124,7 @@ struct tipc_node { int action_flags; struct list_head list; int state; + bool preliminary; bool failover_sent; u16 sync_point; int link_cnt; @@ -120,12 +133,18 @@ struct tipc_node { u32 signature; u32 link_id; u8 peer_id[16]; + char peer_id_string[NODE_ID_STR_LEN]; struct list_head publ_list; struct list_head conn_sks; unsigned long keepalive_intv; struct timer_list timer; struct rcu_head rcu; unsigned long delete_at; + struct net *peer_net; + u32 peer_hash_mix; +#ifdef CONFIG_TIPC_CRYPTO + struct tipc_crypto *crypto_rx; +#endif }; /* Node FSM states and events: @@ -163,7 +182,6 @@ static void tipc_node_timeout(struct timer_list *t); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); static struct tipc_node *tipc_node_find(struct net *net, u32 addr); static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id); -static void tipc_node_put(struct tipc_node *node); static bool node_is_up(struct tipc_node *n); static void tipc_node_delete_from_list(struct tipc_node *node); @@ -184,7 +202,7 @@ static struct tipc_link *node_active_link(struct tipc_node *n, int sel) return n->links[bearer_id].link; } -int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected) { struct tipc_node *n; int bearer_id; @@ -194,6 +212,14 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) if (unlikely(!n)) return mtu; + /* Allow MAX_MSG_SIZE when building connection oriented message + * if they are in the same core network + */ + if (n->peer_net && connected) { + tipc_node_put(n); + return mtu; + } + bearer_id = n->active_links[sel & 1]; if (likely(bearer_id != INVALID_BEARER_ID)) mtu = n->links[bearer_id].mtu; @@ -235,20 +261,65 @@ u16 tipc_node_get_capabilities(struct net *net, u32 addr) return caps; } +u32 tipc_node_get_addr(struct tipc_node *node) +{ + return (node) ? node->addr : 0; +} + +char *tipc_node_get_id_str(struct tipc_node *node) +{ + return node->peer_id_string; +} + +#ifdef CONFIG_TIPC_CRYPTO +/** + * tipc_node_crypto_rx - Retrieve crypto RX handle from node + * @__n: target tipc_node + * Note: node ref counter must be held first! + */ +struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n) +{ + return (__n) ? __n->crypto_rx : NULL; +} + +struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos) +{ + return container_of(pos, struct tipc_node, list)->crypto_rx; +} + +struct tipc_crypto *tipc_node_crypto_rx_by_addr(struct net *net, u32 addr) +{ + struct tipc_node *n; + + n = tipc_node_find(net, addr); + return (n) ? n->crypto_rx : NULL; +} +#endif + +static void tipc_node_free(struct rcu_head *rp) +{ + struct tipc_node *n = container_of(rp, struct tipc_node, rcu); + +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_stop(&n->crypto_rx); +#endif + kfree(n); +} + static void tipc_node_kref_release(struct kref *kref) { struct tipc_node *n = container_of(kref, struct tipc_node, kref); kfree(n->bc_entry.link); - kfree_rcu(n, rcu); + call_rcu(&n->rcu, tipc_node_free); } -static void tipc_node_put(struct tipc_node *node) +void tipc_node_put(struct tipc_node *node) { kref_put(&node->kref, tipc_node_kref_release); } -static void tipc_node_get(struct tipc_node *node) +void tipc_node_get(struct tipc_node *node) { kref_get(&node->kref); } @@ -264,7 +335,7 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr) rcu_read_lock(); hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) { - if (node->addr != addr) + if (node->addr != addr || node->preliminary) continue; if (!kref_get_unless_zero(&node->kref)) node = NULL; @@ -299,42 +370,50 @@ static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id) } static void tipc_node_read_lock(struct tipc_node *n) + __acquires(n->lock) { read_lock_bh(&n->lock); } static void tipc_node_read_unlock(struct tipc_node *n) + __releases(n->lock) { read_unlock_bh(&n->lock); } static void tipc_node_write_lock(struct tipc_node *n) + __acquires(n->lock) { write_lock_bh(&n->lock); } static void tipc_node_write_unlock_fast(struct tipc_node *n) + __releases(n->lock) { write_unlock_bh(&n->lock); } static void tipc_node_write_unlock(struct tipc_node *n) + __releases(n->lock) { + struct tipc_socket_addr sk; struct net *net = n->net; - u32 addr = 0; u32 flags = n->action_flags; - u32 link_id = 0; - u32 bearer_id; struct list_head *publ_list; + struct tipc_uaddr ua; + u32 bearer_id, node; if (likely(!flags)) { write_unlock_bh(&n->lock); return; } - addr = n->addr; - link_id = n->link_id; - bearer_id = link_id & 0xffff; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, + TIPC_LINK_STATE, n->addr, n->addr); + sk.ref = n->link_id; + sk.node = tipc_own_addr(net); + node = n->addr; + bearer_id = n->link_id & 0xffff; publ_list = &n->publ_list; n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | @@ -343,46 +422,117 @@ static void tipc_node_write_unlock(struct tipc_node *n) write_unlock_bh(&n->lock); if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, addr); + tipc_publ_notify(net, publ_list, node, n->capabilities); if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, addr); + tipc_named_node_up(net, node, n->capabilities); if (flags & TIPC_NOTIFY_LINK_UP) { - tipc_mon_peer_up(net, addr, bearer_id); - tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, - TIPC_NODE_SCOPE, link_id, link_id); + tipc_mon_peer_up(net, node, bearer_id); + tipc_nametbl_publish(net, &ua, &sk, sk.ref); } if (flags & TIPC_NOTIFY_LINK_DOWN) { - tipc_mon_peer_down(net, addr, bearer_id); - tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, - addr, link_id); + tipc_mon_peer_down(net, node, bearer_id); + tipc_nametbl_withdraw(net, &ua, &sk, sk.ref); + } +} + +static void tipc_node_assign_peer_net(struct tipc_node *n, u32 hash_mixes) +{ + int net_id = tipc_netid(n->net); + struct tipc_net *tn_peer; + struct net *tmp; + u32 hash_chk; + + if (n->peer_net) + return; + + for_each_net_rcu(tmp) { + tn_peer = tipc_net(tmp); + if (!tn_peer) + continue; + /* Integrity checking whether node exists in namespace or not */ + if (tn_peer->net_id != net_id) + continue; + if (memcmp(n->peer_id, tn_peer->node_id, NODE_ID_LEN)) + continue; + hash_chk = tipc_net_hash_mixes(tmp, tn_peer->random); + if (hash_mixes ^ hash_chk) + continue; + n->peer_net = tmp; + n->peer_hash_mix = hash_mixes; + break; } } -static struct tipc_node *tipc_node_create(struct net *net, u32 addr, - u8 *peer_id, u16 capabilities) +struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, + u16 capabilities, u32 hash_mixes, + bool preliminary) { struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l, *snd_l = tipc_bc_sndlink(net); struct tipc_node *n, *temp_node; - struct tipc_link *l; + unsigned long intv; int bearer_id; int i; spin_lock_bh(&tn->node_list_lock); - n = tipc_node_find(net, addr); + n = tipc_node_find(net, addr) ?: + tipc_node_find_by_id(net, peer_id); if (n) { + if (!n->preliminary) + goto update; + if (preliminary) + goto exit; + /* A preliminary node becomes "real" now, refresh its data */ + tipc_node_write_lock(n); + if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link refresh failed, no memory\n"); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); + n = NULL; + goto exit; + } + n->preliminary = false; + n->addr = addr; + hlist_del_rcu(&n->hash); + hlist_add_head_rcu(&n->hash, + &tn->node_htable[tipc_hashfn(addr)]); + list_del_rcu(&n->list); + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + if (n->addr < temp_node->addr) + break; + } + list_add_tail_rcu(&n->list, &temp_node->list); + tipc_node_write_unlock_fast(n); + +update: + if (n->peer_hash_mix ^ hash_mixes) + tipc_node_assign_peer_net(n, hash_mixes); if (n->capabilities == capabilities) goto exit; /* Same node may come back with new capabilities */ - write_lock_bh(&n->lock); + tipc_node_write_lock(n); n->capabilities = capabilities; for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { l = n->links[bearer_id].link; if (l) tipc_link_update_caps(l, capabilities); } - write_unlock_bh(&n->lock); + tipc_node_write_unlock_fast(n); + + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + + tipc_bcast_toggle_rcast(net, + (tn->capabilities & TIPC_BCAST_RCAST)); + goto exit; } n = kzalloc(sizeof(*n), GFP_ATOMIC); @@ -390,9 +540,23 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, pr_warn("Node creation failed, no memory\n"); goto exit; } + tipc_nodeid2string(n->peer_id_string, peer_id); +#ifdef CONFIG_TIPC_CRYPTO + if (unlikely(tipc_crypto_start(&n->crypto_rx, net, n))) { + pr_warn("Failed to start crypto RX(%s)!\n", n->peer_id_string); + kfree(n); + n = NULL; + goto exit; + } +#endif n->addr = addr; + n->preliminary = preliminary; memcpy(&n->peer_id, peer_id, 16); n->net = net; + n->peer_net = NULL; + n->peer_hash_mix = 0; + /* Assign kernel local namespace if exists */ + tipc_node_assign_peer_net(n, hash_mixes); n->capabilities = capabilities; kref_init(&n->kref); rwlock_init(&n->lock); @@ -411,28 +575,35 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, n->signature = INVALID_NODE_SIG; n->active_links[0] = INVALID_BEARER_ID; n->active_links[1] = INVALID_BEARER_ID; - if (!tipc_link_bc_create(net, tipc_own_addr(net), - addr, U16_MAX, - tipc_link_window(tipc_bc_sndlink(net)), - n->capabilities, - &n->bc_entry.inputq1, - &n->bc_entry.namedq, - tipc_bc_sndlink(net), - &n->bc_entry.link)) { + if (!preliminary && + !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { pr_warn("Broadcast rcv link creation failed, no memory\n"); - kfree(n); + tipc_node_put(n); n = NULL; goto exit; } tipc_node_get(n); timer_setup(&n->timer, tipc_node_timeout, 0); - n->keepalive_intv = U32_MAX; + /* Start a slow timer anyway, crypto needs it */ + n->keepalive_intv = 10000; + intv = jiffies + msecs_to_jiffies(n->keepalive_intv); + if (!mod_timer(&n->timer, intv)) + tipc_node_get(n); hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n->addr < temp_node->addr) break; } list_add_tail_rcu(&n->list, &temp_node->list); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); trace_tipc_node_create(n, true, " "); exit: spin_unlock_bh(&tn->node_list_lock); @@ -454,6 +625,9 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) static void tipc_node_delete_from_list(struct tipc_node *node) { +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_key_flush(node->crypto_rx); +#endif list_del_rcu(&node->list); hlist_del_rcu(&node->hash); tipc_node_put(node); @@ -464,7 +638,7 @@ static void tipc_node_delete(struct tipc_node *node) trace_tipc_node_delete(node, true, " "); tipc_node_delete_from_list(node); - del_timer_sync(&node->timer); + timer_delete_sync(&node->timer); tipc_node_put(node); } @@ -589,6 +763,7 @@ static void tipc_node_clear_links(struct tipc_node *node) */ static bool tipc_node_cleanup(struct tipc_node *peer) { + struct tipc_node *temp_node; struct tipc_net *tn = tipc_net(peer->net); bool deleted = false; @@ -604,6 +779,19 @@ static bool tipc_node_cleanup(struct tipc_node *peer) deleted = true; } tipc_node_write_unlock(peer); + + if (!deleted) { + spin_unlock_bh(&tn->node_list_lock); + return deleted; + } + + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + tipc_bcast_toggle_rcast(peer->net, + (tn->capabilities & TIPC_BCAST_RCAST)); spin_unlock_bh(&tn->node_list_lock); return deleted; } @@ -612,7 +800,7 @@ static bool tipc_node_cleanup(struct tipc_node *peer) */ static void tipc_node_timeout(struct timer_list *t) { - struct tipc_node *n = from_timer(n, t, timer); + struct tipc_node *n = timer_container_of(n, t, timer); struct tipc_link_entry *le; struct sk_buff_head xmitq; int remains = n->link_cnt; @@ -626,6 +814,10 @@ static void tipc_node_timeout(struct timer_list *t) return; } +#ifdef CONFIG_TIPC_CRYPTO + /* Take any crypto key related actions first */ + tipc_crypto_timeout(n->crypto_rx); +#endif __skb_queue_head_init(&xmitq); /* Initial node interval to value larger (10 seconds), then it will be @@ -646,7 +838,7 @@ static void tipc_node_timeout(struct timer_list *t) remains--; } tipc_node_read_unlock(n); - tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr); + tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr, n); if (rc & TIPC_LINK_DOWN_EVT) tipc_node_link_down(n, bearer_id, false); } @@ -655,6 +847,9 @@ static void tipc_node_timeout(struct timer_list *t) /** * __tipc_node_link_up - handle addition of link + * @n: target tipc_node + * @bearer_id: id of the bearer + * @xmitq: queue for messages to be xmited on * Node lock must be held by caller * Link becomes active (alone or shared) or standby, depending on its priority. */ @@ -678,7 +873,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, n->link_id = tipc_link_id(nl); /* Leave room for tunnel header when returning 'mtu' to users: */ - n->links[bearer_id].mtu = tipc_link_mtu(nl) - INT_H_SIZE; + n->links[bearer_id].mtu = tipc_link_mss(nl); tipc_bearer_add_dest(n->net, bearer_id, n->addr); tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id); @@ -695,7 +890,6 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, *slot0 = bearer_id; *slot1 = bearer_id; tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); - n->failover_sent = false; n->action_flags |= TIPC_NOTIFY_NODE_UP; tipc_link_set_active(nl, true); tipc_bcast_add_peer(n->net, nl, xmitq); @@ -722,6 +916,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, /** * tipc_node_link_up - handle addition of link + * @n: target tipc_node + * @bearer_id: id of the bearer + * @xmitq: queue for messages to be xmited on * * Link becomes active (alone or shared) or standby, depending on its priority. */ @@ -733,12 +930,60 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id, tipc_node_write_lock(n); __tipc_node_link_up(n, bearer_id, xmitq); maddr = &n->links[bearer_id].maddr; - tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr); + tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr, n); tipc_node_write_unlock(n); } /** + * tipc_node_link_failover() - start failover in case "half-failover" + * + * This function is only called in a very special situation where link + * failover can be already started on peer node but not on this node. + * This can happen when e.g.:: + * + * 1. Both links <1A-2A>, <1B-2B> down + * 2. Link endpoint 2A up, but 1A still down (e.g. due to network + * disturbance, wrong session, etc.) + * 3. Link <1B-2B> up + * 4. Link endpoint 2A down (e.g. due to link tolerance timeout) + * 5. Node 2 starts failover onto link <1B-2B> + * + * ==> Node 1 does never start link/node failover! + * + * @n: tipc node structure + * @l: link peer endpoint failingover (- can be NULL) + * @tnl: tunnel link + * @xmitq: queue for messages to be xmited on tnl link later + */ +static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l, + struct tipc_link *tnl, + struct sk_buff_head *xmitq) +{ + /* Avoid to be "self-failover" that can never end */ + if (!tipc_link_is_up(tnl)) + return; + + /* Don't rush, failure link may be in the process of resetting */ + if (l && !tipc_link_is_reset(l)) + return; + + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); + tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); + + n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1); + tipc_link_failover_prepare(l, tnl, xmitq); + + if (l) + tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); +} + +/** * __tipc_node_link_down - handle loss of link + * @n: target tipc_node + * @bearer_id: id of the bearer + * @xmitq: queue for messages to be xmited on + * @maddr: output media address of the bearer */ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, struct sk_buff_head *xmitq, @@ -817,10 +1062,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) { struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_media_addr *maddr = NULL; struct tipc_link *l = le->link; - struct tipc_media_addr *maddr; - struct sk_buff_head xmitq; int old_bearer_id = bearer_id; + struct sk_buff_head xmitq; if (!l) return; @@ -830,20 +1075,22 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_node_write_lock(n); if (!tipc_link_is_establishing(l)) { __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); - if (delete) { - kfree(l); - le->link = NULL; - n->link_cnt--; - } } else { /* Defuse pending tipc_node_link_up() */ + tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_RESET_EVT); } + if (delete) { + kfree(l); + le->link = NULL; + n->link_cnt--; + } trace_tipc_node_link_down(n, true, "node link down or deleted!"); tipc_node_write_unlock(n); if (delete) tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); - tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr, n); tipc_sk_rcv(n->net, &le->inputq); } @@ -887,6 +1134,8 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) { struct tipc_net *tn = tipc_net(net); struct tipc_node *n; + bool preliminary; + u32 sugg_addr; /* Suggest new address if some other peer is using this one */ n = tipc_node_find(net, addr); @@ -902,9 +1151,11 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) /* Suggest previously used address if peer is known */ n = tipc_node_find_by_id(net, id); if (n) { - addr = n->addr; + sugg_addr = n->addr; + preliminary = n->preliminary; tipc_node_put(n); - return addr; + if (!preliminary) + return sugg_addr; } /* Even this node may be in conflict */ @@ -916,7 +1167,7 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) void tipc_node_check_dest(struct net *net, u32 addr, u8 *peer_id, struct tipc_bearer *b, - u16 capabilities, u32 signature, + u16 capabilities, u32 signature, u32 hash_mixes, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr) { @@ -926,8 +1177,9 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool addr_match = false; bool sign_match = false; bool link_up = false; + bool link_is_reset = false; bool accept_addr = false; - bool reset = true; + bool reset = false; char *if_name; unsigned long intv; u16 session; @@ -935,7 +1187,8 @@ void tipc_node_check_dest(struct net *net, u32 addr, *dupl_addr = false; *respond = false; - n = tipc_node_create(net, addr, peer_id, capabilities); + n = tipc_node_create(net, addr, peer_id, capabilities, hash_mixes, + false); if (!n) return; @@ -946,14 +1199,17 @@ void tipc_node_check_dest(struct net *net, u32 addr, /* Prepare to validate requesting node's signature and media address */ l = le->link; link_up = l && tipc_link_is_up(l); + link_is_reset = l && tipc_link_is_reset(l); addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr)); sign_match = (signature == n->signature); /* These three flags give us eight permutations: */ if (sign_match && addr_match && link_up) { - /* All is fine. Do nothing. */ - reset = false; + /* All is fine. Ignore requests. */ + /* Peer node is not a container/local namespace */ + if (!n->peer_hash_mix) + n->peer_hash_mix = hash_mixes; } else if (sign_match && addr_match && !link_up) { /* Respond. The link will come up in due time */ *respond = true; @@ -961,7 +1217,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, /* Peer has changed i/f address without rebooting. * If so, the link will reset soon, and the next * discovery will be accepted. So we can ignore it. - * It may also be an cloned or malicious peer having + * It may also be a cloned or malicious peer having * chosen the same node address and signature as an * existing one. * Ignore requests until the link goes down, if ever. @@ -975,6 +1231,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, */ accept_addr = true; *respond = true; + reset = true; } else if (!sign_match && addr_match && link_up) { /* Peer node rebooted. Two possibilities: * - Delayed re-discovery; this link endpoint has already @@ -1006,6 +1263,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, n->signature = signature; accept_addr = true; *respond = true; + reset = true; } if (!accept_addr) @@ -1020,7 +1278,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, get_random_bytes(&session, sizeof(u16)); if (!tipc_link_create(net, if_name, b->identity, b->tolerance, b->net_plane, b->mtu, b->priority, - b->window, session, + b->min_win, b->max_win, session, tipc_own_addr(net), addr, peer_id, n->capabilities, tipc_bc_sndlink(n->net), n->bc_entry.link, @@ -1034,6 +1292,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, tipc_link_fsm_evt(l, LINK_RESET_EVT); if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + link_is_reset = tipc_link_is_reset(l); le->link = l; n->link_cnt++; tipc_node_calculate_timer(n, l); @@ -1046,7 +1305,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: tipc_node_write_unlock(n); - if (reset && l && !tipc_link_is_reset(l)) + if (reset && !link_is_reset) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); } @@ -1269,6 +1528,7 @@ static void node_lost_contact(struct tipc_node *n, /* Clean up broadcast state */ tipc_bcast_remove_peer(n->net, n->bc_entry.link); + skb_queue_purge(&n->bc_entry.namedq); /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { @@ -1279,7 +1539,8 @@ static void node_lost_contact(struct tipc_node *n, /* Notify publications from this node */ n->action_flags |= TIPC_NOTIFY_NODE_DOWN; - + n->peer_net = NULL; + n->peer_hash_mix = 0; /* Notify sockets connected to node */ list_for_each_entry_safe(conn, safe, conns, list) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, @@ -1296,11 +1557,13 @@ static void node_lost_contact(struct tipc_node *n, /** * tipc_node_get_linkname - get the name of a link * + * @net: the applicable net namespace * @bearer_id: id of the bearer - * @node: peer node address + * @addr: peer node address * @linkname: link name output buffer + * @len: size of @linkname output buffer * - * Returns 0 on success + * Return: 0 on success */ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, char *linkname, size_t len) @@ -1318,7 +1581,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, tipc_node_read_lock(node); link = node->links[bearer_id].link; if (link) { - strncpy(linkname, tipc_link_name(link), len); + strscpy(linkname, tipc_link_name(link), len); err = 0; } tipc_node_read_unlock(node); @@ -1338,7 +1601,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_NODE); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NODE); if (!attrs) goto msg_full; @@ -1361,14 +1624,65 @@ msg_full: return -EMSGSIZE; } +static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list) +{ + struct tipc_msg *hdr = buf_msg(skb_peek(list)); + struct sk_buff_head inputq; + + switch (msg_user(hdr)) { + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + if (msg_connected(hdr) || msg_named(hdr) || + msg_direct(hdr)) { + tipc_loopback_trace(peer_net, list); + spin_lock_init(&list->lock); + tipc_sk_rcv(peer_net, list); + return; + } + if (msg_mcast(hdr)) { + tipc_loopback_trace(peer_net, list); + skb_queue_head_init(&inputq); + tipc_sk_mcast_rcv(peer_net, list, &inputq); + __skb_queue_purge(list); + skb_queue_purge(&inputq); + return; + } + return; + case MSG_FRAGMENTER: + if (tipc_msg_assemble(list)) { + tipc_loopback_trace(peer_net, list); + skb_queue_head_init(&inputq); + tipc_sk_mcast_rcv(peer_net, list, &inputq); + __skb_queue_purge(list); + skb_queue_purge(&inputq); + } + return; + case GROUP_PROTOCOL: + case CONN_MANAGER: + tipc_loopback_trace(peer_net, list); + spin_lock_init(&list->lock); + tipc_sk_rcv(peer_net, list); + return; + case LINK_PROTOCOL: + case NAME_DISTRIBUTOR: + case TUNNEL_PROTOCOL: + case BCAST_PROTOCOL: + return; + default: + return; + } +} + /** - * tipc_node_xmit() is the general link level function for message sending + * tipc_node_xmit() - general link level function for message sending * @net: the applicable net namespace * @list: chain of buffers containing message * @dnode: address of destination node * @selector: a number used for deterministic link selection * Consumes the buffer chain. - * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF + * Return: 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF */ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector) @@ -1376,26 +1690,46 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, struct tipc_link_entry *le = NULL; struct tipc_node *n; struct sk_buff_head xmitq; + bool node_up = false; + struct net *peer_net; int bearer_id; int rc; if (in_own_node(net, dnode)) { + tipc_loopback_trace(net, list); + spin_lock_init(&list->lock); tipc_sk_rcv(net, list); return 0; } n = tipc_node_find(net, dnode); if (unlikely(!n)) { - skb_queue_purge(list); + __skb_queue_purge(list); return -EHOSTUNREACH; } + rcu_read_lock(); + tipc_node_read_lock(n); + node_up = node_is_up(n); + peer_net = n->peer_net; + tipc_node_read_unlock(n); + if (node_up && peer_net && check_net(peer_net)) { + /* xmit inner linux container */ + tipc_lxc_xmit(peer_net, list); + if (likely(skb_queue_empty(list))) { + rcu_read_unlock(); + tipc_node_put(n); + return 0; + } + } + rcu_read_unlock(); + tipc_node_read_lock(n); bearer_id = n->active_links[selector & 1]; if (unlikely(bearer_id == INVALID_BEARER_ID)) { tipc_node_read_unlock(n); tipc_node_put(n); - skb_queue_purge(list); + __skb_queue_purge(list); return -EHOSTUNREACH; } @@ -1409,7 +1743,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, if (unlikely(rc == -ENOBUFS)) tipc_node_link_down(n, bearer_id, false); else - tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); tipc_node_put(n); @@ -1417,7 +1751,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, } /* tipc_node_xmit_skb(): send single buffer to destination - * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE + * Buffers sent via this function are generally TIPC_SYSTEM_IMPORTANCE * messages, which will not be rejected * The only exception is datagram messages rerouted after secondary * lookup, which are rare and safe to dispose of anyway. @@ -1427,7 +1761,7 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, { struct sk_buff_head head; - skb_queue_head_init(&head); + __skb_queue_head_init(&head); __skb_queue_tail(&head, skb); tipc_node_xmit(net, &head, dnode, selector); return 0; @@ -1449,12 +1783,23 @@ int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq) return 0; } -void tipc_node_broadcast(struct net *net, struct sk_buff *skb) +void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests) { + struct sk_buff_head xmitq; struct sk_buff *txskb; struct tipc_node *n; + u16 dummy; u32 dst; + /* Use broadcast if all nodes support it */ + if (!rc_dests && tipc_bcast_get_mode(net) != BCLINK_MODE_RCAST) { + __skb_queue_head_init(&xmitq); + __skb_queue_tail(&xmitq, skb); + tipc_bcast_xmit(net, &xmitq, &dummy); + return; + } + + /* Otherwise use legacy replicast method */ rcu_read_lock(); list_for_each_entry_rcu(n, tipc_nodes(net), list) { dst = n->addr; @@ -1469,7 +1814,6 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb) tipc_node_xmit_skb(net, txskb, dst, 0); } rcu_read_unlock(); - kfree_skb(skb); } @@ -1492,7 +1836,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr, struct tipc_link *ucl; int rc; - rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr); + rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr, xmitq); if (rc & TIPC_LINK_DOWN_EVT) { tipc_node_reset_links(n); @@ -1557,14 +1901,16 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id } if (!skb_queue_empty(&xmitq)) - tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); if (!skb_queue_empty(&be->inputq1)) tipc_node_mcast_rcv(n); /* Handle NAME_DISTRIBUTOR messages sent from 1.7 nodes */ if (!skb_queue_empty(&n->bc_entry.namedq)) - tipc_named_rcv(net, &n->bc_entry.namedq); + tipc_named_rcv(net, &n->bc_entry.namedq, + &n->bc_entry.named_rcv_nxt, + &n->bc_entry.named_open); /* If reassembly or retransmission failure => reset all links to peer */ if (rc & TIPC_LINK_DOWN_EVT) @@ -1575,9 +1921,11 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id /** * tipc_node_check_state - check and if necessary update node state + * @n: target tipc_node * @skb: TIPC packet * @bearer_id: identity of bearer delivering the packet - * Returns true if state and msg are ok, otherwise false + * @xmitq: queue for messages to be xmited on + * Return: true if state and msg are ok, otherwise false */ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, int bearer_id, struct sk_buff_head *xmitq) @@ -1586,7 +1934,6 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, int usr = msg_user(hdr); int mtyp = msg_type(hdr); u16 oseqno = msg_seqno(hdr); - u16 iseqno = msg_seqno(msg_get_wrapped(hdr)); u16 exp_pkts = msg_msgcnt(hdr); u16 rcv_nxt, syncpt, dlv_nxt, inputq_len; int state = n->state; @@ -1647,21 +1994,23 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Initiate or update failover mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { syncpt = oseqno + exp_pkts - 1; - if (pl && tipc_link_is_up(pl)) { + if (pl && !tipc_link_is_reset(pl)) { __tipc_node_link_down(n, &pb_id, xmitq, &maddr); trace_tipc_node_link_down(n, true, "node link down <- failover!"); tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), tipc_link_inputq(l)); } + /* If parallel link was already down, and this happened before - * the tunnel link came up, FAILOVER was never sent. Ensure that - * FAILOVER is sent to get peer out of NODE_FAILINGOVER state. + * the tunnel link came up, node failover was never started. + * Ensure that a FAILOVER_MSG is sent to get peer out of + * NODE_FAILINGOVER state, also this node must accept + * TUNNEL_MSGs from peer. */ - if (n->state != NODE_FAILINGOVER && !n->failover_sent) { - tipc_link_create_dummy_tnl_msg(l, xmitq); - n->failover_sent = true; - } + if (n->state != NODE_FAILINGOVER) + tipc_node_link_failover(n, pl, l, xmitq); + /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) n->sync_point = syncpt; @@ -1677,13 +2026,16 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, return true; } - /* No synching needed if only one link */ + /* No syncing needed if only one link */ if (!pl || !tipc_link_is_up(pl)) return true; /* Initiate synch mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) { - syncpt = iseqno + exp_pkts - 1; + if (n->capabilities & TIPC_TUNNEL_ENHANCED) + syncpt = msg_syncpt(hdr); + else + syncpt = msg_seqno(msg_inner_hdr(hdr)) + exp_pkts - 1; if (!tipc_link_is_up(l)) __tipc_node_link_up(n, bearer_id, xmitq); if (n->state == SELF_UP_PEER_UP) { @@ -1723,7 +2075,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, * tipc_rcv - process TIPC packets/messages arriving from off-node * @net: the applicable net namespace * @skb: TIPC packet - * @bearer: pointer to bearer message arrived on + * @b: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. @@ -1731,19 +2083,39 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) { struct sk_buff_head xmitq; - struct tipc_node *n; + struct tipc_link_entry *le; struct tipc_msg *hdr; + struct tipc_node *n; int bearer_id = b->identity; - struct tipc_link_entry *le; u32 self = tipc_own_addr(net); int usr, rc = 0; u16 bc_ack; +#ifdef CONFIG_TIPC_CRYPTO + struct tipc_ehdr *ehdr; - __skb_queue_head_init(&xmitq); + /* Check if message must be decrypted first */ + if (TIPC_SKB_CB(skb)->decrypted || !tipc_ehdr_validate(skb)) + goto rcv; + ehdr = (struct tipc_ehdr *)skb->data; + if (likely(ehdr->user != LINK_CONFIG)) { + n = tipc_node_find(net, ntohl(ehdr->addr)); + if (unlikely(!n)) + goto discard; + } else { + n = tipc_node_find_by_id(net, ehdr->id); + } + skb_dst_force(skb); + tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b); + if (!skb) + return; + +rcv: +#endif /* Ensure message is well-formed before touching the header */ if (unlikely(!tipc_msg_validate(&skb))) goto discard; + __skb_queue_head_init(&xmitq); hdr = buf_msg(skb); usr = msg_user(hdr); bc_ack = msg_bcast_ack(hdr); @@ -1767,10 +2139,16 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) le = &n->links[bearer_id]; /* Ensure broadcast reception is in synch with peer's send state */ - if (unlikely(usr == LINK_PROTOCOL)) + if (unlikely(usr == LINK_PROTOCOL)) { + if (unlikely(skb_linearize(skb))) { + tipc_node_put(n); + goto discard; + } + hdr = buf_msg(skb); tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq); - else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) + } else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) { tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr); + } /* Receive packet directly if conditions permit */ tipc_node_read_lock(n); @@ -1787,7 +2165,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Check/update node state before receiving */ if (unlikely(skb)) { if (unlikely(skb_linearize(skb))) - goto discard; + goto out_node_put; tipc_node_write_lock(n); if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) { if (le->link) { @@ -1805,7 +2183,9 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) tipc_node_link_down(n, bearer_id, false); if (unlikely(!skb_queue_empty(&n->bc_entry.namedq))) - tipc_named_rcv(net, &n->bc_entry.namedq); + tipc_named_rcv(net, &n->bc_entry.namedq, + &n->bc_entry.named_rcv_nxt, + &n->bc_entry.named_open); if (unlikely(!skb_queue_empty(&n->bc_entry.inputq1))) tipc_node_mcast_rcv(n); @@ -1814,8 +2194,9 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) tipc_sk_rcv(net, &le->inputq); if (!skb_queue_empty(&xmitq)) - tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); +out_node_put: tipc_node_put(n); discard: kfree_skb(skb); @@ -1843,9 +2224,13 @@ void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, &xmitq); else if (prop == TIPC_NLA_PROP_MTU) tipc_link_set_mtu(e->link, b->mtu); + + /* Update MTU for node link entry */ + e->mtu = tipc_link_mss(e->link); } + tipc_node_write_unlock(n); - tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr); + tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL); } rcu_read_unlock(); @@ -1856,7 +2241,10 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; - struct tipc_node *peer; + struct tipc_node *peer, *temp_node; + u8 node_id[NODE_ID_LEN]; + u64 *w0 = (u64 *)&node_id[0]; + u64 *w1 = (u64 *)&node_id[8]; u32 addr; int err; @@ -1864,16 +2252,28 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_NET]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, - info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, - info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], + tipc_nl_net_policy, info->extack); if (err) return err; - if (!attrs[TIPC_NLA_NET_ADDR]) - return -EINVAL; + /* attrs[TIPC_NLA_NET_NODEID] and attrs[TIPC_NLA_NET_ADDR] are + * mutually exclusive cases + */ + if (attrs[TIPC_NLA_NET_ADDR]) { + addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + if (!addr) + return -EINVAL; + } - addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + if (attrs[TIPC_NLA_NET_NODEID]) { + if (!attrs[TIPC_NLA_NET_NODEID_W1]) + return -EINVAL; + *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]); + *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]); + addr = hash128to32(node_id); + } if (in_own_node(net, addr)) return -ENOTSUPP; @@ -1897,6 +2297,12 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) tipc_node_write_unlock(peer); tipc_node_delete(peer); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); err = 0; err_out: tipc_node_put(peer); @@ -1941,6 +2347,8 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) } list_for_each_entry_rcu(node, &tn->node_list, list) { + if (node->preliminary) + continue; if (last_addr) { if (node->addr == last_addr) last_addr = 0; @@ -2022,9 +2430,9 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); if (err) return err; @@ -2051,8 +2459,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) if (attrs[TIPC_NLA_LINK_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; - err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], - props); + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props); if (err) { res = err; goto out; @@ -2071,16 +2478,19 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) tipc_link_set_prio(link, prio, &xmitq); } if (props[TIPC_NLA_PROP_WIN]) { - u32 win; + u32 max_win; - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); - tipc_link_set_queue_limits(link, win); + max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + tipc_link_set_queue_limits(link, + tipc_link_min_win(link), + max_win); } } out: tipc_node_read_unlock(node); - tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr); + tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr, + NULL); return res; } @@ -2098,9 +2508,9 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); if (err) return err; @@ -2114,7 +2524,7 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; if (strcmp(name, tipc_bclink_name) == 0) { - err = tipc_nl_add_bc_link(net, &msg); + err = tipc_nl_add_bc_link(net, &msg, tipc_net(net)->bcl); if (err) goto err_free; } else { @@ -2158,14 +2568,15 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) struct tipc_node *node; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); struct tipc_link_entry *le; if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); if (err) return err; @@ -2174,11 +2585,26 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - if (strcmp(link_name, tipc_bclink_name) == 0) { - err = tipc_bclink_reset_stats(net); + err = -EINVAL; + if (!strcmp(link_name, tipc_bclink_name)) { + err = tipc_bclink_reset_stats(net, tipc_bc_sndlink(net)); if (err) return err; return 0; + } else if (strstr(link_name, tipc_bclink_name)) { + rcu_read_lock(); + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_read_lock(node); + link = node->bc_entry.link; + if (link && !strcmp(link_name, tipc_link_name(link))) { + err = tipc_bclink_reset_stats(net, link); + tipc_node_read_unlock(node); + break; + } + tipc_node_read_unlock(node); + } + rcu_read_unlock(); + return err; } node = tipc_node_find_by_name(net, link_name, &bearer_id); @@ -2202,7 +2628,8 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) /* Caller should hold node lock */ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, - struct tipc_node *node, u32 *prev_link) + struct tipc_node *node, u32 *prev_link, + bool bc_link) { u32 i; int err; @@ -2218,6 +2645,14 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, if (err) return err; } + + if (bc_link) { + *prev_link = i; + err = tipc_nl_add_bc_link(net, msg, node->bc_entry.link); + if (err) + return err; + } + *prev_link = 0; return 0; @@ -2226,17 +2661,36 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs; + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; struct tipc_nl_msg msg; u32 prev_node = cb->args[0]; u32 prev_link = cb->args[1]; int done = cb->args[2]; + bool bc_link = cb->args[3]; int err; if (done) return 0; + if (!prev_node) { + /* Check if broadcast-receiver links dumping is needed */ + if (attrs && attrs[TIPC_NLA_LINK]) { + err = nla_parse_nested_deprecated(link, + TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, + NULL); + if (unlikely(err)) + return err; + if (unlikely(!link[TIPC_NLA_LINK_BROADCAST])) + return -EINVAL; + bc_link = true; + } + } + msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; @@ -2260,7 +2714,7 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) list) { tipc_node_read_lock(node); err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); + &prev_link, bc_link); tipc_node_read_unlock(node); if (err) goto out; @@ -2268,14 +2722,14 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) prev_node = node->addr; } } else { - err = tipc_nl_add_bc_link(net, &msg); + err = tipc_nl_add_bc_link(net, &msg, tn->bcl); if (err) goto out; list_for_each_entry_rcu(node, &tn->node_list, list) { tipc_node_read_lock(node); err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); + &prev_link, bc_link); tipc_node_read_unlock(node); if (err) goto out; @@ -2290,6 +2744,7 @@ out: cb->args[0] = prev_node; cb->args[1] = prev_link; cb->args[2] = done; + cb->args[3] = bc_link; return skb->len; } @@ -2303,9 +2758,10 @@ int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_MON]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_MON_MAX, - info->attrs[TIPC_NLA_MON], - tipc_nl_monitor_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MON_MAX, + info->attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy, + info->extack); if (err) return err; @@ -2332,7 +2788,7 @@ static int __tipc_nl_add_monitor_prop(struct net *net, struct tipc_nl_msg *msg) if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON); if (!attrs) goto msg_full; @@ -2413,19 +2869,16 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, int err; if (!prev_node) { - struct nlattr **attrs; + struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs; struct nlattr *mon[TIPC_NLA_MON_MAX + 1]; - err = tipc_nlmsg_parse(cb->nlh, &attrs); - if (err) - return err; - if (!attrs[TIPC_NLA_MON]) return -EINVAL; - err = nla_parse_nested(mon, TIPC_NLA_MON_MAX, - attrs[TIPC_NLA_MON], - tipc_nl_monitor_policy, NULL); + err = nla_parse_nested_deprecated(mon, TIPC_NLA_MON_MAX, + attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy, + NULL); if (err) return err; @@ -2458,10 +2911,171 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, return skb->len; } -u32 tipc_node_get_addr(struct tipc_node *node) +#ifdef CONFIG_TIPC_CRYPTO +static int tipc_nl_retrieve_key(struct nlattr **attrs, + struct tipc_aead_key **pkey) { - return (node) ? node->addr : 0; + struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY]; + struct tipc_aead_key *key; + + if (!attr) + return -ENODATA; + + if (nla_len(attr) < sizeof(*key)) + return -EINVAL; + key = (struct tipc_aead_key *)nla_data(attr); + if (key->keylen > TIPC_AEAD_KEYLEN_MAX || + nla_len(attr) < tipc_aead_key_size(key)) + return -EINVAL; + + *pkey = key; + return 0; +} + +static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id) +{ + struct nlattr *attr = attrs[TIPC_NLA_NODE_ID]; + + if (!attr) + return -ENODATA; + + if (nla_len(attr) < TIPC_NODEID_LEN) + return -EINVAL; + + *node_id = (u8 *)nla_data(attr); + return 0; +} + +static int tipc_nl_retrieve_rekeying(struct nlattr **attrs, u32 *intv) +{ + struct nlattr *attr = attrs[TIPC_NLA_NODE_REKEYING]; + + if (!attr) + return -ENODATA; + + *intv = nla_get_u32(attr); + return 0; +} + +static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_crypto *tx = tipc_net(net)->crypto_tx, *c = tx; + struct tipc_node *n = NULL; + struct tipc_aead_key *ukey; + bool rekeying = true, master_key = false; + u8 *id, *own_id, mode; + u32 intv = 0; + int rc = 0; + + if (!info->attrs[TIPC_NLA_NODE]) + return -EINVAL; + + rc = nla_parse_nested(attrs, TIPC_NLA_NODE_MAX, + info->attrs[TIPC_NLA_NODE], + tipc_nl_node_policy, info->extack); + if (rc) + return rc; + + own_id = tipc_own_id(net); + if (!own_id) { + GENL_SET_ERR_MSG(info, "not found own node identity (set id?)"); + return -EPERM; + } + + rc = tipc_nl_retrieve_rekeying(attrs, &intv); + if (rc == -ENODATA) + rekeying = false; + + rc = tipc_nl_retrieve_key(attrs, &ukey); + if (rc == -ENODATA && rekeying) + goto rekeying; + else if (rc) + return rc; + + rc = tipc_aead_key_validate(ukey, info); + if (rc) + return rc; + + rc = tipc_nl_retrieve_nodeid(attrs, &id); + switch (rc) { + case -ENODATA: + mode = CLUSTER_KEY; + master_key = !!(attrs[TIPC_NLA_NODE_KEY_MASTER]); + break; + case 0: + mode = PER_NODE_KEY; + if (memcmp(id, own_id, NODE_ID_LEN)) { + n = tipc_node_find_by_id(net, id) ?: + tipc_node_create(net, 0, id, 0xffffu, 0, true); + if (unlikely(!n)) + return -ENOMEM; + c = n->crypto_rx; + } + break; + default: + return rc; + } + + /* Initiate the TX/RX key */ + rc = tipc_crypto_key_init(c, ukey, mode, master_key); + if (n) + tipc_node_put(n); + + if (unlikely(rc < 0)) { + GENL_SET_ERR_MSG(info, "unable to initiate or attach new key"); + return rc; + } else if (c == tx) { + /* Distribute TX key but not master one */ + if (!master_key && tipc_crypto_key_distr(tx, rc, NULL)) + GENL_SET_ERR_MSG(info, "failed to replicate new key"); +rekeying: + /* Schedule TX rekeying if needed */ + tipc_crypto_rekeying_sched(tx, rekeying, intv); + } + + return 0; +} + +int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_node_set_key(skb, info); + rtnl_unlock(); + + return err; +} + +static int __tipc_nl_node_flush_key(struct sk_buff *skb, + struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); + struct tipc_node *n; + + tipc_crypto_key_flush(tn->crypto_tx); + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) + tipc_crypto_key_flush(n->crypto_rx); + rcu_read_unlock(); + + return 0; +} + +int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_node_flush_key(skb, info); + rtnl_unlock(); + + return err; } +#endif /** * tipc_node_dump - dump TIPC node data @@ -2519,3 +3133,33 @@ int tipc_node_dump(struct tipc_node *n, bool more, char *buf) return i; } + +void tipc_node_pre_cleanup_net(struct net *exit_net) +{ + struct tipc_node *n; + struct tipc_net *tn; + struct net *tmp; + + rcu_read_lock(); + for_each_net_rcu(tmp) { + if (tmp == exit_net) + continue; + tn = tipc_net(tmp); + if (!tn) + continue; + spin_lock_bh(&tn->node_list_lock); + list_for_each_entry_rcu(n, &tn->node_list, list) { + if (!n->peer_net) + continue; + if (n->peer_net != exit_net) + continue; + tipc_node_write_lock(n); + n->peer_net = NULL; + n->peer_hash_mix = 0; + tipc_node_write_unlock_fast(n); + break; + } + spin_unlock_bh(&tn->node_list_lock); + } + rcu_read_unlock(); +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 4f59a30e989a..154a5bbb0d29 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -51,7 +51,12 @@ enum { TIPC_BLOCK_FLOWCTL = (1 << 3), TIPC_BCAST_RCAST = (1 << 4), TIPC_NODE_ID128 = (1 << 5), - TIPC_LINK_PROTO_SEQNO = (1 << 6) + TIPC_LINK_PROTO_SEQNO = (1 << 6), + TIPC_MCAST_RBCTL = (1 << 7), + TIPC_GAP_ACK_BLOCK = (1 << 8), + TIPC_TUNNEL_ENHANCED = (1 << 9), + TIPC_NAGLE = (1 << 10), + TIPC_NAMED_BCAST = (1 << 11) }; #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \ @@ -60,16 +65,33 @@ enum { TIPC_BCAST_RCAST | \ TIPC_BLOCK_FLOWCTL | \ TIPC_NODE_ID128 | \ - TIPC_LINK_PROTO_SEQNO) + TIPC_LINK_PROTO_SEQNO | \ + TIPC_MCAST_RBCTL | \ + TIPC_GAP_ACK_BLOCK | \ + TIPC_TUNNEL_ENHANCED | \ + TIPC_NAGLE | \ + TIPC_NAMED_BCAST) + #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); bool tipc_node_get_id(struct net *net, u32 addr, u8 *id); u32 tipc_node_get_addr(struct tipc_node *node); +char *tipc_node_get_id_str(struct tipc_node *node); +void tipc_node_put(struct tipc_node *node); +void tipc_node_get(struct tipc_node *node); +struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, + u16 capabilities, u32 hash_mixes, + bool preliminary); +#ifdef CONFIG_TIPC_CRYPTO +struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n); +struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos); +struct tipc_crypto *tipc_node_crypto_rx_by_addr(struct net *net, u32 addr); +#endif u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr); void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128, struct tipc_bearer *bearer, - u16 capabilities, u32 signature, + u16 capabilities, u32 signature, u32 hash_mixes, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr); void tipc_node_delete_links(struct net *net, int bearer_id); @@ -83,10 +105,10 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, u32 selector); void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr); -void tipc_node_broadcast(struct net *net, struct sk_buff *skb); +void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); -int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected); bool tipc_node_is_up(struct net *net, u32 addr); u16 tipc_node_get_capabilities(struct net *net, u32 addr); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); @@ -101,4 +123,9 @@ int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, struct netlink_callback *cb); +#ifdef CONFIG_TIPC_CRYPTO +int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info); +#endif +void tipc_node_pre_cleanup_net(struct net *exit_net); #endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 8fc5acd4820d..817b07d95a91 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,8 +1,9 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012-2017, Ericsson AB + * Copyright (c) 2001-2007, 2012-2019, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,6 +37,7 @@ #include <linux/rhashtable.h> #include <linux/sched/signal.h> +#include <trace/events/sock.h> #include "core.h" #include "name_table.h" @@ -48,12 +50,13 @@ #include "group.h" #include "trace.h" +#define NAGLE_START_INIT 4 +#define NAGLE_START_MAX 1024 #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ #define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */ -#define TIPC_FWD_MSG 1 #define TIPC_MAX_PORT 0xffffffff #define TIPC_MIN_PORT 1 -#define TIPC_ACK_RATE 4 /* ACK at 1/4 of of rcv window size */ +#define TIPC_ACK_RATE 4 /* ACK at 1/4 of rcv window size */ enum { TIPC_LISTEN = TCP_LISTEN, @@ -71,32 +74,42 @@ struct sockaddr_pair { /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API - * @conn_type: TIPC type used when connection was established - * @conn_instance: TIPC instance used when connection was established - * @published: non-zero if port has one or more associated names * @max_pkt: maximum packet size "hint" used when building messages sent by port + * @maxnagle: maximum size of msg which can be subject to nagle * @portid: unique port identity in TIPC socket hash table * @phdr: preformatted message header used when sending messages - * #cong_links: list of congested links + * @cong_links: list of congested links * @publications: list of publications for port - * @blocking_link: address of the congested link we are currently sleeping on * @pub_count: total # of publications port has made during its lifetime * @conn_timeout: the time we can wait for an unresponded setup request + * @probe_unacked: probe has not received ack yet * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @cong_link_cnt: number of congested links * @snt_unacked: # messages sent by socket, and not yet acked by peer + * @snd_win: send window size + * @peer_caps: peer capabilities mask * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @rcv_win: receive window size * @peer: 'connected' peer for dgram/rdm * @node: hash table node * @mc_method: cookie for use between socket and broadcast layer * @rcu: rcu struct for tipc_sock + * @group: TIPC communications group + * @oneway: message count in one direction (FIXME) + * @nagle_start: current nagle value + * @snd_backlog: send backlog count + * @msg_acc: messages accepted; used in managing backlog and nagle + * @pkt_cnt: TIPC socket packet count + * @expect_ack: whether this TIPC socket is expecting an ack + * @nodelay: setsockopt() TIPC_NODELAY setting + * @group_is_open: TIPC socket group is fully open (FIXME) + * @published: true if port has one or more associated names + * @conn_addrtype: address type used when establishing connection */ struct tipc_sock { struct sock sk; - u32 conn_type; - u32 conn_instance; - int published; u32 max_pkt; + u32 maxnagle; u32 portid; struct tipc_msg phdr; struct list_head cong_links; @@ -116,7 +129,16 @@ struct tipc_sock { struct tipc_mc_method mc_method; struct rcu_head rcu; struct tipc_group *group; + u32 oneway; + u32 nagle_start; + u16 snd_backlog; + u16 msg_acc; + u16 pkt_cnt; + bool expect_ack; + bool nodelay; bool group_is_open; + bool published; + u8 conn_addrtype; }; static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@ -124,19 +146,17 @@ static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static void tipc_sock_destruct(struct sock *sk); static int tipc_release(struct socket *sock); -static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, - bool kern); static void tipc_sk_timeout(struct timer_list *t); -static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, - struct tipc_name_seq const *seq); -static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, - struct tipc_name_seq const *seq); +static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua); +static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua); static int tipc_sk_leave(struct tipc_sock *tsk); static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); static void tipc_sk_remove(struct tipc_sock *tsk); static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz); static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); +static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack); +static int tipc_wait_for_connect(struct socket *sock, long *timeo_p); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -184,17 +204,17 @@ static int tsk_importance(struct tipc_sock *tsk) return msg_importance(&tsk->phdr); } -static int tsk_set_importance(struct tipc_sock *tsk, int imp) +static struct tipc_sock *tipc_sk(const struct sock *sk) { - if (imp > TIPC_CRITICAL_IMPORTANCE) - return -EINVAL; - msg_set_importance(&tsk->phdr, (u32)imp); - return 0; + return container_of(sk, struct tipc_sock, sk); } -static struct tipc_sock *tipc_sk(const struct sock *sk) +int tsk_set_importance(struct sock *sk, int imp) { - return container_of(sk, struct tipc_sock, sk); + if (imp > TIPC_CRITICAL_IMPORTANCE) + return -EINVAL; + msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp); + return 0; } static bool tsk_conn_cong(struct tipc_sock *tsk) @@ -227,8 +247,29 @@ static u16 tsk_inc(struct tipc_sock *tsk, int msglen) return 1; } +/* tsk_set_nagle - enable/disable nagle property by manipulating maxnagle + */ +static void tsk_set_nagle(struct tipc_sock *tsk) +{ + struct sock *sk = &tsk->sk; + + tsk->maxnagle = 0; + if (sk->sk_type != SOCK_STREAM) + return; + if (tsk->nodelay) + return; + if (!(tsk->peer_caps & TIPC_NAGLE)) + return; + /* Limit node local buffer size to avoid receive queue overflow */ + if (tsk->max_pkt == MAX_MSG_SIZE) + tsk->maxnagle = 1500; + else + tsk->maxnagle = tsk->max_pkt; +} + /** * tsk_advance_rx_queue - discard first buffer in socket receive queue + * @sk: network socket * * Caller must hold socket lock */ @@ -257,20 +298,22 @@ static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err) /** * tsk_rej_rx_queue - reject all buffers in socket receive queue + * @sk: network socket + * @error: response error code * * Caller must hold socket lock */ -static void tsk_rej_rx_queue(struct sock *sk) +static void tsk_rej_rx_queue(struct sock *sk, int error) { struct sk_buff *skb; while ((skb = __skb_dequeue(&sk->sk_receive_queue))) - tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); + tipc_sk_respond(sk, skb, error); } -static bool tipc_sk_connected(struct sock *sk) +static bool tipc_sk_connected(const struct sock *sk) { - return sk->sk_state == TIPC_ESTABLISHED; + return READ_ONCE(sk->sk_state) == TIPC_ESTABLISHED; } /* tipc_sk_type_connectionless - check if the socket is datagram socket @@ -379,16 +422,18 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout) #define tipc_wait_for_cond(sock_, timeo_, condition_) \ ({ \ + DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ struct sock *sk_; \ int rc_; \ \ while ((rc_ = !(condition_))) { \ - DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ + /* coupled with smp_wmb() in tipc_sk_proto_rcv() */ \ + smp_rmb(); \ sk_ = (sock_)->sk; \ rc_ = tipc_sk_sock_err((sock_), timeo_); \ if (rc_) \ break; \ - prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \ + add_wait_queue(sk_sleep(sk_), &wait_); \ release_sock(sk_); \ *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \ sched_annotate_sleep(); \ @@ -408,7 +453,7 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout) * This routine creates additional data structures used by the TIPC socket, * initializes them, and links them together. * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, int kern) @@ -444,6 +489,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk = tipc_sk(sk); tsk->max_pkt = MAX_PKT_DEFAULT; + tsk->maxnagle = 0; + tsk->nagle_start = NAGLE_START_INIT; INIT_LIST_HEAD(&tsk->publications); INIT_LIST_HEAD(&tsk->cong_links); msg = &tsk->phdr; @@ -453,6 +500,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sock_init_data(sock, sk); tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { + sk_free(sk); pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } @@ -467,7 +515,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); sk->sk_shutdown = 0; sk->sk_backlog_rcv = tipc_sk_backlog_rcv; - sk->sk_rcvbuf = sysctl_tipc_rmem[1]; + sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]); sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; sk->sk_destruct = tipc_sock_destruct; @@ -484,7 +532,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, if (sock->type == SOCK_DGRAM) tsk_set_unreliable(tsk, true); } - + __skb_queue_head_init(&tsk->mc_method.deferredq); trace_tipc_sk_create(sk, NULL, TIPC_DUMP_NONE, " "); return 0; } @@ -502,7 +550,7 @@ static void __tipc_shutdown(struct socket *sock, int error) struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); - long timeout = CONN_TIMEOUT_DEFAULT; + long timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT); u32 dnode = tsk_peer_node(tsk); struct sk_buff *skb; @@ -510,37 +558,50 @@ static void __tipc_shutdown(struct socket *sock, int error) tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))); - /* Remove any pending SYN message */ + /* Push out delayed messages if in Nagle mode */ + tipc_sk_push_backlog(tsk, false); + /* Remove pending SYN */ __skb_queue_purge(&sk->sk_write_queue); - /* Reject all unreceived messages, except on an active connection - * (which disconnects locally & sends a 'FIN+' to peer). - */ - while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { - if (TIPC_SKB_CB(skb)->bytes_read) { - kfree_skb(skb); - continue; - } - if (!tipc_sk_type_connectionless(sk) && - sk->sk_state != TIPC_DISCONNECTING) { - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - tipc_node_remove_conn(net, dnode, tsk->portid); - } - tipc_sk_respond(sk, skb, error); + /* Remove partially received buffer if any */ + skb = skb_peek(&sk->sk_receive_queue); + if (skb && TIPC_SKB_CB(skb)->bytes_read) { + __skb_unlink(skb, &sk->sk_receive_queue); + kfree_skb(skb); } - if (tipc_sk_type_connectionless(sk)) + /* Reject all unreceived messages if connectionless */ + if (tipc_sk_type_connectionless(sk)) { + tsk_rej_rx_queue(sk, error); return; + } - if (sk->sk_state != TIPC_DISCONNECTING) { + switch (sk->sk_state) { + case TIPC_CONNECTING: + case TIPC_ESTABLISHED: + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, dnode, tsk->portid); + /* Send a FIN+/- to its peer */ + skb = __skb_dequeue(&sk->sk_receive_queue); + if (skb) { + __skb_queue_purge(&sk->sk_receive_queue); + tipc_sk_respond(sk, skb, error); + break; + } skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, tsk_own_node(tsk), tsk_peer_port(tsk), tsk->portid, error); if (skb) tipc_node_xmit_skb(net, skb, dnode, tsk->portid); - tipc_node_remove_conn(net, dnode, tsk->portid); - tipc_set_sk_state(sk, TIPC_DISCONNECTING); + break; + case TIPC_LISTEN: + /* Reject all SYN messages */ + tsk_rej_rx_queue(sk, error); + break; + default: + __skb_queue_purge(&sk->sk_receive_queue); + break; } } @@ -558,7 +619,7 @@ static void __tipc_shutdown(struct socket *sock, int error) * are returned or discarded according to the "destination droppable" setting * specified for the message by the sender. * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_release(struct socket *sock) { @@ -579,7 +640,8 @@ static int tipc_release(struct socket *sock) __tipc_shutdown(sock, TIPC_ERR_NO_PORT); sk->sk_shutdown = SHUTDOWN_MASK; tipc_sk_leave(tsk); - tipc_sk_withdraw(tsk, 0, NULL); + tipc_sk_withdraw(tsk, NULL); + __skb_queue_purge(&tsk->mc_method.deferredq); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); @@ -595,76 +657,85 @@ static int tipc_release(struct socket *sock) } /** - * tipc_bind - associate or disassocate TIPC name(s) with a socket + * __tipc_bind - associate or disassociate TIPC name(s) with a socket * @sock: socket structure - * @uaddr: socket address describing name(s) and desired operation - * @uaddr_len: size of socket address data structure + * @skaddr: socket address describing name(s) and desired operation + * @alen: size of socket address data structure * - * Name and name sequence binding is indicated using a positive scope value; + * Name and name sequence binding are indicated using a positive scope value; * a negative scope value unbinds the specified name. Specifying no name * (i.e. a socket address length of 0) unbinds all names from the socket. * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise * * NOTE: This routine doesn't need to take the socket lock since it doesn't * access any non-constant socket information. */ -static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, - int uaddr_len) +static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) { - struct sock *sk = sock->sk; - struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - struct tipc_sock *tsk = tipc_sk(sk); - int res = -EINVAL; + struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; + struct tipc_sock *tsk = tipc_sk(sock->sk); + bool unbind = false; - lock_sock(sk); - if (unlikely(!uaddr_len)) { - res = tipc_sk_withdraw(tsk, 0, NULL); - goto exit; - } - if (tsk->group) { - res = -EACCES; - goto exit; - } - if (uaddr_len < sizeof(struct sockaddr_tipc)) { - res = -EINVAL; - goto exit; + if (unlikely(!alen)) + return tipc_sk_withdraw(tsk, NULL); + + if (ua->addrtype == TIPC_SERVICE_ADDR) { + ua->addrtype = TIPC_SERVICE_RANGE; + ua->sr.upper = ua->sr.lower; } - if (addr->family != AF_TIPC) { - res = -EAFNOSUPPORT; - goto exit; + if (ua->scope < 0) { + unbind = true; + ua->scope = -ua->scope; } + /* Users may still use deprecated TIPC_ZONE_SCOPE */ + if (ua->scope != TIPC_NODE_SCOPE) + ua->scope = TIPC_CLUSTER_SCOPE; - if (addr->addrtype == TIPC_ADDR_NAME) - addr->addr.nameseq.upper = addr->addr.nameseq.lower; - else if (addr->addrtype != TIPC_ADDR_NAMESEQ) { - res = -EAFNOSUPPORT; - goto exit; - } + if (tsk->group) + return -EACCES; - if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && - (addr->addr.nameseq.type != TIPC_TOP_SRV) && - (addr->addr.nameseq.type != TIPC_CFG_SRV)) { - res = -EACCES; - goto exit; - } + if (unbind) + return tipc_sk_withdraw(tsk, ua); + return tipc_sk_publish(tsk, ua); +} - res = (addr->scope >= 0) ? - tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : - tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); -exit: - release_sock(sk); +int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen) +{ + int res; + + lock_sock(sock->sk); + res = __tipc_bind(sock, skaddr, alen); + release_sock(sock->sk); return res; } +static int tipc_bind(struct socket *sock, struct sockaddr_unsized *skaddr, int alen) +{ + struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; + u32 atype = ua->addrtype; + + if (alen) { + if (!tipc_uaddr_valid(ua, alen)) + return -EINVAL; + if (atype == TIPC_SOCKET_ADDR) + return -EAFNOSUPPORT; + if (ua->sr.type < TIPC_RESERVED_TYPES) { + pr_warn_once("Can't bind to reserved service type %u\n", + ua->sr.type); + return -EACCES; + } + } + return tipc_sk_bind(sock, (struct sockaddr *)skaddr, alen); +} + /** * tipc_getname - get port ID of socket or peer socket * @sock: socket structure * @uaddr: area for returned socket address - * @uaddr_len: area for returned length of socket address * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise * * NOTE: This routine doesn't need to take the socket lock since it only * accesses socket information that is unchanging (or which changes in @@ -689,7 +760,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, addr->addr.id.node = tipc_own_addr(sock_net(sk)); } - addr->addrtype = TIPC_ADDR_ID; + addr->addrtype = TIPC_SOCKET_ADDR; addr->family = AF_TIPC; addr->scope = 0; addr->addr.name.domain = 0; @@ -703,7 +774,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, * @sock: socket for which to calculate the poll bits * @wait: ??? * - * Returns pollmask value + * Return: pollmask value * * COMMENTARY: * It appears that the usual socket locking mechanisms are not useful here @@ -732,12 +803,12 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, switch (sk->sk_state) { case TIPC_ESTABLISHED: - case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) revents |= EPOLLOUT; - /* fall through */ + fallthrough; case TIPC_LISTEN: - if (!skb_queue_empty(&sk->sk_receive_queue)) + case TIPC_CONNECTING: + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) revents |= EPOLLIN | EPOLLRDNORM; break; case TIPC_OPEN: @@ -745,7 +816,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, revents |= EPOLLOUT; if (!tipc_sk_type_connectionless(sk)) break; - if (skb_queue_empty(&sk->sk_receive_queue)) + if (skb_queue_empty_lockless(&sk->sk_receive_queue)) break; revents |= EPOLLIN | EPOLLRDNORM; break; @@ -759,15 +830,15 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, /** * tipc_sendmcast - send multicast message * @sock: socket structure - * @seq: destination address + * @ua: destination address struct * @msg: message to send * @dlen: length of data to send * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks - * Returns the number of bytes sent on success, or errno + * Return: the number of bytes sent on success, or errno */ -static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, +static int tipc_sendmcast(struct socket *sock, struct tipc_uaddr *ua, struct msghdr *msg, size_t dlen, long timeout) { struct sock *sk = sock->sk; @@ -775,7 +846,6 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); int mtu = tipc_bcast_get_mtu(net); - struct tipc_mc_method *method = &tsk->mc_method; struct sk_buff_head pkts; struct tipc_nlist dsts; int rc; @@ -790,8 +860,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, /* Lookup destination nodes */ tipc_nlist_init(&dsts, tipc_own_addr(net)); - tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower, - seq->upper, &dsts); + tipc_nametbl_lookup_mcast_nodes(net, ua, &dsts); if (!dsts.local && !dsts.remote) return -EHOSTUNREACH; @@ -801,19 +870,19 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE); msg_set_destport(hdr, 0); msg_set_destnode(hdr, 0); - msg_set_nametype(hdr, seq->type); - msg_set_namelower(hdr, seq->lower); - msg_set_nameupper(hdr, seq->upper); + msg_set_nametype(hdr, ua->sr.type); + msg_set_namelower(hdr, ua->sr.lower); + msg_set_nameupper(hdr, ua->sr.upper); /* Build message as chain of buffers */ - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts); /* Send message if build was successful */ if (unlikely(rc == dlen)) { trace_tipc_sk_sendmcast(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " "); - rc = tipc_mcast_xmit(net, &pkts, method, &dsts, + rc = tipc_mcast_xmit(net, &pkts, &tsk->mc_method, &dsts, &tsk->cong_link_cnt); } @@ -825,6 +894,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, /** * tipc_send_group_msg - send a message to a member in the group * @net: network namespace + * @tsk: tipc socket * @m: message to send * @mb: group member * @dnode: destination node @@ -850,8 +920,8 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, msg_set_grp_bc_seqno(hdr, bc_snd_nxt); /* Build message as chain of buffers */ - skb_queue_head_init(&pkts); - mtu = tipc_node_get_mtu(net, dnode, tsk->portid); + __skb_queue_head_init(&pkts); + mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; @@ -880,13 +950,13 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks - * Returns the number of bytes sent on success, or errno + * Return: the number of bytes sent on success, or errno */ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { struct sock *sk = sock->sk; - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); @@ -894,8 +964,8 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, u32 node, port; int rc; - node = dest->addr.id.node; - port = dest->addr.id.ref; + node = ua->sk.node; + port = ua->sk.ref; if (!port && !node) return -EHOSTUNREACH; @@ -924,12 +994,12 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks - * Returns the number of bytes sent on success, or errno + * Return: the number of bytes sent on success, or errno */ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct list_head *cong_links = &tsk->cong_links; @@ -939,17 +1009,13 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, struct tipc_member *mbr = NULL; struct net *net = sock_net(sk); u32 node, port, exclude; - struct list_head dsts; - u32 type, inst, scope; + LIST_HEAD(dsts); int lookups = 0; int dstcnt, rc; bool cong; - INIT_LIST_HEAD(&dsts); - - type = msg_nametype(hdr); - inst = dest->addr.name.name.instance; - scope = msg_lookup_scope(hdr); + ua->sa.type = msg_nametype(hdr); + ua->scope = msg_lookup_scope(hdr); while (++lookups < 4) { exclude = tipc_group_exclude(tsk->group); @@ -958,8 +1024,8 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, /* Look for a non-congested destination member, if any */ while (1) { - if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, - &dstcnt, exclude, false)) + if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, + exclude, false)) return -EHOSTUNREACH; tipc_dest_pop(&dsts, &node, &port); cong = tipc_group_cong(tsk->group, node, port, blks, @@ -1003,18 +1069,18 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, /** * tipc_send_group_bcast - send message to all members in communication group - * @sk: socket structure + * @sock: socket structure * @m: message to send * @dlen: total length of message data * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks - * Returns the number of bytes sent on success, or errno + * Return: the number of bytes sent on success, or errno */ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); @@ -1039,9 +1105,9 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, return -EHOSTUNREACH; /* Complete message header */ - if (dest) { + if (ua) { msg_set_type(hdr, TIPC_GRP_MCAST_MSG); - msg_set_nameinst(hdr, dest->addr.name.name.instance); + msg_set_nameinst(hdr, ua->sa.instance); } else { msg_set_type(hdr, TIPC_GRP_BCAST_MSG); msg_set_nameinst(hdr, 0); @@ -1055,7 +1121,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, msg_set_grp_bc_ack_req(hdr, ack); /* Build message as chain of buffers */ - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; @@ -1083,34 +1149,29 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, * @timeout: timeout to wait for wakeup * * Called from function tipc_sendmsg(), which has done all sanity checks - * Returns the number of bytes sent on success, or errno + * Return: the number of bytes sent on success, or errno */ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, int dlen, long timeout) { + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); - u32 type, inst, scope, exclude; - struct list_head dsts; - u32 dstcnt; + u32 dstcnt, exclude; + LIST_HEAD(dsts); - INIT_LIST_HEAD(&dsts); - - type = msg_nametype(hdr); - inst = dest->addr.name.name.instance; - scope = msg_lookup_scope(hdr); + ua->sa.type = msg_nametype(hdr); + ua->scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp); - if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, - &dstcnt, exclude, true)) + if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, exclude, true)) return -EHOSTUNREACH; if (dstcnt == 1) { - tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref); + tipc_dest_pop(&dsts, &ua->sk.node, &ua->sk.ref); return tipc_send_group_unicast(sock, m, dlen, timeout); } @@ -1120,6 +1181,7 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, /** * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets + * @net: the associated network namespace * @arrvq: queue with arriving messages, to be cloned after destination lookup * @inputq: queue with cloned messages, delivered to socket after dest lookup * @@ -1129,18 +1191,19 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq) { u32 self = tipc_own_addr(net); - u32 type, lower, upper, scope; struct sk_buff *skb, *_skb; u32 portid, onode; struct sk_buff_head tmpq; struct list_head dports; struct tipc_msg *hdr; + struct tipc_uaddr ua; int user, mtyp, hlen; - bool exact; __skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports); + ua.addrtype = TIPC_SERVICE_RANGE; + /* tipc_skb_peek() increments the head skb's reference counter */ skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { hdr = buf_msg(skb); @@ -1148,7 +1211,13 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, mtyp = msg_type(hdr); hlen = skb_headroom(skb) + msg_hdr_sz(hdr); onode = msg_orignode(hdr); - type = msg_nametype(hdr); + ua.sr.type = msg_nametype(hdr); + ua.sr.lower = msg_namelower(hdr); + ua.sr.upper = msg_nameupper(hdr); + if (onode == self) + ua.scope = TIPC_ANY_SCOPE; + else + ua.scope = TIPC_CLUSTER_SCOPE; if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { spin_lock_bh(&inputq->lock); @@ -1163,24 +1232,13 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, /* Group messages require exact scope match */ if (msg_in_group(hdr)) { - lower = 0; - upper = ~0; - scope = msg_lookup_scope(hdr); - exact = true; - } else { - /* TIPC_NODE_SCOPE means "any scope" in this context */ - if (onode == self) - scope = TIPC_NODE_SCOPE; - else - scope = TIPC_CLUSTER_SCOPE; - exact = false; - lower = msg_namelower(hdr); - upper = msg_nameupper(hdr); + ua.sr.lower = 0; + ua.sr.upper = ~0; + ua.scope = msg_lookup_scope(hdr); } /* Create destination port list: */ - tipc_nametbl_mc_lookup(net, type, lower, upper, - scope, exact, &dports); + tipc_nametbl_lookup_mcast_sockets(net, &ua, &dports); /* Clone message per destination */ while (tipc_dest_pop(&dports, NULL, &portid)) { @@ -1192,10 +1250,11 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, } pr_warn("Failed to clone mcast rcv buffer\n"); } - /* Append to inputq if not already done by other thread */ + /* Append clones to inputq only if skb is still head of arrvq */ spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { skb_queue_splice_tail_init(&tmpq, inputq); + /* Decrement the skb's refcnt */ kfree_skb(__skb_dequeue(arrvq)); } spin_unlock_bh(&inputq->lock); @@ -1205,10 +1264,62 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, tipc_sk_rcv(net, inputq); } +/* tipc_sk_push_backlog(): send accumulated buffers in socket write queue + * when socket is in Nagle mode + */ +static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack) +{ + struct sk_buff_head *txq = &tsk->sk.sk_write_queue; + struct sk_buff *skb = skb_peek_tail(txq); + struct net *net = sock_net(&tsk->sk); + u32 dnode = tsk_peer_node(tsk); + int rc; + + if (nagle_ack) { + tsk->pkt_cnt += skb_queue_len(txq); + if (!tsk->pkt_cnt || tsk->msg_acc / tsk->pkt_cnt < 2) { + tsk->oneway = 0; + if (tsk->nagle_start < NAGLE_START_MAX) + tsk->nagle_start *= 2; + tsk->expect_ack = false; + pr_debug("tsk %10u: bad nagle %u -> %u, next start %u!\n", + tsk->portid, tsk->msg_acc, tsk->pkt_cnt, + tsk->nagle_start); + } else { + tsk->nagle_start = NAGLE_START_INIT; + if (skb) { + msg_set_ack_required(buf_msg(skb)); + tsk->expect_ack = true; + } else { + tsk->expect_ack = false; + } + } + tsk->msg_acc = 0; + tsk->pkt_cnt = 0; + } + + if (!skb || tsk->cong_link_cnt) + return; + + /* Do not send SYN again after congestion */ + if (msg_is_syn(buf_msg(skb))) + return; + + if (tsk->msg_acc) + tsk->pkt_cnt += skb_queue_len(txq); + tsk->snt_unacked += tsk->snd_backlog; + tsk->snd_backlog = 0; + rc = tipc_node_xmit(net, txq, dnode, tsk->portid); + if (rc == -ELINKCONG) + tsk->cong_link_cnt = 1; +} + /** * tipc_sk_conn_proto_rcv - receive a connection mng protocol message * @tsk: receiving socket * @skb: pointer to message buffer. + * @inputq: buffer list containing the buffers + * @xmitq: output message area */ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, struct sk_buff_head *inputq, @@ -1218,7 +1329,7 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, u32 onode = tsk_own_node(tsk); struct sock *sk = &tsk->sk; int mtyp = msg_type(hdr); - bool conn_cong; + bool was_cong; /* Ignore if connection cannot be validated: */ if (!tsk_peer_msg(tsk, hdr)) { @@ -1251,11 +1362,12 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, __skb_queue_tail(xmitq, skb); return; } else if (mtyp == CONN_ACK) { - conn_cong = tsk_conn_cong(tsk); + was_cong = tsk_conn_cong(tsk); + tipc_sk_push_backlog(tsk, msg_nagle_ack(hdr)); tsk->snt_unacked -= msg_conn_ack(hdr); if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) tsk->snd_win = msg_adv_win(hdr); - if (conn_cong) + if (was_cong && !tsk_conn_cong(tsk)) sk->sk_write_space(sk); } else if (mtyp != CONN_PROBE_REPLY) { pr_warn("Received unknown CONN_PROTO msg\n"); @@ -1275,7 +1387,7 @@ exit: * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections. * (Note: 'SYN+' is prohibited on SOCK_STREAM.) * - * Returns the number of bytes sent on success, or errno otherwise + * Return: the number of bytes sent on success, or errno otherwise */ static int tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) @@ -1295,44 +1407,43 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); struct list_head *clinks = &tsk->cong_links; bool syn = !tipc_sk_type_connectionless(sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; - struct tipc_name_seq *seq; + struct tipc_socket_addr skaddr; struct sk_buff_head pkts; - u32 dport, dnode = 0; - u32 type, inst; - int mtu, rc; + int atype, mtu, rc; if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) return -EMSGSIZE; - if (likely(dest)) { - if (unlikely(m->msg_namelen < sizeof(*dest))) - return -EINVAL; - if (unlikely(dest->family != AF_TIPC)) + if (ua) { + if (!tipc_uaddr_valid(ua, m->msg_namelen)) return -EINVAL; + atype = ua->addrtype; } + /* If socket belongs to a communication group follow other paths */ if (grp) { - if (!dest) + if (!ua) return tipc_send_group_bcast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_ADDR_NAME) + if (atype == TIPC_SERVICE_ADDR) return tipc_send_group_anycast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_ADDR_ID) + if (atype == TIPC_SOCKET_ADDR) return tipc_send_group_unicast(sock, m, dlen, timeout); - if (dest->addrtype == TIPC_ADDR_MCAST) + if (atype == TIPC_SERVICE_RANGE) return tipc_send_group_mcast(sock, m, dlen, timeout); return -EINVAL; } - if (unlikely(!dest)) { - dest = &tsk->peer; - if (!syn || dest->family != AF_TIPC) + if (!ua) { + ua = (struct tipc_uaddr *)&tsk->peer; + if (!syn && ua->family != AF_TIPC) return -EDESTADDRREQ; + atype = ua->addrtype; } if (unlikely(syn)) { @@ -1342,66 +1453,75 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) return -EISCONN; if (tsk->published) return -EOPNOTSUPP; - if (dest->addrtype == TIPC_ADDR_NAME) { - tsk->conn_type = dest->addr.name.name.type; - tsk->conn_instance = dest->addr.name.name.instance; - } + if (atype == TIPC_SERVICE_ADDR) + tsk->conn_addrtype = atype; msg_set_syn(hdr, 1); } - seq = &dest->addr.nameseq; - if (dest->addrtype == TIPC_ADDR_MCAST) - return tipc_sendmcast(sock, seq, m, dlen, timeout); + memset(&skaddr, 0, sizeof(skaddr)); - if (dest->addrtype == TIPC_ADDR_NAME) { - type = dest->addr.name.name.type; - inst = dest->addr.name.name.instance; - dnode = dest->addr.name.domain; - msg_set_type(hdr, TIPC_NAMED_MSG); - msg_set_hdr_sz(hdr, NAMED_H_SIZE); - msg_set_nametype(hdr, type); - msg_set_nameinst(hdr, inst); - msg_set_lookup_scope(hdr, tipc_node2scope(dnode)); - dport = tipc_nametbl_translate(net, type, inst, &dnode); - msg_set_destnode(hdr, dnode); - msg_set_destport(hdr, dport); - if (unlikely(!dport && !dnode)) + /* Determine destination */ + if (atype == TIPC_SERVICE_RANGE) { + return tipc_sendmcast(sock, ua, m, dlen, timeout); + } else if (atype == TIPC_SERVICE_ADDR) { + skaddr.node = ua->lookup_node; + ua->scope = tipc_node2scope(skaddr.node); + if (!tipc_nametbl_lookup_anycast(net, ua, &skaddr)) return -EHOSTUNREACH; - } else if (dest->addrtype == TIPC_ADDR_ID) { - dnode = dest->addr.id.node; - msg_set_type(hdr, TIPC_DIRECT_MSG); - msg_set_lookup_scope(hdr, 0); - msg_set_destnode(hdr, dnode); - msg_set_destport(hdr, dest->addr.id.ref); - msg_set_hdr_sz(hdr, BASIC_H_SIZE); + } else if (atype == TIPC_SOCKET_ADDR) { + skaddr = ua->sk; } else { return -EINVAL; } /* Block or return if destination link is congested */ rc = tipc_wait_for_cond(sock, &timeout, - !tipc_dest_find(clinks, dnode, 0)); + !tipc_dest_find(clinks, skaddr.node, 0)); if (unlikely(rc)) return rc; - skb_queue_head_init(&pkts); - mtu = tipc_node_get_mtu(net, dnode, tsk->portid); + /* Finally build message header */ + msg_set_destnode(hdr, skaddr.node); + msg_set_destport(hdr, skaddr.ref); + if (atype == TIPC_SERVICE_ADDR) { + msg_set_type(hdr, TIPC_NAMED_MSG); + msg_set_hdr_sz(hdr, NAMED_H_SIZE); + msg_set_nametype(hdr, ua->sa.type); + msg_set_nameinst(hdr, ua->sa.instance); + msg_set_lookup_scope(hdr, ua->scope); + } else { /* TIPC_SOCKET_ADDR */ + msg_set_type(hdr, TIPC_DIRECT_MSG); + msg_set_lookup_scope(hdr, 0); + msg_set_hdr_sz(hdr, BASIC_H_SIZE); + } + + /* Add message body */ + __skb_queue_head_init(&pkts); + mtu = tipc_node_get_mtu(net, skaddr.node, tsk->portid, true); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; - if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue))) + if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue))) { + __skb_queue_purge(&pkts); return -ENOMEM; + } + /* Send message */ trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " "); - rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); + rc = tipc_node_xmit(net, &pkts, skaddr.node, tsk->portid); if (unlikely(rc == -ELINKCONG)) { - tipc_dest_push(clinks, dnode, 0); + tipc_dest_push(clinks, skaddr.node, 0); tsk->cong_link_cnt++; rc = 0; } - if (unlikely(syn && !rc)) + if (unlikely(syn && !rc)) { tipc_set_sk_state(sk, TIPC_CONNECTING); + if (dlen && timeout) { + timeout = msecs_to_jiffies(timeout); + tipc_wait_for_connect(sock, &timeout); + } + } return rc ? rc : dlen; } @@ -1414,7 +1534,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) * * Used for SOCK_STREAM data. * - * Returns the number of bytes sent on success (or partial success), + * Return: the number of bytes sent on success (or partial success), * or errno if no data sent */ static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz) @@ -1434,21 +1554,22 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + struct sk_buff_head *txq = &sk->sk_write_queue; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); - struct sk_buff_head pkts; + struct sk_buff *skb; u32 dnode = tsk_peer_node(tsk); + int maxnagle = tsk->maxnagle; + int maxpkt = tsk->max_pkt; int send, sent = 0; - int rc = 0; - - skb_queue_head_init(&pkts); + int blocks, rc = 0; if (unlikely(dlen > INT_MAX)) return -EMSGSIZE; /* Handle implicit connection setup */ - if (unlikely(dest)) { + if (unlikely(dest && sk->sk_state == TIPC_OPEN)) { rc = __tipc_sendmsg(sock, m, dlen); if (dlen && dlen == rc) { tsk->peer_caps = tipc_node_get_capabilities(net, dnode); @@ -1464,21 +1585,48 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) tipc_sk_connected(sk))); if (unlikely(rc)) break; - send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE); - rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts); - if (unlikely(rc != send)) - break; - - trace_tipc_sk_sendstream(sk, skb_peek(&pkts), + blocks = tsk->snd_backlog; + if (tsk->oneway++ >= tsk->nagle_start && maxnagle && + send <= maxnagle) { + rc = tipc_msg_append(hdr, m, send, maxnagle, txq); + if (unlikely(rc < 0)) + break; + blocks += rc; + tsk->msg_acc++; + if (blocks <= 64 && tsk->expect_ack) { + tsk->snd_backlog = blocks; + sent += send; + break; + } else if (blocks > 64) { + tsk->pkt_cnt += skb_queue_len(txq); + } else { + skb = skb_peek_tail(txq); + if (skb) { + msg_set_ack_required(buf_msg(skb)); + tsk->expect_ack = true; + } else { + tsk->expect_ack = false; + } + tsk->msg_acc = 0; + tsk->pkt_cnt = 0; + } + } else { + rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq); + if (unlikely(rc != send)) + break; + blocks += tsk_inc(tsk, send + MIN_H_SIZE); + } + trace_tipc_sk_sendstream(sk, skb_peek(txq), TIPC_DUMP_SK_SNDQ, " "); - rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); + rc = tipc_node_xmit(net, txq, dnode, tsk->portid); if (unlikely(rc == -ELINKCONG)) { tsk->cong_link_cnt = 1; rc = 0; } if (likely(!rc)) { - tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE); + tsk->snt_unacked += blocks; + tsk->snd_backlog = 0; sent += send; } } while (sent < dlen && !rc); @@ -1494,7 +1642,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) * * Used for SOCK_SEQPACKET messages. * - * Returns the number of bytes sent on success, or errno otherwise + * Return: the number of bytes sent on success, or errno otherwise */ static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz) { @@ -1523,8 +1671,9 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); tipc_set_sk_state(sk, TIPC_ESTABLISHED); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); - tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); + tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid, true); tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); + tsk_set_nagle(tsk); __skb_queue_purge(&sk->sk_write_queue); if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) return; @@ -1537,7 +1686,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, /** * tipc_sk_set_orig_addr - capture sender's address for received message * @m: descriptor for message info - * @hdr: received message header + * @skb: received message * * Note: Address is not captured if not requested by receiver. */ @@ -1550,7 +1699,7 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) return; srcaddr->sock.family = AF_TIPC; - srcaddr->sock.addrtype = TIPC_ADDR_ID; + srcaddr->sock.addrtype = TIPC_SOCKET_ADDR; srcaddr->sock.scope = 0; srcaddr->sock.addr.id.ref = msg_origport(hdr); srcaddr->sock.addr.id.node = msg_orignode(hdr); @@ -1562,7 +1711,7 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) /* Group message users may also want to know sending member's id */ srcaddr->member.family = AF_TIPC; - srcaddr->member.addrtype = TIPC_ADDR_NAME; + srcaddr->member.addrtype = TIPC_SERVICE_ADDR; srcaddr->member.scope = 0; srcaddr->member.addr.name.name.type = msg_nametype(hdr); srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member; @@ -1578,90 +1727,80 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) * * Note: Ancillary data is not captured if not requested by receiver. * - * Returns 0 if successful, otherwise errno + * Return: 0 if successful, otherwise errno */ static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb, struct tipc_sock *tsk) { - struct tipc_msg *msg; - u32 anc_data[3]; - u32 err; - u32 dest_type; - int has_name; - int res; + struct tipc_msg *hdr; + u32 data[3] = {0,}; + bool has_addr; + int dlen, rc; if (likely(m->msg_controllen == 0)) return 0; - msg = buf_msg(skb); - /* Optionally capture errored message object(s) */ - err = msg ? msg_errcode(msg) : 0; - if (unlikely(err)) { - anc_data[0] = err; - anc_data[1] = msg_data_sz(msg); - res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data); - if (res) - return res; - if (anc_data[1]) { - if (skb_linearize(skb)) - return -ENOMEM; - msg = buf_msg(skb); - res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1], - msg_data(msg)); - if (res) - return res; - } + hdr = buf_msg(skb); + dlen = msg_data_sz(hdr); + + /* Capture errored message object, if any */ + if (msg_errcode(hdr)) { + if (skb_linearize(skb)) + return -ENOMEM; + hdr = buf_msg(skb); + data[0] = msg_errcode(hdr); + data[1] = dlen; + rc = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, data); + if (rc || !dlen) + return rc; + rc = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, dlen, msg_data(hdr)); + if (rc) + return rc; } - /* Optionally capture message destination object */ - dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG; - switch (dest_type) { + /* Capture TIPC_SERVICE_ADDR/RANGE destination address, if any */ + switch (msg_type(hdr)) { case TIPC_NAMED_MSG: - has_name = 1; - anc_data[0] = msg_nametype(msg); - anc_data[1] = msg_namelower(msg); - anc_data[2] = msg_namelower(msg); + has_addr = true; + data[0] = msg_nametype(hdr); + data[1] = msg_namelower(hdr); + data[2] = data[1]; break; case TIPC_MCAST_MSG: - has_name = 1; - anc_data[0] = msg_nametype(msg); - anc_data[1] = msg_namelower(msg); - anc_data[2] = msg_nameupper(msg); + has_addr = true; + data[0] = msg_nametype(hdr); + data[1] = msg_namelower(hdr); + data[2] = msg_nameupper(hdr); break; case TIPC_CONN_MSG: - has_name = (tsk->conn_type != 0); - anc_data[0] = tsk->conn_type; - anc_data[1] = tsk->conn_instance; - anc_data[2] = tsk->conn_instance; + has_addr = !!tsk->conn_addrtype; + data[0] = msg_nametype(&tsk->phdr); + data[1] = msg_nameinst(&tsk->phdr); + data[2] = data[1]; break; default: - has_name = 0; + has_addr = false; } - if (has_name) { - res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data); - if (res) - return res; - } - - return 0; + if (!has_addr) + return 0; + return put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, data); } -static void tipc_sk_send_ack(struct tipc_sock *tsk) +static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk) { struct sock *sk = &tsk->sk; - struct net *net = sock_net(sk); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); u32 dnode = tsk_peer_node(tsk); if (!tipc_sk_connected(sk)) - return; + return NULL; skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, tsk_own_node(tsk), peer_port, tsk->portid, TIPC_OK); if (!skb) - return; + return NULL; msg = buf_msg(skb); msg_set_conn_ack(msg, tsk->rcv_unacked); tsk->rcv_unacked = 0; @@ -1671,13 +1810,25 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk) tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf); msg_set_adv_win(msg, tsk->rcv_win); } - tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg)); + return skb; +} + +static void tipc_sk_send_ack(struct tipc_sock *tsk) +{ + struct sk_buff *skb; + + skb = tipc_sk_build_ack(tsk); + if (!skb) + return; + + tipc_node_xmit_skb(sock_net(&tsk->sk), skb, tsk_peer_node(tsk), + msg_link_selector(buf_msg(skb))); } static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) { struct sock *sk = sock->sk; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); long timeo = *timeop; int err = sock_error(sk); @@ -1685,15 +1836,17 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) return err; for (;;) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { if (sk->sk_shutdown & RCV_SHUTDOWN) { err = -ENOTCONN; break; } + add_wait_queue(sk_sleep(sk), &wait); release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + sched_annotate_sleep(); lock_sock(sk); + remove_wait_queue(sk_sleep(sk), &wait); } err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) @@ -1709,13 +1862,13 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) if (err) break; } - finish_wait(sk_sleep(sk), &wait); *timeop = timeo; return err; } /** * tipc_recvmsg - receive packet-oriented message + * @sock: network socket * @m: descriptor for message info * @buflen: length of user buffer area * @flags: receive flags @@ -1723,7 +1876,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages. * If the complete message doesn't fit in user area, truncate it. * - * Returns size of returned message data, errno otherwise + * Return: size of returned message data, errno otherwise */ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buflen, int flags) @@ -1732,6 +1885,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, bool connected = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); int rc, err, hlen, dlen, copy; + struct tipc_skb_cb *skb_cb; struct sk_buff_head xmitq; struct tipc_msg *hdr; struct sk_buff *skb; @@ -1755,6 +1909,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, if (unlikely(rc)) goto exit; skb = skb_peek(&sk->sk_receive_queue); + skb_cb = TIPC_SKB_CB(skb); hdr = buf_msg(skb); dlen = msg_data_sz(hdr); hlen = msg_hdr_sz(hdr); @@ -1774,18 +1929,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, /* Capture data if non-error msg, otherwise just set return value */ if (likely(!err)) { - copy = min_t(int, dlen, buflen); - if (unlikely(copy != dlen)) - m->msg_flags |= MSG_TRUNC; - rc = skb_copy_datagram_msg(skb, hlen, m, copy); + int offset = skb_cb->bytes_read; + + copy = min_t(int, dlen - offset, buflen); + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); + if (unlikely(rc)) + goto exit; + if (unlikely(offset + copy < dlen)) { + if (flags & MSG_EOR) { + if (!(flags & MSG_PEEK)) + skb_cb->bytes_read = offset + copy; + } else { + m->msg_flags |= MSG_TRUNC; + skb_cb->bytes_read = 0; + } + } else { + if (flags & MSG_EOR) + m->msg_flags |= MSG_EOR; + skb_cb->bytes_read = 0; + } } else { copy = 0; rc = 0; - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { rc = -ECONNRESET; + goto exit; + } } - if (unlikely(rc)) - goto exit; /* Mark message as group event if applicable */ if (unlikely(grp_evt)) { @@ -1801,13 +1971,16 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, /* Send group flow control advertisement when applicable */ if (tsk->group && msg_in_group(hdr) && !grp_evt) { - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen), msg_orignode(hdr), msg_origport(hdr), &xmitq); tipc_node_distr_xmit(sock_net(sk), &xmitq); } + if (skb_cb->bytes_read) + goto exit; + tsk_advance_rx_queue(sk); if (likely(!connected)) @@ -1824,6 +1997,7 @@ exit: /** * tipc_recvstream - receive stream-oriented data + * @sock: network socket * @m: descriptor for message info * @buflen: total size of user buffer area * @flags: receive flags @@ -1831,7 +2005,7 @@ exit: * Used for SOCK_STREAM messages only. If not enough data is available * will optionally wait for more; never truncates data. * - * Returns size of returned message data, errno otherwise + * Return: size of returned message data, errno otherwise */ static int tipc_recvstream(struct socket *sock, struct msghdr *m, size_t buflen, int flags) @@ -1915,7 +2089,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m, /* Send connection flow control advertisement when applicable */ tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); - if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)) + if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) tipc_sk_send_ack(tsk); /* Exit if all requested data or FIN/error received */ @@ -1947,12 +2121,13 @@ static void tipc_write_space(struct sock *sk) /** * tipc_data_ready - wake up threads to indicate messages have been received * @sk: socket - * @len: the length of messages */ static void tipc_data_ready(struct sock *sk) { struct socket_wq *wq; + trace_sk_data_ready(sk); + rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) @@ -1982,8 +2157,11 @@ static void tipc_sk_proto_rcv(struct sock *sk, return; case SOCK_WAKEUP: tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); + /* coupled with smp_rmb() in tipc_wait_for_cond() */ + smp_wmb(); tsk->cong_link_cnt--; wakeup = true; + tipc_sk_push_backlog(tsk, false); break; case GROUP_PROTOCOL: tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq); @@ -2006,9 +2184,11 @@ static void tipc_sk_proto_rcv(struct sock *sk, * tipc_sk_filter_connect - check incoming message for a connection-based socket * @tsk: TIPC socket * @skb: pointer to message buffer. - * Returns true if message should be added to receive queue, false otherwise + * @xmitq: for Nagle ACK if any + * Return: true if message should be added to receive queue, false otherwise */ -static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) +static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); @@ -2023,6 +2203,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) if (unlikely(msg_mcast(hdr))) return false; + tsk->oneway = 0; switch (sk->sk_state) { case TIPC_CONNECTING: @@ -2036,7 +2217,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) if (msg_data_sz(hdr)) return true; /* Empty ACK-, - wake up sleeping connect() and drop */ - sk->sk_data_ready(sk); + sk->sk_state_change(sk); msg_set_dest_droppable(hdr, 1); return false; } @@ -2068,9 +2249,22 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return true; return false; case TIPC_ESTABLISHED: + if (!skb_queue_empty(&sk->sk_write_queue)) + tipc_sk_push_backlog(tsk, false); /* Accept only connection-based messages sent by peer */ - if (likely(con_msg && !err && pport == oport && pnode == onode)) + if (likely(con_msg && !err && pport == oport && + pnode == onode)) { + if (msg_ack_required(hdr)) { + struct sk_buff *skb; + + skb = tipc_sk_build_ack(tsk); + if (skb) { + msg_set_nagle_ack(buf_msg(skb)); + __skb_queue_tail(xmitq, skb); + } + } return true; + } if (!tsk_peer_msg(tsk, hdr)) return false; if (!err) @@ -2105,7 +2299,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) * TIPC_HIGH_IMPORTANCE (8 MB) * TIPC_CRITICAL_IMPORTANCE (16 MB) * - * Returns overload limit according to corresponding message importance + * Return: overload limit according to corresponding message importance */ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) { @@ -2113,13 +2307,13 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) struct tipc_msg *hdr = buf_msg(skb); if (unlikely(msg_in_group(hdr))) - return sk->sk_rcvbuf; + return READ_ONCE(sk->sk_rcvbuf); if (unlikely(!msg_connected(hdr))) - return sk->sk_rcvbuf << msg_importance(hdr); + return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr); if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) - return sk->sk_rcvbuf; + return READ_ONCE(sk->sk_rcvbuf); return FLOWCTL_MSG_LIM; } @@ -2128,12 +2322,12 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) * tipc_sk_filter_rcv - validate incoming message * @sk: socket * @skb: pointer to message. + * @xmitq: output message area (FIXME) * * Enqueues message on receive queue if acceptable; optionally handles * disconnect indication for a connected socket. * * Called with socket lock already taken - * */ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *xmitq) @@ -2144,6 +2338,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, struct tipc_msg *hdr = buf_msg(skb); struct net *net = sock_net(sk); struct sk_buff_head inputq; + int mtyp = msg_type(hdr); int limit, err = TIPC_OK; trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " "); @@ -2157,18 +2352,21 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, if (unlikely(grp)) tipc_group_filter_msg(grp, &inputq, xmitq); + if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG) + tipc_mcast_filter_msg(net, &tsk->mc_method.deferredq, &inputq); + /* Validate and add to receive buffer if there is space */ while ((skb = __skb_dequeue(&inputq))) { hdr = buf_msg(skb); limit = rcvbuf_limit(sk, skb); - if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) || + if ((sk_conn && !tipc_sk_filter_connect(tsk, skb, xmitq)) || (!sk_conn && msg_connected(hdr)) || (!grp && msg_in_group(hdr))) err = TIPC_ERR_NO_PORT; else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) { trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, "err_overload2!"); - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); err = TIPC_ERR_OVERLOAD; } @@ -2219,13 +2417,14 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) * @inputq: list of incoming buffers with potentially different destinations * @sk: socket where the buffers should be enqueued * @dport: port number for the socket + * @xmitq: output queue * * Caller must hold socket lock */ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, u32 dport, struct sk_buff_head *xmitq) { - unsigned long time_limit = jiffies + 2; + unsigned long time_limit = jiffies + usecs_to_jiffies(20000); struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; @@ -2259,7 +2458,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, "err_overload!"); /* Overload => reject message back to sender */ onode = tipc_own_addr(sock_net(sk)); - atomic_inc(&sk->sk_drops); + sk_drops_inc(sk); if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) { trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_ALL, "@sk_enqueue!"); @@ -2271,6 +2470,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /** * tipc_sk_rcv - handle a chain of incoming buffers + * @net: the associated network namespace * @inputq: buffer list containing the buffers * Consumes all buffers in list until inputq is empty * Note: may be called in multiple threads referring to the same queue @@ -2335,15 +2535,27 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) return -ETIMEDOUT; if (signal_pending(current)) return sock_intr_errno(*timeo_p); + if (sk->sk_state == TIPC_DISCONNECTING) + break; add_wait_queue(sk_sleep(sk), &wait); - done = sk_wait_event(sk, timeo_p, - sk->sk_state != TIPC_CONNECTING, &wait); + done = sk_wait_event(sk, timeo_p, tipc_sk_connected(sk), + &wait); remove_wait_queue(sk_sleep(sk), &wait); } while (!done); return 0; } +static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr) +{ + if (addr->family != AF_TIPC) + return false; + if (addr->addrtype == TIPC_SERVICE_RANGE) + return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper); + return (addr->addrtype == TIPC_SERVICE_ADDR || + addr->addrtype == TIPC_SOCKET_ADDR); +} + /** * tipc_connect - establish a connection to another TIPC port * @sock: socket structure @@ -2351,9 +2563,9 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) * @destlen: size of socket address data structure * @flags: file-related flags associated with socket * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ -static int tipc_connect(struct socket *sock, struct sockaddr *dest, +static int tipc_connect(struct socket *sock, struct sockaddr_unsized *dest, int destlen, int flags) { struct sock *sk = sock->sk; @@ -2379,18 +2591,18 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, if (!tipc_sk_type_connectionless(sk)) res = -EINVAL; goto exit; - } else if (dst->family != AF_TIPC) { - res = -EINVAL; } - if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME) + if (!tipc_sockaddr_is_sane(dst)) { res = -EINVAL; - if (res) goto exit; - + } /* DGRAM/RDM connect(), just save the destaddr */ if (tipc_sk_type_connectionless(sk)) { memcpy(&tsk->peer, dest, destlen); goto exit; + } else if (dst->addrtype == TIPC_SERVICE_RANGE) { + res = -EINVAL; + goto exit; } previous = sk->sk_state; @@ -2400,6 +2612,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, /* Send a 'SYN-' to destination */ m.msg_name = dest; m.msg_namelen = destlen; + iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); /* If connect is in non-blocking case, set MSG_DONTWAIT to * indicate send_msg() is never blocked. @@ -2416,7 +2629,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, * case is EINPROGRESS, rather than EALREADY. */ res = -EINPROGRESS; - /* fall through */ + fallthrough; case TIPC_CONNECTING: if (!timeout) { if (previous == TIPC_CONNECTING) @@ -2444,7 +2657,7 @@ exit: * @sock: socket structure * @len: (unused) * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_listen(struct socket *sock, int len) { @@ -2461,7 +2674,7 @@ static int tipc_listen(struct socket *sock, int len) static int tipc_wait_for_accept(struct socket *sock, long timeo) { struct sock *sk = sock->sk; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err; /* True wake-one mechanism for incoming connections: only @@ -2470,12 +2683,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) * anymore, the common case will execute the loop only once. */ for (;;) { - prepare_to_wait_exclusive(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { + add_wait_queue(sk_sleep(sk), &wait); release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); + remove_wait_queue(sk_sleep(sk), &wait); } err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) @@ -2487,25 +2700,25 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) if (signal_pending(current)) break; } - finish_wait(sk_sleep(sk), &wait); return err; } /** * tipc_accept - wait for connection request * @sock: listening socket - * @newsock: new socket that is to be connected - * @flags: file-related flags associated with socket + * @new_sock: new socket that is to be connected + * @arg: arguments for accept * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ -static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, - bool kern) +static int tipc_accept(struct socket *sock, struct socket *new_sock, + struct proto_accept_arg *arg) { struct sock *new_sk, *sk = sock->sk; - struct sk_buff *buf; struct tipc_sock *new_tsock; + struct msghdr m = {NULL,}; struct tipc_msg *msg; + struct sk_buff *buf; long timeo; int res; @@ -2515,14 +2728,14 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, res = -EINVAL; goto exit; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); res = tipc_wait_for_accept(sock, timeo); if (res) goto exit; buf = skb_peek(&sk->sk_receive_queue); - res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, arg->kern); if (res) goto exit; security_sk_clone(sock->sk, new_sock->sk); @@ -2538,31 +2751,31 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, * Reject any stray messages received by new socket * before the socket lock was taken (very, very unlikely) */ - tsk_rej_rx_queue(new_sk); + tsk_rej_rx_queue(new_sk, TIPC_ERR_NO_PORT); /* Connect new socket to it's peer */ tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); - tsk_set_importance(new_tsock, msg_importance(msg)); + tsk_set_importance(new_sk, msg_importance(msg)); if (msg_named(msg)) { - new_tsock->conn_type = msg_nametype(msg); - new_tsock->conn_instance = msg_nameinst(msg); + new_tsock->conn_addrtype = TIPC_SERVICE_ADDR; + msg_set_nametype(&new_tsock->phdr, msg_nametype(msg)); + msg_set_nameinst(&new_tsock->phdr, msg_nameinst(msg)); } /* - * Respond to 'SYN-' by discarding it & returning 'ACK'-. - * Respond to 'SYN+' by queuing it on new socket. + * Respond to 'SYN-' by discarding it & returning 'ACK'. + * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'. */ if (!msg_data_sz(msg)) { - struct msghdr m = {NULL,}; - tsk_advance_rx_queue(sk); - __tipc_sendstream(new_sock, &m, 0); } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); skb_set_owner_r(buf, new_sk); } + iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); + __tipc_sendstream(new_sock, &m, 0); release_sock(new_sk); exit: release_sock(sk); @@ -2576,7 +2789,7 @@ exit: * * Terminates connection (if necessary), then purges socket's receive queue. * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_shutdown(struct socket *sock, int how) { @@ -2590,18 +2803,18 @@ static int tipc_shutdown(struct socket *sock, int how) trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " "); __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); - sk->sk_shutdown = SEND_SHUTDOWN; + sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == TIPC_DISCONNECTING) { /* Discard any unreceived messages */ __skb_queue_purge(&sk->sk_receive_queue); - /* Wake up anyone sleeping in poll */ - sk->sk_state_change(sk); res = 0; } else { res = -ENOTCONN; } + /* Wake up anyone sleeping in poll. */ + sk->sk_state_change(sk); release_sock(sk); return res; @@ -2639,7 +2852,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list) /* Try again later if dest link is congested */ if (tsk->cong_link_cnt) { - sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100)); + sk_reset_timer(sk, &sk->sk_timer, + jiffies + msecs_to_jiffies(100)); return; } /* Prepare SYN for retransmit */ @@ -2648,19 +2862,20 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list) static void tipc_sk_timeout(struct timer_list *t) { - struct sock *sk = from_timer(sk, t, sk_timer); + struct sock *sk = timer_container_of(sk, t, sk_timer); struct tipc_sock *tsk = tipc_sk(sk); u32 pnode = tsk_peer_node(tsk); struct sk_buff_head list; int rc = 0; - skb_queue_head_init(&list); + __skb_queue_head_init(&list); bh_lock_sock(sk); /* Try again later if socket is busy */ if (sock_owned_by_user(sk)) { sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20); bh_unlock_sock(sk); + sock_put(sk); return; } @@ -2682,66 +2897,62 @@ static void tipc_sk_timeout(struct timer_list *t) sock_put(sk); } -static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, - struct tipc_name_seq const *seq) +static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); - struct publication *publ; + struct tipc_socket_addr skaddr; + struct publication *p; u32 key; - if (scope != TIPC_NODE_SCOPE) - scope = TIPC_CLUSTER_SCOPE; - if (tipc_sk_connected(sk)) return -EINVAL; key = tsk->portid + tsk->pub_count + 1; if (key == tsk->portid) return -EADDRINUSE; - - publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper, - scope, tsk->portid, key); - if (unlikely(!publ)) + skaddr.ref = tsk->portid; + skaddr.node = tipc_own_addr(net); + p = tipc_nametbl_publish(net, ua, &skaddr, key); + if (unlikely(!p)) return -EINVAL; - list_add(&publ->binding_sock, &tsk->publications); + list_add(&p->binding_sock, &tsk->publications); tsk->pub_count++; - tsk->published = 1; + tsk->published = true; return 0; } -static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, - struct tipc_name_seq const *seq) +static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua) { struct net *net = sock_net(&tsk->sk); - struct publication *publ; - struct publication *safe; + struct publication *safe, *p; + struct tipc_uaddr _ua; int rc = -EINVAL; - if (scope != TIPC_NODE_SCOPE) - scope = TIPC_CLUSTER_SCOPE; - - list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) { - if (seq) { - if (publ->scope != scope) - continue; - if (publ->type != seq->type) - continue; - if (publ->lower != seq->lower) - continue; - if (publ->upper != seq->upper) - break; - tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->upper, publ->key); - rc = 0; - break; + list_for_each_entry_safe(p, safe, &tsk->publications, binding_sock) { + if (!ua) { + tipc_uaddr(&_ua, TIPC_SERVICE_RANGE, p->scope, + p->sr.type, p->sr.lower, p->sr.upper); + tipc_nametbl_withdraw(net, &_ua, &p->sk, p->key); + continue; } - tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->upper, publ->key); + /* Unbind specific publication */ + if (p->scope != ua->scope) + continue; + if (p->sr.type != ua->sr.type) + continue; + if (p->sr.lower != ua->sr.lower) + continue; + if (p->sr.upper != ua->sr.upper) + break; + tipc_nametbl_withdraw(net, ua, &p->sk, p->key); rc = 0; + break; } - if (list_empty(&tsk->publications)) + if (list_empty(&tsk->publications)) { tsk->published = 0; + rc = 0; + } return rc; } @@ -2784,7 +2995,7 @@ static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) struct tipc_sock *tsk; rcu_read_lock(); - tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params); + tsk = rhashtable_lookup(&tn->sk_rht, &portid, tsk_rht_params); if (tsk) sock_hold(&tsk->sk); rcu_read_unlock(); @@ -2798,7 +3009,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk) struct net *net = sock_net(sk); struct tipc_net *tn = net_generic(net, tipc_net_id); u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; - u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT; + u32 portid = get_random_u32_below(remaining) + TIPC_MIN_PORT; while (remaining--) { portid++; @@ -2820,10 +3031,8 @@ static void tipc_sk_remove(struct tipc_sock *tsk) struct sock *sk = &tsk->sk; struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); - if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { - WARN_ON(refcount_read(&sk->sk_refcnt) == 1); + if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) __sock_put(sk); - } } static const struct rhashtable_params tsk_rht_params = { @@ -2858,13 +3067,15 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) struct net *net = sock_net(&tsk->sk); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; - struct tipc_name_seq seq; + struct tipc_uaddr ua; int rc; if (mreq->type < TIPC_RESERVED_TYPES) return -EACCES; if (mreq->scope > TIPC_NODE_SCOPE) return -EINVAL; + if (mreq->scope != TIPC_NODE_SCOPE) + mreq->scope = TIPC_CLUSTER_SCOPE; if (grp) return -EACCES; grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); @@ -2874,11 +3085,10 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) msg_set_lookup_scope(hdr, mreq->scope); msg_set_nametype(hdr, mreq->type); msg_set_dest_droppable(hdr, true); - seq.type = mreq->type; - seq.lower = mreq->instance; - seq.upper = seq.lower; - tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope); - rc = tipc_sk_publish(tsk, mreq->scope, &seq); + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, mreq->scope, + mreq->type, mreq->instance, mreq->instance); + tipc_nametbl_build_group(net, grp, &ua); + rc = tipc_sk_publish(tsk, &ua); if (rc) { tipc_group_delete(net, grp); tsk->group = NULL; @@ -2895,15 +3105,17 @@ static int tipc_sk_leave(struct tipc_sock *tsk) { struct net *net = sock_net(&tsk->sk); struct tipc_group *grp = tsk->group; - struct tipc_name_seq seq; + struct tipc_uaddr ua; int scope; if (!grp) return -EINVAL; - tipc_group_self(grp, &seq, &scope); + ua.addrtype = TIPC_SERVICE_RANGE; + tipc_group_self(grp, &ua.sr, &scope); + ua.scope = scope; tipc_group_delete(net, grp); tsk->group = NULL; - tipc_sk_withdraw(tsk, scope, &seq); + tipc_sk_withdraw(tsk, &ua); return 0; } @@ -2918,10 +3130,10 @@ static int tipc_sk_leave(struct tipc_sock *tsk) * For stream sockets only, accepts and ignores all IPPROTO_TCP options * (to ease compatibility). * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, - char __user *ov, unsigned int ol) + sockptr_t ov, unsigned int ol) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); @@ -2939,19 +3151,20 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, case TIPC_SRC_DROPPABLE: case TIPC_DEST_DROPPABLE: case TIPC_CONN_TIMEOUT: + case TIPC_NODELAY: if (ol < sizeof(value)) return -EINVAL; - if (get_user(value, (u32 __user *)ov)) + if (copy_from_sockptr(&value, ov, sizeof(u32))) return -EFAULT; break; case TIPC_GROUP_JOIN: if (ol < sizeof(mreq)) return -EINVAL; - if (copy_from_user(&mreq, ov, sizeof(mreq))) + if (copy_from_sockptr(&mreq, ov, sizeof(mreq))) return -EFAULT; break; default: - if (ov || ol) + if (!sockptr_is_null(ov) || ol) return -EINVAL; } @@ -2959,7 +3172,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, switch (opt) { case TIPC_IMPORTANCE: - res = tsk_set_importance(tsk, value); + res = tsk_set_importance(sk, value); break; case TIPC_SRC_DROPPABLE: if (sock->type != SOCK_STREAM) @@ -2987,6 +3200,10 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, case TIPC_GROUP_LEAVE: res = tipc_sk_leave(tsk); break; + case TIPC_NODELAY: + tsk->nodelay = !!value; + tsk_set_nagle(tsk); + break; default: res = -EINVAL; } @@ -3007,14 +3224,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, * For stream sockets only, returns 0 length result for all IPPROTO_TCP options * (to ease compatibility). * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, int __user *ol) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_name_seq seq; + struct tipc_service_range seq; int len, scope; u32 value; int res; @@ -3049,6 +3266,9 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, case TIPC_SOCK_RECVQ_DEPTH: value = skb_queue_len(&sk->sk_receive_queue); break; + case TIPC_SOCK_RECVQ_USED: + value = sk_rmem_alloc_get(sk); + break; case TIPC_GROUP_JOIN: seq.type = 0; if (tsk->group) @@ -3112,12 +3332,12 @@ static int tipc_socketpair(struct socket *sock1, struct socket *sock2) u32 onode = tipc_own_addr(sock_net(sock1->sk)); tsk1->peer.family = AF_TIPC; - tsk1->peer.addrtype = TIPC_ADDR_ID; + tsk1->peer.addrtype = TIPC_SOCKET_ADDR; tsk1->peer.scope = TIPC_NODE_SCOPE; tsk1->peer.addr.id.ref = tsk2->portid; tsk1->peer.addr.id.node = onode; tsk2->peer.family = AF_TIPC; - tsk2->peer.addrtype = TIPC_ADDR_ID; + tsk2->peer.addrtype = TIPC_SOCKET_ADDR; tsk2->peer.scope = TIPC_NODE_SCOPE; tsk2->peer.addr.id.ref = tsk1->portid; tsk2->peer.addr.id.node = onode; @@ -3147,7 +3367,6 @@ static const struct proto_ops msg_ops = { .sendmsg = tipc_sendmsg, .recvmsg = tipc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage }; static const struct proto_ops packet_ops = { @@ -3168,7 +3387,6 @@ static const struct proto_ops packet_ops = { .sendmsg = tipc_send_packet, .recvmsg = tipc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage }; static const struct proto_ops stream_ops = { @@ -3189,7 +3407,6 @@ static const struct proto_ops stream_ops = { .sendmsg = tipc_sendstream, .recvmsg = tipc_recvstream, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage }; static const struct net_proto_family tipc_family_ops = { @@ -3208,7 +3425,7 @@ static struct proto tipc_proto = { /** * tipc_socket_init - initialize TIPC socket interface * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ int tipc_socket_init(void) { @@ -3242,26 +3459,29 @@ void tipc_socket_stop(void) /* Caller should hold socket lock for the passed tipc socket. */ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) { - u32 peer_node; - u32 peer_port; + u32 peer_node, peer_port; + u32 conn_type, conn_instance; struct nlattr *nest; peer_node = tsk_peer_node(tsk); peer_port = tsk_peer_port(tsk); - - nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON); + conn_type = msg_nametype(&tsk->phdr); + conn_instance = msg_nameinst(&tsk->phdr); + nest = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_CON); + if (!nest) + return -EMSGSIZE; if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node)) goto msg_full; if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port)) goto msg_full; - if (tsk->conn_type != 0) { + if (tsk->conn_addrtype != 0) { if (nla_put_flag(skb, TIPC_NLA_CON_FLAG)) goto msg_full; - if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type)) + if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, conn_type)) goto msg_full; - if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance)) + if (nla_put_u32(skb, TIPC_NLA_CON_INST, conn_instance)) goto msg_full; } nla_nest_end(skb, nest); @@ -3306,7 +3526,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, if (!hdr) goto msg_cancel; - attrs = nla_nest_start(skb, TIPC_NLA_SOCK); + attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK); if (!attrs) goto genlmsg_cancel; @@ -3338,11 +3558,8 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, rhashtable_walk_start(iter); while ((tsk = rhashtable_walk_next(iter)) != NULL) { if (IS_ERR(tsk)) { - err = PTR_ERR(tsk); - if (err == -EAGAIN) { - err = 0; + if (PTR_ERR(tsk) == -EAGAIN) continue; - } break; } @@ -3411,7 +3628,7 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, if (!(sk_filter_state & (1 << sk->sk_state))) return 0; - attrs = nla_nest_start(skb, TIPC_NLA_SOCK); + attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK); if (!attrs) goto msg_cancel; @@ -3423,13 +3640,13 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) || nla_put_u32(skb, TIPC_NLA_SOCK_UID, from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk), - sock_i_uid(sk))) || + sk_uid(sk))) || nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE, tipc_diag_gen_cookie(sk), TIPC_NLA_SOCK_PAD)) goto attr_msg_cancel; - stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT); + stat = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_STAT); if (!stat) goto attr_msg_cancel; @@ -3438,7 +3655,7 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ, skb_queue_len(&sk->sk_write_queue)) || nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP, - atomic_read(&sk->sk_drops))) + sk_drops_read(sk))) goto stat_msg_cancel; if (tsk->cong_link_cnt && @@ -3486,17 +3703,17 @@ static int __tipc_nl_add_sk_publ(struct sk_buff *skb, if (!hdr) goto msg_cancel; - attrs = nla_nest_start(skb, TIPC_NLA_PUBL); + attrs = nla_nest_start_noflag(skb, TIPC_NLA_PUBL); if (!attrs) goto genlmsg_cancel; if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type)) + if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->sr.type)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower)) + if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->sr.lower)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper)) + if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->sr.upper)) goto attr_msg_cancel; nla_nest_end(skb, attrs); @@ -3525,7 +3742,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, if (p->key == *last_publ) break; } - if (p->key != *last_publ) { + if (list_entry_is_head(p, &tsk->publications, binding_sock)) { /* We never set seq or call nl_dump_check_consistent() * this means that setting prev_seq here will cause the * consistence check to fail in the netlink callback @@ -3563,19 +3780,15 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) struct tipc_sock *tsk; if (!tsk_portid) { - struct nlattr **attrs; + struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs; struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; - err = tipc_nlmsg_parse(cb->nlh, &attrs); - if (err) - return err; - if (!attrs[TIPC_NLA_SOCK]) return -EINVAL; - err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, - attrs[TIPC_NLA_SOCK], - tipc_nl_sock_policy, NULL); + err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX, + attrs[TIPC_NLA_SOCK], + tipc_nl_sock_policy, NULL); if (err) return err; @@ -3609,10 +3822,11 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) /** * tipc_sk_filtering - check if a socket should be traced * @sk: the socket to be examined - * @sysctl_tipc_sk_filter[]: the socket tuple for filtering, - * (portid, sock type, name type, name lower, name upper) * - * Returns true if the socket meets the socket tuple data + * @sysctl_tipc_sk_filter is used as the socket tuple for filtering: + * (portid, sock type, name type, name lower, name upper) + * + * Return: true if the socket meets the socket tuple data * (value 0 = 'any') or when there is no tuple set (all = 0), * otherwise false */ @@ -3647,16 +3861,16 @@ bool tipc_sk_filtering(struct sock *sk) p = list_first_entry_or_null(&tsk->publications, struct publication, binding_sock); if (p) { - type = p->type; - lower = p->lower; - upper = p->upper; + type = p->sr.type; + lower = p->sr.lower; + upper = p->sr.upper; } } if (!tipc_sk_type_connectionless(sk)) { - type = tsk->conn_type; - lower = tsk->conn_instance; - upper = tsk->conn_instance; + type = msg_nametype(&tsk->phdr); + lower = msg_nameinst(&tsk->phdr); + upper = lower; } if ((_type && _type != type) || (_lower && _lower != lower) || @@ -3677,7 +3891,7 @@ u32 tipc_sock_get_portid(struct sock *sk) * @sk: tipc sk to be checked * @skb: tipc msg to be checked * - * Returns true if the socket rx queue allocation is > 90%, otherwise false + * Return: true if the socket rx queue allocation is > 90%, otherwise false */ bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb) @@ -3695,7 +3909,7 @@ bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb) * @sk: tipc sk to be checked * @skb: tipc msg to be checked * - * Returns true if the socket rx queue allocation is > 90%, otherwise false + * Return: true if the socket rx queue allocation is > 90%, otherwise false */ bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb) @@ -3721,6 +3935,7 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf) { int i = 0; size_t sz = (dqueues) ? SK_LMAX : SK_LMIN; + u32 conn_type, conn_instance; struct tipc_sock *tsk; struct publication *p; bool tsk_connected; @@ -3741,16 +3956,18 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf) if (tsk_connected) { i += scnprintf(buf + i, sz - i, " %x", tsk_peer_node(tsk)); i += scnprintf(buf + i, sz - i, " %u", tsk_peer_port(tsk)); - i += scnprintf(buf + i, sz - i, " %u", tsk->conn_type); - i += scnprintf(buf + i, sz - i, " %u", tsk->conn_instance); + conn_type = msg_nametype(&tsk->phdr); + conn_instance = msg_nameinst(&tsk->phdr); + i += scnprintf(buf + i, sz - i, " %u", conn_type); + i += scnprintf(buf + i, sz - i, " %u", conn_instance); } i += scnprintf(buf + i, sz - i, " | %u", tsk->published); if (tsk->published) { p = list_first_entry_or_null(&tsk->publications, struct publication, binding_sock); - i += scnprintf(buf + i, sz - i, " %u", (p) ? p->type : 0); - i += scnprintf(buf + i, sz - i, " %u", (p) ? p->lower : 0); - i += scnprintf(buf + i, sz - i, " %u", (p) ? p->upper : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.type : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.lower : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.upper : 0); } i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win); i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win); @@ -3765,7 +3982,7 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf) i += scnprintf(buf + i, sz - i, " %d", sk->sk_sndbuf); i += scnprintf(buf + i, sz - i, " | %d", sk_rmem_alloc_get(sk)); i += scnprintf(buf + i, sz - i, " %d", sk->sk_rcvbuf); - i += scnprintf(buf + i, sz - i, " | %d\n", sk->sk_backlog.len); + i += scnprintf(buf + i, sz - i, " | %d\n", READ_ONCE(sk->sk_backlog.len)); if (dqueues & TIPC_DUMP_SK_SNDQ) { i += scnprintf(buf + i, sz - i, "sk_write_queue: "); diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 235b9679acee..02cdf166807d 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -74,5 +74,7 @@ int tipc_dump_done(struct netlink_callback *cb); u32 tipc_sock_get_portid(struct sock *sk); bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb); bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb); +int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen); +int tsk_set_importance(struct sock *sk, int imp); #endif diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index f340e53da625..f8490d94e323 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -3,6 +3,7 @@ * * Copyright (c) 2000-2017, Ericsson AB * Copyright (c) 2005-2007, 2010-2013, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,75 +40,75 @@ #include "subscr.h" static void tipc_sub_send_event(struct tipc_subscription *sub, - u32 found_lower, u32 found_upper, - u32 event, u32 port, u32 node) + struct publication *p, + u32 event) { + struct tipc_subscr *s = &sub->evt.s; struct tipc_event *evt = &sub->evt; if (sub->inactive) return; tipc_evt_write(evt, event, event); - tipc_evt_write(evt, found_lower, found_lower); - tipc_evt_write(evt, found_upper, found_upper); - tipc_evt_write(evt, port.ref, port); - tipc_evt_write(evt, port.node, node); + if (p) { + tipc_evt_write(evt, found_lower, p->sr.lower); + tipc_evt_write(evt, found_upper, p->sr.upper); + tipc_evt_write(evt, port.ref, p->sk.ref); + tipc_evt_write(evt, port.node, p->sk.node); + } else { + tipc_evt_write(evt, found_lower, s->seq.lower); + tipc_evt_write(evt, found_upper, s->seq.upper); + tipc_evt_write(evt, port.ref, 0); + tipc_evt_write(evt, port.node, 0); + } tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt); } /** - * tipc_sub_check_overlap - test for subscription overlap with the - * given values + * tipc_sub_check_overlap - test for subscription overlap with the given values + * @subscribed: the service range subscribed for + * @found: the service range we are checking for match * - * Returns 1 if there is overlap, otherwise 0. + * Returns true if there is overlap, otherwise false. */ -int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower, - u32 found_upper) +static bool tipc_sub_check_overlap(struct tipc_service_range *subscribed, + struct tipc_service_range *found) { - if (found_lower < seq->lower) - found_lower = seq->lower; - if (found_upper > seq->upper) - found_upper = seq->upper; - if (found_lower > found_upper) - return 0; - return 1; + u32 found_lower = found->lower; + u32 found_upper = found->upper; + + if (found_lower < subscribed->lower) + found_lower = subscribed->lower; + if (found_upper > subscribed->upper) + found_upper = subscribed->upper; + return found_lower <= found_upper; } void tipc_sub_report_overlap(struct tipc_subscription *sub, - u32 found_lower, u32 found_upper, - u32 event, u32 port, u32 node, - u32 scope, int must) + struct publication *p, + u32 event, bool must) { - struct tipc_subscr *s = &sub->evt.s; - u32 filter = tipc_sub_read(s, filter); - struct tipc_name_seq seq; + struct tipc_service_range *sr = &sub->s.seq; + u32 filter = sub->s.filter; - seq.type = tipc_sub_read(s, seq.type); - seq.lower = tipc_sub_read(s, seq.lower); - seq.upper = tipc_sub_read(s, seq.upper); - - if (!tipc_sub_check_overlap(&seq, found_lower, found_upper)) + if (!tipc_sub_check_overlap(sr, &p->sr)) return; - if (!must && !(filter & TIPC_SUB_PORTS)) return; - if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE) + if (filter & TIPC_SUB_CLUSTER_SCOPE && p->scope == TIPC_NODE_SCOPE) return; - if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE) + if (filter & TIPC_SUB_NODE_SCOPE && p->scope != TIPC_NODE_SCOPE) return; spin_lock(&sub->lock); - tipc_sub_send_event(sub, found_lower, found_upper, - event, port, node); + tipc_sub_send_event(sub, p, event); spin_unlock(&sub->lock); } static void tipc_sub_timeout(struct timer_list *t) { - struct tipc_subscription *sub = from_timer(sub, t, timer); - struct tipc_subscr *s = &sub->evt.s; + struct tipc_subscription *sub = timer_container_of(sub, t, timer); spin_lock(&sub->lock); - tipc_sub_send_event(sub, s->seq.lower, s->seq.upper, - TIPC_SUBSCR_TIMEOUT, 0, 0); + tipc_sub_send_event(sub, NULL, TIPC_SUBSCR_TIMEOUT); sub->inactive = true; spin_unlock(&sub->lock); } @@ -131,12 +132,14 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net, struct tipc_subscr *s, int conid) { + u32 lower = tipc_sub_read(s, seq.lower); + u32 upper = tipc_sub_read(s, seq.upper); u32 filter = tipc_sub_read(s, filter); struct tipc_subscription *sub; u32 timeout; if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) || - (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) { + lower > upper) { pr_warn("Subscription rejected, illegal request\n"); return NULL; } @@ -151,6 +154,12 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net, sub->conid = conid; sub->inactive = false; memcpy(&sub->evt.s, s, sizeof(*s)); + sub->s.seq.type = tipc_sub_read(s, seq.type); + sub->s.seq.lower = lower; + sub->s.seq.upper = upper; + sub->s.filter = filter; + sub->s.timeout = tipc_sub_read(s, timeout); + memcpy(sub->s.usr_handle, s->usr_handle, 8); spin_lock_init(&sub->lock); kref_init(&sub->kref); if (!tipc_nametbl_subscribe(sub)) { @@ -168,7 +177,7 @@ void tipc_sub_unsubscribe(struct tipc_subscription *sub) { tipc_nametbl_unsubscribe(sub); if (sub->evt.s.timeout != TIPC_WAIT_FOREVER) - del_timer_sync(&sub->timer); + timer_delete_sync(&sub->timer); list_del(&sub->sub_list); tipc_sub_put(sub); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index d793b4343885..60b877531b66 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -3,6 +3,7 @@ * * Copyright (c) 2003-2017, Ericsson AB * Copyright (c) 2005-2007, 2012-2013, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,43 +43,46 @@ #define TIPC_MAX_SUBSCR 65535 #define TIPC_MAX_PUBL 65535 +struct publication; struct tipc_subscription; struct tipc_conn; /** * struct tipc_subscription - TIPC network topology subscription object - * @subscriber: pointer to its subscriber - * @seq: name sequence associated with subscription + * @s: host-endian copy of the user subscription + * @evt: template for events generated by subscription + * @kref: reference count for this subscription + * @net: network namespace associated with subscription * @timer: timer governing subscription duration (optional) - * @nameseq_list: adjacent subscriptions in name sequence's subscription list + * @service_list: adjacent subscriptions in name sequence's subscription list * @sub_list: adjacent subscriptions in subscriber's subscription list - * @evt: template for events generated by subscription + * @conid: connection identifier of topology server + * @inactive: true if this subscription is inactive + * @lock: serialize up/down and timer events */ struct tipc_subscription { + struct tipc_subscr s; + struct tipc_event evt; struct kref kref; struct net *net; struct timer_list timer; struct list_head service_list; struct list_head sub_list; - struct tipc_event evt; int conid; bool inactive; - spinlock_t lock; /* serialize up/down and timer events */ + spinlock_t lock; }; struct tipc_subscription *tipc_sub_subscribe(struct net *net, struct tipc_subscr *s, int conid); void tipc_sub_unsubscribe(struct tipc_subscription *sub); - -int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower, - u32 found_upper); void tipc_sub_report_overlap(struct tipc_subscription *sub, - u32 found_lower, u32 found_upper, - u32 event, u32 port, u32 node, - u32 scope, int must); -int tipc_topsrv_start(struct net *net); -void tipc_topsrv_stop(struct net *net); + struct publication *p, + u32 event, bool must); + +int __net_init tipc_topsrv_init_net(struct net *net); +void __net_exit tipc_topsrv_exit_net(struct net *net); void tipc_sub_put(struct tipc_subscription *subscription); void tipc_sub_get(struct tipc_subscription *subscription); @@ -95,6 +99,16 @@ void tipc_sub_get(struct tipc_subscription *subscription); (swap_ ? swab32(val__) : val__); \ }) +/* tipc_sub_write - write val_ to field_ of struct sub_ in user endian format + */ +#define tipc_sub_write(sub_, field_, val_) \ + ({ \ + struct tipc_subscr *sub__ = sub_; \ + u32 val__ = val_; \ + int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \ + (sub__)->field_ = swap_ ? swab32(val__) : val__; \ + }) + /* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format */ #define tipc_evt_write(evt_, field_, val_) \ diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index 3481e4906bd6..30d2e06e3d8c 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -35,7 +35,8 @@ #include "core.h" #include "trace.h" - +#include "crypto.h" +#include "bcast.h" #include <linux/sysctl.h> static struct ctl_table_header *tipc_ctl_hdr; @@ -46,14 +47,16 @@ static struct ctl_table tipc_table[] = { .data = &sysctl_tipc_rmem, .maxlen = sizeof(sysctl_tipc_rmem), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, }, { .procname = "named_timeout", .data = &sysctl_tipc_named_timeout, .maxlen = sizeof(sysctl_tipc_named_timeout), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, }, { .procname = "sk_filter", @@ -62,7 +65,32 @@ static struct ctl_table tipc_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, - {} +#ifdef CONFIG_TIPC_CRYPTO + { + .procname = "max_tfms", + .data = &sysctl_tipc_max_tfms, + .maxlen = sizeof(sysctl_tipc_max_tfms), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + }, + { + .procname = "key_exchange_enabled", + .data = &sysctl_tipc_key_exchange_enabled, + .maxlen = sizeof(sysctl_tipc_key_exchange_enabled), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif + { + .procname = "bc_retruni", + .data = &sysctl_tipc_bc_retruni, + .maxlen = sizeof(sysctl_tipc_bc_retruni), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, }; int tipc_register_sysctl(void) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 4a708a4e8583..aad7f96b6009 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -40,14 +40,15 @@ #include "socket.h" #include "addr.h" #include "msg.h" +#include "bearer.h" #include <net/sock.h> #include <linux/module.h> +#include <trace/events/sock.h> /* Number of messages to send before rescheduling */ #define MAX_SEND_MSG_COUNT 25 #define MAX_RECV_MSG_COUNT 25 #define CF_CONNECTED 1 -#define CF_SERVER 2 #define TIPC_SERVER_NAME_LEN 32 @@ -56,7 +57,7 @@ * @conn_idr: identifier set of connection * @idr_lock: protect the connection identifier set * @idr_in_use: amount of allocated identifier entry - * @net: network namspace instance + * @net: network namespace instance * @awork: accept work item * @rcv_wq: receive workqueue * @send_wq: send workqueue @@ -82,7 +83,7 @@ struct tipc_topsrv { * @sock: socket handler associated with connection * @flags: indicates connection state * @server: pointer to connected server - * @sub_list: lsit to all pertaing subscriptions + * @sub_list: list to all pertaining subscriptions * @sub_lock: lock protecting the subscription list * @rwork: receive work item * @outqueue: pointer to first outbound message in queue @@ -176,7 +177,7 @@ static void tipc_conn_close(struct tipc_conn *con) conn_put(con); } -static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) +static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock) { struct tipc_conn *con; int ret; @@ -202,10 +203,12 @@ static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) } con->conid = ret; s->idr_in_use++; - spin_unlock_bh(&s->idr_lock); set_bit(CF_CONNECTED, &con->flags); con->server = s; + con->sock = sock; + conn_get(con); + spin_unlock_bh(&s->idr_lock); return con; } @@ -236,8 +239,8 @@ static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s) if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) { tipc_sub_unsubscribe(sub); atomic_dec(&tn->subscription_count); - } else if (s) { - break; + if (s) + break; } } spin_unlock_bh(&con->sub_lock); @@ -361,8 +364,10 @@ static int tipc_conn_rcv_sub(struct tipc_topsrv *srv, { struct tipc_net *tn = tipc_net(srv->net); struct tipc_subscription *sub; + u32 s_filter = tipc_sub_read(s, filter); - if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) { + if (s_filter & TIPC_SUB_CANCEL) { + tipc_sub_write(s, filter, s_filter & ~TIPC_SUB_CANCEL); tipc_conn_delete_sub(con, s); return 0; } @@ -392,18 +397,21 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con) iov.iov_base = &s; iov.iov_len = sizeof(s); msg.msg_name = NULL; - iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, iov.iov_len); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, iov.iov_len); ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); if (ret == -EWOULDBLOCK) return -EWOULDBLOCK; if (ret == sizeof(s)) { read_lock_bh(&sk->sk_callback_lock); - ret = tipc_conn_rcv_sub(srv, con, &s); + /* RACE: the connection can be closed in the meantime */ + if (likely(connected(con))) + ret = tipc_conn_rcv_sub(srv, con, &s); read_unlock_bh(&sk->sk_callback_lock); + if (!ret) + return 0; } - if (ret < 0) - tipc_conn_close(con); + tipc_conn_close(con); return ret; } @@ -432,6 +440,8 @@ static void tipc_conn_data_ready(struct sock *sk) { struct tipc_conn *con; + trace_sk_data_ready(sk); + read_lock_bh(&sk->sk_callback_lock); con = sk->sk_user_data; if (connected(con)) { @@ -445,17 +455,24 @@ static void tipc_conn_data_ready(struct sock *sk) static void tipc_topsrv_accept(struct work_struct *work) { struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork); - struct socket *lsock = srv->listener; - struct socket *newsock; + struct socket *newsock, *lsock; struct tipc_conn *con; struct sock *newsk; int ret; + spin_lock_bh(&srv->idr_lock); + if (!srv->listener) { + spin_unlock_bh(&srv->idr_lock); + return; + } + lsock = srv->listener; + spin_unlock_bh(&srv->idr_lock); + while (1) { ret = kernel_accept(lsock, &newsock, O_NONBLOCK); if (ret < 0) return; - con = tipc_conn_alloc(srv); + con = tipc_conn_alloc(srv, newsock); if (IS_ERR(con)) { ret = PTR_ERR(con); sock_release(newsock); @@ -467,31 +484,32 @@ static void tipc_topsrv_accept(struct work_struct *work) newsk->sk_data_ready = tipc_conn_data_ready; newsk->sk_write_space = tipc_conn_write_space; newsk->sk_user_data = con; - con->sock = newsock; write_unlock_bh(&newsk->sk_callback_lock); /* Wake up receive process in case of 'SYN+' message */ newsk->sk_data_ready(newsk); + conn_put(con); } } -/* tipc_toprsv_listener_data_ready - interrupt callback with connection request +/* tipc_topsrv_listener_data_ready - interrupt callback with connection request * The queued job is launched into tipc_topsrv_accept() */ static void tipc_topsrv_listener_data_ready(struct sock *sk) { struct tipc_topsrv *srv; + trace_sk_data_ready(sk); + read_lock_bh(&sk->sk_callback_lock); srv = sk->sk_user_data; - if (srv->listener) + if (srv) queue_work(srv->rcv_wq, &srv->awork); read_unlock_bh(&sk->sk_callback_lock); } static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) { - int imp = TIPC_CRITICAL_IMPORTANCE; struct socket *lsock = NULL; struct sockaddr_tipc saddr; struct sock *sk; @@ -508,19 +526,20 @@ static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) sk->sk_user_data = srv; write_unlock_bh(&sk->sk_callback_lock); - rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE, - (char *)&imp, sizeof(imp)); + lock_sock(sk); + rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE); + release_sock(sk); if (rc < 0) goto err; saddr.family = AF_TIPC; - saddr.addrtype = TIPC_ADDR_NAMESEQ; - saddr.addr.nameseq.type = TIPC_TOP_SRV; + saddr.addrtype = TIPC_SERVICE_RANGE; + saddr.addr.nameseq.type = TIPC_TOP_SRV; saddr.addr.nameseq.lower = TIPC_TOP_SRV; saddr.addr.nameseq.upper = TIPC_TOP_SRV; saddr.scope = TIPC_NODE_SCOPE; - rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr)); + rc = tipc_sk_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr)); if (rc < 0) goto err; rc = kernel_listen(lsock, 0); @@ -563,19 +582,19 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, sub.seq.upper = upper; sub.timeout = TIPC_WAIT_FOREVER; sub.filter = filter; - *(u32 *)&sub.usr_handle = port; + *(u64 *)&sub.usr_handle = (u64)port; - con = tipc_conn_alloc(tipc_topsrv(net)); + con = tipc_conn_alloc(tipc_topsrv(net), NULL); if (IS_ERR(con)) return false; *conid = con->conid; - con->sock = NULL; rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); - if (rc >= 0) - return true; + if (rc) + conn_put(con); + conn_put(con); - return false; + return !rc; } void tipc_topsrv_kern_unsubscr(struct net *net, int conid) @@ -607,6 +626,7 @@ static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt) memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); skb_queue_head_init(&evtq); __skb_queue_tail(&evtq, skb); + tipc_loopback_trace(net, &evtq); tipc_sk_rcv(net, &evtq); } @@ -634,7 +654,7 @@ static void tipc_topsrv_work_stop(struct tipc_topsrv *s) destroy_workqueue(s->send_wq); } -int tipc_topsrv_start(struct net *net) +static int tipc_topsrv_start(struct net *net) { struct tipc_net *tn = tipc_net(net); const char name[] = "topology_server"; @@ -658,16 +678,22 @@ int tipc_topsrv_start(struct net *net) ret = tipc_topsrv_work_start(srv); if (ret < 0) - return ret; + goto err_start; ret = tipc_topsrv_create_listener(srv); if (ret < 0) - tipc_topsrv_work_stop(srv); + goto err_create; + return 0; + +err_create: + tipc_topsrv_work_stop(srv); +err_start: + kfree(srv); return ret; } -void tipc_topsrv_stop(struct net *net) +static void tipc_topsrv_stop(struct net *net) { struct tipc_topsrv *srv = tipc_topsrv(net); struct socket *lsock = srv->listener; @@ -678,8 +704,10 @@ void tipc_topsrv_stop(struct net *net) for (id = 0; srv->idr_in_use; id++) { con = idr_find(&srv->conn_idr, id); if (con) { + conn_get(con); spin_unlock_bh(&srv->idr_lock); tipc_conn_close(con); + conn_put(con); spin_lock_bh(&srv->idr_lock); } } @@ -687,8 +715,19 @@ void tipc_topsrv_stop(struct net *net) __module_get(lsock->sk->sk_prot_creator->owner); srv->listener = NULL; spin_unlock_bh(&srv->idr_lock); - sock_release(lsock); + tipc_topsrv_work_stop(srv); + sock_release(lsock); idr_destroy(&srv->conn_idr); kfree(srv); } + +int __net_init tipc_topsrv_init_net(struct net *net) +{ + return tipc_topsrv_start(net); +} + +void __net_exit tipc_topsrv_exit_net(struct net *net) +{ + tipc_topsrv_stop(net); +} diff --git a/net/tipc/trace.c b/net/tipc/trace.c index 265f6a26aa3d..7d2931521e0e 100644 --- a/net/tipc/trace.c +++ b/net/tipc/trace.c @@ -36,7 +36,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -/** +/* * socket tuples for filtering in socket traces: * (portid, sock type, name type, name lower, name upper) */ diff --git a/net/tipc/trace.h b/net/tipc/trace.h index 4d8e00483afc..865142ed0ab4 100644 --- a/net/tipc/trace.h +++ b/net/tipc/trace.h @@ -145,7 +145,7 @@ DECLARE_EVENT_CLASS(tipc_skb_class, ), TP_fast_assign( - __assign_str(header, header); + __assign_str(header); tipc_skb_dump(skb, more, __get_str(buf)); ), @@ -172,7 +172,7 @@ DECLARE_EVENT_CLASS(tipc_list_class, ), TP_fast_assign( - __assign_str(header, header); + __assign_str(header); tipc_list_dump(list, more, __get_str(buf)); ), @@ -200,7 +200,7 @@ DECLARE_EVENT_CLASS(tipc_sk_class, ), TP_fast_assign( - __assign_str(header, header); + __assign_str(header); __entry->portid = tipc_sock_get_portid(sk); tipc_sk_dump(sk, dqueues, __get_str(buf)); if (skb) @@ -254,8 +254,8 @@ DECLARE_EVENT_CLASS(tipc_link_class, ), TP_fast_assign( - __assign_str(header, header); - tipc_link_name_ext(l, __entry->name); + __assign_str(header); + memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME); tipc_link_dump(l, dqueues, __get_str(buf)); ), @@ -295,12 +295,14 @@ DECLARE_EVENT_CLASS(tipc_link_transmq_class, ), TP_fast_assign( - tipc_link_name_ext(r, __entry->name); + memcpy(__entry->name, tipc_link_name(r), TIPC_MAX_LINK_NAME); __entry->from = f; __entry->to = t; __entry->len = skb_queue_len(tq); - __entry->fseqno = msg_seqno(buf_msg(skb_peek(tq))); - __entry->lseqno = msg_seqno(buf_msg(skb_peek_tail(tq))); + __entry->fseqno = __entry->len ? + msg_seqno(buf_msg(skb_peek(tq))) : 0; + __entry->lseqno = __entry->len ? + msg_seqno(buf_msg(skb_peek_tail(tq))) : 0; ), TP_printk("<%s> retrans req: [%u-%u] transmq: %u [%u-%u]\n", @@ -308,15 +310,16 @@ DECLARE_EVENT_CLASS(tipc_link_transmq_class, __entry->len, __entry->fseqno, __entry->lseqno) ); -DEFINE_EVENT(tipc_link_transmq_class, tipc_link_retrans, +DEFINE_EVENT_CONDITION(tipc_link_transmq_class, tipc_link_retrans, TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq), - TP_ARGS(r, f, t, tq) + TP_ARGS(r, f, t, tq), + TP_CONDITION(less_eq(f, t)) ); DEFINE_EVENT_PRINT(tipc_link_transmq_class, tipc_link_bc_ack, TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq), TP_ARGS(r, f, t, tq), - TP_printk("<%s> acked: [%u-%u] transmq: %u [%u-%u]\n", + TP_printk("<%s> acked: %u gap: %u transmq: %u [%u-%u]\n", __entry->name, __entry->from, __entry->to, __entry->len, __entry->fseqno, __entry->lseqno) ); @@ -334,7 +337,7 @@ DECLARE_EVENT_CLASS(tipc_node_class, ), TP_fast_assign( - __assign_str(header, header); + __assign_str(header); __entry->addr = tipc_node_get_addr(n); tipc_node_dump(n, more, __get_str(buf)); ), @@ -371,7 +374,7 @@ DECLARE_EVENT_CLASS(tipc_fsm_class, ), TP_fast_assign( - __assign_str(name, name); + __assign_str(name); __entry->os = os; __entry->ns = ns; __entry->evt = evt; @@ -406,8 +409,8 @@ TRACE_EVENT(tipc_l2_device_event, ), TP_fast_assign( - __assign_str(dev_name, dev->name); - __assign_str(b_name, b->name); + __assign_str(dev_name); + __assign_str(b_name); __entry->evt = evt; __entry->b_up = test_bit(0, &b->up); __entry->carrier = netif_carrier_ok(dev); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 4d85d71f16e2..b85ab0fb3b8c 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -44,7 +44,7 @@ #include <net/sock.h> #include <net/ip.h> #include <net/udp_tunnel.h> -#include <net/addrconf.h> +#include <net/ipv6_stubs.h> #include <linux/tipc_netlink.h> #include "core.h" #include "addr.h" @@ -52,6 +52,7 @@ #include "bearer.h" #include "netlink.h" #include "msg.h" +#include "udp_media.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 @@ -63,6 +64,11 @@ * * This is the bearer level originating address used in neighbor discovery * messages, and all fields should be in network byte order + * + * @proto: Ethernet protocol in use + * @port: port being used + * @ipv4: IPv4 address of neighbor + * @ipv6: IPv6 address of neighbor */ struct udp_media_addr { __be16 proto; @@ -76,6 +82,7 @@ struct udp_media_addr { /* struct udp_replicast - container for UDP remote addresses */ struct udp_replicast { struct udp_media_addr addr; + struct dst_cache dst_cache; struct rcu_head rcu; struct list_head list; }; @@ -86,6 +93,7 @@ struct udp_replicast { * @ubsock: bearer associated socket * @ifindex: local address scope * @work: used to schedule deferred work on a bearer + * @rcast: associated udp_replicast container */ struct udp_bearer { struct tipc_bearer __rcu *bearer; @@ -127,8 +135,11 @@ static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size) snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->port)); else if (ntohs(ua->proto) == ETH_P_IPV6) snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->port)); - else + else { pr_err("Invalid UDP media address\n"); + return 1; + } + return 0; } @@ -158,52 +169,64 @@ static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a) /* tipc_send_msg - enqueue a send request */ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, struct udp_bearer *ub, struct udp_media_addr *src, - struct udp_media_addr *dst) + struct udp_media_addr *dst, struct dst_cache *cache) { - int ttl, err = 0; - struct rtable *rt; + struct dst_entry *ndst; + int ttl, err; + local_bh_disable(); + ndst = dst_cache_get(cache); if (dst->proto == htons(ETH_P_IP)) { - struct flowi4 fl = { - .daddr = dst->ipv4.s_addr, - .saddr = src->ipv4.s_addr, - .flowi4_mark = skb->mark, - .flowi4_proto = IPPROTO_UDP - }; - rt = ip_route_output_key(net, &fl); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - goto tx_error; + struct rtable *rt = dst_rtable(ndst); + + if (!rt) { + struct flowi4 fl = { + .daddr = dst->ipv4.s_addr, + .saddr = src->ipv4.s_addr, + .flowi4_mark = skb->mark, + .flowi4_proto = IPPROTO_UDP + }; + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto tx_error; + } + dst_cache_set_ip4(cache, &rt->dst, fl.saddr); } - skb->dev = rt->dst.dev; ttl = ip4_dst_hoplimit(&rt->dst); udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, dst->ipv4.s_addr, 0, ttl, 0, src->port, - dst->port, false, true); + dst->port, false, true, 0); #if IS_ENABLED(CONFIG_IPV6) } else { - struct dst_entry *ndst; - struct flowi6 fl6 = { - .flowi6_oif = ub->ifindex, - .daddr = dst->ipv6, - .saddr = src->ipv6, - .flowi6_proto = IPPROTO_UDP - }; - err = ipv6_stub->ipv6_dst_lookup(net, ub->ubsock->sk, &ndst, - &fl6); - if (err) - goto tx_error; + if (!ndst) { + struct flowi6 fl6 = { + .flowi6_oif = ub->ifindex, + .daddr = dst->ipv6, + .saddr = src->ipv6, + .flowi6_proto = IPPROTO_UDP + }; + ndst = ipv6_stub->ipv6_dst_lookup_flow(net, + ub->ubsock->sk, + &fl6, NULL); + if (IS_ERR(ndst)) { + err = PTR_ERR(ndst); + goto tx_error; + } + dst_cache_set_ip6(cache, ndst, &fl6.saddr); + } ttl = ip6_dst_hoplimit(ndst); - err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, - ndst->dev, &src->ipv6, - &dst->ipv6, 0, ttl, 0, src->port, - dst->port, false); + udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL, + &src->ipv6, &dst->ipv6, 0, ttl, 0, + src->port, dst->port, false, 0); #endif } - return err; + local_bh_enable(); + return 0; tx_error: + local_bh_enable(); kfree_skb(skb); return err; } @@ -225,14 +248,15 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, } skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); - ub = rcu_dereference_rtnl(b->media_ptr); + ub = rcu_dereference(b->media_ptr); if (!ub) { err = -ENODEV; goto out; } if (addr->broadcast != TIPC_REPLICAST_SUPPORT) - return tipc_udp_xmit(net, skb, ub, src, dst); + return tipc_udp_xmit(net, skb, ub, src, dst, + &ub->rcast.dst_cache); /* Replicast, send an skb to each configured IP address */ list_for_each_entry_rcu(rcast, &ub->rcast.list, list) { @@ -244,7 +268,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, goto out; } - err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr); + err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr, + &rcast->dst_cache); if (err) goto out; } @@ -288,6 +313,11 @@ static int tipc_udp_rcast_add(struct tipc_bearer *b, if (!rcast) return -ENOMEM; + if (dst_cache_init(&rcast->dst_cache, GFP_ATOMIC)) { + kfree(rcast); + return -ENOMEM; + } + memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr)); if (ntohs(addr->proto) == ETH_P_IP) @@ -354,25 +384,22 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) skb_pull(skb, sizeof(struct udphdr)); hdr = buf_msg(skb); - rcu_read_lock(); - b = rcu_dereference_rtnl(ub->bearer); + b = rcu_dereference(ub->bearer); if (!b) - goto rcu_out; + goto out; if (b && test_bit(0, &b->up)) { + TIPC_SKB_CB(skb)->flags = 0; tipc_rcv(sock_net(sk), skb, b); - rcu_read_unlock(); return 0; } if (unlikely(msg_user(hdr) == LINK_CONFIG)) { err = tipc_udp_rcast_disc(b, skb); if (err) - goto rcu_out; + goto out; } -rcu_out: - rcu_read_unlock(); out: kfree_skb(skb); return 0; @@ -390,8 +417,10 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) err = ip_mc_join_group(sk, &mreqn); #if IS_ENABLED(CONFIG_IPV6) } else { + lock_sock(sk); err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); + release_sock(sk); #endif } return err; @@ -439,21 +468,17 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) int i; if (!bid && !skip_cnt) { + struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs; struct net *net = sock_net(skb->sk); struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1]; - struct nlattr **attrs; char *bname; - err = tipc_nlmsg_parse(cb->nlh, &attrs); - if (err) - return err; - if (!attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX, - attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, NULL); + err = nla_parse_nested_deprecated(battrs, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, NULL); if (err) return err; @@ -464,7 +489,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); b = tipc_bearer_find(net, bname); - if (!b) { + if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) { rtnl_unlock(); return -EINVAL; } @@ -475,13 +500,13 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); b = rtnl_dereference(tn->bearer_list[bid]); - if (!b) { + if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) { rtnl_unlock(); return -EINVAL; } } - ub = rcu_dereference_rtnl(b->media_ptr); + ub = rtnl_dereference(b->media_ptr); if (!ub) { rtnl_unlock(); return -EINVAL; @@ -523,11 +548,11 @@ int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b) struct udp_bearer *ub; struct nlattr *nest; - ub = rcu_dereference_rtnl(b->media_ptr); + ub = rtnl_dereference(b->media_ptr); if (!ub) return -ENODEV; - nest = nla_nest_start(msg->skb, TIPC_NLA_BEARER_UDP_OPTS); + nest = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_UDP_OPTS); if (!nest) goto msg_full; @@ -552,7 +577,7 @@ msg_full: /** * tipc_parse_udp_addr - build udp media address from netlink data - * @nlattr: netlink attribute containing sockaddr storage aligned address + * @nla: netlink attribute containing sockaddr storage aligned address * @addr: tipc media address to fill with address, port and protocol type * @scope_id: IPv6 scope id pointer, not NULL indicates it's required */ @@ -605,8 +630,7 @@ int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; struct udp_media_addr *dst; - if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, - tipc_nl_udp_policy, NULL)) + if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy, NULL)) return -EINVAL; if (!opts[TIPC_NLA_UDP_REMOTE]) @@ -648,6 +672,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, struct udp_tunnel_sock_cfg tuncfg = {NULL}; struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; u8 node_id[NODE_ID_LEN] = {0,}; + struct net_device *dev; int rmcast = 0; ub = kzalloc(sizeof(*ub), GFP_ATOMIC); @@ -659,9 +684,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) goto err; - if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, - attrs[TIPC_NLA_BEARER_UDP_OPTS], - tipc_nl_udp_policy, NULL)) + if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attrs[TIPC_NLA_BEARER_UDP_OPTS], tipc_nl_udp_policy, NULL)) goto err; if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) { @@ -704,8 +727,6 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, rcu_assign_pointer(ub->bearer, b); tipc_udp_media_addr_set(&b->addr, &local); if (local.proto == htons(ETH_P_IP)) { - struct net_device *dev; - dev = __ip_dev_find(net, local.ipv4.s_addr, false); if (!dev) { err = -ENODEV; @@ -720,14 +741,16 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, udp_conf.local_ip.s_addr = local.ipv4.s_addr; udp_conf.use_udp_checksums = false; ub->ifindex = dev->ifindex; - if (tipc_mtu_bad(dev, sizeof(struct iphdr) + - sizeof(struct udphdr))) { - err = -EINVAL; - goto err; - } + b->encap_hlen = sizeof(struct iphdr) + sizeof(struct udphdr); b->mtu = b->media->mtu; #if IS_ENABLED(CONFIG_IPV6) } else if (local.proto == htons(ETH_P_IPV6)) { + dev = ub->ifindex ? __dev_get_by_index(net, ub->ifindex) : NULL; + dev = ipv6_dev_find(net, &local.ipv6, dev); + if (!dev) { + err = -ENODEV; + goto err; + } udp_conf.family = AF_INET6; udp_conf.use_udp6_tx_checksums = true; udp_conf.use_udp6_rx_checksums = true; @@ -735,6 +758,8 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, udp_conf.local_ip6 = in6addr_any; else udp_conf.local_ip6 = local.ipv6; + ub->ifindex = dev->ifindex; + b->encap_hlen = sizeof(struct ipv6hdr) + sizeof(struct udphdr); b->mtu = 1280; #endif } else { @@ -751,7 +776,11 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, tuncfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); - /** + err = dst_cache_init(&ub->rcast.dst_cache, GFP_ATOMIC); + if (err) + goto free; + + /* * The bcast media address port is used for all peers and the ip * is used if it's a multicast address. */ @@ -761,12 +790,14 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, else err = tipc_udp_rcast_add(b, &remote); if (err) - goto err; + goto free; return 0; + +free: + dst_cache_destroy(&ub->rcast.dst_cache); + udp_tunnel_sock_release(ub->ubsock); err: - if (ub->ubsock) - udp_tunnel_sock_release(ub->ubsock); kfree(ub); return err; } @@ -776,15 +807,22 @@ static void cleanup_bearer(struct work_struct *work) { struct udp_bearer *ub = container_of(work, struct udp_bearer, work); struct udp_replicast *rcast, *tmp; + struct tipc_net *tn; list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + dst_cache_destroy(&rcast->dst_cache); list_del_rcu(&rcast->list); kfree_rcu(rcast, rcu); } - if (ub->ubsock) - udp_tunnel_sock_release(ub->ubsock); + tn = tipc_net(sock_net(ub->ubsock->sk)); + + dst_cache_destroy(&ub->rcast.dst_cache); + udp_tunnel_sock_release(ub->ubsock); + + /* Note: could use a call_rcu() to avoid another synchronize_net() */ synchronize_net(); + atomic_dec(&tn->wq_count); kfree(ub); } @@ -793,16 +831,16 @@ static void tipc_udp_disable(struct tipc_bearer *b) { struct udp_bearer *ub; - ub = rcu_dereference_rtnl(b->media_ptr); + ub = rtnl_dereference(b->media_ptr); if (!ub) { pr_err("UDP bearer instance not found\n"); return; } - if (ub->ubsock) - sock_set_flag(ub->ubsock->sk, SOCK_DEAD); + sock_set_flag(ub->ubsock->sk, SOCK_DEAD); RCU_INIT_POINTER(ub->bearer, NULL); /* sock_release need to be done outside of rtnl lock */ + atomic_inc(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); INIT_WORK(&ub->work, cleanup_bearer); schedule_work(&ub->work); } @@ -816,7 +854,8 @@ struct tipc_media udp_media_info = { .msg2addr = tipc_udp_msg2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, - .window = TIPC_DEF_LINK_WIN, + .min_win = TIPC_DEF_LINK_WIN, + .max_win = TIPC_DEF_LINK_WIN, .mtu = TIPC_DEF_LINK_UDP_MTU, .type_id = TIPC_MEDIA_TYPE_UDP, .hwaddr_len = 0, |
