diff options
Diffstat (limited to 'net/tipc')
56 files changed, 23952 insertions, 11347 deletions
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index c890848f9d56..bb0d71eb02a6 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # TIPC configuration # @@ -5,7 +6,8 @@ menuconfig TIPC tristate "The TIPC Protocol" depends on INET - ---help--- + depends on IPV6 || IPV6=n + help The Transparent Inter Process Communication (TIPC) protocol is specially designed for intra cluster communication. This protocol originates from Ericsson where it has been used in carrier grade @@ -16,25 +18,43 @@ menuconfig TIPC This protocol support is also available as a module ( = code which can be inserted in and removed from the running kernel whenever you want). The module will be called tipc. If you want to compile it - as a module, say M here and read <file:Documentation/kbuild/modules.txt>. + as a module, say M here and read <file:Documentation/kbuild/modules.rst>. If in doubt, say N. -config TIPC_PORTS - int "Maximum number of ports in a node" - depends on TIPC - range 127 65535 - default "8191" - help - Specifies how many ports can be supported by a node. - Can range from 127 to 65535 ports; default is 8191. - - Setting this to a smaller value saves some memory, - setting it to higher allows for more ports. - config TIPC_MEDIA_IB bool "InfiniBand media type support" depends on TIPC && INFINIBAND_IPOIB help Saying Y here will enable support for running TIPC on IP-over-InfiniBand devices. +config TIPC_MEDIA_UDP + bool "IP/UDP media type support" + depends on TIPC + select NET_UDP_TUNNEL + default y + help + Saying Y here will enable support for running TIPC over IP/UDP + +config TIPC_CRYPTO + bool "TIPC encryption support" + depends on TIPC + select CRYPTO + select CRYPTO_AES + select CRYPTO_GCM + default y + help + Saying Y here will enable support for TIPC encryption. + All TIPC messages will be encrypted/decrypted by using the currently most + advanced algorithm: AEAD AES-GCM (like IPSec or TLS) before leaving/ + entering the TIPC stack. + Key setting from user-space is performed via netlink by a user program + (e.g. the iproute2 'tipc' tool). + +config TIPC_DIAG + tristate "TIPC: socket monitoring interface" + depends on TIPC + default y + help + Support for TIPC socket monitoring interface used by ss tool. + If unsure, say Y. diff --git a/net/tipc/Makefile b/net/tipc/Makefile index b282f7130d2b..18e1636aa036 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -1,14 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux TIPC layer # obj-$(CONFIG_TIPC) := tipc.o -tipc-y += addr.o bcast.o bearer.o config.o \ - core.o handler.o link.o discover.o msg.o \ - name_distr.o subscr.o name_table.o net.o \ - netlink.o node.o node_subscr.o port.o ref.o \ - socket.o log.o eth_media.o server.o +tipc-y += addr.o bcast.o bearer.o \ + core.o link.o discover.o msg.o \ + name_distr.o subscr.o monitor.o name_table.o net.o \ + netlink.o netlink_compat.o node.o socket.o eth_media.o \ + topsrv.o group.o trace.o +CFLAGS_trace.o += -I$(src) + +tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o +tipc-$(CONFIG_TIPC_CRYPTO) += crypto.o + +obj-$(CONFIG_TIPC_DIAG) += tipc_diag.o +tipc_diag-y += diag.o diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 357b74b26f9e..6f5c54cbf8d9 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -1,8 +1,9 @@ /* * net/tipc/addr.c: TIPC address utility routines * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2018, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,70 +35,90 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "core.h" #include "addr.h" +#include "core.h" -/** - * tipc_addr_domain_valid - validates a network domain address - * - * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>, - * where Z, C, and N are non-zero. - * - * Returns 1 if domain address is valid, otherwise 0 - */ -int tipc_addr_domain_valid(u32 addr) +bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr) { - u32 n = tipc_node(addr); - u32 c = tipc_cluster(addr); - u32 z = tipc_zone(addr); - - if (n && (!z || !c)) - return 0; - if (c && !z) - return 0; - return 1; + if (!domain || (domain == addr)) + return true; + if (!legacy_format) + return false; + if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */ + return true; + if (domain == (addr & TIPC_ZONE_CLUSTER_MASK)) /* domain <Z.C.0> */ + return true; + if (domain == (addr & TIPC_ZONE_MASK)) /* domain <Z.0.0> */ + return true; + return false; } -/** - * tipc_addr_node_valid - validates a proposed network address for this node - * - * Accepts <Z.C.N>, where Z, C, and N are non-zero. - * - * Returns 1 if address can be used, otherwise 0 - */ -int tipc_addr_node_valid(u32 addr) +void tipc_set_node_id(struct net *net, u8 *id) { - return tipc_addr_domain_valid(addr) && tipc_node(addr); -} + struct tipc_net *tn = tipc_net(net); -int tipc_in_scope(u32 domain, u32 addr) -{ - if (!domain || (domain == addr)) - return 1; - if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */ - return 1; - if (domain == tipc_zone_mask(addr)) /* domain <Z.0.0> */ - return 1; - return 0; + memcpy(tn->node_id, id, NODE_ID_LEN); + tipc_nodeid2string(tn->node_id_string, id); + tn->trial_addr = hash128to32(id); + pr_info("Node identity %s, cluster identity %u\n", + tipc_own_id_string(net), tn->net_id); } -/** - * tipc_addr_scope - convert message lookup domain to a 2-bit scope value - */ -int tipc_addr_scope(u32 domain) +void tipc_set_node_addr(struct net *net, u32 addr) { - if (likely(!domain)) - return TIPC_ZONE_SCOPE; - if (tipc_node(domain)) - return TIPC_NODE_SCOPE; - if (tipc_cluster(domain)) - return TIPC_CLUSTER_SCOPE; - return TIPC_ZONE_SCOPE; + struct tipc_net *tn = tipc_net(net); + u8 node_id[NODE_ID_LEN] = {0,}; + + tn->node_addr = addr; + if (!tipc_own_id(net)) { + sprintf(node_id, "%x", addr); + tipc_set_node_id(net, node_id); + } + tn->trial_addr = addr; + tn->addr_trial_end = jiffies; + pr_info("Node number set to %u\n", addr); } -char *tipc_addr_string_fill(char *string, u32 addr) +int tipc_nodeid2string(char *str, u8 *id) { - snprintf(string, 16, "<%u.%u.%u>", - tipc_zone(addr), tipc_cluster(addr), tipc_node(addr)); - return string; + int i; + u8 c; + + /* Already a string ? */ + for (i = 0; i < NODE_ID_LEN; i++) { + c = id[i]; + if (c >= '0' && c <= '9') + continue; + if (c >= 'A' && c <= 'Z') + continue; + if (c >= 'a' && c <= 'z') + continue; + if (c == '.') + continue; + if (c == ':') + continue; + if (c == '_') + continue; + if (c == '-') + continue; + if (c == '@') + continue; + if (c != 0) + break; + } + if (i == NODE_ID_LEN) { + memcpy(str, id, NODE_ID_LEN); + str[NODE_ID_LEN] = 0; + return i; + } + + /* Translate to hex string */ + for (i = 0; i < NODE_ID_LEN; i++) + sprintf(&str[2 * i], "%02x", id[i]); + + /* Strip off trailing zeroes */ + for (i = NODE_ID_STR_LEN - 2; str[i] == '0'; i--) + str[i] = 0; + + return i + 1; } diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 60b00ab93d74..a113cf7e1f89 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -1,8 +1,9 @@ /* * net/tipc/addr.h: Include file for TIPC address utility routines * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2018, Ericsson AB * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,58 +38,98 @@ #ifndef _TIPC_ADDR_H #define _TIPC_ADDR_H -#define TIPC_ZONE_MASK 0xff000000u -#define TIPC_CLUSTER_MASK 0xfffff000u +#include <linux/types.h> +#include <linux/tipc.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include "core.h" -static inline u32 tipc_zone_mask(u32 addr) +/* Struct tipc_uaddr: internal version of struct sockaddr_tipc. + * Must be kept aligned both regarding field positions and size. + */ +struct tipc_uaddr { + unsigned short family; + unsigned char addrtype; + signed char scope; + union { + struct { + struct tipc_service_addr sa; + u32 lookup_node; + }; + struct tipc_service_range sr; + struct tipc_socket_addr sk; + }; +}; + +static inline void tipc_uaddr(struct tipc_uaddr *ua, u32 atype, u32 scope, + u32 type, u32 lower, u32 upper) { - return addr & TIPC_ZONE_MASK; + ua->family = AF_TIPC; + ua->addrtype = atype; + ua->scope = scope; + ua->sr.type = type; + ua->sr.lower = lower; + ua->sr.upper = upper; } -static inline u32 tipc_cluster_mask(u32 addr) +static inline bool tipc_uaddr_valid(struct tipc_uaddr *ua, int len) { - return addr & TIPC_CLUSTER_MASK; + u32 atype; + + if (len < sizeof(struct sockaddr_tipc)) + return false; + atype = ua->addrtype; + if (ua->family != AF_TIPC) + return false; + if (atype == TIPC_SERVICE_ADDR || atype == TIPC_SOCKET_ADDR) + return true; + if (atype == TIPC_SERVICE_RANGE) + return ua->sr.upper >= ua->sr.lower; + return false; } -static inline int in_own_cluster_exact(u32 addr) +static inline u32 tipc_own_addr(struct net *net) { - return !((addr ^ tipc_own_addr) >> 12); + return tipc_net(net)->node_addr; } -/** - * in_own_node - test for node inclusion; <0.0.0> always matches - */ -static inline int in_own_node(u32 addr) +static inline u8 *tipc_own_id(struct net *net) { - return (addr == tipc_own_addr) || !addr; + struct tipc_net *tn = tipc_net(net); + + if (!strlen(tn->node_id_string)) + return NULL; + return tn->node_id; } -/** - * in_own_cluster - test for cluster inclusion; <0.0.0> always matches - */ -static inline int in_own_cluster(u32 addr) +static inline char *tipc_own_id_string(struct net *net) { - return in_own_cluster_exact(addr) || !addr; + return tipc_net(net)->node_id_string; } -/** - * addr_domain - convert 2-bit scope value to equivalent message lookup domain - * - * Needed when address of a named message must be looked up a second time - * after a network hop. - */ -static inline u32 addr_domain(u32 sc) +static inline u32 tipc_cluster_mask(u32 addr) { - if (likely(sc == TIPC_NODE_SCOPE)) - return tipc_own_addr; - if (sc == TIPC_CLUSTER_SCOPE) - return tipc_cluster_mask(tipc_own_addr); - return tipc_zone_mask(tipc_own_addr); + return addr & TIPC_ZONE_CLUSTER_MASK; } -int tipc_addr_domain_valid(u32); -int tipc_addr_node_valid(u32 addr); -int tipc_in_scope(u32 domain, u32 addr); -int tipc_addr_scope(u32 domain); -char *tipc_addr_string_fill(char *string, u32 addr); +static inline int tipc_node2scope(u32 node) +{ + return node ? TIPC_NODE_SCOPE : TIPC_CLUSTER_SCOPE; +} + +static inline int tipc_scope2node(struct net *net, int sc) +{ + return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net); +} + +static inline int in_own_node(struct net *net, u32 addr) +{ + return addr == tipc_own_addr(net) || !addr; +} + +bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr); +void tipc_set_node_id(struct net *net, u8 *id); +void tipc_set_node_addr(struct net *net, u32 addr); +int tipc_nodeid2string(char *str, u8 *id); + #endif diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 716de1ac6cb5..114fef65f92e 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -1,7 +1,7 @@ /* * net/tipc/bcast.c: TIPC broadcast code * - * Copyright (c) 2004-2006, Ericsson AB + * Copyright (c) 2004-2006, 2014-2017, Ericsson AB * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. @@ -35,874 +35,830 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "core.h" -#include "link.h" -#include "port.h" +#include <linux/tipc_config.h> +#include "socket.h" +#include "msg.h" #include "bcast.h" -#include "name_distr.h" - -#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ - -#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ +#include "link.h" +#include "name_table.h" -/** - * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link - * @primary: pointer to primary bearer - * @secondary: pointer to secondary bearer - * - * Bearers must have same priority and same set of reachable destinations - * to be paired. - */ +#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ +#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ -struct tipc_bcbearer_pair { - struct tipc_bearer *primary; - struct tipc_bearer *secondary; -}; - -/** - * struct tipc_bcbearer - bearer used by broadcast link - * @bearer: (non-standard) broadcast bearer structure - * @media: (non-standard) broadcast media structure - * @bpairs: array of bearer pairs - * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() - * @remains: temporary node map used by tipc_bcbearer_send() - * @remains_new: temporary node map used tipc_bcbearer_send() - * - * Note: The fields labelled "temporary" are incorporated into the bearer - * to avoid consuming potentially limited stack space through the use of - * large local variables within multicast routines. Concurrent access is - * prevented through use of the spinlock "bc_lock". - */ -struct tipc_bcbearer { - struct tipc_bearer bearer; - struct tipc_media media; - struct tipc_bcbearer_pair bpairs[MAX_BEARERS]; - struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; - struct tipc_node_map remains; - struct tipc_node_map remains_new; -}; +const char tipc_bclink_name[] = "broadcast-link"; +unsigned long sysctl_tipc_bc_retruni __read_mostly; /** - * struct tipc_bclink - link used for broadcast messages - * @link: (non-standard) broadcast link structure - * @node: (non-standard) node structure representing b'cast link's peer node - * @bcast_nodes: map of broadcast-capable nodes - * @retransmit_to: node that most recently requested a retransmit - * - * Handles sequence numbering, fragmentation, bundling, etc. + * struct tipc_bc_base - base structure for keeping broadcast send state + * @link: broadcast send link structure + * @inputq: data input queue; will only carry SOCK_WAKEUP messages + * @dests: array keeping number of reachable destinations per bearer + * @primary_bearer: a bearer having links to all broadcast destinations, if any + * @bcast_support: indicates if primary bearer, if any, supports broadcast + * @force_bcast: forces broadcast for multicast traffic + * @rcast_support: indicates if all peer nodes support replicast + * @force_rcast: forces replicast for multicast traffic + * @rc_ratio: dest count as percentage of cluster size where send method changes + * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast */ -struct tipc_bclink { - struct tipc_link link; - struct tipc_node node; - struct tipc_node_map bcast_nodes; - struct tipc_node *retransmit_to; +struct tipc_bc_base { + struct tipc_link *link; + struct sk_buff_head inputq; + int dests[MAX_BEARERS]; + int primary_bearer; + bool bcast_support; + bool force_bcast; + bool rcast_support; + bool force_rcast; + int rc_ratio; + int bc_threshold; }; -static struct tipc_bcbearer bcast_bearer; -static struct tipc_bclink bcast_link; - -static struct tipc_bcbearer *bcbearer = &bcast_bearer; -static struct tipc_bclink *bclink = &bcast_link; -static struct tipc_link *bcl = &bcast_link.link; - -static DEFINE_SPINLOCK(bc_lock); - -const char tipc_bclink_name[] = "broadcast-link"; - -static void tipc_nmap_diff(struct tipc_node_map *nm_a, - struct tipc_node_map *nm_b, - struct tipc_node_map *nm_diff); - -static u32 bcbuf_acks(struct sk_buff *buf) +static struct tipc_bc_base *tipc_bc_base(struct net *net) { - return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle; + return tipc_net(net)->bcbase; } -static void bcbuf_set_acks(struct sk_buff *buf, u32 acks) +/* tipc_bcast_get_mtu(): -get the MTU currently used by broadcast link + * Note: the MTU is decremented to give room for a tunnel header, in + * case the message needs to be sent as replicast + */ +int tipc_bcast_get_mtu(struct net *net) { - TIPC_SKB_CB(buf)->handle = (void *)(unsigned long)acks; + return tipc_link_mss(tipc_bc_sndlink(net)); } -static void bcbuf_decr_acks(struct sk_buff *buf) +void tipc_bcast_toggle_rcast(struct net *net, bool supp) { - bcbuf_set_acks(buf, bcbuf_acks(buf) - 1); + tipc_bc_base(net)->rcast_support = supp; } -void tipc_bclink_add_node(u32 addr) +static void tipc_bcbase_calc_bc_threshold(struct net *net) { - spin_lock_bh(&bc_lock); - tipc_nmap_add(&bclink->bcast_nodes, addr); - spin_unlock_bh(&bc_lock); -} + struct tipc_bc_base *bb = tipc_bc_base(net); + int cluster_size = tipc_link_bc_peers(tipc_bc_sndlink(net)); -void tipc_bclink_remove_node(u32 addr) -{ - spin_lock_bh(&bc_lock); - tipc_nmap_remove(&bclink->bcast_nodes, addr); - spin_unlock_bh(&bc_lock); + bb->bc_threshold = 1 + (cluster_size * bb->rc_ratio / 100); } -static void bclink_set_last_sent(void) +/* tipc_bcbase_select_primary(): find a bearer with links to all destinations, + * if any, and make it primary bearer + */ +static void tipc_bcbase_select_primary(struct net *net) { - if (bcl->next_out) - bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1); - else - bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1); -} + struct tipc_bc_base *bb = tipc_bc_base(net); + int all_dests = tipc_link_bc_peers(bb->link); + int max_win = tipc_link_max_win(bb->link); + int min_win = tipc_link_min_win(bb->link); + int i, mtu, prim; -u32 tipc_bclink_get_last_sent(void) -{ - return bcl->fsm_msg_cnt; -} + bb->primary_bearer = INVALID_BEARER_ID; + bb->bcast_support = true; -static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) -{ - node->bclink.last_sent = less_eq(node->bclink.last_sent, seqno) ? - seqno : node->bclink.last_sent; -} + if (!all_dests) + return; + for (i = 0; i < MAX_BEARERS; i++) { + if (!bb->dests[i]) + continue; -/** - * tipc_bclink_retransmit_to - get most recent node to request retransmission - * - * Called with bc_lock locked - */ -struct tipc_node *tipc_bclink_retransmit_to(void) + mtu = tipc_bearer_mtu(net, i); + if (mtu < tipc_link_mtu(bb->link)) { + tipc_link_set_mtu(bb->link, mtu); + tipc_link_set_queue_limits(bb->link, + min_win, + max_win); + } + bb->bcast_support &= tipc_bearer_bcast_support(net, i); + if (bb->dests[i] < all_dests) + continue; + + bb->primary_bearer = i; + + /* Reduce risk that all nodes select same primary */ + if ((i ^ tipc_own_addr(net)) & 1) + break; + } + prim = bb->primary_bearer; + if (prim != INVALID_BEARER_ID) + bb->bcast_support = tipc_bearer_bcast_support(net, prim); +} + +void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id) { - return bclink->retransmit_to; + struct tipc_bc_base *bb = tipc_bc_base(net); + + tipc_bcast_lock(net); + bb->dests[bearer_id]++; + tipc_bcbase_select_primary(net); + tipc_bcast_unlock(net); } -/** - * bclink_retransmit_pkt - retransmit broadcast packets - * @after: sequence number of last packet to *not* retransmit - * @to: sequence number of last packet to retransmit - * - * Called with bc_lock locked - */ -static void bclink_retransmit_pkt(u32 after, u32 to) +void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id) { - struct sk_buff *buf; + struct tipc_bc_base *bb = tipc_bc_base(net); - buf = bcl->first_out; - while (buf && less_eq(buf_seqno(buf), after)) - buf = buf->next; - tipc_link_retransmit(bcl, buf, mod(to - after)); + tipc_bcast_lock(net); + bb->dests[bearer_id]--; + tipc_bcbase_select_primary(net); + tipc_bcast_unlock(net); } -/** - * tipc_bclink_acknowledge - handle acknowledgement of broadcast packets - * @n_ptr: node that sent acknowledgement info - * @acked: broadcast sequence # that has been acknowledged +/* tipc_bcbase_xmit - broadcast a packet queue across one or more bearers * - * Node is locked, bc_lock unlocked. + * Note that number of reachable destinations, as indicated in the dests[] + * array, may transitionally differ from the number of destinations indicated + * in each sent buffer. We can sustain this. Excess destination nodes will + * drop and never acknowledge the unexpected packets, and missing destinations + * will either require retransmission (if they are just about to be added to + * the bearer), or be removed from the buffer's 'ackers' counter (if they + * just went down) */ -void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) +static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq) { - struct sk_buff *crs; - struct sk_buff *next; - unsigned int released = 0; - - spin_lock_bh(&bc_lock); + int bearer_id; + struct tipc_bc_base *bb = tipc_bc_base(net); + struct sk_buff *skb, *_skb; + struct sk_buff_head _xmitq; - /* Bail out if tx queue is empty (no clean up is required) */ - crs = bcl->first_out; - if (!crs) - goto exit; + if (skb_queue_empty(xmitq)) + return; - /* Determine which messages need to be acknowledged */ - if (acked == INVALID_LINK_SEQ) { - /* - * Contact with specified node has been lost, so need to - * acknowledge sent messages only (if other nodes still exist) - * or both sent and unsent messages (otherwise) - */ - if (bclink->bcast_nodes.count) - acked = bcl->fsm_msg_cnt; - else - acked = bcl->next_out_no; - } else { - /* - * Bail out if specified sequence number does not correspond - * to a message that has been sent and not yet acknowledged - */ - if (less(acked, buf_seqno(crs)) || - less(bcl->fsm_msg_cnt, acked) || - less_eq(acked, n_ptr->bclink.acked)) - goto exit; + /* The typical case: at least one bearer has links to all nodes */ + bearer_id = bb->primary_bearer; + if (bearer_id >= 0) { + tipc_bearer_bc_xmit(net, bearer_id, xmitq); + return; } - /* Skip over packets that node has previously acknowledged */ - while (crs && less_eq(buf_seqno(crs), n_ptr->bclink.acked)) - crs = crs->next; - - /* Update packets that node is now acknowledging */ - - while (crs && less_eq(buf_seqno(crs), acked)) { - next = crs->next; - - if (crs != bcl->next_out) - bcbuf_decr_acks(crs); - else { - bcbuf_set_acks(crs, 0); - bcl->next_out = next; - bclink_set_last_sent(); - } + /* We have to transmit across all bearers */ + __skb_queue_head_init(&_xmitq); + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + if (!bb->dests[bearer_id]) + continue; - if (bcbuf_acks(crs) == 0) { - bcl->first_out = next; - bcl->out_queue_size--; - kfree_skb(crs); - released = 1; + skb_queue_walk(xmitq, skb) { + _skb = pskb_copy_for_clone(skb, GFP_ATOMIC); + if (!_skb) + break; + __skb_queue_tail(&_xmitq, _skb); } - crs = next; + tipc_bearer_bc_xmit(net, bearer_id, &_xmitq); } - n_ptr->bclink.acked = acked; - - /* Try resolving broadcast link congestion, if necessary */ - - if (unlikely(bcl->next_out)) { - tipc_link_push_queue(bcl); - bclink_set_last_sent(); - } - if (unlikely(released && !list_empty(&bcl->waiting_ports))) - tipc_link_wakeup_ports(bcl, 0); -exit: - spin_unlock_bh(&bc_lock); + __skb_queue_purge(xmitq); + __skb_queue_purge(&_xmitq); } -/** - * tipc_bclink_update_link_state - update broadcast link state - * - * tipc_net_lock and node lock set - */ -void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) +static void tipc_bcast_select_xmit_method(struct net *net, int dests, + struct tipc_mc_method *method) { - struct sk_buff *buf; - - /* Ignore "stale" link state info */ + struct tipc_bc_base *bb = tipc_bc_base(net); + unsigned long exp = method->expires; - if (less_eq(last_sent, n_ptr->bclink.last_in)) + /* Broadcast supported by used bearer/bearers? */ + if (!bb->bcast_support) { + method->rcast = true; + return; + } + /* Any destinations which don't support replicast ? */ + if (!bb->rcast_support) { + method->rcast = false; + return; + } + /* Can current method be changed ? */ + method->expires = jiffies + TIPC_METHOD_EXPIRE; + if (method->mandatory) return; - /* Update link synchronization state; quit if in sync */ - - bclink_update_last_sent(n_ptr, last_sent); - - if (n_ptr->bclink.last_sent == n_ptr->bclink.last_in) + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) && + time_before(jiffies, exp)) return; - /* Update out-of-sync state; quit if loss is still unconfirmed */ + /* Configuration as force 'broadcast' method */ + if (bb->force_bcast) { + method->rcast = false; + return; + } + /* Configuration as force 'replicast' method */ + if (bb->force_rcast) { + method->rcast = true; + return; + } + /* Configuration as 'autoselect' or default method */ + /* Determine method to use now */ + method->rcast = dests <= bb->bc_threshold; +} - if ((++n_ptr->bclink.oos_state) == 1) { - if (n_ptr->bclink.deferred_size < (TIPC_MIN_LINK_WIN / 2)) - return; - n_ptr->bclink.oos_state++; +/* tipc_bcast_xmit - broadcast the buffer chain to all external nodes + * @net: the applicable net namespace + * @pkts: chain of buffers containing message + * @cong_link_cnt: set to 1 if broadcast link is congested, otherwise 0 + * Consumes the buffer chain. + * Returns 0 if success, otherwise errno: -EHOSTUNREACH,-EMSGSIZE + */ +int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, + u16 *cong_link_cnt) +{ + struct tipc_link *l = tipc_bc_sndlink(net); + struct sk_buff_head xmitq; + int rc = 0; + + __skb_queue_head_init(&xmitq); + tipc_bcast_lock(net); + if (tipc_link_bc_peers(l)) + rc = tipc_link_xmit(l, pkts, &xmitq); + tipc_bcast_unlock(net); + tipc_bcbase_xmit(net, &xmitq); + __skb_queue_purge(pkts); + if (rc == -ELINKCONG) { + *cong_link_cnt = 1; + rc = 0; } + return rc; +} - /* Don't NACK if one has been recently sent (or seen) */ +/* tipc_rcast_xmit - replicate and send a message to given destination nodes + * @net: the applicable net namespace + * @pkts: chain of buffers containing message + * @dests: list of destination nodes + * @cong_link_cnt: returns number of congested links + * @cong_links: returns identities of congested links + * Returns 0 if success, otherwise errno + */ +static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, + struct tipc_nlist *dests, u16 *cong_link_cnt) +{ + struct tipc_dest *dst, *tmp; + struct sk_buff_head _pkts; + u32 dnode, selector; - if (n_ptr->bclink.oos_state & 0x1) - return; + selector = msg_link_selector(buf_msg(skb_peek(pkts))); + __skb_queue_head_init(&_pkts); + + list_for_each_entry_safe(dst, tmp, &dests->list, list) { + dnode = dst->node; + if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts)) + return -ENOMEM; - /* Send NACK */ - - buf = tipc_buf_acquire(INT_H_SIZE); - if (buf) { - struct tipc_msg *msg = buf_msg(buf); - - tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, - INT_H_SIZE, n_ptr->addr); - msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tipc_net_id); - msg_set_bcast_ack(msg, n_ptr->bclink.last_in); - msg_set_bcgap_after(msg, n_ptr->bclink.last_in); - msg_set_bcgap_to(msg, n_ptr->bclink.deferred_head - ? buf_seqno(n_ptr->bclink.deferred_head) - 1 - : n_ptr->bclink.last_sent); - - spin_lock_bh(&bc_lock); - tipc_bearer_send(&bcbearer->bearer, buf, NULL); - bcl->stats.sent_nacks++; - spin_unlock_bh(&bc_lock); - kfree_skb(buf); - - n_ptr->bclink.oos_state++; + /* Any other return value than -ELINKCONG is ignored */ + if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG) + (*cong_link_cnt)++; } + return 0; } -/** - * bclink_peek_nack - monitor retransmission requests sent by other nodes - * - * Delay any upcoming NACK by this node if another node has already - * requested the first message this node is going to ask for. - * - * Only tipc_net_lock set. +/* tipc_mcast_send_sync - deliver a dummy message with SYN bit + * @net: the applicable net namespace + * @skb: socket buffer to copy + * @method: send method to be used + * @dests: destination nodes for message. + * Returns 0 if success, otherwise errno */ -static void bclink_peek_nack(struct tipc_msg *msg) +static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, + struct tipc_mc_method *method, + struct tipc_nlist *dests) { - struct tipc_node *n_ptr = tipc_node_find(msg_destnode(msg)); + struct tipc_msg *hdr, *_hdr; + struct sk_buff_head tmpq; + u16 cong_link_cnt = 0; + struct sk_buff *_skb; + int rc = 0; + + /* Is a cluster supporting with new capabilities ? */ + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL)) + return 0; - if (unlikely(!n_ptr)) - return; + hdr = buf_msg(skb); + if (msg_user(hdr) == MSG_FRAGMENTER) + hdr = msg_inner_hdr(hdr); + if (msg_type(hdr) != TIPC_MCAST_MSG) + return 0; - tipc_node_lock(n_ptr); + /* Allocate dummy message */ + _skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL); + if (!_skb) + return -ENOMEM; - if (n_ptr->bclink.recv_permitted && - (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) && - (n_ptr->bclink.last_in == msg_bcgap_after(msg))) - n_ptr->bclink.oos_state = 2; + /* Preparing for 'synching' header */ + msg_set_syn(hdr, 1); + + /* Copy skb's header into a dummy header */ + skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE); + skb_orphan(_skb); + + /* Reverse method for dummy message */ + _hdr = buf_msg(_skb); + msg_set_size(_hdr, MCAST_H_SIZE); + msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); + msg_set_errcode(_hdr, TIPC_ERR_NO_PORT); + + __skb_queue_head_init(&tmpq); + __skb_queue_tail(&tmpq, _skb); + if (method->rcast) + rc = tipc_bcast_xmit(net, &tmpq, &cong_link_cnt); + else + rc = tipc_rcast_xmit(net, &tmpq, dests, &cong_link_cnt); - tipc_node_unlock(n_ptr); + /* This queue should normally be empty by now */ + __skb_queue_purge(&tmpq); + + return rc; } -/* - * tipc_bclink_send_msg - broadcast a packet to all nodes in cluster +/* tipc_mcast_xmit - deliver message to indicated destination nodes + * and to identified node local sockets + * @net: the applicable net namespace + * @pkts: chain of buffers containing message + * @method: send method to be used + * @dests: destination nodes for message. + * @cong_link_cnt: returns number of encountered congested destination links + * Consumes buffer chain. + * Returns 0 if success, otherwise errno */ -int tipc_bclink_send_msg(struct sk_buff *buf) +int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, + struct tipc_mc_method *method, struct tipc_nlist *dests, + u16 *cong_link_cnt) { - int res; - - spin_lock_bh(&bc_lock); - - if (!bclink->bcast_nodes.count) { - res = msg_data_sz(buf_msg(buf)); - kfree_skb(buf); + struct sk_buff_head inputq, localq; + bool rcast = method->rcast; + struct tipc_msg *hdr; + struct sk_buff *skb; + int rc = 0; + + skb_queue_head_init(&inputq); + __skb_queue_head_init(&localq); + + /* Clone packets before they are consumed by next call */ + if (dests->local && !tipc_msg_reassemble(pkts, &localq)) { + rc = -ENOMEM; goto exit; } + /* Send according to determined transmit method */ + if (dests->remote) { + tipc_bcast_select_xmit_method(net, dests->remote, method); + + skb = skb_peek(pkts); + hdr = buf_msg(skb); + if (msg_user(hdr) == MSG_FRAGMENTER) + hdr = msg_inner_hdr(hdr); + msg_set_is_rcast(hdr, method->rcast); + + /* Switch method ? */ + if (rcast != method->rcast) { + rc = tipc_mcast_send_sync(net, skb, method, dests); + if (unlikely(rc)) { + pr_err("Unable to send SYN: method %d, rc %d\n", + rcast, rc); + goto exit; + } + } - res = tipc_link_send_buf(bcl, buf); - if (likely(res >= 0)) { - bclink_set_last_sent(); - bcl->stats.queue_sz_counts++; - bcl->stats.accu_queue_sz += bcl->out_queue_size; + if (method->rcast) + rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt); + else + rc = tipc_bcast_xmit(net, pkts, cong_link_cnt); } -exit: - spin_unlock_bh(&bc_lock); - return res; -} -/** - * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet - * - * Called with both sending node's lock and bc_lock taken. - */ -static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) -{ - bclink_update_last_sent(node, seqno); - node->bclink.last_in = seqno; - node->bclink.oos_state = 0; - bcl->stats.recv_info++; - - /* - * Unicast an ACK periodically, ensuring that - * all nodes in the cluster don't ACK at the same time - */ - - if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) { - tipc_link_send_proto_msg( - node->active_links[node->addr & 1], - STATE_MSG, 0, 0, 0, 0, 0); - bcl->stats.sent_acks++; + if (dests->local) { + tipc_loopback_trace(net, &localq); + tipc_sk_mcast_rcv(net, &localq, &inputq); } +exit: + /* This queue should normally be empty by now */ + __skb_queue_purge(pkts); + return rc; } -/** - * tipc_bclink_recv_pkt - receive a broadcast packet, and deliver upwards +/* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link * - * tipc_net_lock is read_locked, no other locks set + * RCU is locked, no other locks set */ -void tipc_bclink_recv_pkt(struct sk_buff *buf) +int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb) { - struct tipc_msg *msg = buf_msg(buf); - struct tipc_node *node; - u32 next_in; - u32 seqno; - int deferred; + struct tipc_msg *hdr = buf_msg(skb); + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; + int rc; - /* Screen out unwanted broadcast messages */ + __skb_queue_head_init(&xmitq); - if (msg_mc_netid(msg) != tipc_net_id) - goto exit; - - node = tipc_node_find(msg_prevnode(msg)); - if (unlikely(!node)) - goto exit; - - tipc_node_lock(node); - if (unlikely(!node->bclink.recv_permitted)) - goto unlock; - - /* Handle broadcast protocol message */ - - if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { - if (msg_type(msg) != STATE_MSG) - goto unlock; - if (msg_destnode(msg) == tipc_own_addr) { - tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); - tipc_node_unlock(node); - spin_lock_bh(&bc_lock); - bcl->stats.recv_nacks++; - bclink->retransmit_to = node; - bclink_retransmit_pkt(msg_bcgap_after(msg), - msg_bcgap_to(msg)); - spin_unlock_bh(&bc_lock); - } else { - tipc_node_unlock(node); - bclink_peek_nack(msg); - } - goto exit; + if (msg_mc_netid(hdr) != tipc_netid(net) || !tipc_link_is_up(l)) { + kfree_skb(skb); + return 0; } - /* Handle in-sequence broadcast message */ - - seqno = msg_seqno(msg); - next_in = mod(node->bclink.last_in + 1); - - if (likely(seqno == next_in)) { -receive: - /* Deliver message to destination */ - - if (likely(msg_isdata(msg))) { - spin_lock_bh(&bc_lock); - bclink_accept_pkt(node, seqno); - spin_unlock_bh(&bc_lock); - tipc_node_unlock(node); - if (likely(msg_mcast(msg))) - tipc_port_recv_mcast(buf, NULL); - else - kfree_skb(buf); - } else if (msg_user(msg) == MSG_BUNDLER) { - spin_lock_bh(&bc_lock); - bclink_accept_pkt(node, seqno); - bcl->stats.recv_bundles++; - bcl->stats.recv_bundled += msg_msgcnt(msg); - spin_unlock_bh(&bc_lock); - tipc_node_unlock(node); - tipc_link_recv_bundle(buf); - } else if (msg_user(msg) == MSG_FRAGMENTER) { - int ret = tipc_link_recv_fragment(&node->bclink.defragm, - &buf, &msg); - if (ret < 0) - goto unlock; - spin_lock_bh(&bc_lock); - bclink_accept_pkt(node, seqno); - bcl->stats.recv_fragments++; - if (ret > 0) - bcl->stats.recv_fragmented++; - spin_unlock_bh(&bc_lock); - tipc_node_unlock(node); - tipc_net_route_msg(buf); - } else if (msg_user(msg) == NAME_DISTRIBUTOR) { - spin_lock_bh(&bc_lock); - bclink_accept_pkt(node, seqno); - spin_unlock_bh(&bc_lock); - tipc_node_unlock(node); - tipc_named_recv(buf); - } else { - spin_lock_bh(&bc_lock); - bclink_accept_pkt(node, seqno); - spin_unlock_bh(&bc_lock); - tipc_node_unlock(node); - kfree_skb(buf); - } - buf = NULL; - - /* Determine new synchronization state */ + tipc_bcast_lock(net); + if (msg_user(hdr) == BCAST_PROTOCOL) + rc = tipc_link_bc_nack_rcv(l, skb, &xmitq); + else + rc = tipc_link_rcv(l, skb, NULL); + tipc_bcast_unlock(net); - tipc_node_lock(node); - if (unlikely(!tipc_node_is_up(node))) - goto unlock; + tipc_bcbase_xmit(net, &xmitq); - if (node->bclink.last_in == node->bclink.last_sent) - goto unlock; + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); - if (!node->bclink.deferred_head) { - node->bclink.oos_state = 1; - goto unlock; - } + return rc; +} - msg = buf_msg(node->bclink.deferred_head); - seqno = msg_seqno(msg); - next_in = mod(next_in + 1); - if (seqno != next_in) - goto unlock; +/* tipc_bcast_ack_rcv - receive and handle a broadcast acknowledge + * + * RCU is locked, no other locks set + */ +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr) +{ + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + u16 acked = msg_bcast_ack(hdr); + struct sk_buff_head xmitq; - /* Take in-sequence message from deferred queue & deliver it */ + /* Ignore bc acks sent by peer before bcast synch point was received */ + if (msg_bc_ack_invalid(hdr)) + return; - buf = node->bclink.deferred_head; - node->bclink.deferred_head = buf->next; - node->bclink.deferred_size--; - goto receive; - } + __skb_queue_head_init(&xmitq); - /* Handle out-of-sequence broadcast message */ + tipc_bcast_lock(net); + tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq, NULL); + tipc_bcast_unlock(net); - if (less(next_in, seqno)) { - deferred = tipc_link_defer_pkt(&node->bclink.deferred_head, - &node->bclink.deferred_tail, - buf); - node->bclink.deferred_size += deferred; - bclink_update_last_sent(node, seqno); - buf = NULL; - } else - deferred = 0; + tipc_bcbase_xmit(net, &xmitq); - spin_lock_bh(&bc_lock); + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); +} - if (deferred) - bcl->stats.deferred_recv++; - else - bcl->stats.duplicates++; +/* tipc_bcast_synch_rcv - check and update rcv link with peer's send state + * + * RCU is locked, no other locks set + */ +int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr, + struct sk_buff_head *retrq) +{ + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct tipc_gap_ack_blks *ga; + struct sk_buff_head xmitq; + int rc = 0; + + __skb_queue_head_init(&xmitq); + + tipc_bcast_lock(net); + if (msg_type(hdr) != STATE_MSG) { + tipc_link_bc_init_rcv(l, hdr); + } else if (!msg_bc_ack_invalid(hdr)) { + tipc_get_gap_ack_blks(&ga, l, hdr, false); + if (!sysctl_tipc_bc_retruni) + retrq = &xmitq; + rc = tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), + msg_bc_gap(hdr), ga, &xmitq, + retrq); + rc |= tipc_link_bc_sync_rcv(l, hdr, &xmitq); + } + tipc_bcast_unlock(net); - spin_unlock_bh(&bc_lock); + tipc_bcbase_xmit(net, &xmitq); -unlock: - tipc_node_unlock(node); -exit: - kfree_skb(buf); + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); + return rc; } -u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) +/* tipc_bcast_add_peer - add a peer node to broadcast link and bearer + * + * RCU is locked, node lock is set + */ +void tipc_bcast_add_peer(struct net *net, struct tipc_link *uc_l, + struct sk_buff_head *xmitq) { - return (n_ptr->bclink.recv_permitted && - (tipc_bclink_get_last_sent() != n_ptr->bclink.acked)); -} + struct tipc_link *snd_l = tipc_bc_sndlink(net); + tipc_bcast_lock(net); + tipc_link_add_bc_peer(snd_l, uc_l, xmitq); + tipc_bcbase_select_primary(net); + tipc_bcbase_calc_bc_threshold(net); + tipc_bcast_unlock(net); +} -/** - * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer +/* tipc_bcast_remove_peer - remove a peer node from broadcast link and bearer * - * Send packet over as many bearers as necessary to reach all nodes - * that have joined the broadcast link. - * - * Returns 0 (packet sent successfully) under all circumstances, - * since the broadcast link's pseudo-bearer never blocks + * RCU is locked, node lock is set */ -static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, - struct tipc_media_addr *unused2) -{ - int bp_index; - - /* Prepare broadcast link message for reliable transmission, - * if first time trying to send it; - * preparation is skipped for broadcast link protocol messages - * since they are sent in an unreliable manner and don't need it - */ - if (likely(!msg_non_seq(buf_msg(buf)))) { - struct tipc_msg *msg; - - bcbuf_set_acks(buf, bclink->bcast_nodes.count); - msg = buf_msg(buf); - msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tipc_net_id); - bcl->stats.sent_info++; - - if (WARN_ON(!bclink->bcast_nodes.count)) { - dump_stack(); - return 0; - } - } +void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) +{ + struct tipc_link *snd_l = tipc_bc_sndlink(net); + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; - /* Send buffer over bearers until all targets reached */ - bcbearer->remains = bclink->bcast_nodes; + __skb_queue_head_init(&xmitq); - for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { - struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; - struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; - struct tipc_bearer *b = p; - struct sk_buff *tbuf; + tipc_bcast_lock(net); + tipc_link_remove_bc_peer(snd_l, rcv_l, &xmitq); + tipc_bcbase_select_primary(net); + tipc_bcbase_calc_bc_threshold(net); + tipc_bcast_unlock(net); - if (!p) - break; /* No more bearers to try */ + tipc_bcbase_xmit(net, &xmitq); - if (tipc_bearer_blocked(p)) { - if (!s || tipc_bearer_blocked(s)) - continue; /* Can't use either bearer */ - b = s; - } + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); +} - tipc_nmap_diff(&bcbearer->remains, &b->nodes, - &bcbearer->remains_new); - if (bcbearer->remains_new.count == bcbearer->remains.count) - continue; /* Nothing added by bearer pair */ - - if (bp_index == 0) { - /* Use original buffer for first bearer */ - tipc_bearer_send(b, buf, &b->bcast_addr); - } else { - /* Avoid concurrent buffer access */ - tbuf = pskb_copy(buf, GFP_ATOMIC); - if (!tbuf) - break; - tipc_bearer_send(b, tbuf, &b->bcast_addr); - kfree_skb(tbuf); /* Bearer keeps a clone */ - } +int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l) +{ + if (!l) + return -ENOPROTOOPT; - /* Swap bearers for next packet */ - if (s) { - bcbearer->bpairs[bp_index].primary = s; - bcbearer->bpairs[bp_index].secondary = p; - } + tipc_bcast_lock(net); + tipc_link_reset_stats(l); + tipc_bcast_unlock(net); + return 0; +} - if (bcbearer->remains_new.count == 0) - break; /* All targets reached */ +static int tipc_bc_link_set_queue_limits(struct net *net, u32 max_win) +{ + struct tipc_link *l = tipc_bc_sndlink(net); - bcbearer->remains = bcbearer->remains_new; + if (!l) + return -ENOPROTOOPT; + if (max_win < BCLINK_WIN_MIN) + max_win = BCLINK_WIN_MIN; + if (max_win > TIPC_MAX_LINK_WIN) + return -EINVAL; + tipc_bcast_lock(net); + tipc_link_set_queue_limits(l, tipc_link_min_win(l), max_win); + tipc_bcast_unlock(net); + return 0; +} + +static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + switch (bc_mode) { + case BCLINK_MODE_BCAST: + if (!bb->bcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = true; + bb->force_rcast = false; + break; + case BCLINK_MODE_RCAST: + if (!bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = true; + break; + case BCLINK_MODE_SEL: + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = false; + break; + default: + return -EINVAL; } return 0; } -/** - * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer - */ -void tipc_bcbearer_sort(void) +static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio) { - struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; - struct tipc_bcbearer_pair *bp_curr; - int b_index; - int pri; + struct tipc_bc_base *bb = tipc_bc_base(net); - spin_lock_bh(&bc_lock); + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; - /* Group bearers by priority (can assume max of two per priority) */ - memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); + if (bc_ratio > 100 || bc_ratio <= 0) + return -EINVAL; - for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - struct tipc_bearer *b = &tipc_bearers[b_index]; + bb->rc_ratio = bc_ratio; + tipc_bcast_lock(net); + tipc_bcbase_calc_bc_threshold(net); + tipc_bcast_unlock(net); - if (!b->active || !b->nodes.count) - continue; + return 0; +} - if (!bp_temp[b->priority].primary) - bp_temp[b->priority].primary = b; - else - bp_temp[b->priority].secondary = b; - } +int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) +{ + int err; + u32 win; + u32 bc_mode; + u32 bc_ratio; + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; - /* Create array of bearer pairs for broadcasting */ - bp_curr = bcbearer->bpairs; - memset(bcbearer->bpairs, 0, sizeof(bcbearer->bpairs)); + if (!attrs[TIPC_NLA_LINK_PROP]) + return -EINVAL; - for (pri = TIPC_MAX_LINK_PRI; pri >= 0; pri--) { + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props); + if (err) + return err; - if (!bp_temp[pri].primary) - continue; + if (!props[TIPC_NLA_PROP_WIN] && + !props[TIPC_NLA_PROP_BROADCAST] && + !props[TIPC_NLA_PROP_BROADCAST_RATIO]) { + return -EOPNOTSUPP; + } - bp_curr->primary = bp_temp[pri].primary; + if (props[TIPC_NLA_PROP_BROADCAST]) { + bc_mode = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST]); + err = tipc_bc_link_set_broadcast_mode(net, bc_mode); + } - if (bp_temp[pri].secondary) { - if (tipc_nmap_equal(&bp_temp[pri].primary->nodes, - &bp_temp[pri].secondary->nodes)) { - bp_curr->secondary = bp_temp[pri].secondary; - } else { - bp_curr++; - bp_curr->primary = bp_temp[pri].secondary; - } - } + if (!err && props[TIPC_NLA_PROP_BROADCAST_RATIO]) { + bc_ratio = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST_RATIO]); + err = tipc_bc_link_set_broadcast_ratio(net, bc_ratio); + } - bp_curr++; + if (!err && props[TIPC_NLA_PROP_WIN]) { + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + err = tipc_bc_link_set_queue_limits(net, win); } - spin_unlock_bh(&bc_lock); + return err; } - -int tipc_bclink_stats(char *buf, const u32 buf_size) +int tipc_bcast_init(struct net *net) { - int ret; - struct tipc_stats *s; - - if (!bcl) - return 0; - - spin_lock_bh(&bc_lock); - - s = &bcl->stats; - - ret = tipc_snprintf(buf, buf_size, "Link <%s>\n" - " Window:%u packets\n", - bcl->name, bcl->queue_limit[0]); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX packets:%u fragments:%u/%u bundles:%u/%u\n", - s->recv_info, s->recv_fragments, - s->recv_fragmented, s->recv_bundles, - s->recv_bundled); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX packets:%u fragments:%u/%u bundles:%u/%u\n", - s->sent_info, s->sent_fragments, - s->sent_fragmented, s->sent_bundles, - s->sent_bundled); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX naks:%u defs:%u dups:%u\n", - s->recv_nacks, s->deferred_recv, s->duplicates); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX naks:%u acks:%u dups:%u\n", - s->sent_nacks, s->sent_acks, s->retransmitted); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " Congestion link:%u Send queue max:%u avg:%u\n", - s->link_congs, s->max_queue_sz, - s->queue_sz_counts ? - (s->accu_queue_sz / s->queue_sz_counts) : 0); - - spin_unlock_bh(&bc_lock); - return ret; -} - -int tipc_bclink_reset_stats(void) -{ - if (!bcl) - return -ENOPROTOOPT; - - spin_lock_bh(&bc_lock); - memset(&bcl->stats, 0, sizeof(bcl->stats)); - spin_unlock_bh(&bc_lock); + struct tipc_net *tn = tipc_net(net); + struct tipc_bc_base *bb = NULL; + struct tipc_link *l = NULL; + + bb = kzalloc(sizeof(*bb), GFP_KERNEL); + if (!bb) + goto enomem; + tn->bcbase = bb; + spin_lock_init(&tipc_net(net)->bclock); + + if (!tipc_link_bc_create(net, 0, 0, NULL, + one_page_mtu, + BCLINK_WIN_DEFAULT, + BCLINK_WIN_DEFAULT, + 0, + &bb->inputq, + NULL, + NULL, + &l)) + goto enomem; + bb->link = l; + tn->bcl = l; + bb->rc_ratio = 10; + bb->rcast_support = true; return 0; +enomem: + kfree(bb); + kfree(l); + return -ENOMEM; } -int tipc_bclink_set_queue_limits(u32 limit) +void tipc_bcast_stop(struct net *net) { - if (!bcl) - return -ENOPROTOOPT; - if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) - return -EINVAL; + struct tipc_net *tn = net_generic(net, tipc_net_id); - spin_lock_bh(&bc_lock); - tipc_link_set_queue_limits(bcl, limit); - spin_unlock_bh(&bc_lock); - return 0; + synchronize_net(); + kfree(tn->bcbase); + kfree(tn->bcl); } -void tipc_bclink_init(void) +void tipc_nlist_init(struct tipc_nlist *nl, u32 self) { - bcbearer->bearer.media = &bcbearer->media; - bcbearer->media.send_msg = tipc_bcbearer_send; - sprintf(bcbearer->media.name, "tipc-broadcast"); + memset(nl, 0, sizeof(*nl)); + INIT_LIST_HEAD(&nl->list); + nl->self = self; +} - INIT_LIST_HEAD(&bcl->waiting_ports); - bcl->next_out_no = 1; - spin_lock_init(&bclink->node.lock); - bcl->owner = &bclink->node; - bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; - tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); - spin_lock_init(&bcbearer->bearer.lock); - bcl->b_ptr = &bcbearer->bearer; - bcl->state = WORKING_WORKING; - strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); +void tipc_nlist_add(struct tipc_nlist *nl, u32 node) +{ + if (node == nl->self) + nl->local = true; + else if (tipc_dest_push(&nl->list, node, 0)) + nl->remote++; } -void tipc_bclink_stop(void) +void tipc_nlist_del(struct tipc_nlist *nl, u32 node) { - spin_lock_bh(&bc_lock); - tipc_link_stop(bcl); - spin_unlock_bh(&bc_lock); + if (node == nl->self) + nl->local = false; + else if (tipc_dest_del(&nl->list, node, 0)) + nl->remote--; +} - memset(bclink, 0, sizeof(*bclink)); - memset(bcbearer, 0, sizeof(*bcbearer)); +void tipc_nlist_purge(struct tipc_nlist *nl) +{ + tipc_dest_list_purge(&nl->list); + nl->remote = 0; + nl->local = false; } +u32 tipc_bcast_get_mode(struct net *net) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); -/** - * tipc_nmap_add - add a node to a node map - */ -void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) + if (bb->force_bcast) + return BCLINK_MODE_BCAST; + + if (bb->force_rcast) + return BCLINK_MODE_RCAST; + + if (bb->bcast_support && bb->rcast_support) + return BCLINK_MODE_SEL; + + return 0; +} + +u32 tipc_bcast_get_broadcast_ratio(struct net *net) { - int n = tipc_node(node); - int w = n / WSIZE; - u32 mask = (1 << (n % WSIZE)); + struct tipc_bc_base *bb = tipc_bc_base(net); - if ((nm_ptr->map[w] & mask) == 0) { - nm_ptr->count++; - nm_ptr->map[w] |= mask; - } + return bb->rc_ratio; } -/** - * tipc_nmap_remove - remove a node from a node map - */ -void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, + struct sk_buff_head *inputq) { - int n = tipc_node(node); - int w = n / WSIZE; - u32 mask = (1 << (n % WSIZE)); + struct sk_buff *skb, *_skb, *tmp; + struct tipc_msg *hdr, *_hdr; + bool match = false; + u32 node, port; + + skb = skb_peek(inputq); + if (!skb) + return; + + hdr = buf_msg(skb); + + if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq))) + return; + + node = msg_orignode(hdr); + if (node == tipc_own_addr(net)) + return; + + port = msg_origport(hdr); - if ((nm_ptr->map[w] & mask) != 0) { - nm_ptr->map[w] &= ~mask; - nm_ptr->count--; + /* Has the twin SYN message already arrived ? */ + skb_queue_walk(defq, _skb) { + _hdr = buf_msg(_skb); + if (msg_orignode(_hdr) != node) + continue; + if (msg_origport(_hdr) != port) + continue; + match = true; + break; } -} -/** - * tipc_nmap_diff - find differences between node maps - * @nm_a: input node map A - * @nm_b: input node map B - * @nm_diff: output node map A-B (i.e. nodes of A that are not in B) - */ -static void tipc_nmap_diff(struct tipc_node_map *nm_a, - struct tipc_node_map *nm_b, - struct tipc_node_map *nm_diff) -{ - int stop = ARRAY_SIZE(nm_a->map); - int w; - int b; - u32 map; - - memset(nm_diff, 0, sizeof(*nm_diff)); - for (w = 0; w < stop; w++) { - map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]); - nm_diff->map[w] = map; - if (map != 0) { - for (b = 0 ; b < WSIZE; b++) { - if (map & (1 << b)) - nm_diff->count++; - } - } + if (!match) { + if (!msg_is_syn(hdr)) + return; + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; } -} -/** - * tipc_port_list_add - add a port to a port list, ensuring no duplicates - */ -void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port) -{ - struct tipc_port_list *item = pl_ptr; - int i; - int item_sz = PLSIZE; - int cnt = pl_ptr->count; - - for (; ; cnt -= item_sz, item = item->next) { - if (cnt < PLSIZE) - item_sz = cnt; - for (i = 0; i < item_sz; i++) - if (item->ports[i] == port) - return; - if (i < PLSIZE) { - item->ports[i] = port; - pl_ptr->count++; + /* Deliver non-SYN message from other link, otherwise queue it */ + if (!msg_is_syn(hdr)) { + if (msg_is_rcast(hdr) != msg_is_rcast(_hdr)) return; - } - if (!item->next) { - item->next = kmalloc(sizeof(*item), GFP_ATOMIC); - if (!item->next) { - pr_warn("Incomplete multicast delivery, no memory\n"); - return; - } - item->next->next = NULL; - } + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; } -} -/** - * tipc_port_list_free - free dynamically created entries in port_list chain - * - */ -void tipc_port_list_free(struct tipc_port_list *pl_ptr) -{ - struct tipc_port_list *item; - struct tipc_port_list *next; + /* Queue non-SYN/SYN message from same link */ + if (msg_is_rcast(hdr) == msg_is_rcast(_hdr)) { + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } - for (item = pl_ptr->next; item; item = next) { - next = item->next; - kfree(item); + /* Matching SYN messages => return the one with data, if any */ + __skb_unlink(_skb, defq); + if (msg_data_sz(hdr)) { + kfree_skb(_skb); + } else { + __skb_dequeue(inputq); + kfree_skb(skb); + __skb_queue_tail(inputq, _skb); + } + + /* Deliver subsequent non-SYN messages from same peer */ + skb_queue_walk_safe(defq, _skb, tmp) { + _hdr = buf_msg(_skb); + if (msg_orignode(_hdr) != node) + continue; + if (msg_origport(_hdr) != port) + continue; + if (msg_is_syn(_hdr)) + break; + __skb_unlink(_skb, defq); + __skb_queue_tail(inputq, _skb); } } diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 6ee587b469fd..2d9352dc7b0e 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -1,7 +1,7 @@ /* * net/tipc/bcast.h: Include file for TIPC broadcast code * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2006, 2014-2015, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -37,67 +37,91 @@ #ifndef _TIPC_BCAST_H #define _TIPC_BCAST_H -#define MAX_NODES 4096 -#define WSIZE 32 +#include "core.h" -/** - * struct tipc_node_map - set of node identifiers - * @count: # of nodes in set - * @map: bitmap of node identifiers that are in the set - */ -struct tipc_node_map { - u32 count; - u32 map[MAX_NODES / WSIZE]; +struct tipc_node; +struct tipc_msg; +struct tipc_nl_msg; +struct tipc_nlist; +struct tipc_nitem; +extern const char tipc_bclink_name[]; +extern unsigned long sysctl_tipc_bc_retruni; + +#define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000) + +#define BCLINK_MODE_BCAST 0x1 +#define BCLINK_MODE_RCAST 0x2 +#define BCLINK_MODE_SEL 0x4 + +struct tipc_nlist { + struct list_head list; + u32 self; + u16 remote; + bool local; }; -#define PLSIZE 32 +void tipc_nlist_init(struct tipc_nlist *nl, u32 self); +void tipc_nlist_purge(struct tipc_nlist *nl); +void tipc_nlist_add(struct tipc_nlist *nl, u32 node); +void tipc_nlist_del(struct tipc_nlist *nl, u32 node); -/** - * struct tipc_port_list - set of node local destination ports - * @count: # of ports in set (only valid for first entry in list) - * @next: pointer to next entry in list - * @ports: array of port references +/* Cookie to be used between socket and broadcast layer + * @rcast: replicast (instead of broadcast) was used at previous xmit + * @mandatory: broadcast/replicast indication was set by user + * @deferredq: defer queue to make message in order + * @expires: re-evaluate non-mandatory transmit method if we are past this */ -struct tipc_port_list { - int count; - struct tipc_port_list *next; - u32 ports[PLSIZE]; +struct tipc_mc_method { + bool rcast; + bool mandatory; + struct sk_buff_head deferredq; + unsigned long expires; }; +int tipc_bcast_init(struct net *net); +void tipc_bcast_stop(struct net *net); +void tipc_bcast_add_peer(struct net *net, struct tipc_link *l, + struct sk_buff_head *xmitq); +void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl); +void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id); +void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id); +int tipc_bcast_get_mtu(struct net *net); +void tipc_bcast_toggle_rcast(struct net *net, bool supp); +int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, + struct tipc_mc_method *method, struct tipc_nlist *dests, + u16 *cong_link_cnt); +int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, + u16 *cong_link_cnt); +int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr); +int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr, + struct sk_buff_head *retrq); +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *bcl); +int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); +int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l); -struct tipc_node; - -extern const char tipc_bclink_name[]; +u32 tipc_bcast_get_mode(struct net *net); +u32 tipc_bcast_get_broadcast_ratio(struct net *net); -void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); -void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, + struct sk_buff_head *inputq); -/** - * tipc_nmap_equal - test for equality of node maps - */ -static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, - struct tipc_node_map *nm_b) +static inline void tipc_bcast_lock(struct net *net) { - return !memcmp(nm_a, nm_b, sizeof(*nm_a)); + spin_lock_bh(&tipc_net(net)->bclock); } -void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port); -void tipc_port_list_free(struct tipc_port_list *pl_ptr); +static inline void tipc_bcast_unlock(struct net *net) +{ + spin_unlock_bh(&tipc_net(net)->bclock); +} -void tipc_bclink_init(void); -void tipc_bclink_stop(void); -void tipc_bclink_add_node(u32 addr); -void tipc_bclink_remove_node(u32 addr); -struct tipc_node *tipc_bclink_retransmit_to(void); -void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); -int tipc_bclink_send_msg(struct sk_buff *buf); -void tipc_bclink_recv_pkt(struct sk_buff *buf); -u32 tipc_bclink_get_last_sent(void); -u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr); -void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent); -int tipc_bclink_stats(char *stats_buf, const u32 buf_size); -int tipc_bclink_reset_stats(void); -int tipc_bclink_set_queue_limits(u32 limit); -void tipc_bcbearer_sort(void); +static inline struct tipc_link *tipc_bc_sndlink(struct net *net) +{ + return tipc_net(net)->bcl; +} #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index cb29ef7ba2f0..ae1ddbf71853 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1,8 +1,8 @@ /* * net/tipc/bearer.c: TIPC bearer code * - * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2004-2006, 2010-2011, Wind River Systems + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB + * Copyright (c) 2004-2006, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,124 +34,97 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <net/sock.h> #include "core.h" -#include "config.h" #include "bearer.h" +#include "link.h" #include "discover.h" +#include "monitor.h" +#include "bcast.h" +#include "netlink.h" +#include "udp_media.h" +#include "trace.h" +#include "crypto.h" #define MAX_ADDR_STR 60 -static struct tipc_media *media_list[MAX_MEDIA]; -static u32 media_count; +static struct tipc_media * const media_info_array[] = { + ð_media_info, +#ifdef CONFIG_TIPC_MEDIA_IB + &ib_media_info, +#endif +#ifdef CONFIG_TIPC_MEDIA_UDP + &udp_media_info, +#endif + NULL +}; -struct tipc_bearer tipc_bearers[MAX_BEARERS]; +static struct tipc_bearer *bearer_get(struct net *net, int bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + + return rcu_dereference(tn->bearer_list[bearer_id]); +} -static void bearer_disable(struct tipc_bearer *b_ptr); +static void bearer_disable(struct net *net, struct tipc_bearer *b); +static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); /** * tipc_media_find - locates specified media object by name + * @name: name to locate */ struct tipc_media *tipc_media_find(const char *name) { u32 i; - for (i = 0; i < media_count; i++) { - if (!strcmp(media_list[i]->name, name)) - return media_list[i]; + for (i = 0; media_info_array[i] != NULL; i++) { + if (!strcmp(media_info_array[i]->name, name)) + break; } - return NULL; + return media_info_array[i]; } /** * media_find_id - locates specified media object by type identifier + * @type: type identifier to locate */ static struct tipc_media *media_find_id(u8 type) { u32 i; - for (i = 0; i < media_count; i++) { - if (media_list[i]->type_id == type) - return media_list[i]; + for (i = 0; media_info_array[i] != NULL; i++) { + if (media_info_array[i]->type_id == type) + break; } - return NULL; -} - -/** - * tipc_register_media - register a media type - * - * Bearers for this media type must be activated separately at a later stage. - */ -int tipc_register_media(struct tipc_media *m_ptr) -{ - int res = -EINVAL; - - write_lock_bh(&tipc_net_lock); - - if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME) - goto exit; - if (m_ptr->priority > TIPC_MAX_LINK_PRI) - goto exit; - if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) || - (m_ptr->tolerance > TIPC_MAX_LINK_TOL)) - goto exit; - if (media_count >= MAX_MEDIA) - goto exit; - if (tipc_media_find(m_ptr->name) || media_find_id(m_ptr->type_id)) - goto exit; - - media_list[media_count] = m_ptr; - media_count++; - res = 0; -exit: - write_unlock_bh(&tipc_net_lock); - if (res) - pr_warn("Media <%s> registration error\n", m_ptr->name); - return res; + return media_info_array[i]; } /** * tipc_media_addr_printf - record media address in print buffer + * @buf: output buffer + * @len: output buffer size remaining + * @a: input media address */ -void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) +int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) { char addr_str[MAX_ADDR_STR]; - struct tipc_media *m_ptr; + struct tipc_media *m; int ret; - m_ptr = media_find_id(a->media_id); + m = media_find_id(a->media_id); - if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str))) - ret = tipc_snprintf(buf, len, "%s(%s)", m_ptr->name, addr_str); + if (m && !m->addr2str(a, addr_str, sizeof(addr_str))) + ret = scnprintf(buf, len, "%s(%s)", m->name, addr_str); else { u32 i; - ret = tipc_snprintf(buf, len, "UNKNOWN(%u)", a->media_id); + ret = scnprintf(buf, len, "UNKNOWN(%u)", a->media_id); for (i = 0; i < sizeof(a->value); i++) - ret += tipc_snprintf(buf - ret, len + ret, - "-%02x", a->value[i]); + ret += scnprintf(buf + ret, len - ret, + "-%x", a->value[i]); } -} - -/** - * tipc_media_get_names - record names of registered media in buffer - */ -struct sk_buff *tipc_media_get_names(void) -{ - struct sk_buff *buf; - int i; - - buf = tipc_cfg_reply_alloc(MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME)); - if (!buf) - return NULL; - - read_lock_bh(&tipc_net_lock); - for (i = 0; i < media_count; i++) { - tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME, - media_list[i]->name, - strlen(media_list[i]->name) + 1); - } - read_unlock_bh(&tipc_net_lock); - return buf; + return ret; } /** @@ -159,7 +132,7 @@ struct sk_buff *tipc_media_get_names(void) * @name: ptr to bearer name string * @name_parts: ptr to area for bearer name components (or NULL if not needed) * - * Returns 1 if bearer name is valid, otherwise 0. + * Return: 1 if bearer name is valid, otherwise 0. */ static int bearer_name_validate(const char *name, struct tipc_bearer_names *name_parts) @@ -171,10 +144,7 @@ static int bearer_name_validate(const char *name, u32 if_len; /* copy bearer name & ensure length is OK */ - name_copy[TIPC_MAX_BEARER_NAME - 1] = 0; - /* need above in case non-Posix strncpy() doesn't pad with nulls */ - strncpy(name_copy, name, TIPC_MAX_BEARER_NAME); - if (name_copy[TIPC_MAX_BEARER_NAME - 1] != 0) + if (strscpy(name_copy, name, TIPC_MAX_BEARER_NAME) < 0) return 0; /* ensure all component parts of bearer name are present */ @@ -193,314 +163,1215 @@ static int bearer_name_validate(const char *name, /* return bearer name components, if necessary */ if (name_parts) { - strcpy(name_parts->media_name, media_name); - strcpy(name_parts->if_name, if_name); + if (strscpy(name_parts->media_name, media_name, + TIPC_MAX_MEDIA_NAME) < 0) + return 0; + if (strscpy(name_parts->if_name, if_name, + TIPC_MAX_IF_NAME) < 0) + return 0; } return 1; } /** * tipc_bearer_find - locates bearer object with matching bearer name + * @net: the applicable net namespace + * @name: bearer name to locate */ -struct tipc_bearer *tipc_bearer_find(const char *name) +struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { - struct tipc_bearer *b_ptr; + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer *b; u32 i; - for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { - if (b_ptr->active && (!strcmp(b_ptr->name, name))) - return b_ptr; + for (i = 0; i < MAX_BEARERS; i++) { + b = rtnl_dereference(tn->bearer_list[i]); + if (b && (!strcmp(b->name, name))) + return b; } return NULL; } -/** - * tipc_bearer_find_interface - locates bearer object with matching interface name +/* tipc_bearer_get_name - get the bearer name from its id. + * @net: network namespace + * @name: a pointer to the buffer where the name will be stored. + * @bearer_id: the id to get the name from. */ -struct tipc_bearer *tipc_bearer_find_interface(const char *if_name) +int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id) { - struct tipc_bearer *b_ptr; - char *b_if_name; - u32 i; + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer *b; - for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { - if (!b_ptr->active) - continue; - b_if_name = strchr(b_ptr->name, ':') + 1; - if (!strcmp(b_if_name, if_name)) - return b_ptr; - } - return NULL; -} + if (bearer_id >= MAX_BEARERS) + return -EINVAL; -/** - * tipc_bearer_get_names - record names of bearers in buffer - */ -struct sk_buff *tipc_bearer_get_names(void) -{ - struct sk_buff *buf; - struct tipc_bearer *b_ptr; - int i, j; - - buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME)); - if (!buf) - return NULL; - - read_lock_bh(&tipc_net_lock); - for (i = 0; i < media_count; i++) { - for (j = 0; j < MAX_BEARERS; j++) { - b_ptr = &tipc_bearers[j]; - if (b_ptr->active && (b_ptr->media == media_list[i])) { - tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME, - b_ptr->name, - strlen(b_ptr->name) + 1); - } - } - } - read_unlock_bh(&tipc_net_lock); - return buf; -} + b = rtnl_dereference(tn->bearer_list[bearer_id]); + if (!b) + return -EINVAL; -void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest) -{ - tipc_nmap_add(&b_ptr->nodes, dest); - tipc_bcbearer_sort(); - tipc_disc_add_dest(b_ptr->link_req); + strcpy(name, b->name); + return 0; } -void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest) +void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { - tipc_nmap_remove(&b_ptr->nodes, dest); - tipc_bcbearer_sort(); - tipc_disc_remove_dest(b_ptr->link_req); -} + struct tipc_bearer *b; -/* - * Interrupt enabling new requests after bearer blocking: - * See bearer_send(). - */ -void tipc_continue(struct tipc_bearer *b) -{ - spin_lock_bh(&b->lock); - b->blocked = 0; - spin_unlock_bh(&b->lock); + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (b) + tipc_disc_add_dest(b->disc); + rcu_read_unlock(); } -/* - * tipc_bearer_blocked - determines if bearer is currently blocked - */ -int tipc_bearer_blocked(struct tipc_bearer *b) +void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { - int res; - - spin_lock_bh(&b->lock); - res = b->blocked; - spin_unlock_bh(&b->lock); + struct tipc_bearer *b; - return res; + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (b) + tipc_disc_remove_dest(b->disc); + rcu_read_unlock(); } /** * tipc_enable_bearer - enable bearer with the given name + * @net: the applicable net namespace + * @name: bearer name to enable + * @disc_domain: bearer domain + * @prio: bearer priority + * @attr: nlattr array + * @extack: netlink extended ack */ -int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) +static int tipc_enable_bearer(struct net *net, const char *name, + u32 disc_domain, u32 prio, + struct nlattr *attr[], + struct netlink_ext_ack *extack) { - struct tipc_bearer *b_ptr; - struct tipc_media *m_ptr; + struct tipc_net *tn = tipc_net(net); struct tipc_bearer_names b_names; - char addr_string[16]; - u32 bearer_id; - u32 with_this_prio; - u32 i; + int with_this_prio = 1; + struct tipc_bearer *b; + struct tipc_media *m; + struct sk_buff *skb; + int bearer_id = 0; int res = -EINVAL; + char *errstr = ""; + u32 i; - if (!tipc_own_addr) { - pr_warn("Bearer <%s> rejected, not supported in standalone mode\n", - name); - return -ENOPROTOOPT; - } if (!bearer_name_validate(name, &b_names)) { - pr_warn("Bearer <%s> rejected, illegal name\n", name); - return -EINVAL; - } - if (tipc_addr_domain_valid(disc_domain) && - (disc_domain != tipc_own_addr)) { - if (tipc_in_scope(disc_domain, tipc_own_addr)) { - disc_domain = tipc_own_addr & TIPC_CLUSTER_MASK; - res = 0; /* accept any node in own cluster */ - } else if (in_own_cluster_exact(disc_domain)) - res = 0; /* accept specified node in own cluster */ - } - if (res) { - pr_warn("Bearer <%s> rejected, illegal discovery domain\n", - name); - return -EINVAL; - } - if ((priority > TIPC_MAX_LINK_PRI) && - (priority != TIPC_MEDIA_LINK_PRI)) { - pr_warn("Bearer <%s> rejected, illegal priority\n", name); - return -EINVAL; + NL_SET_ERR_MSG(extack, "Illegal name"); + return res; } - write_lock_bh(&tipc_net_lock); + if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { + errstr = "illegal priority"; + NL_SET_ERR_MSG(extack, "Illegal priority"); + goto rejected; + } - m_ptr = tipc_media_find(b_names.media_name); - if (!m_ptr) { - pr_warn("Bearer <%s> rejected, media <%s> not registered\n", - name, b_names.media_name); - goto exit; + m = tipc_media_find(b_names.media_name); + if (!m) { + errstr = "media not registered"; + NL_SET_ERR_MSG(extack, "Media not registered"); + goto rejected; } - if (priority == TIPC_MEDIA_LINK_PRI) - priority = m_ptr->priority; + if (prio == TIPC_MEDIA_LINK_PRI) + prio = m->priority; -restart: + /* Check new bearer vs existing ones and find free bearer id if any */ bearer_id = MAX_BEARERS; - with_this_prio = 1; - for (i = MAX_BEARERS; i-- != 0; ) { - if (!tipc_bearers[i].active) { + i = MAX_BEARERS; + while (i-- != 0) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) { bearer_id = i; continue; } - if (!strcmp(name, tipc_bearers[i].name)) { - pr_warn("Bearer <%s> rejected, already enabled\n", - name); - goto exit; + if (!strcmp(name, b->name)) { + errstr = "already enabled"; + NL_SET_ERR_MSG(extack, "Already enabled"); + goto rejected; } - if ((tipc_bearers[i].priority == priority) && + + if (b->priority == prio && (++with_this_prio > 2)) { - if (priority-- == 0) { - pr_warn("Bearer <%s> rejected, duplicate priority\n", - name); - goto exit; + pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", + name, prio); + + if (prio == TIPC_MIN_LINK_PRI) { + errstr = "cannot adjust to lower"; + NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); + goto rejected; } - pr_warn("Bearer <%s> priority adjustment required %u->%u\n", - name, priority + 1, priority); - goto restart; + + pr_warn("Bearer <%s>: trying with adjusted priority\n", + name); + prio--; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + with_this_prio = 1; } } + if (bearer_id >= MAX_BEARERS) { - pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n", - name, MAX_BEARERS); - goto exit; + errstr = "max 3 bearers permitted"; + NL_SET_ERR_MSG(extack, "Max 3 bearers permitted"); + goto rejected; } - b_ptr = &tipc_bearers[bearer_id]; - strcpy(b_ptr->name, name); - res = m_ptr->enable_bearer(b_ptr); + b = kzalloc(sizeof(*b), GFP_ATOMIC); + if (!b) + return -ENOMEM; + + strscpy(b->name, name); + b->media = m; + res = m->enable_media(net, b, attr); if (res) { - pr_warn("Bearer <%s> rejected, enable failure (%d)\n", - name, -res); - goto exit; - } - - b_ptr->identity = bearer_id; - b_ptr->media = m_ptr; - b_ptr->tolerance = m_ptr->tolerance; - b_ptr->window = m_ptr->window; - b_ptr->net_plane = bearer_id + 'A'; - b_ptr->active = 1; - b_ptr->priority = priority; - INIT_LIST_HEAD(&b_ptr->links); - spin_lock_init(&b_ptr->lock); - - res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain); + kfree(b); + errstr = "failed to enable media"; + NL_SET_ERR_MSG(extack, "Failed to enable media"); + goto rejected; + } + + b->identity = bearer_id; + b->tolerance = m->tolerance; + b->min_win = m->min_win; + b->max_win = m->max_win; + b->domain = disc_domain; + b->net_plane = bearer_id + 'A'; + b->priority = prio; + refcount_set(&b->refcnt, 1); + + res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { - bearer_disable(b_ptr); - pr_warn("Bearer <%s> rejected, discovery object creation failed\n", - name); - goto exit; - } - pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", - name, - tipc_addr_string_fill(addr_string, disc_domain), priority); -exit: - write_unlock_bh(&tipc_net_lock); + bearer_disable(net, b); + errstr = "failed to create discoverer"; + NL_SET_ERR_MSG(extack, "Failed to create discoverer"); + goto rejected; + } + + /* Create monitoring data before accepting activate messages */ + if (tipc_mon_create(net, bearer_id)) { + bearer_disable(net, b); + kfree_skb(skb); + return -ENOMEM; + } + + test_and_set_bit_lock(0, &b->up); + rcu_assign_pointer(tn->bearer_list[bearer_id], b); + if (skb) + tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); + + pr_info("Enabled bearer <%s>, priority %u\n", name, prio); + + return res; +rejected: + pr_warn("Enabling of bearer <%s> rejected, %s\n", name, errstr); return res; } /** - * tipc_block_bearer - Block the bearer with the given name, and reset all its links + * tipc_reset_bearer - Reset all links established over this bearer + * @net: the applicable net namespace + * @b: the target bearer */ -int tipc_block_bearer(const char *name) +static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) +{ + pr_info("Resetting bearer <%s>\n", b->name); + tipc_node_delete_links(net, b->identity); + tipc_disc_reset(net, b); + return 0; +} + +bool tipc_bearer_hold(struct tipc_bearer *b) +{ + return (b && refcount_inc_not_zero(&b->refcnt)); +} + +void tipc_bearer_put(struct tipc_bearer *b) +{ + if (b && refcount_dec_and_test(&b->refcnt)) + kfree_rcu(b, rcu); +} + +/** + * bearer_disable - disable this bearer + * @net: the applicable net namespace + * @b: the bearer to disable + * + * Note: This routine assumes caller holds RTNL lock. + */ +static void bearer_disable(struct net *net, struct tipc_bearer *b) +{ + struct tipc_net *tn = tipc_net(net); + int bearer_id = b->identity; + + pr_info("Disabling bearer <%s>\n", b->name); + clear_bit_unlock(0, &b->up); + tipc_node_delete_links(net, bearer_id); + b->media->disable_media(b); + RCU_INIT_POINTER(b->media_ptr, NULL); + if (b->disc) + tipc_disc_delete(b->disc); + RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); + tipc_bearer_put(b); + tipc_mon_delete(net, bearer_id); +} + +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attr[]) { - struct tipc_bearer *b_ptr = NULL; - struct tipc_link *l_ptr; - struct tipc_link *temp_l_ptr; + char *dev_name = strchr((const char *)b->name, ':') + 1; + int hwaddr_len = b->media->hwaddr_len; + u8 node_id[NODE_ID_LEN] = {0,}; + struct net_device *dev; - read_lock_bh(&tipc_net_lock); - b_ptr = tipc_bearer_find(name); - if (!b_ptr) { - pr_warn("Attempt to block unknown bearer <%s>\n", name); - read_unlock_bh(&tipc_net_lock); + /* Find device with specified name */ + dev = dev_get_by_name(net, dev_name); + if (!dev) + return -ENODEV; + if (tipc_mtu_bad(dev)) { + dev_put(dev); + return -EINVAL; + } + if (dev == net->loopback_dev) { + dev_put(dev); + pr_info("Enabling <%s> not permitted\n", b->name); + return -EINVAL; + } + + /* Autoconfigure own node identity if needed */ + if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) { + memcpy(node_id, dev->dev_addr, hwaddr_len); + tipc_net_init(net, node_id, 0); + } + if (!tipc_own_id(net)) { + dev_put(dev); + pr_warn("Failed to obtain node identity\n"); return -EINVAL; } - pr_info("Blocking bearer <%s>\n", name); - spin_lock_bh(&b_ptr->lock); - b_ptr->blocked = 1; - list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { - struct tipc_node *n_ptr = l_ptr->owner; + /* Associate TIPC bearer with L2 bearer */ + rcu_assign_pointer(b->media_ptr, dev); + b->pt.dev = dev; + b->pt.type = htons(ETH_P_TIPC); + b->pt.func = tipc_l2_rcv_msg; + dev_add_pack(&b->pt); + memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); + memcpy(b->bcast_addr.value, dev->broadcast, hwaddr_len); + b->bcast_addr.media_id = b->media->type_id; + b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; + b->mtu = dev->mtu; + b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr); + rcu_assign_pointer(dev->tipc_ptr, b); + return 0; +} + +/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface + * @b: the target bearer + * + * Mark L2 bearer as inactive so that incoming buffers are thrown away + */ +void tipc_disable_l2_media(struct tipc_bearer *b) +{ + struct net_device *dev; - spin_lock_bh(&n_ptr->lock); - tipc_link_reset(l_ptr); - spin_unlock_bh(&n_ptr->lock); + dev = (struct net_device *)rtnl_dereference(b->media_ptr); + dev_remove_pack(&b->pt); + RCU_INIT_POINTER(dev->tipc_ptr, NULL); + synchronize_net(); + dev_put(dev); +} + +/** + * tipc_l2_send_msg - send a TIPC packet out over an L2 interface + * @net: the associated network namespace + * @skb: the packet to be sent + * @b: the bearer through which the packet is to be sent + * @dest: peer destination address + */ +int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b, struct tipc_media_addr *dest) +{ + struct net_device *dev; + int delta; + + dev = (struct net_device *)rcu_dereference(b->media_ptr); + if (!dev) + return 0; + + delta = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); + if ((delta > 0) && pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) { + kfree_skb(skb); + return 0; } - spin_unlock_bh(&b_ptr->lock); - read_unlock_bh(&tipc_net_lock); + skb_reset_network_header(skb); + skb->dev = dev; + skb->protocol = htons(ETH_P_TIPC); + dev_hard_header(skb, dev, ETH_P_TIPC, dest->value, + dev->dev_addr, skb->len); + dev_queue_xmit(skb); return 0; } +bool tipc_bearer_bcast_support(struct net *net, u32 bearer_id) +{ + bool supp = false; + struct tipc_bearer *b; + + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (b) + supp = (b->bcast_addr.broadcast == TIPC_BROADCAST_SUPPORT); + rcu_read_unlock(); + return supp; +} + +int tipc_bearer_mtu(struct net *net, u32 bearer_id) +{ + int mtu = 0; + struct tipc_bearer *b; + + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (b) + mtu = b->mtu; + rcu_read_unlock(); + return mtu; +} + +int tipc_bearer_min_mtu(struct net *net, u32 bearer_id) +{ + int mtu = TIPC_MIN_BEARER_MTU; + struct tipc_bearer *b; + + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (b) + mtu += b->encap_hlen; + rcu_read_unlock(); + return mtu; +} + +/* tipc_bearer_xmit_skb - sends buffer to destination over bearer + */ +void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, + struct sk_buff *skb, + struct tipc_media_addr *dest) +{ + struct tipc_msg *hdr = buf_msg(skb); + struct tipc_bearer *b; + + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) { +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_xmit(net, &skb, b, dest, NULL); + if (skb) +#endif + b->media->send_msg(net, skb, b, dest); + } else { + kfree_skb(skb); + } + rcu_read_unlock(); +} + +/* tipc_bearer_xmit() -send buffer to destination over bearer + */ +void tipc_bearer_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq, + struct tipc_media_addr *dst, + struct tipc_node *__dnode) +{ + struct tipc_bearer *b; + struct sk_buff *skb, *tmp; + + if (skb_queue_empty(xmitq)) + return; + + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (unlikely(!b)) + __skb_queue_purge(xmitq); + skb_queue_walk_safe(xmitq, skb, tmp) { + __skb_dequeue(xmitq); + if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) { +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_xmit(net, &skb, b, dst, __dnode); + if (skb) +#endif + b->media->send_msg(net, skb, b, dst); + } else { + kfree_skb(skb); + } + } + rcu_read_unlock(); +} + +/* tipc_bearer_bc_xmit() - broadcast buffers to all destinations + */ +void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_media_addr *dst; + int net_id = tn->net_id; + struct tipc_bearer *b; + struct sk_buff *skb, *tmp; + struct tipc_msg *hdr; + + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (unlikely(!b || !test_bit(0, &b->up))) + __skb_queue_purge(xmitq); + skb_queue_walk_safe(xmitq, skb, tmp) { + hdr = buf_msg(skb); + msg_set_non_seq(hdr, 1); + msg_set_mc_netid(hdr, net_id); + __skb_dequeue(xmitq); + dst = &b->bcast_addr; +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_xmit(net, &skb, b, dst, NULL); + if (skb) +#endif + b->media->send_msg(net, skb, b, dst); + } + rcu_read_unlock(); +} + /** - * bearer_disable + * tipc_l2_rcv_msg - handle incoming TIPC message from an interface + * @skb: the received message + * @dev: the net device that the packet was received on + * @pt: the packet_type structure which was used to register this handler + * @orig_dev: the original receive net device in case the device is a bond * - * Note: This routine assumes caller holds tipc_net_lock. + * Accept only packets explicitly sent to this node, or broadcast packets; + * ignores packets sent using interface multicast, and traffic sent to other + * nodes (which can happen if interface is running in promiscuous mode). */ -static void bearer_disable(struct tipc_bearer *b_ptr) +static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) { - struct tipc_link *l_ptr; - struct tipc_link *temp_l_ptr; + struct tipc_bearer *b; - pr_info("Disabling bearer <%s>\n", b_ptr->name); - spin_lock_bh(&b_ptr->lock); - b_ptr->blocked = 1; - b_ptr->media->disable_bearer(b_ptr); - list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { - tipc_link_delete(l_ptr); + rcu_read_lock(); + b = rcu_dereference(dev->tipc_ptr) ?: + rcu_dereference(orig_dev->tipc_ptr); + if (likely(b && test_bit(0, &b->up) && + (skb->pkt_type <= PACKET_MULTICAST))) { + skb_mark_not_on_list(skb); + TIPC_SKB_CB(skb)->flags = 0; + tipc_rcv(dev_net(b->pt.dev), skb, b); + rcu_read_unlock(); + return NET_RX_SUCCESS; } - if (b_ptr->link_req) - tipc_disc_delete(b_ptr->link_req); - spin_unlock_bh(&b_ptr->lock); - memset(b_ptr, 0, sizeof(struct tipc_bearer)); + rcu_read_unlock(); + kfree_skb(skb); + return NET_RX_DROP; } -int tipc_disable_bearer(const char *name) +/** + * tipc_l2_device_event - handle device events from network device + * @nb: the context of the notification + * @evt: the type of event + * @ptr: the net device that the event was on + * + * This function is called by the Ethernet driver in case of link + * change event. + */ +static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, + void *ptr) { - struct tipc_bearer *b_ptr; - int res; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + struct tipc_bearer *b; - write_lock_bh(&tipc_net_lock); - b_ptr = tipc_bearer_find(name); - if (b_ptr == NULL) { - pr_warn("Attempt to disable unknown bearer <%s>\n", name); - res = -EINVAL; - } else { - bearer_disable(b_ptr); - res = 0; + b = rtnl_dereference(dev->tipc_ptr); + if (!b) + return NOTIFY_DONE; + + trace_tipc_l2_device_event(dev, b, evt); + switch (evt) { + case NETDEV_CHANGE: + if (netif_carrier_ok(dev) && netif_oper_up(dev)) { + test_and_set_bit_lock(0, &b->up); + break; + } + fallthrough; + case NETDEV_GOING_DOWN: + clear_bit_unlock(0, &b->up); + tipc_reset_bearer(net, b); + break; + case NETDEV_UP: + test_and_set_bit_lock(0, &b->up); + break; + case NETDEV_CHANGEMTU: + if (tipc_mtu_bad(dev)) { + bearer_disable(net, b); + break; + } + b->mtu = dev->mtu; + tipc_reset_bearer(net, b); + break; + case NETDEV_CHANGEADDR: + b->media->raw2addr(b, &b->addr, + (const char *)dev->dev_addr); + tipc_reset_bearer(net, b); + break; + case NETDEV_UNREGISTER: + case NETDEV_CHANGENAME: + bearer_disable(net, b); + break; } - write_unlock_bh(&tipc_net_lock); - return res; + return NOTIFY_OK; } +static struct notifier_block notifier = { + .notifier_call = tipc_l2_device_event, + .priority = 0, +}; +int tipc_bearer_setup(void) +{ + return register_netdevice_notifier(¬ifier); +} -void tipc_bearer_stop(void) +void tipc_bearer_cleanup(void) { + unregister_netdevice_notifier(¬ifier); +} + +void tipc_bearer_stop(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - if (tipc_bearers[i].active) - bearer_disable(&tipc_bearers[i]); + b = rtnl_dereference(tn->bearer_list[i]); + if (b) { + bearer_disable(net, b); + tn->bearer_list[i] = NULL; + } + } +} + +void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts) +{ + struct net_device *dev = net->loopback_dev; + struct sk_buff *skb, *_skb; + int exp; + + skb_queue_walk(pkts, _skb) { + skb = pskb_copy(_skb, GFP_ATOMIC); + if (!skb) + continue; + + exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); + if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) { + kfree_skb(skb); + continue; + } + + skb_reset_network_header(skb); + dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr, + dev->dev_addr, skb->len); + skb->dev = dev; + skb->pkt_type = PACKET_HOST; + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->protocol = eth_type_trans(skb, dev); + netif_rx(skb); } - media_count = 0; +} + +static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *od) +{ + consume_skb(skb); + return NET_RX_SUCCESS; +} + +int tipc_attach_loopback(struct net *net) +{ + struct net_device *dev = net->loopback_dev; + struct tipc_net *tn = tipc_net(net); + + if (!dev) + return -ENODEV; + + netdev_hold(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL); + tn->loopback_pt.dev = dev; + tn->loopback_pt.type = htons(ETH_P_TIPC); + tn->loopback_pt.func = tipc_loopback_rcv_pkt; + dev_add_pack(&tn->loopback_pt); + return 0; +} + +void tipc_detach_loopback(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + dev_remove_pack(&tn->loopback_pt); + netdev_put(net->loopback_dev, &tn->loopback_pt.dev_tracker); +} + +/* Caller should hold rtnl_lock to protect the bearer */ +static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, + struct tipc_bearer *bearer, int nlflags) +{ + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + nlflags, TIPC_NL_BEARER_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER); + if (!attrs) + goto msg_full; + + if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name)) + goto attr_msg_full; + + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_PROP); + if (!prop) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->max_win)) + goto prop_msg_full; + if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu)) + goto prop_msg_full; + + nla_nest_end(msg->skb, prop); + +#ifdef CONFIG_TIPC_MEDIA_UDP + if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) { + if (tipc_udp_nl_add_bearer_data(msg, bearer)) + goto attr_msg_full; + } +#endif + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + int i = cb->args[0]; + struct tipc_bearer *bearer; + struct tipc_nl_msg msg; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); + + if (i == MAX_BEARERS) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + for (i = 0; i < MAX_BEARERS; i++) { + bearer = rtnl_dereference(tn->bearer_list[i]); + if (!bearer) + continue; + + err = __tipc_nl_add_bearer(&msg, bearer, NLM_F_MULTI); + if (err) + break; + } + rtnl_unlock(); + + cb->args[0] = i; + return skb->len; +} + +int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct sk_buff *rep; + struct tipc_bearer *bearer; + struct tipc_nl_msg msg; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = genl_info_net(info); + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!rep) + return -ENOMEM; + + msg.skb = rep; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + rtnl_lock(); + bearer = tipc_bearer_find(net, name); + if (!bearer) { + err = -EINVAL; + NL_SET_ERR_MSG(info->extack, "Bearer not found"); + goto err_out; + } + + err = __tipc_nl_add_bearer(&msg, bearer, 0); + if (err) + goto err_out; + rtnl_unlock(); + + return genlmsg_reply(rep, info); +err_out: + rtnl_unlock(); + nlmsg_free(rep); + + return err; +} + +int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_bearer *bearer; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + bearer = tipc_bearer_find(net, name); + if (!bearer) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); + return -EINVAL; + } + + bearer_disable(net, bearer); + + return 0; +} + +int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_bearer_disable(skb, info); + rtnl_unlock(); + + return err; +} + +int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *bearer; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + u32 domain = 0; + u32 prio; + + prio = TIPC_MEDIA_LINK_PRI; + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + + bearer = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + if (attrs[TIPC_NLA_BEARER_DOMAIN]) + domain = nla_get_u32(attrs[TIPC_NLA_BEARER_DOMAIN]); + + if (attrs[TIPC_NLA_BEARER_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], + props); + if (err) + return err; + + if (props[TIPC_NLA_PROP_PRIO]) + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + } + + return tipc_enable_bearer(net, bearer, domain, prio, attrs, + info->extack); +} + +int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_bearer_enable(skb, info); + rtnl_unlock(); + + return err; +} + +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_bearer *b; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + rtnl_lock(); + b = tipc_bearer_find(net, name); + if (!b) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); + err = -EINVAL; + goto out; + } + +#ifdef CONFIG_TIPC_MEDIA_UDP + if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, "UDP option is unsupported"); + err = -EINVAL; + goto out; + } + + err = tipc_udp_nl_bearer_add(b, + attrs[TIPC_NLA_BEARER_UDP_OPTS]); + } +#endif +out: + rtnl_unlock(); + + return err; +} + +int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) +{ + struct tipc_bearer *b; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + char *name; + int err; + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + b = tipc_bearer_find(net, name); + if (!b) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); + return -EINVAL; + } + + if (attrs[TIPC_NLA_BEARER_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], + props); + if (err) + return err; + + if (props[TIPC_NLA_PROP_TOL]) { + b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + tipc_node_apply_property(net, b, TIPC_NLA_PROP_TOL); + } + if (props[TIPC_NLA_PROP_PRIO]) + b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + if (props[TIPC_NLA_PROP_WIN]) + b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (props[TIPC_NLA_PROP_MTU]) { + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, + "MTU property is unsupported"); + return -EINVAL; + } +#ifdef CONFIG_TIPC_MEDIA_UDP + if (nla_get_u32(props[TIPC_NLA_PROP_MTU]) < + b->encap_hlen + TIPC_MIN_BEARER_MTU) { + NL_SET_ERR_MSG(info->extack, + "MTU value is out-of-range"); + return -EINVAL; + } + b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); + tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU); +#endif + } + } + + return 0; +} + +int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_bearer_set(skb, info); + rtnl_unlock(); + + return err; +} + +static int __tipc_nl_add_media(struct tipc_nl_msg *msg, + struct tipc_media *media, int nlflags) +{ + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + nlflags, TIPC_NL_MEDIA_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA); + if (!attrs) + goto msg_full; + + if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name)) + goto attr_msg_full; + + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA_PROP); + if (!prop) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->max_win)) + goto prop_msg_full; + if (media->type_id == TIPC_MEDIA_TYPE_UDP) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu)) + goto prop_msg_full; + + nla_nest_end(msg->skb, prop); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + int i = cb->args[0]; + struct tipc_nl_msg msg; + + if (i == MAX_MEDIA) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + for (; media_info_array[i] != NULL; i++) { + err = __tipc_nl_add_media(&msg, media_info_array[i], + NLM_F_MULTI); + if (err) + break; + } + rtnl_unlock(); + + cb->args[0] = i; + return skb->len; +} + +int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_nl_msg msg; + struct tipc_media *media; + struct sk_buff *rep; + struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; + + if (!info->attrs[TIPC_NLA_MEDIA]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_MEDIA_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); + + rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!rep) + return -ENOMEM; + + msg.skb = rep; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + rtnl_lock(); + media = tipc_media_find(name); + if (!media) { + NL_SET_ERR_MSG(info->extack, "Media not found"); + err = -EINVAL; + goto err_out; + } + + err = __tipc_nl_add_media(&msg, media, 0); + if (err) + goto err_out; + rtnl_unlock(); + + return genlmsg_reply(rep, info); +err_out: + rtnl_unlock(); + nlmsg_free(rep); + + return err; +} + +int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_media *m; + struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; + + if (!info->attrs[TIPC_NLA_MEDIA]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy, info->extack); + + if (!attrs[TIPC_NLA_MEDIA_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); + + m = tipc_media_find(name); + if (!m) { + NL_SET_ERR_MSG(info->extack, "Media not found"); + return -EINVAL; + } + if (attrs[TIPC_NLA_MEDIA_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP], + props); + if (err) + return err; + + if (props[TIPC_NLA_PROP_TOL]) + m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + if (props[TIPC_NLA_PROP_PRIO]) + m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + if (props[TIPC_NLA_PROP_WIN]) + m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (props[TIPC_NLA_PROP_MTU]) { + if (m->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, + "MTU property is unsupported"); + return -EINVAL; + } +#ifdef CONFIG_TIPC_MEDIA_UDP + if (tipc_udp_mtu_bad(nla_get_u32 + (props[TIPC_NLA_PROP_MTU]))) { + NL_SET_ERR_MSG(info->extack, + "MTU value is out-of-range"); + return -EINVAL; + } + m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); +#endif + } + } + + return 0; +} + +int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_media_set(skb, info); + rtnl_unlock(); + + return err; } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 09c869adcfcf..41eac1ee0c09 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -1,7 +1,7 @@ /* * net/tipc/bearer.h: Include file for TIPC bearer code * - * Copyright (c) 1996-2006, Ericsson AB + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -37,26 +37,36 @@ #ifndef _TIPC_BEARER_H #define _TIPC_BEARER_H -#include "bcast.h" +#include "netlink.h" +#include "core.h" +#include "msg.h" +#include <net/genetlink.h> -#define MAX_BEARERS 2 -#define MAX_MEDIA 2 +#define MAX_MEDIA 3 -/* - * Identifiers associated with TIPC message header media address info - * - * - address info field is 20 bytes long - * - media type identifier located at offset 3 - * - remaining bytes vary according to media type +/* Identifiers associated with TIPC message header media address info + * - address info field is 32 bytes long + * - the field's actual content and length is defined per media + * - remaining unused bytes in the field are set to zero */ -#define TIPC_MEDIA_ADDR_SIZE 20 +#define TIPC_MEDIA_INFO_SIZE 32 #define TIPC_MEDIA_TYPE_OFFSET 3 +#define TIPC_MEDIA_ADDR_OFFSET 4 /* * Identifiers of supported TIPC media types */ #define TIPC_MEDIA_TYPE_ETH 1 #define TIPC_MEDIA_TYPE_IB 2 +#define TIPC_MEDIA_TYPE_UDP 3 + +/* Minimum bearer MTU */ +#define TIPC_MIN_BEARER_MTU (MAX_H_SIZE + INT_H_SIZE) + +/* Identifiers for distinguishing between broadcast/multicast and replicast + */ +#define TIPC_BROADCAST_SUPPORT 1 +#define TIPC_REPLICAST_SUPPORT 2 /** * struct tipc_media_addr - destination address used by TIPC bearers @@ -65,7 +75,7 @@ * @broadcast: non-zero if address is a broadcast address */ struct tipc_media_addr { - u8 value[TIPC_MEDIA_ADDR_SIZE]; + u8 value[TIPC_MEDIA_INFO_SIZE]; u8 media_id; u8 broadcast; }; @@ -73,78 +83,97 @@ struct tipc_media_addr { struct tipc_bearer; /** - * struct tipc_media - TIPC media information available to internal users + * struct tipc_media - Media specific info exposed to generic bearer layer * @send_msg: routine which handles buffer transmission - * @enable_bearer: routine which enables a bearer - * @disable_bearer: routine which disables a bearer - * @addr2str: routine which converts media address to string - * @addr2msg: routine which converts media address to protocol message area - * @msg2addr: routine which converts media address from protocol message area - * @bcast_addr: media address used in broadcasting + * @enable_media: routine which enables a media + * @disable_media: routine which disables a media + * @addr2str: convert media address format to string + * @addr2msg: convert from media addr format to discovery msg addr format + * @msg2addr: convert from discovery msg addr format to media addr format + * @raw2addr: convert from raw addr format to media addr format * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure - * @window: default window (in packets) before declaring link congestion + * @min_win: minimum window (in packets) before declaring link congestion + * @max_win: maximum window (in packets) before declaring link congestion + * @mtu: max packet size bearer can support for media type not dependent on + * underlying device MTU * @type_id: TIPC media identifier + * @hwaddr_len: TIPC media address len * @name: media name */ struct tipc_media { - int (*send_msg)(struct sk_buff *buf, - struct tipc_bearer *b_ptr, + int (*send_msg)(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b, struct tipc_media_addr *dest); - int (*enable_bearer)(struct tipc_bearer *b_ptr); - void (*disable_bearer)(struct tipc_bearer *b_ptr); - int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); - int (*addr2msg)(struct tipc_media_addr *a, char *msg_area); - int (*msg2addr)(const struct tipc_bearer *b_ptr, - struct tipc_media_addr *a, char *msg_area); + int (*enable_media)(struct net *net, struct tipc_bearer *b, + struct nlattr *attr[]); + void (*disable_media)(struct tipc_bearer *b); + int (*addr2str)(struct tipc_media_addr *addr, + char *strbuf, + int bufsz); + int (*addr2msg)(char *msg, struct tipc_media_addr *addr); + int (*msg2addr)(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg); + int (*raw2addr)(struct tipc_bearer *b, + struct tipc_media_addr *addr, + const char *raw); u32 priority; u32 tolerance; - u32 window; + u32 min_win; + u32 max_win; + u32 mtu; u32 type_id; + u32 hwaddr_len; char name[TIPC_MAX_MEDIA_NAME]; }; /** - * struct tipc_bearer - TIPC bearer structure - * @usr_handle: pointer to additional media-specific information about bearer + * struct tipc_bearer - Generic TIPC bearer structure + * @media_ptr: pointer to additional media-specific information about bearer * @mtu: max packet size bearer can support - * @blocked: non-zero if bearer is blocked - * @lock: spinlock for controlling access to bearer * @addr: media-specific address associated with bearer * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer + * @bcast_addr: media address used in broadcasting + * @pt: packet type for bearer + * @rcu: rcu struct for tipc_bearer * @priority: default link priority for bearer - * @window: default window size for bearer + * @min_win: minimum window (in packets) before declaring link congestion + * @max_win: maximum window (in packets) before declaring link congestion * @tolerance: default link tolerance for bearer + * @domain: network domain to which links can be established * @identity: array index of this bearer within TIPC bearer array - * @link_req: ptr to (optional) structure making periodic link setup requests - * @links: list of non-congested links associated with bearer - * @active: non-zero if bearer structure is represents a bearer + * @disc: ptr to link setup request * @net_plane: network plane ('A' through 'H') currently associated with bearer - * @nodes: indicates which nodes in cluster can be reached through bearer + * @encap_hlen: encap headers length + * @up: bearer up flag (bit 0) + * @refcnt: tipc_bearer reference counter * * Note: media-specific code is responsible for initialization of the fields * indicated below when a bearer is enabled; TIPC's generic bearer code takes * care of initializing all other fields. */ struct tipc_bearer { - void *usr_handle; /* initalized by media */ - u32 mtu; /* initalized by media */ - int blocked; /* initalized by media */ - struct tipc_media_addr addr; /* initalized by media */ + void __rcu *media_ptr; /* initialized by media */ + u32 mtu; /* initialized by media */ + struct tipc_media_addr addr; /* initialized by media */ char name[TIPC_MAX_BEARER_NAME]; - spinlock_t lock; struct tipc_media *media; struct tipc_media_addr bcast_addr; + struct packet_type pt; + struct rcu_head rcu; u32 priority; - u32 window; + u32 min_win; + u32 max_win; u32 tolerance; + u32 domain; u32 identity; - struct tipc_link_req *link_req; - struct list_head links; - int active; + struct tipc_discoverer *disc; char net_plane; - struct tipc_node_map nodes; + u16 encap_hlen; + unsigned long up; + refcount_t refcnt; }; struct tipc_bearer_names { @@ -152,62 +181,86 @@ struct tipc_bearer_names { char if_name[TIPC_MAX_IF_NAME]; }; -struct tipc_link; - -extern struct tipc_bearer tipc_bearers[]; - /* * TIPC routines available to supported media types */ -int tipc_register_media(struct tipc_media *m_ptr); - -void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); -int tipc_block_bearer(const char *name); -void tipc_continue(struct tipc_bearer *tb_ptr); - -int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority); -int tipc_disable_bearer(const char *name); +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b); /* * Routines made available to TIPC by supported media types */ -int tipc_eth_media_start(void); -void tipc_eth_media_stop(void); +extern struct tipc_media eth_media_info; #ifdef CONFIG_TIPC_MEDIA_IB -int tipc_ib_media_start(void); -void tipc_ib_media_stop(void); -#else -static inline int tipc_ib_media_start(void) { return 0; } -static inline void tipc_ib_media_stop(void) { return; } +extern struct tipc_media ib_media_info; +#endif +#ifdef CONFIG_TIPC_MEDIA_UDP +extern struct tipc_media udp_media_info; #endif -int tipc_media_set_priority(const char *name, u32 new_value); -int tipc_media_set_window(const char *name, u32 new_value); -void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); -struct sk_buff *tipc_media_get_names(void); +int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info); + +int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); -struct sk_buff *tipc_bearer_get_names(void); -void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest); -void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest); -struct tipc_bearer *tipc_bearer_find(const char *name); -struct tipc_bearer *tipc_bearer_find_interface(const char *if_name); +int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]); +bool tipc_bearer_hold(struct tipc_bearer *b); +void tipc_bearer_put(struct tipc_bearer *b); +void tipc_disable_l2_media(struct tipc_bearer *b); +int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b, struct tipc_media_addr *dest); + +void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest); +void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); +struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); +int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id); struct tipc_media *tipc_media_find(const char *name); -int tipc_bearer_blocked(struct tipc_bearer *b_ptr); -void tipc_bearer_stop(void); +int tipc_bearer_setup(void); +void tipc_bearer_cleanup(void); +void tipc_bearer_stop(struct net *net); +int tipc_bearer_mtu(struct net *net, u32 bearer_id); +int tipc_bearer_min_mtu(struct net *net, u32 bearer_id); +bool tipc_bearer_bcast_support(struct net *net, u32 bearer_id); +void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, + struct sk_buff *skb, + struct tipc_media_addr *dest); +void tipc_bearer_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq, + struct tipc_media_addr *dst, + struct tipc_node *__dnode); +void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq); +void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts); +int tipc_attach_loopback(struct net *net); +void tipc_detach_loopback(struct net *net); -/** - * tipc_bearer_send- sends buffer to destination over bearer - * - * IMPORTANT: - * The media send routine must not alter the buffer being passed in - * as it may be needed for later retransmission! - */ -static inline void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, - struct tipc_media_addr *dest) +static inline void tipc_loopback_trace(struct net *net, + struct sk_buff_head *pkts) +{ + if (unlikely(dev_nit_active(net->loopback_dev))) + tipc_clone_to_loopback(net, pkts); +} + +/* check if device MTU is too low for tipc headers */ +static inline bool tipc_mtu_bad(struct net_device *dev) { - b->media->send_msg(buf, b, dest); + if (dev->mtu >= TIPC_MIN_BEARER_MTU) + return false; + netdev_warn(dev, "MTU too low for tipc bearer\n"); + return true; } #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/config.c b/net/tipc/config.c deleted file mode 100644 index c301a9a592d8..000000000000 --- a/net/tipc/config.c +++ /dev/null @@ -1,448 +0,0 @@ -/* - * net/tipc/config.c: TIPC configuration management code - * - * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, 2010-2013, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "port.h" -#include "name_table.h" -#include "config.h" -#include "server.h" - -#define REPLY_TRUNCATED "<truncated>\n" - -static DEFINE_MUTEX(config_mutex); -static struct tipc_server cfgsrv; - -static const void *req_tlv_area; /* request message TLV area */ -static int req_tlv_space; /* request message TLV area size */ -static int rep_headroom; /* reply message headroom to use */ - - -struct sk_buff *tipc_cfg_reply_alloc(int payload_size) -{ - struct sk_buff *buf; - - buf = alloc_skb(rep_headroom + payload_size, GFP_ATOMIC); - if (buf) - skb_reserve(buf, rep_headroom); - return buf; -} - -int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, - void *tlv_data, int tlv_data_size) -{ - struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf); - int new_tlv_space = TLV_SPACE(tlv_data_size); - - if (skb_tailroom(buf) < new_tlv_space) - return 0; - skb_put(buf, new_tlv_space); - tlv->tlv_type = htons(tlv_type); - tlv->tlv_len = htons(TLV_LENGTH(tlv_data_size)); - if (tlv_data_size && tlv_data) - memcpy(TLV_DATA(tlv), tlv_data, tlv_data_size); - return 1; -} - -static struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value) -{ - struct sk_buff *buf; - __be32 value_net; - - buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(value))); - if (buf) { - value_net = htonl(value); - tipc_cfg_append_tlv(buf, tlv_type, &value_net, - sizeof(value_net)); - } - return buf; -} - -static struct sk_buff *tipc_cfg_reply_unsigned(u32 value) -{ - return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value); -} - -struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string) -{ - struct sk_buff *buf; - int string_len = strlen(string) + 1; - - buf = tipc_cfg_reply_alloc(TLV_SPACE(string_len)); - if (buf) - tipc_cfg_append_tlv(buf, tlv_type, string, string_len); - return buf; -} - -static struct sk_buff *tipc_show_stats(void) -{ - struct sk_buff *buf; - struct tlv_desc *rep_tlv; - char *pb; - int pb_len; - int str_len; - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - value = ntohl(*(u32 *)TLV_DATA(req_tlv_area)); - if (value != 0) - return tipc_cfg_reply_error_string("unsupported argument"); - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (buf == NULL) - return NULL; - - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - - str_len = tipc_snprintf(pb, pb_len, "TIPC version " TIPC_MOD_VER "\n"); - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); - - return buf; -} - -static struct sk_buff *cfg_enable_bearer(void) -{ - struct tipc_bearer_config *args; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_CONFIG)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area); - if (tipc_enable_bearer(args->name, - ntohl(args->disc_domain), - ntohl(args->priority))) - return tipc_cfg_reply_error_string("unable to enable bearer"); - - return tipc_cfg_reply_none(); -} - -static struct sk_buff *cfg_disable_bearer(void) -{ - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_NAME)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - if (tipc_disable_bearer((char *)TLV_DATA(req_tlv_area))) - return tipc_cfg_reply_error_string("unable to disable bearer"); - - return tipc_cfg_reply_none(); -} - -static struct sk_buff *cfg_set_own_addr(void) -{ - u32 addr; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - addr = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (addr == tipc_own_addr) - return tipc_cfg_reply_none(); - if (!tipc_addr_node_valid(addr)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (node address)"); - if (tipc_own_addr) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change node address once assigned)"); - tipc_core_start_net(addr); - return tipc_cfg_reply_none(); -} - -static struct sk_buff *cfg_set_remote_mng(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - tipc_remote_management = (value != 0); - return tipc_cfg_reply_none(); -} - -static struct sk_buff *cfg_set_max_ports(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value == tipc_max_ports) - return tipc_cfg_reply_none(); - if (value < 127 || value > 65535) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max ports must be 127-65535)"); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change max ports while TIPC is active)"); -} - -static struct sk_buff *cfg_set_netid(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value == tipc_net_id) - return tipc_cfg_reply_none(); - if (value < 1 || value > 9999) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (network id must be 1-9999)"); - if (tipc_own_addr) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change network id once TIPC has joined a network)"); - tipc_net_id = value; - return tipc_cfg_reply_none(); -} - -struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area, - int request_space, int reply_headroom) -{ - struct sk_buff *rep_tlv_buf; - - mutex_lock(&config_mutex); - - /* Save request and reply details in a well-known location */ - req_tlv_area = request_area; - req_tlv_space = request_space; - rep_headroom = reply_headroom; - - /* Check command authorization */ - if (likely(in_own_node(orig_node))) { - /* command is permitted */ - } else if (cmd >= 0x8000) { - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot be done remotely)"); - goto exit; - } else if (!tipc_remote_management) { - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NO_REMOTE); - goto exit; - } else if (cmd >= 0x4000) { - u32 domain = 0; - - if ((tipc_nametbl_translate(TIPC_ZM_SRV, 0, &domain) == 0) || - (domain != orig_node)) { - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_ZONE_MSTR); - goto exit; - } - } - - /* Call appropriate processing routine */ - switch (cmd) { - case TIPC_CMD_NOOP: - rep_tlv_buf = tipc_cfg_reply_none(); - break; - case TIPC_CMD_GET_NODES: - rep_tlv_buf = tipc_node_get_nodes(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_GET_LINKS: - rep_tlv_buf = tipc_node_get_links(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_SHOW_LINK_STATS: - rep_tlv_buf = tipc_link_cmd_show_stats(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_RESET_LINK_STATS: - rep_tlv_buf = tipc_link_cmd_reset_stats(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_SHOW_NAME_TABLE: - rep_tlv_buf = tipc_nametbl_get(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_GET_BEARER_NAMES: - rep_tlv_buf = tipc_bearer_get_names(); - break; - case TIPC_CMD_GET_MEDIA_NAMES: - rep_tlv_buf = tipc_media_get_names(); - break; - case TIPC_CMD_SHOW_PORTS: - rep_tlv_buf = tipc_port_get_ports(); - break; - case TIPC_CMD_SHOW_STATS: - rep_tlv_buf = tipc_show_stats(); - break; - case TIPC_CMD_SET_LINK_TOL: - case TIPC_CMD_SET_LINK_PRI: - case TIPC_CMD_SET_LINK_WINDOW: - rep_tlv_buf = tipc_link_cmd_config(req_tlv_area, req_tlv_space, cmd); - break; - case TIPC_CMD_ENABLE_BEARER: - rep_tlv_buf = cfg_enable_bearer(); - break; - case TIPC_CMD_DISABLE_BEARER: - rep_tlv_buf = cfg_disable_bearer(); - break; - case TIPC_CMD_SET_NODE_ADDR: - rep_tlv_buf = cfg_set_own_addr(); - break; - case TIPC_CMD_SET_REMOTE_MNG: - rep_tlv_buf = cfg_set_remote_mng(); - break; - case TIPC_CMD_SET_MAX_PORTS: - rep_tlv_buf = cfg_set_max_ports(); - break; - case TIPC_CMD_SET_NETID: - rep_tlv_buf = cfg_set_netid(); - break; - case TIPC_CMD_GET_REMOTE_MNG: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_remote_management); - break; - case TIPC_CMD_GET_MAX_PORTS: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports); - break; - case TIPC_CMD_GET_NETID: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); - break; - case TIPC_CMD_NOT_NET_ADMIN: - rep_tlv_buf = - tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN); - break; - case TIPC_CMD_SET_MAX_ZONES: - case TIPC_CMD_GET_MAX_ZONES: - case TIPC_CMD_SET_MAX_SLAVES: - case TIPC_CMD_GET_MAX_SLAVES: - case TIPC_CMD_SET_MAX_CLUSTERS: - case TIPC_CMD_GET_MAX_CLUSTERS: - case TIPC_CMD_SET_MAX_NODES: - case TIPC_CMD_GET_MAX_NODES: - case TIPC_CMD_SET_MAX_SUBSCR: - case TIPC_CMD_GET_MAX_SUBSCR: - case TIPC_CMD_SET_MAX_PUBL: - case TIPC_CMD_GET_MAX_PUBL: - case TIPC_CMD_SET_LOG_SIZE: - case TIPC_CMD_DUMP_LOG: - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (obsolete command)"); - break; - default: - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (unknown command)"); - break; - } - - WARN_ON(rep_tlv_buf->len > TLV_SPACE(ULTRA_STRING_MAX_LEN)); - - /* Append an error message if we cannot return all requested data */ - if (rep_tlv_buf->len == TLV_SPACE(ULTRA_STRING_MAX_LEN)) { - if (*(rep_tlv_buf->data + ULTRA_STRING_MAX_LEN) != '\0') - sprintf(rep_tlv_buf->data + rep_tlv_buf->len - - sizeof(REPLY_TRUNCATED) - 1, REPLY_TRUNCATED); - } - - /* Return reply buffer */ -exit: - mutex_unlock(&config_mutex); - return rep_tlv_buf; -} - -static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len) -{ - struct tipc_cfg_msg_hdr *req_hdr; - struct tipc_cfg_msg_hdr *rep_hdr; - struct sk_buff *rep_buf; - int ret; - - /* Validate configuration message header (ignore invalid message) */ - req_hdr = (struct tipc_cfg_msg_hdr *)buf; - if ((len < sizeof(*req_hdr)) || - (len != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || - (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) { - pr_warn("Invalid configuration message discarded\n"); - return; - } - - /* Generate reply for request (if can't, return request) */ - rep_buf = tipc_cfg_do_cmd(addr->addr.id.node, ntohs(req_hdr->tcm_type), - buf + sizeof(*req_hdr), - len - sizeof(*req_hdr), - BUF_HEADROOM + MAX_H_SIZE + sizeof(*rep_hdr)); - if (rep_buf) { - skb_push(rep_buf, sizeof(*rep_hdr)); - rep_hdr = (struct tipc_cfg_msg_hdr *)rep_buf->data; - memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr)); - rep_hdr->tcm_len = htonl(rep_buf->len); - rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST); - - ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data, - rep_buf->len); - if (ret < 0) - pr_err("Sending cfg reply message failed, no memory\n"); - - kfree_skb(rep_buf); - } -} - -static struct sockaddr_tipc cfgsrv_addr __read_mostly = { - .family = AF_TIPC, - .addrtype = TIPC_ADDR_NAMESEQ, - .addr.nameseq.type = TIPC_CFG_SRV, - .addr.nameseq.lower = 0, - .addr.nameseq.upper = 0, - .scope = TIPC_ZONE_SCOPE -}; - -static struct tipc_server cfgsrv __read_mostly = { - .saddr = &cfgsrv_addr, - .imp = TIPC_CRITICAL_IMPORTANCE, - .type = SOCK_RDM, - .max_rcvbuf_size = 64 * 1024, - .name = "cfg_server", - .tipc_conn_recvmsg = cfg_conn_msg_event, - .tipc_conn_new = NULL, - .tipc_conn_shutdown = NULL -}; - -int tipc_cfg_init(void) -{ - return tipc_server_start(&cfgsrv); -} - -void tipc_cfg_reinit(void) -{ - tipc_server_stop(&cfgsrv); - - cfgsrv_addr.addr.nameseq.lower = tipc_own_addr; - cfgsrv_addr.addr.nameseq.upper = tipc_own_addr; - tipc_server_start(&cfgsrv); -} - -void tipc_cfg_stop(void) -{ - tipc_server_stop(&cfgsrv); -} diff --git a/net/tipc/config.h b/net/tipc/config.h deleted file mode 100644 index 1f252f3fa058..000000000000 --- a/net/tipc/config.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * net/tipc/config.h: Include file for TIPC configuration service code - * - * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _TIPC_CONFIG_H -#define _TIPC_CONFIG_H - -/* ---------------------------------------------------------------------- */ - -#include "link.h" - -struct sk_buff *tipc_cfg_reply_alloc(int payload_size); -int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, - void *tlv_data, int tlv_data_size); -struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string); - -static inline struct sk_buff *tipc_cfg_reply_none(void) -{ - return tipc_cfg_reply_alloc(0); -} - -static inline struct sk_buff *tipc_cfg_reply_error_string(char *string) -{ - return tipc_cfg_reply_string_type(TIPC_TLV_ERROR_STRING, string); -} - -static inline struct sk_buff *tipc_cfg_reply_ultra_string(char *string) -{ - return tipc_cfg_reply_string_type(TIPC_TLV_ULTRA_STRING, string); -} - -struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, - const void *req_tlv_area, int req_tlv_space, - int headroom); - -int tipc_cfg_init(void); -void tipc_cfg_reinit(void); -void tipc_cfg_stop(void); - -#endif diff --git a/net/tipc/core.c b/net/tipc/core.c index fd4eeeaa972a..434e70eabe08 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -1,7 +1,7 @@ /* * net/tipc/core.c: TIPC module code * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2006, 2013, Ericsson AB * Copyright (c) 2005-2006, 2010-2013, Wind River Systems * All rights reserved. * @@ -35,151 +35,189 @@ */ #include "core.h" -#include "ref.h" #include "name_table.h" #include "subscr.h" -#include "config.h" -#include "port.h" +#include "bearer.h" +#include "net.h" +#include "socket.h" +#include "bcast.h" +#include "node.h" +#include "crypto.h" #include <linux/module.h> -/* global variables used by multiple sub-systems within TIPC */ -int tipc_random __read_mostly; - /* configurable TIPC parameters */ -u32 tipc_own_addr __read_mostly; -int tipc_max_ports __read_mostly; -int tipc_net_id __read_mostly; -int tipc_remote_management __read_mostly; +unsigned int tipc_net_id __read_mostly; int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ -/** - * tipc_buf_acquire - creates a TIPC message buffer - * @size: message size (including TIPC header) - * - * Returns a new buffer with data pointers set to the specified size. - * - * NOTE: Headroom is reserved to allow prepending of a data link header. - * There may also be unrequested tailroom present at the buffer's end. - */ -struct sk_buff *tipc_buf_acquire(u32 size) +static int __net_init tipc_init_net(struct net *net) { - struct sk_buff *skb; - unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; - - skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); - if (skb) { - skb_reserve(skb, BUF_HEADROOM); - skb_put(skb, size); - skb->next = NULL; - } - return skb; + struct tipc_net *tn = net_generic(net, tipc_net_id); + int err; + + tn->net_id = 4711; + tn->node_addr = 0; + tn->trial_addr = 0; + tn->addr_trial_end = 0; + tn->capabilities = TIPC_NODE_CAPABILITIES; + INIT_WORK(&tn->work, tipc_net_finalize_work); + memset(tn->node_id, 0, sizeof(tn->node_id)); + memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); + tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; + get_random_bytes(&tn->random, sizeof(int)); + INIT_LIST_HEAD(&tn->node_list); + spin_lock_init(&tn->node_list_lock); + +#ifdef CONFIG_TIPC_CRYPTO + err = tipc_crypto_start(&tn->crypto_tx, net, NULL); + if (err) + goto out_crypto; +#endif + err = tipc_sk_rht_init(net); + if (err) + goto out_sk_rht; + + err = tipc_nametbl_init(net); + if (err) + goto out_nametbl; + + err = tipc_bcast_init(net); + if (err) + goto out_bclink; + + err = tipc_attach_loopback(net); + if (err) + goto out_bclink; + + return 0; + +out_bclink: + tipc_nametbl_stop(net); +out_nametbl: + tipc_sk_rht_destroy(net); +out_sk_rht: + +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_stop(&tn->crypto_tx); +out_crypto: +#endif + return err; } -/** - * tipc_core_stop_net - shut down TIPC networking sub-systems - */ -static void tipc_core_stop_net(void) +static void __net_exit tipc_exit_net(struct net *net) { - tipc_net_stop(); - tipc_eth_media_stop(); - tipc_ib_media_stop(); + struct tipc_net *tn = tipc_net(net); + + tipc_detach_loopback(net); + tipc_net_stop(net); + /* Make sure the tipc_net_finalize_work() finished */ + cancel_work_sync(&tn->work); + tipc_bcast_stop(net); + tipc_nametbl_stop(net); + tipc_sk_rht_destroy(net); +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_stop(&tipc_net(net)->crypto_tx); +#endif + while (atomic_read(&tn->wq_count)) + cond_resched(); } -/** - * start_net - start TIPC networking sub-systems - */ -int tipc_core_start_net(unsigned long addr) +static void __net_exit tipc_pernet_pre_exit(struct net *net) { - int res; - - tipc_net_start(addr); - res = tipc_eth_media_start(); - if (res < 0) - goto err; - res = tipc_ib_media_start(); - if (res < 0) - goto err; - return res; - -err: - tipc_core_stop_net(); - return res; + tipc_node_pre_cleanup_net(net); } -/** - * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode - */ -static void tipc_core_stop(void) -{ - tipc_netlink_stop(); - tipc_handler_stop(); - tipc_cfg_stop(); - tipc_subscr_stop(); - tipc_nametbl_stop(); - tipc_ref_table_stop(); - tipc_socket_stop(); - tipc_unregister_sysctl(); -} +static struct pernet_operations tipc_pernet_pre_exit_ops = { + .pre_exit = tipc_pernet_pre_exit, +}; -/** - * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode - */ -static int tipc_core_start(void) -{ - int res; - - get_random_bytes(&tipc_random, sizeof(tipc_random)); - - res = tipc_handler_start(); - if (!res) - res = tipc_ref_table_init(tipc_max_ports, tipc_random); - if (!res) - res = tipc_nametbl_init(); - if (!res) - res = tipc_netlink_start(); - if (!res) - res = tipc_socket_init(); - if (!res) - res = tipc_register_sysctl(); - if (!res) - res = tipc_subscr_start(); - if (!res) - res = tipc_cfg_init(); - if (res) - tipc_core_stop(); - - return res; -} +static struct pernet_operations tipc_net_ops = { + .init = tipc_init_net, + .exit = tipc_exit_net, + .id = &tipc_net_id, + .size = sizeof(struct tipc_net), +}; + +static struct pernet_operations tipc_topsrv_net_ops = { + .init = tipc_topsrv_init_net, + .exit = tipc_topsrv_exit_net, +}; static int __init tipc_init(void) { - int res; + int err; pr_info("Activated (version " TIPC_MOD_VER ")\n"); - tipc_own_addr = 0; - tipc_remote_management = 1; - tipc_max_ports = CONFIG_TIPC_PORTS; - tipc_net_id = 4711; - - sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE; - sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 << - TIPC_CRITICAL_IMPORTANCE; - sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT; - - res = tipc_core_start(); - if (res) - pr_err("Unable to start in single node mode\n"); - else - pr_info("Started in single node mode\n"); - return res; + sysctl_tipc_rmem[0] = RCVBUF_MIN; + sysctl_tipc_rmem[1] = RCVBUF_DEF; + sysctl_tipc_rmem[2] = RCVBUF_MAX; + + err = tipc_register_sysctl(); + if (err) + goto out_sysctl; + + err = register_pernet_device(&tipc_net_ops); + if (err) + goto out_pernet; + + err = tipc_socket_init(); + if (err) + goto out_socket; + + err = register_pernet_device(&tipc_topsrv_net_ops); + if (err) + goto out_pernet_topsrv; + + err = register_pernet_subsys(&tipc_pernet_pre_exit_ops); + if (err) + goto out_register_pernet_subsys; + + err = tipc_bearer_setup(); + if (err) + goto out_bearer; + + err = tipc_netlink_start(); + if (err) + goto out_netlink; + + err = tipc_netlink_compat_start(); + if (err) + goto out_netlink_compat; + + pr_info("Started in single node mode\n"); + return 0; + +out_netlink_compat: + tipc_netlink_stop(); +out_netlink: + tipc_bearer_cleanup(); +out_bearer: + unregister_pernet_subsys(&tipc_pernet_pre_exit_ops); +out_register_pernet_subsys: + unregister_pernet_device(&tipc_topsrv_net_ops); +out_pernet_topsrv: + tipc_socket_stop(); +out_socket: + unregister_pernet_device(&tipc_net_ops); +out_pernet: + tipc_unregister_sysctl(); +out_sysctl: + pr_err("Unable to start in single node mode\n"); + return err; } static void __exit tipc_exit(void) { - tipc_core_stop_net(); - tipc_core_stop(); + tipc_netlink_compat_stop(); + tipc_netlink_stop(); + tipc_bearer_cleanup(); + unregister_pernet_subsys(&tipc_pernet_pre_exit_ops); + unregister_pernet_device(&tipc_topsrv_net_ops); + tipc_socket_stop(); + unregister_pernet_device(&tipc_net_ops); + tipc_unregister_sysctl(); + pr_info("Deactivated\n"); } diff --git a/net/tipc/core.h b/net/tipc/core.h index be72f8cebc53..7f3fe3401c45 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -1,8 +1,9 @@ /* * net/tipc/core.h: Include file for TIPC global declarations * - * Copyright (c) 2005-2006, 2013 Ericsson AB + * Copyright (c) 2005-2006, 2013-2018 Ericsson AB * Copyright (c) 2005-2007, 2010-2013, Wind River Systems + * Copyright (c) 2020, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,170 +38,190 @@ #ifndef _TIPC_CORE_H #define _TIPC_CORE_H -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include <linux/tipc.h> #include <linux/tipc_config.h> +#include <linux/tipc_netlink.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/timer.h> #include <linux/string.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/interrupt.h> #include <linux/atomic.h> -#include <asm/hardirq.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/rtnetlink.h> +#include <linux/etherdevice.h> +#include <net/netns/generic.h> +#include <linux/rhashtable.h> +#include <net/genetlink.h> +#include <net/netns/hash.h> + +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#define TIPC_MOD_VER "2.0.0" - -#define ULTRA_STRING_MAX_LEN 32768 -#define TIPC_MAX_SUBSCRIPTIONS 65535 -#define TIPC_MAX_PUBLICATIONS 65535 +struct tipc_node; +struct tipc_bearer; +struct tipc_bc_base; +struct tipc_link; +struct tipc_topsrv; +struct tipc_monitor; +#ifdef CONFIG_TIPC_CRYPTO +struct tipc_crypto; +#endif -struct tipc_msg; /* msg.h */ +#define TIPC_MOD_VER "2.0.0" -int tipc_snprintf(char *buf, int len, const char *fmt, ...); +#define NODE_HTABLE_SIZE 512 +#define MAX_BEARERS 3 +#define TIPC_DEF_MON_THRESHOLD 32 +#define NODE_ID_LEN 16 +#define NODE_ID_STR_LEN (NODE_ID_LEN * 2 + 1) -/* - * TIPC-specific error codes - */ -#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ - -/* - * Global configuration variables - */ -extern u32 tipc_own_addr __read_mostly; -extern int tipc_max_ports __read_mostly; -extern int tipc_net_id __read_mostly; -extern int tipc_remote_management __read_mostly; +extern unsigned int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; +extern int sysctl_tipc_named_timeout __read_mostly; + +struct tipc_net { + u8 node_id[NODE_ID_LEN]; + u32 node_addr; + u32 trial_addr; + unsigned long addr_trial_end; + char node_id_string[NODE_ID_STR_LEN]; + int net_id; + int random; + bool legacy_addr_format; + + /* Node table and node list */ + spinlock_t node_list_lock; + struct hlist_head node_htable[NODE_HTABLE_SIZE]; + struct list_head node_list; + u32 num_nodes; + u32 num_links; + + /* Neighbor monitoring list */ + struct tipc_monitor *monitors[MAX_BEARERS]; + int mon_threshold; + + /* Bearer list */ + struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; + + /* Broadcast link */ + spinlock_t bclock; + struct tipc_bc_base *bcbase; + struct tipc_link *bcl; + + /* Socket hash table */ + struct rhashtable sk_rht; + + /* Name table */ + spinlock_t nametbl_lock; + struct name_table *nametbl; + + /* Topology subscription server */ + struct tipc_topsrv *topsrv; + atomic_t subscription_count; + + /* Cluster capabilities */ + u16 capabilities; + + /* Tracing of node internal messages */ + struct packet_type loopback_pt; + +#ifdef CONFIG_TIPC_CRYPTO + /* TX crypto handler */ + struct tipc_crypto *crypto_tx; +#endif + /* Work item for net finalize */ + struct work_struct work; + /* The numbers of work queues in schedule */ + atomic_t wq_count; +}; -/* - * Other global variables - */ -extern int tipc_random __read_mostly; - -/* - * Routines available to privileged subsystems - */ -extern int tipc_core_start_net(unsigned long); -extern int tipc_handler_start(void); -extern void tipc_handler_stop(void); -extern int tipc_netlink_start(void); -extern void tipc_netlink_stop(void); -extern int tipc_socket_init(void); -extern void tipc_socket_stop(void); -extern int tipc_sock_create_local(int type, struct socket **res); -extern void tipc_sock_release_local(struct socket *sock); -extern int tipc_sock_accept_local(struct socket *sock, - struct socket **newsock, int flags); +static inline struct tipc_net *tipc_net(struct net *net) +{ + return net_generic(net, tipc_net_id); +} -#ifdef CONFIG_SYSCTL -extern int tipc_register_sysctl(void); -extern void tipc_unregister_sysctl(void); -#else -#define tipc_register_sysctl() 0 -#define tipc_unregister_sysctl() -#endif +static inline int tipc_netid(struct net *net) +{ + return tipc_net(net)->net_id; +} -/* - * TIPC timer and signal code - */ -typedef void (*Handler) (unsigned long); +static inline struct list_head *tipc_nodes(struct net *net) +{ + return &tipc_net(net)->node_list; +} -u32 tipc_k_signal(Handler routine, unsigned long argument); +static inline struct name_table *tipc_name_table(struct net *net) +{ + return tipc_net(net)->nametbl; +} -/** - * k_init_timer - initialize a timer - * @timer: pointer to timer structure - * @routine: pointer to routine to invoke when timer expires - * @argument: value to pass to routine when timer expires - * - * Timer must be initialized before use (and terminated when no longer needed). - */ -static inline void k_init_timer(struct timer_list *timer, Handler routine, - unsigned long argument) +static inline struct tipc_topsrv *tipc_topsrv(struct net *net) { - setup_timer(timer, routine, argument); + return tipc_net(net)->topsrv; } -/** - * k_start_timer - start a timer - * @timer: pointer to timer structure - * @msec: time to delay (in ms) - * - * Schedules a previously initialized timer for later execution. - * If timer is already running, the new timeout overrides the previous request. - * - * To ensure the timer doesn't expire before the specified delay elapses, - * the amount of delay is rounded up when converting to the jiffies - * then an additional jiffy is added to account for the fact that - * the starting time may be in the middle of the current jiffy. - */ -static inline void k_start_timer(struct timer_list *timer, unsigned long msec) +static inline unsigned int tipc_hashfn(u32 addr) { - mod_timer(timer, jiffies + msecs_to_jiffies(msec) + 1); + return addr & (NODE_HTABLE_SIZE - 1); } -/** - * k_cancel_timer - cancel a timer - * @timer: pointer to timer structure - * - * Cancels a previously initialized timer. - * Can be called safely even if the timer is already inactive. - * - * WARNING: Must not be called when holding locks required by the timer's - * timeout routine, otherwise deadlock can occur on SMP systems! - */ -static inline void k_cancel_timer(struct timer_list *timer) +static inline u16 mod(u16 x) { - del_timer_sync(timer); + return x & 0xffffu; } -/** - * k_term_timer - terminate a timer - * @timer: pointer to timer structure - * - * Prevents further use of a previously initialized timer. - * - * WARNING: Caller must ensure timer isn't currently running. - * - * (Do not "enhance" this routine to automatically cancel an active timer, - * otherwise deadlock can arise when a timeout routine calls k_term_timer.) - */ -static inline void k_term_timer(struct timer_list *timer) +static inline int less_eq(u16 left, u16 right) { + return mod(right - left) < 32768u; } -/* - * TIPC message buffer code - * - * TIPC message buffer headroom reserves space for the worst-case - * link-level device header (in case the message is sent off-node). - * - * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields - * are word aligned for quicker access - */ -#define BUF_HEADROOM LL_MAX_HEADER +static inline int more(u16 left, u16 right) +{ + return !less_eq(left, right); +} -struct tipc_skb_cb { - void *handle; -}; +static inline int less(u16 left, u16 right) +{ + return less_eq(left, right) && (mod(right) != mod(left)); +} -#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) +static inline int tipc_in_range(u16 val, u16 min, u16 max) +{ + return !less(val, min) && !more(val, max); +} -static inline struct tipc_msg *buf_msg(struct sk_buff *skb) +static inline u32 tipc_net_hash_mixes(struct net *net, int tn_rand) { - return (struct tipc_msg *)skb->data; + return net_hash_mix(&init_net) ^ net_hash_mix(net) ^ tn_rand; } -extern struct sk_buff *tipc_buf_acquire(u32 size); +static inline u32 hash128to32(char *bytes) +{ + __be32 *tmp = (__be32 *)bytes; + u32 res; + + res = ntohl(tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3]); + if (likely(res)) + return res; + return ntohl(tmp[0] | tmp[1] | tmp[2] | tmp[3]); +} +#ifdef CONFIG_SYSCTL +int tipc_register_sysctl(void); +void tipc_unregister_sysctl(void); +#else +#define tipc_register_sysctl() 0 +#define tipc_unregister_sysctl() +#endif #endif diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c new file mode 100644 index 000000000000..751904f10aab --- /dev/null +++ b/net/tipc/crypto.c @@ -0,0 +1,2484 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * net/tipc/crypto.c: TIPC crypto for key handling & packet en/decryption + * + * Copyright (c) 2019, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <crypto/aead.h> +#include <crypto/aes.h> +#include <crypto/rng.h> +#include "crypto.h" +#include "msg.h" +#include "bcast.h" + +#define TIPC_TX_GRACE_PERIOD msecs_to_jiffies(5000) /* 5s */ +#define TIPC_TX_LASTING_TIME msecs_to_jiffies(10000) /* 10s */ +#define TIPC_RX_ACTIVE_LIM msecs_to_jiffies(3000) /* 3s */ +#define TIPC_RX_PASSIVE_LIM msecs_to_jiffies(15000) /* 15s */ + +#define TIPC_MAX_TFMS_DEF 10 +#define TIPC_MAX_TFMS_LIM 1000 + +#define TIPC_REKEYING_INTV_DEF (60 * 24) /* default: 1 day */ + +/* + * TIPC Key ids + */ +enum { + KEY_MASTER = 0, + KEY_MIN = KEY_MASTER, + KEY_1 = 1, + KEY_2, + KEY_3, + KEY_MAX = KEY_3, +}; + +/* + * TIPC Crypto statistics + */ +enum { + STAT_OK, + STAT_NOK, + STAT_ASYNC, + STAT_ASYNC_OK, + STAT_ASYNC_NOK, + STAT_BADKEYS, /* tx only */ + STAT_BADMSGS = STAT_BADKEYS, /* rx only */ + STAT_NOKEYS, + STAT_SWITCHES, + + MAX_STATS, +}; + +/* TIPC crypto statistics' header */ +static const char *hstats[MAX_STATS] = {"ok", "nok", "async", "async_ok", + "async_nok", "badmsgs", "nokeys", + "switches"}; + +/* Max TFMs number per key */ +int sysctl_tipc_max_tfms __read_mostly = TIPC_MAX_TFMS_DEF; +/* Key exchange switch, default: on */ +int sysctl_tipc_key_exchange_enabled __read_mostly = 1; + +/* + * struct tipc_key - TIPC keys' status indicator + * + * 7 6 5 4 3 2 1 0 + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * key: | (reserved)|passive idx| active idx|pending idx| + * +-----+-----+-----+-----+-----+-----+-----+-----+ + */ +struct tipc_key { +#define KEY_BITS (2) +#define KEY_MASK ((1 << KEY_BITS) - 1) + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 pending:2, + active:2, + passive:2, /* rx only */ + reserved:2; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 reserved:2, + passive:2, /* rx only */ + active:2, + pending:2; +#else +#error "Please fix <asm/byteorder.h>" +#endif + } __packed; + u8 keys; + }; +}; + +/** + * struct tipc_tfm - TIPC TFM structure to form a list of TFMs + * @tfm: cipher handle/key + * @list: linked list of TFMs + */ +struct tipc_tfm { + struct crypto_aead *tfm; + struct list_head list; +}; + +/** + * struct tipc_aead - TIPC AEAD key structure + * @tfm_entry: per-cpu pointer to one entry in TFM list + * @crypto: TIPC crypto owns this key + * @cloned: reference to the source key in case cloning + * @users: the number of the key users (TX/RX) + * @salt: the key's SALT value + * @authsize: authentication tag size (max = 16) + * @mode: crypto mode is applied to the key + * @hint: a hint for user key + * @rcu: struct rcu_head + * @key: the aead key + * @gen: the key's generation + * @seqno: the key seqno (cluster scope) + * @refcnt: the key reference counter + */ +struct tipc_aead { +#define TIPC_AEAD_HINT_LEN (5) + struct tipc_tfm * __percpu *tfm_entry; + struct tipc_crypto *crypto; + struct tipc_aead *cloned; + atomic_t users; + u32 salt; + u8 authsize; + u8 mode; + char hint[2 * TIPC_AEAD_HINT_LEN + 1]; + struct rcu_head rcu; + struct tipc_aead_key *key; + u16 gen; + + atomic64_t seqno ____cacheline_aligned; + refcount_t refcnt ____cacheline_aligned; + +} ____cacheline_aligned; + +/** + * struct tipc_crypto_stats - TIPC Crypto statistics + * @stat: array of crypto statistics + */ +struct tipc_crypto_stats { + unsigned int stat[MAX_STATS]; +}; + +/** + * struct tipc_crypto - TIPC TX/RX crypto structure + * @net: struct net + * @node: TIPC node (RX) + * @aead: array of pointers to AEAD keys for encryption/decryption + * @peer_rx_active: replicated peer RX active key index + * @key_gen: TX/RX key generation + * @key: the key states + * @skey_mode: session key's mode + * @skey: received session key + * @wq: common workqueue on TX crypto + * @work: delayed work sched for TX/RX + * @key_distr: key distributing state + * @rekeying_intv: rekeying interval (in minutes) + * @stats: the crypto statistics + * @name: the crypto name + * @sndnxt: the per-peer sndnxt (TX) + * @timer1: general timer 1 (jiffies) + * @timer2: general timer 2 (jiffies) + * @working: the crypto is working or not + * @key_master: flag indicates if master key exists + * @legacy_user: flag indicates if a peer joins w/o master key (for bwd comp.) + * @nokey: no key indication + * @flags: combined flags field + * @lock: tipc_key lock + */ +struct tipc_crypto { + struct net *net; + struct tipc_node *node; + struct tipc_aead __rcu *aead[KEY_MAX + 1]; + atomic_t peer_rx_active; + u16 key_gen; + struct tipc_key key; + u8 skey_mode; + struct tipc_aead_key *skey; + struct workqueue_struct *wq; + struct delayed_work work; +#define KEY_DISTR_SCHED 1 +#define KEY_DISTR_COMPL 2 + atomic_t key_distr; + u32 rekeying_intv; + + struct tipc_crypto_stats __percpu *stats; + char name[48]; + + atomic64_t sndnxt ____cacheline_aligned; + unsigned long timer1; + unsigned long timer2; + union { + struct { + u8 working:1; + u8 key_master:1; + u8 legacy_user:1; + u8 nokey: 1; + }; + u8 flags; + }; + spinlock_t lock; /* crypto lock */ + +} ____cacheline_aligned; + +/* struct tipc_crypto_tx_ctx - TX context for callbacks */ +struct tipc_crypto_tx_ctx { + struct tipc_aead *aead; + struct tipc_bearer *bearer; + struct tipc_media_addr dst; +}; + +/* struct tipc_crypto_rx_ctx - RX context for callbacks */ +struct tipc_crypto_rx_ctx { + struct tipc_aead *aead; + struct tipc_bearer *bearer; +}; + +static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead); +static inline void tipc_aead_put(struct tipc_aead *aead); +static void tipc_aead_free(struct rcu_head *rp); +static int tipc_aead_users(struct tipc_aead __rcu *aead); +static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim); +static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim); +static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val); +static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead); +static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, + u8 mode); +static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src); +static void *tipc_aead_mem_alloc(struct crypto_aead *tfm, + unsigned int crypto_ctx_size, + u8 **iv, struct aead_request **req, + struct scatterlist **sg, int nsg); +static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *dst, + struct tipc_node *__dnode); +static void tipc_aead_encrypt_done(void *data, int err); +static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, + struct sk_buff *skb, struct tipc_bearer *b); +static void tipc_aead_decrypt_done(void *data, int err); +static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr); +static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead, + u8 tx_key, struct sk_buff *skb, + struct tipc_crypto *__rx); +static inline void tipc_crypto_key_set_state(struct tipc_crypto *c, + u8 new_passive, + u8 new_active, + u8 new_pending); +static int tipc_crypto_key_attach(struct tipc_crypto *c, + struct tipc_aead *aead, u8 pos, + bool master_key); +static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending); +static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx, + struct tipc_crypto *rx, + struct sk_buff *skb, + u8 tx_key); +static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb); +static int tipc_crypto_key_revoke(struct net *net, u8 tx_key); +static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb, + struct tipc_bearer *b, + struct tipc_media_addr *dst, + struct tipc_node *__dnode, u8 type); +static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead, + struct tipc_bearer *b, + struct sk_buff **skb, int err); +static void tipc_crypto_do_cmd(struct net *net, int cmd); +static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf); +static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new, + char *buf); +static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey, + u16 gen, u8 mode, u32 dnode); +static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr); +static void tipc_crypto_work_tx(struct work_struct *work); +static void tipc_crypto_work_rx(struct work_struct *work); +static int tipc_aead_key_generate(struct tipc_aead_key *skey); + +#define is_tx(crypto) (!(crypto)->node) +#define is_rx(crypto) (!is_tx(crypto)) + +#define key_next(cur) ((cur) % KEY_MAX + 1) + +#define tipc_aead_rcu_ptr(rcu_ptr, lock) \ + rcu_dereference_protected((rcu_ptr), lockdep_is_held(lock)) + +#define tipc_aead_rcu_replace(rcu_ptr, ptr, lock) \ +do { \ + struct tipc_aead *__tmp = rcu_dereference_protected((rcu_ptr), \ + lockdep_is_held(lock)); \ + rcu_assign_pointer((rcu_ptr), (ptr)); \ + tipc_aead_put(__tmp); \ +} while (0) + +#define tipc_crypto_key_detach(rcu_ptr, lock) \ + tipc_aead_rcu_replace((rcu_ptr), NULL, lock) + +/** + * tipc_aead_key_validate - Validate a AEAD user key + * @ukey: pointer to user key data + * @info: netlink info pointer + */ +int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info) +{ + int keylen; + + /* Check if algorithm exists */ + if (unlikely(!crypto_has_alg(ukey->alg_name, 0, 0))) { + GENL_SET_ERR_MSG(info, "unable to load the algorithm (module existed?)"); + return -ENODEV; + } + + /* Currently, we only support the "gcm(aes)" cipher algorithm */ + if (strcmp(ukey->alg_name, "gcm(aes)")) { + GENL_SET_ERR_MSG(info, "not supported yet the algorithm"); + return -ENOTSUPP; + } + + /* Check if key size is correct */ + keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE; + if (unlikely(keylen != TIPC_AES_GCM_KEY_SIZE_128 && + keylen != TIPC_AES_GCM_KEY_SIZE_192 && + keylen != TIPC_AES_GCM_KEY_SIZE_256)) { + GENL_SET_ERR_MSG(info, "incorrect key length (20, 28 or 36 octets?)"); + return -EKEYREJECTED; + } + + return 0; +} + +/** + * tipc_aead_key_generate - Generate new session key + * @skey: input/output key with new content + * + * Return: 0 in case of success, otherwise < 0 + */ +static int tipc_aead_key_generate(struct tipc_aead_key *skey) +{ + int rc = 0; + + /* Fill the key's content with a random value via RNG cipher */ + rc = crypto_get_default_rng(); + if (likely(!rc)) { + rc = crypto_rng_get_bytes(crypto_default_rng, skey->key, + skey->keylen); + crypto_put_default_rng(); + } + + return rc; +} + +static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead) +{ + struct tipc_aead *tmp; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (unlikely(!tmp || !refcount_inc_not_zero(&tmp->refcnt))) + tmp = NULL; + rcu_read_unlock(); + + return tmp; +} + +static inline void tipc_aead_put(struct tipc_aead *aead) +{ + if (aead && refcount_dec_and_test(&aead->refcnt)) + call_rcu(&aead->rcu, tipc_aead_free); +} + +/** + * tipc_aead_free - Release AEAD key incl. all the TFMs in the list + * @rp: rcu head pointer + */ +static void tipc_aead_free(struct rcu_head *rp) +{ + struct tipc_aead *aead = container_of(rp, struct tipc_aead, rcu); + struct tipc_tfm *tfm_entry, *head, *tmp; + + if (aead->cloned) { + tipc_aead_put(aead->cloned); + } else { + head = *get_cpu_ptr(aead->tfm_entry); + put_cpu_ptr(aead->tfm_entry); + list_for_each_entry_safe(tfm_entry, tmp, &head->list, list) { + crypto_free_aead(tfm_entry->tfm); + list_del(&tfm_entry->list); + kfree(tfm_entry); + } + /* Free the head */ + crypto_free_aead(head->tfm); + list_del(&head->list); + kfree(head); + } + free_percpu(aead->tfm_entry); + kfree_sensitive(aead->key); + kfree_sensitive(aead); +} + +static int tipc_aead_users(struct tipc_aead __rcu *aead) +{ + struct tipc_aead *tmp; + int users = 0; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (tmp) + users = atomic_read(&tmp->users); + rcu_read_unlock(); + + return users; +} + +static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim) +{ + struct tipc_aead *tmp; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (tmp) + atomic_add_unless(&tmp->users, 1, lim); + rcu_read_unlock(); +} + +static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim) +{ + struct tipc_aead *tmp; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (tmp) + atomic_add_unless(&rcu_dereference(aead)->users, -1, lim); + rcu_read_unlock(); +} + +static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val) +{ + struct tipc_aead *tmp; + int cur; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (tmp) { + do { + cur = atomic_read(&tmp->users); + if (cur == val) + break; + } while (atomic_cmpxchg(&tmp->users, cur, val) != cur); + } + rcu_read_unlock(); +} + +/** + * tipc_aead_tfm_next - Move TFM entry to the next one in list and return it + * @aead: the AEAD key pointer + */ +static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead) +{ + struct tipc_tfm **tfm_entry; + struct crypto_aead *tfm; + + tfm_entry = get_cpu_ptr(aead->tfm_entry); + *tfm_entry = list_next_entry(*tfm_entry, list); + tfm = (*tfm_entry)->tfm; + put_cpu_ptr(tfm_entry); + + return tfm; +} + +/** + * tipc_aead_init - Initiate TIPC AEAD + * @aead: returned new TIPC AEAD key handle pointer + * @ukey: pointer to user key data + * @mode: the key mode + * + * Allocate a (list of) new cipher transformation (TFM) with the specific user + * key data if valid. The number of the allocated TFMs can be set via the sysfs + * "net/tipc/max_tfms" first. + * Also, all the other AEAD data are also initialized. + * + * Return: 0 if the initiation is successful, otherwise: < 0 + */ +static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, + u8 mode) +{ + struct tipc_tfm *tfm_entry, *head; + struct crypto_aead *tfm; + struct tipc_aead *tmp; + int keylen, err, cpu; + int tfm_cnt = 0; + + if (unlikely(*aead)) + return -EEXIST; + + /* Allocate a new AEAD */ + tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); + if (unlikely(!tmp)) + return -ENOMEM; + + /* The key consists of two parts: [AES-KEY][SALT] */ + keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE; + + /* Allocate per-cpu TFM entry pointer */ + tmp->tfm_entry = alloc_percpu(struct tipc_tfm *); + if (!tmp->tfm_entry) { + kfree_sensitive(tmp); + return -ENOMEM; + } + + /* Make a list of TFMs with the user key data */ + do { + tfm = crypto_alloc_aead(ukey->alg_name, 0, 0); + if (IS_ERR(tfm)) { + err = PTR_ERR(tfm); + break; + } + + if (unlikely(!tfm_cnt && + crypto_aead_ivsize(tfm) != TIPC_AES_GCM_IV_SIZE)) { + crypto_free_aead(tfm); + err = -ENOTSUPP; + break; + } + + err = crypto_aead_setauthsize(tfm, TIPC_AES_GCM_TAG_SIZE); + err |= crypto_aead_setkey(tfm, ukey->key, keylen); + if (unlikely(err)) { + crypto_free_aead(tfm); + break; + } + + tfm_entry = kmalloc(sizeof(*tfm_entry), GFP_KERNEL); + if (unlikely(!tfm_entry)) { + crypto_free_aead(tfm); + err = -ENOMEM; + break; + } + INIT_LIST_HEAD(&tfm_entry->list); + tfm_entry->tfm = tfm; + + /* First entry? */ + if (!tfm_cnt) { + head = tfm_entry; + for_each_possible_cpu(cpu) { + *per_cpu_ptr(tmp->tfm_entry, cpu) = head; + } + } else { + list_add_tail(&tfm_entry->list, &head->list); + } + + } while (++tfm_cnt < sysctl_tipc_max_tfms); + + /* Not any TFM is allocated? */ + if (!tfm_cnt) { + free_percpu(tmp->tfm_entry); + kfree_sensitive(tmp); + return err; + } + + /* Form a hex string of some last bytes as the key's hint */ + bin2hex(tmp->hint, ukey->key + keylen - TIPC_AEAD_HINT_LEN, + TIPC_AEAD_HINT_LEN); + + /* Initialize the other data */ + tmp->mode = mode; + tmp->cloned = NULL; + tmp->authsize = TIPC_AES_GCM_TAG_SIZE; + tmp->key = kmemdup(ukey, tipc_aead_key_size(ukey), GFP_KERNEL); + if (!tmp->key) { + tipc_aead_free(&tmp->rcu); + return -ENOMEM; + } + memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE); + atomic_set(&tmp->users, 0); + atomic64_set(&tmp->seqno, 0); + refcount_set(&tmp->refcnt, 1); + + *aead = tmp; + return 0; +} + +/** + * tipc_aead_clone - Clone a TIPC AEAD key + * @dst: dest key for the cloning + * @src: source key to clone from + * + * Make a "copy" of the source AEAD key data to the dest, the TFMs list is + * common for the keys. + * A reference to the source is hold in the "cloned" pointer for the later + * freeing purposes. + * + * Note: this must be done in cluster-key mode only! + * Return: 0 in case of success, otherwise < 0 + */ +static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src) +{ + struct tipc_aead *aead; + int cpu; + + if (!src) + return -ENOKEY; + + if (src->mode != CLUSTER_KEY) + return -EINVAL; + + if (unlikely(*dst)) + return -EEXIST; + + aead = kzalloc(sizeof(*aead), GFP_ATOMIC); + if (unlikely(!aead)) + return -ENOMEM; + + aead->tfm_entry = alloc_percpu_gfp(struct tipc_tfm *, GFP_ATOMIC); + if (unlikely(!aead->tfm_entry)) { + kfree_sensitive(aead); + return -ENOMEM; + } + + for_each_possible_cpu(cpu) { + *per_cpu_ptr(aead->tfm_entry, cpu) = + *per_cpu_ptr(src->tfm_entry, cpu); + } + + memcpy(aead->hint, src->hint, sizeof(src->hint)); + aead->mode = src->mode; + aead->salt = src->salt; + aead->authsize = src->authsize; + atomic_set(&aead->users, 0); + atomic64_set(&aead->seqno, 0); + refcount_set(&aead->refcnt, 1); + + WARN_ON(!refcount_inc_not_zero(&src->refcnt)); + aead->cloned = src; + + *dst = aead; + return 0; +} + +/** + * tipc_aead_mem_alloc - Allocate memory for AEAD request operations + * @tfm: cipher handle to be registered with the request + * @crypto_ctx_size: size of crypto context for callback + * @iv: returned pointer to IV data + * @req: returned pointer to AEAD request data + * @sg: returned pointer to SG lists + * @nsg: number of SG lists to be allocated + * + * Allocate memory to store the crypto context data, AEAD request, IV and SG + * lists, the memory layout is as follows: + * crypto_ctx || iv || aead_req || sg[] + * + * Return: the pointer to the memory areas in case of success, otherwise NULL + */ +static void *tipc_aead_mem_alloc(struct crypto_aead *tfm, + unsigned int crypto_ctx_size, + u8 **iv, struct aead_request **req, + struct scatterlist **sg, int nsg) +{ + unsigned int iv_size, req_size; + unsigned int len; + u8 *mem; + + iv_size = crypto_aead_ivsize(tfm); + req_size = sizeof(**req) + crypto_aead_reqsize(tfm); + + len = crypto_ctx_size; + len += iv_size; + len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1); + len = ALIGN(len, crypto_tfm_ctx_alignment()); + len += req_size; + len = ALIGN(len, __alignof__(struct scatterlist)); + len += nsg * sizeof(**sg); + + mem = kmalloc(len, GFP_ATOMIC); + if (!mem) + return NULL; + + *iv = (u8 *)PTR_ALIGN(mem + crypto_ctx_size, + crypto_aead_alignmask(tfm) + 1); + *req = (struct aead_request *)PTR_ALIGN(*iv + iv_size, + crypto_tfm_ctx_alignment()); + *sg = (struct scatterlist *)PTR_ALIGN((u8 *)*req + req_size, + __alignof__(struct scatterlist)); + + return (void *)mem; +} + +/** + * tipc_aead_encrypt - Encrypt a message + * @aead: TIPC AEAD key for the message encryption + * @skb: the input/output skb + * @b: TIPC bearer where the message will be delivered after the encryption + * @dst: the destination media address + * @__dnode: TIPC dest node if "known" + * + * Return: + * * 0 : if the encryption has completed + * * -EINPROGRESS/-EBUSY : if a callback will be performed + * * < 0 : the encryption has failed + */ +static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *dst, + struct tipc_node *__dnode) +{ + struct crypto_aead *tfm = tipc_aead_tfm_next(aead); + struct tipc_crypto_tx_ctx *tx_ctx; + struct aead_request *req; + struct sk_buff *trailer; + struct scatterlist *sg; + struct tipc_ehdr *ehdr; + int ehsz, len, tailen, nsg, rc; + void *ctx; + u32 salt; + u8 *iv; + + /* Make sure message len at least 4-byte aligned */ + len = ALIGN(skb->len, 4); + tailen = len - skb->len + aead->authsize; + + /* Expand skb tail for authentication tag: + * As for simplicity, we'd have made sure skb having enough tailroom + * for authentication tag @skb allocation. Even when skb is nonlinear + * but there is no frag_list, it should be still fine! + * Otherwise, we must cow it to be a writable buffer with the tailroom. + */ + SKB_LINEAR_ASSERT(skb); + if (tailen > skb_tailroom(skb)) { + pr_debug("TX(): skb tailroom is not enough: %d, requires: %d\n", + skb_tailroom(skb), tailen); + } + + nsg = skb_cow_data(skb, tailen, &trailer); + if (unlikely(nsg < 0)) { + pr_err("TX: skb_cow_data() returned %d\n", nsg); + return nsg; + } + + pskb_put(skb, trailer, tailen); + + /* Allocate memory for the AEAD operation */ + ctx = tipc_aead_mem_alloc(tfm, sizeof(*tx_ctx), &iv, &req, &sg, nsg); + if (unlikely(!ctx)) + return -ENOMEM; + TIPC_SKB_CB(skb)->crypto_ctx = ctx; + + /* Map skb to the sg lists */ + sg_init_table(sg, nsg); + rc = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(rc < 0)) { + pr_err("TX: skb_to_sgvec() returned %d, nsg %d!\n", rc, nsg); + goto exit; + } + + /* Prepare IV: [SALT (4 octets)][SEQNO (8 octets)] + * In case we're in cluster-key mode, SALT is varied by xor-ing with + * the source address (or w0 of id), otherwise with the dest address + * if dest is known. + */ + ehdr = (struct tipc_ehdr *)skb->data; + salt = aead->salt; + if (aead->mode == CLUSTER_KEY) + salt ^= __be32_to_cpu(ehdr->addr); + else if (__dnode) + salt ^= tipc_node_get_addr(__dnode); + memcpy(iv, &salt, 4); + memcpy(iv + 4, (u8 *)&ehdr->seqno, 8); + + /* Prepare request */ + ehsz = tipc_ehdr_size(ehdr); + aead_request_set_tfm(req, tfm); + aead_request_set_ad(req, ehsz); + aead_request_set_crypt(req, sg, sg, len - ehsz, iv); + + /* Set callback function & data */ + aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tipc_aead_encrypt_done, skb); + tx_ctx = (struct tipc_crypto_tx_ctx *)ctx; + tx_ctx->aead = aead; + tx_ctx->bearer = b; + memcpy(&tx_ctx->dst, dst, sizeof(*dst)); + + /* Hold bearer */ + if (unlikely(!tipc_bearer_hold(b))) { + rc = -ENODEV; + goto exit; + } + + /* Get net to avoid freed tipc_crypto when delete namespace */ + if (!maybe_get_net(aead->crypto->net)) { + tipc_bearer_put(b); + rc = -ENODEV; + goto exit; + } + + /* Now, do encrypt */ + rc = crypto_aead_encrypt(req); + if (rc == -EINPROGRESS || rc == -EBUSY) + return rc; + + tipc_bearer_put(b); + put_net(aead->crypto->net); + +exit: + kfree(ctx); + TIPC_SKB_CB(skb)->crypto_ctx = NULL; + return rc; +} + +static void tipc_aead_encrypt_done(void *data, int err) +{ + struct sk_buff *skb = data; + struct tipc_crypto_tx_ctx *tx_ctx = TIPC_SKB_CB(skb)->crypto_ctx; + struct tipc_bearer *b = tx_ctx->bearer; + struct tipc_aead *aead = tx_ctx->aead; + struct tipc_crypto *tx = aead->crypto; + struct net *net = tx->net; + + switch (err) { + case 0: + this_cpu_inc(tx->stats->stat[STAT_ASYNC_OK]); + rcu_read_lock(); + if (likely(test_bit(0, &b->up))) + b->media->send_msg(net, skb, b, &tx_ctx->dst); + else + kfree_skb(skb); + rcu_read_unlock(); + break; + case -EINPROGRESS: + return; + default: + this_cpu_inc(tx->stats->stat[STAT_ASYNC_NOK]); + kfree_skb(skb); + break; + } + + kfree(tx_ctx); + tipc_bearer_put(b); + tipc_aead_put(aead); + put_net(net); +} + +/** + * tipc_aead_decrypt - Decrypt an encrypted message + * @net: struct net + * @aead: TIPC AEAD for the message decryption + * @skb: the input/output skb + * @b: TIPC bearer where the message has been received + * + * Return: + * * 0 : if the decryption has completed + * * -EINPROGRESS/-EBUSY : if a callback will be performed + * * < 0 : the decryption has failed + */ +static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, + struct sk_buff *skb, struct tipc_bearer *b) +{ + struct tipc_crypto_rx_ctx *rx_ctx; + struct aead_request *req; + struct crypto_aead *tfm; + struct sk_buff *unused; + struct scatterlist *sg; + struct tipc_ehdr *ehdr; + int ehsz, nsg, rc; + void *ctx; + u32 salt; + u8 *iv; + + if (unlikely(!aead)) + return -ENOKEY; + + nsg = skb_cow_data(skb, 0, &unused); + if (unlikely(nsg < 0)) { + pr_err("RX: skb_cow_data() returned %d\n", nsg); + return nsg; + } + + /* Allocate memory for the AEAD operation */ + tfm = tipc_aead_tfm_next(aead); + ctx = tipc_aead_mem_alloc(tfm, sizeof(*rx_ctx), &iv, &req, &sg, nsg); + if (unlikely(!ctx)) + return -ENOMEM; + TIPC_SKB_CB(skb)->crypto_ctx = ctx; + + /* Map skb to the sg lists */ + sg_init_table(sg, nsg); + rc = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(rc < 0)) { + pr_err("RX: skb_to_sgvec() returned %d, nsg %d\n", rc, nsg); + goto exit; + } + + /* Reconstruct IV: */ + ehdr = (struct tipc_ehdr *)skb->data; + salt = aead->salt; + if (aead->mode == CLUSTER_KEY) + salt ^= __be32_to_cpu(ehdr->addr); + else if (ehdr->destined) + salt ^= tipc_own_addr(net); + memcpy(iv, &salt, 4); + memcpy(iv + 4, (u8 *)&ehdr->seqno, 8); + + /* Prepare request */ + ehsz = tipc_ehdr_size(ehdr); + aead_request_set_tfm(req, tfm); + aead_request_set_ad(req, ehsz); + aead_request_set_crypt(req, sg, sg, skb->len - ehsz, iv); + + /* Set callback function & data */ + aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tipc_aead_decrypt_done, skb); + rx_ctx = (struct tipc_crypto_rx_ctx *)ctx; + rx_ctx->aead = aead; + rx_ctx->bearer = b; + + /* Hold bearer */ + if (unlikely(!tipc_bearer_hold(b))) { + rc = -ENODEV; + goto exit; + } + + /* Now, do decrypt */ + rc = crypto_aead_decrypt(req); + if (rc == -EINPROGRESS || rc == -EBUSY) + return rc; + + tipc_bearer_put(b); + +exit: + kfree(ctx); + TIPC_SKB_CB(skb)->crypto_ctx = NULL; + return rc; +} + +static void tipc_aead_decrypt_done(void *data, int err) +{ + struct sk_buff *skb = data; + struct tipc_crypto_rx_ctx *rx_ctx = TIPC_SKB_CB(skb)->crypto_ctx; + struct tipc_bearer *b = rx_ctx->bearer; + struct tipc_aead *aead = rx_ctx->aead; + struct tipc_crypto_stats __percpu *stats = aead->crypto->stats; + struct net *net = aead->crypto->net; + + switch (err) { + case 0: + this_cpu_inc(stats->stat[STAT_ASYNC_OK]); + break; + case -EINPROGRESS: + return; + default: + this_cpu_inc(stats->stat[STAT_ASYNC_NOK]); + break; + } + + kfree(rx_ctx); + tipc_crypto_rcv_complete(net, aead, b, &skb, err); + if (likely(skb)) { + if (likely(test_bit(0, &b->up))) + tipc_rcv(net, skb, b); + else + kfree_skb(skb); + } + + tipc_bearer_put(b); +} + +static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr) +{ + return (ehdr->user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE; +} + +/** + * tipc_ehdr_validate - Validate an encryption message + * @skb: the message buffer + * + * Return: "true" if this is a valid encryption message, otherwise "false" + */ +bool tipc_ehdr_validate(struct sk_buff *skb) +{ + struct tipc_ehdr *ehdr; + int ehsz; + + if (unlikely(!pskb_may_pull(skb, EHDR_MIN_SIZE))) + return false; + + ehdr = (struct tipc_ehdr *)skb->data; + if (unlikely(ehdr->version != TIPC_EVERSION)) + return false; + ehsz = tipc_ehdr_size(ehdr); + if (unlikely(!pskb_may_pull(skb, ehsz))) + return false; + if (unlikely(skb->len <= ehsz + TIPC_AES_GCM_TAG_SIZE)) + return false; + + return true; +} + +/** + * tipc_ehdr_build - Build TIPC encryption message header + * @net: struct net + * @aead: TX AEAD key to be used for the message encryption + * @tx_key: key id used for the message encryption + * @skb: input/output message skb + * @__rx: RX crypto handle if dest is "known" + * + * Return: the header size if the building is successful, otherwise < 0 + */ +static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead, + u8 tx_key, struct sk_buff *skb, + struct tipc_crypto *__rx) +{ + struct tipc_msg *hdr = buf_msg(skb); + struct tipc_ehdr *ehdr; + u32 user = msg_user(hdr); + u64 seqno; + int ehsz; + + /* Make room for encryption header */ + ehsz = (user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE; + WARN_ON(skb_headroom(skb) < ehsz); + ehdr = (struct tipc_ehdr *)skb_push(skb, ehsz); + + /* Obtain a seqno first: + * Use the key seqno (= cluster wise) if dest is unknown or we're in + * cluster key mode, otherwise it's better for a per-peer seqno! + */ + if (!__rx || aead->mode == CLUSTER_KEY) + seqno = atomic64_inc_return(&aead->seqno); + else + seqno = atomic64_inc_return(&__rx->sndnxt); + + /* Revoke the key if seqno is wrapped around */ + if (unlikely(!seqno)) + return tipc_crypto_key_revoke(net, tx_key); + + /* Word 1-2 */ + ehdr->seqno = cpu_to_be64(seqno); + + /* Words 0, 3- */ + ehdr->version = TIPC_EVERSION; + ehdr->user = 0; + ehdr->keepalive = 0; + ehdr->tx_key = tx_key; + ehdr->destined = (__rx) ? 1 : 0; + ehdr->rx_key_active = (__rx) ? __rx->key.active : 0; + ehdr->rx_nokey = (__rx) ? __rx->nokey : 0; + ehdr->master_key = aead->crypto->key_master; + ehdr->reserved_1 = 0; + ehdr->reserved_2 = 0; + + switch (user) { + case LINK_CONFIG: + ehdr->user = LINK_CONFIG; + memcpy(ehdr->id, tipc_own_id(net), NODE_ID_LEN); + break; + default: + if (user == LINK_PROTOCOL && msg_type(hdr) == STATE_MSG) { + ehdr->user = LINK_PROTOCOL; + ehdr->keepalive = msg_is_keepalive(hdr); + } + ehdr->addr = hdr->hdr[3]; + break; + } + + return ehsz; +} + +static inline void tipc_crypto_key_set_state(struct tipc_crypto *c, + u8 new_passive, + u8 new_active, + u8 new_pending) +{ + struct tipc_key old = c->key; + char buf[32]; + + c->key.keys = ((new_passive & KEY_MASK) << (KEY_BITS * 2)) | + ((new_active & KEY_MASK) << (KEY_BITS)) | + ((new_pending & KEY_MASK)); + + pr_debug("%s: key changing %s ::%pS\n", c->name, + tipc_key_change_dump(old, c->key, buf), + __builtin_return_address(0)); +} + +/** + * tipc_crypto_key_init - Initiate a new user / AEAD key + * @c: TIPC crypto to which new key is attached + * @ukey: the user key + * @mode: the key mode (CLUSTER_KEY or PER_NODE_KEY) + * @master_key: specify this is a cluster master key + * + * A new TIPC AEAD key will be allocated and initiated with the specified user + * key, then attached to the TIPC crypto. + * + * Return: new key id in case of success, otherwise: < 0 + */ +int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey, + u8 mode, bool master_key) +{ + struct tipc_aead *aead = NULL; + int rc = 0; + + /* Initiate with the new user key */ + rc = tipc_aead_init(&aead, ukey, mode); + + /* Attach it to the crypto */ + if (likely(!rc)) { + rc = tipc_crypto_key_attach(c, aead, 0, master_key); + if (rc < 0) + tipc_aead_free(&aead->rcu); + } + + return rc; +} + +/** + * tipc_crypto_key_attach - Attach a new AEAD key to TIPC crypto + * @c: TIPC crypto to which the new AEAD key is attached + * @aead: the new AEAD key pointer + * @pos: desired slot in the crypto key array, = 0 if any! + * @master_key: specify this is a cluster master key + * + * Return: new key id in case of success, otherwise: -EBUSY + */ +static int tipc_crypto_key_attach(struct tipc_crypto *c, + struct tipc_aead *aead, u8 pos, + bool master_key) +{ + struct tipc_key key; + int rc = -EBUSY; + u8 new_key; + + spin_lock_bh(&c->lock); + key = c->key; + if (master_key) { + new_key = KEY_MASTER; + goto attach; + } + if (key.active && key.passive) + goto exit; + if (key.pending) { + if (tipc_aead_users(c->aead[key.pending]) > 0) + goto exit; + /* if (pos): ok with replacing, will be aligned when needed */ + /* Replace it */ + new_key = key.pending; + } else { + if (pos) { + if (key.active && pos != key_next(key.active)) { + key.passive = pos; + new_key = pos; + goto attach; + } else if (!key.active && !key.passive) { + key.pending = pos; + new_key = pos; + goto attach; + } + } + key.pending = key_next(key.active ?: key.passive); + new_key = key.pending; + } + +attach: + aead->crypto = c; + aead->gen = (is_tx(c)) ? ++c->key_gen : c->key_gen; + tipc_aead_rcu_replace(c->aead[new_key], aead, &c->lock); + if (likely(c->key.keys != key.keys)) + tipc_crypto_key_set_state(c, key.passive, key.active, + key.pending); + c->working = 1; + c->nokey = 0; + c->key_master |= master_key; + rc = new_key; + +exit: + spin_unlock_bh(&c->lock); + return rc; +} + +void tipc_crypto_key_flush(struct tipc_crypto *c) +{ + struct tipc_crypto *tx, *rx; + int k; + + spin_lock_bh(&c->lock); + if (is_rx(c)) { + /* Try to cancel pending work */ + rx = c; + tx = tipc_net(rx->net)->crypto_tx; + if (cancel_delayed_work(&rx->work)) { + kfree(rx->skey); + rx->skey = NULL; + atomic_xchg(&rx->key_distr, 0); + tipc_node_put(rx->node); + } + /* RX stopping => decrease TX key users if any */ + k = atomic_xchg(&rx->peer_rx_active, 0); + if (k) { + tipc_aead_users_dec(tx->aead[k], 0); + /* Mark the point TX key users changed */ + tx->timer1 = jiffies; + } + } + + c->flags = 0; + tipc_crypto_key_set_state(c, 0, 0, 0); + for (k = KEY_MIN; k <= KEY_MAX; k++) + tipc_crypto_key_detach(c->aead[k], &c->lock); + atomic64_set(&c->sndnxt, 0); + spin_unlock_bh(&c->lock); +} + +/** + * tipc_crypto_key_try_align - Align RX keys if possible + * @rx: RX crypto handle + * @new_pending: new pending slot if aligned (= TX key from peer) + * + * Peer has used an unknown key slot, this only happens when peer has left and + * rejoned, or we are newcomer. + * That means, there must be no active key but a pending key at unaligned slot. + * If so, we try to move the pending key to the new slot. + * Note: A potential passive key can exist, it will be shifted correspondingly! + * + * Return: "true" if key is successfully aligned, otherwise "false" + */ +static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending) +{ + struct tipc_aead *tmp1, *tmp2 = NULL; + struct tipc_key key; + bool aligned = false; + u8 new_passive = 0; + int x; + + spin_lock(&rx->lock); + key = rx->key; + if (key.pending == new_pending) { + aligned = true; + goto exit; + } + if (key.active) + goto exit; + if (!key.pending) + goto exit; + if (tipc_aead_users(rx->aead[key.pending]) > 0) + goto exit; + + /* Try to "isolate" this pending key first */ + tmp1 = tipc_aead_rcu_ptr(rx->aead[key.pending], &rx->lock); + if (!refcount_dec_if_one(&tmp1->refcnt)) + goto exit; + rcu_assign_pointer(rx->aead[key.pending], NULL); + + /* Move passive key if any */ + if (key.passive) { + tmp2 = rcu_replace_pointer(rx->aead[key.passive], tmp2, lockdep_is_held(&rx->lock)); + x = (key.passive - key.pending + new_pending) % KEY_MAX; + new_passive = (x <= 0) ? x + KEY_MAX : x; + } + + /* Re-allocate the key(s) */ + tipc_crypto_key_set_state(rx, new_passive, 0, new_pending); + rcu_assign_pointer(rx->aead[new_pending], tmp1); + if (new_passive) + rcu_assign_pointer(rx->aead[new_passive], tmp2); + refcount_set(&tmp1->refcnt, 1); + aligned = true; + pr_info_ratelimited("%s: key[%d] -> key[%d]\n", rx->name, key.pending, + new_pending); + +exit: + spin_unlock(&rx->lock); + return aligned; +} + +/** + * tipc_crypto_key_pick_tx - Pick one TX key for message decryption + * @tx: TX crypto handle + * @rx: RX crypto handle (can be NULL) + * @skb: the message skb which will be decrypted later + * @tx_key: peer TX key id + * + * This function looks up the existing TX keys and pick one which is suitable + * for the message decryption, that must be a cluster key and not used before + * on the same message (i.e. recursive). + * + * Return: the TX AEAD key handle in case of success, otherwise NULL + */ +static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx, + struct tipc_crypto *rx, + struct sk_buff *skb, + u8 tx_key) +{ + struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(skb); + struct tipc_aead *aead = NULL; + struct tipc_key key = tx->key; + u8 k, i = 0; + + /* Initialize data if not yet */ + if (!skb_cb->tx_clone_deferred) { + skb_cb->tx_clone_deferred = 1; + memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx)); + } + + skb_cb->tx_clone_ctx.rx = rx; + if (++skb_cb->tx_clone_ctx.recurs > 2) + return NULL; + + /* Pick one TX key */ + spin_lock(&tx->lock); + if (tx_key == KEY_MASTER) { + aead = tipc_aead_rcu_ptr(tx->aead[KEY_MASTER], &tx->lock); + goto done; + } + do { + k = (i == 0) ? key.pending : + ((i == 1) ? key.active : key.passive); + if (!k) + continue; + aead = tipc_aead_rcu_ptr(tx->aead[k], &tx->lock); + if (!aead) + continue; + if (aead->mode != CLUSTER_KEY || + aead == skb_cb->tx_clone_ctx.last) { + aead = NULL; + continue; + } + /* Ok, found one cluster key */ + skb_cb->tx_clone_ctx.last = aead; + WARN_ON(skb->next); + skb->next = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb->next)) + pr_warn("Failed to clone skb for next round if any\n"); + break; + } while (++i < 3); + +done: + if (likely(aead)) + WARN_ON(!refcount_inc_not_zero(&aead->refcnt)); + spin_unlock(&tx->lock); + + return aead; +} + +/** + * tipc_crypto_key_synch: Synch own key data according to peer key status + * @rx: RX crypto handle + * @skb: TIPCv2 message buffer (incl. the ehdr from peer) + * + * This function updates the peer node related data as the peer RX active key + * has changed, so the number of TX keys' users on this node are increased and + * decreased correspondingly. + * + * It also considers if peer has no key, then we need to make own master key + * (if any) taking over i.e. starting grace period and also trigger key + * distributing process. + * + * The "per-peer" sndnxt is also reset when the peer key has switched. + */ +static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb) +{ + struct tipc_ehdr *ehdr = (struct tipc_ehdr *)skb_network_header(skb); + struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx; + struct tipc_msg *hdr = buf_msg(skb); + u32 self = tipc_own_addr(rx->net); + u8 cur, new; + unsigned long delay; + + /* Update RX 'key_master' flag according to peer, also mark "legacy" if + * a peer has no master key. + */ + rx->key_master = ehdr->master_key; + if (!rx->key_master) + tx->legacy_user = 1; + + /* For later cases, apply only if message is destined to this node */ + if (!ehdr->destined || msg_short(hdr) || msg_destnode(hdr) != self) + return; + + /* Case 1: Peer has no keys, let's make master key take over */ + if (ehdr->rx_nokey) { + /* Set or extend grace period */ + tx->timer2 = jiffies; + /* Schedule key distributing for the peer if not yet */ + if (tx->key.keys && + !atomic_cmpxchg(&rx->key_distr, 0, KEY_DISTR_SCHED)) { + get_random_bytes(&delay, 2); + delay %= 5; + delay = msecs_to_jiffies(500 * ++delay); + if (queue_delayed_work(tx->wq, &rx->work, delay)) + tipc_node_get(rx->node); + } + } else { + /* Cancel a pending key distributing if any */ + atomic_xchg(&rx->key_distr, 0); + } + + /* Case 2: Peer RX active key has changed, let's update own TX users */ + cur = atomic_read(&rx->peer_rx_active); + new = ehdr->rx_key_active; + if (tx->key.keys && + cur != new && + atomic_cmpxchg(&rx->peer_rx_active, cur, new) == cur) { + if (new) + tipc_aead_users_inc(tx->aead[new], INT_MAX); + if (cur) + tipc_aead_users_dec(tx->aead[cur], 0); + + atomic64_set(&rx->sndnxt, 0); + /* Mark the point TX key users changed */ + tx->timer1 = jiffies; + + pr_debug("%s: key users changed %d-- %d++, peer %s\n", + tx->name, cur, new, rx->name); + } +} + +static int tipc_crypto_key_revoke(struct net *net, u8 tx_key) +{ + struct tipc_crypto *tx = tipc_net(net)->crypto_tx; + struct tipc_key key; + + spin_lock_bh(&tx->lock); + key = tx->key; + WARN_ON(!key.active || tx_key != key.active); + + /* Free the active key */ + tipc_crypto_key_set_state(tx, key.passive, 0, key.pending); + tipc_crypto_key_detach(tx->aead[key.active], &tx->lock); + spin_unlock_bh(&tx->lock); + + pr_warn("%s: key is revoked\n", tx->name); + return -EKEYREVOKED; +} + +int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, + struct tipc_node *node) +{ + struct tipc_crypto *c; + + if (*crypto) + return -EEXIST; + + /* Allocate crypto */ + c = kzalloc(sizeof(*c), GFP_ATOMIC); + if (!c) + return -ENOMEM; + + /* Allocate workqueue on TX */ + if (!node) { + c->wq = alloc_ordered_workqueue("tipc_crypto", 0); + if (!c->wq) { + kfree(c); + return -ENOMEM; + } + } + + /* Allocate statistic structure */ + c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC); + if (!c->stats) { + if (c->wq) + destroy_workqueue(c->wq); + kfree_sensitive(c); + return -ENOMEM; + } + + c->flags = 0; + c->net = net; + c->node = node; + get_random_bytes(&c->key_gen, 2); + tipc_crypto_key_set_state(c, 0, 0, 0); + atomic_set(&c->key_distr, 0); + atomic_set(&c->peer_rx_active, 0); + atomic64_set(&c->sndnxt, 0); + c->timer1 = jiffies; + c->timer2 = jiffies; + c->rekeying_intv = TIPC_REKEYING_INTV_DEF; + spin_lock_init(&c->lock); + scnprintf(c->name, 48, "%s(%s)", (is_rx(c)) ? "RX" : "TX", + (is_rx(c)) ? tipc_node_get_id_str(c->node) : + tipc_own_id_string(c->net)); + + if (is_rx(c)) + INIT_DELAYED_WORK(&c->work, tipc_crypto_work_rx); + else + INIT_DELAYED_WORK(&c->work, tipc_crypto_work_tx); + + *crypto = c; + return 0; +} + +void tipc_crypto_stop(struct tipc_crypto **crypto) +{ + struct tipc_crypto *c = *crypto; + u8 k; + + if (!c) + return; + + /* Flush any queued works & destroy wq */ + if (is_tx(c)) { + c->rekeying_intv = 0; + cancel_delayed_work_sync(&c->work); + destroy_workqueue(c->wq); + } + + /* Release AEAD keys */ + rcu_read_lock(); + for (k = KEY_MIN; k <= KEY_MAX; k++) + tipc_aead_put(rcu_dereference(c->aead[k])); + rcu_read_unlock(); + pr_debug("%s: has been stopped\n", c->name); + + /* Free this crypto statistics */ + free_percpu(c->stats); + + *crypto = NULL; + kfree_sensitive(c); +} + +void tipc_crypto_timeout(struct tipc_crypto *rx) +{ + struct tipc_net *tn = tipc_net(rx->net); + struct tipc_crypto *tx = tn->crypto_tx; + struct tipc_key key; + int cmd; + + /* TX pending: taking all users & stable -> active */ + spin_lock(&tx->lock); + key = tx->key; + if (key.active && tipc_aead_users(tx->aead[key.active]) > 0) + goto s1; + if (!key.pending || tipc_aead_users(tx->aead[key.pending]) <= 0) + goto s1; + if (time_before(jiffies, tx->timer1 + TIPC_TX_LASTING_TIME)) + goto s1; + + tipc_crypto_key_set_state(tx, key.passive, key.pending, 0); + if (key.active) + tipc_crypto_key_detach(tx->aead[key.active], &tx->lock); + this_cpu_inc(tx->stats->stat[STAT_SWITCHES]); + pr_info("%s: key[%d] is activated\n", tx->name, key.pending); + +s1: + spin_unlock(&tx->lock); + + /* RX pending: having user -> active */ + spin_lock(&rx->lock); + key = rx->key; + if (!key.pending || tipc_aead_users(rx->aead[key.pending]) <= 0) + goto s2; + + if (key.active) + key.passive = key.active; + key.active = key.pending; + rx->timer2 = jiffies; + tipc_crypto_key_set_state(rx, key.passive, key.active, 0); + this_cpu_inc(rx->stats->stat[STAT_SWITCHES]); + pr_info("%s: key[%d] is activated\n", rx->name, key.pending); + goto s5; + +s2: + /* RX pending: not working -> remove */ + if (!key.pending || tipc_aead_users(rx->aead[key.pending]) > -10) + goto s3; + + tipc_crypto_key_set_state(rx, key.passive, key.active, 0); + tipc_crypto_key_detach(rx->aead[key.pending], &rx->lock); + pr_debug("%s: key[%d] is removed\n", rx->name, key.pending); + goto s5; + +s3: + /* RX active: timed out or no user -> pending */ + if (!key.active) + goto s4; + if (time_before(jiffies, rx->timer1 + TIPC_RX_ACTIVE_LIM) && + tipc_aead_users(rx->aead[key.active]) > 0) + goto s4; + + if (key.pending) + key.passive = key.active; + else + key.pending = key.active; + rx->timer2 = jiffies; + tipc_crypto_key_set_state(rx, key.passive, 0, key.pending); + tipc_aead_users_set(rx->aead[key.pending], 0); + pr_debug("%s: key[%d] is deactivated\n", rx->name, key.active); + goto s5; + +s4: + /* RX passive: outdated or not working -> free */ + if (!key.passive) + goto s5; + if (time_before(jiffies, rx->timer2 + TIPC_RX_PASSIVE_LIM) && + tipc_aead_users(rx->aead[key.passive]) > -10) + goto s5; + + tipc_crypto_key_set_state(rx, 0, key.active, key.pending); + tipc_crypto_key_detach(rx->aead[key.passive], &rx->lock); + pr_debug("%s: key[%d] is freed\n", rx->name, key.passive); + +s5: + spin_unlock(&rx->lock); + + /* Relax it here, the flag will be set again if it really is, but only + * when we are not in grace period for safety! + */ + if (time_after(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD)) + tx->legacy_user = 0; + + /* Limit max_tfms & do debug commands if needed */ + if (likely(sysctl_tipc_max_tfms <= TIPC_MAX_TFMS_LIM)) + return; + + cmd = sysctl_tipc_max_tfms; + sysctl_tipc_max_tfms = TIPC_MAX_TFMS_DEF; + tipc_crypto_do_cmd(rx->net, cmd); +} + +static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb, + struct tipc_bearer *b, + struct tipc_media_addr *dst, + struct tipc_node *__dnode, u8 type) +{ + struct sk_buff *skb; + + skb = skb_clone(_skb, GFP_ATOMIC); + if (skb) { + TIPC_SKB_CB(skb)->xmit_type = type; + tipc_crypto_xmit(net, &skb, b, dst, __dnode); + if (skb) + b->media->send_msg(net, skb, b, dst); + } +} + +/** + * tipc_crypto_xmit - Build & encrypt TIPC message for xmit + * @net: struct net + * @skb: input/output message skb pointer + * @b: bearer used for xmit later + * @dst: destination media address + * @__dnode: destination node for reference if any + * + * First, build an encryption message header on the top of the message, then + * encrypt the original TIPC message by using the pending, master or active + * key with this preference order. + * If the encryption is successful, the encrypted skb is returned directly or + * via the callback. + * Otherwise, the skb is freed! + * + * Return: + * * 0 : the encryption has succeeded (or no encryption) + * * -EINPROGRESS/-EBUSY : the encryption is ongoing, a callback will be made + * * -ENOKEK : the encryption has failed due to no key + * * -EKEYREVOKED : the encryption has failed due to key revoked + * * -ENOMEM : the encryption has failed due to no memory + * * < 0 : the encryption has failed due to other reasons + */ +int tipc_crypto_xmit(struct net *net, struct sk_buff **skb, + struct tipc_bearer *b, struct tipc_media_addr *dst, + struct tipc_node *__dnode) +{ + struct tipc_crypto *__rx = tipc_node_crypto_rx(__dnode); + struct tipc_crypto *tx = tipc_net(net)->crypto_tx; + struct tipc_crypto_stats __percpu *stats = tx->stats; + struct tipc_msg *hdr = buf_msg(*skb); + struct tipc_key key = tx->key; + struct tipc_aead *aead = NULL; + u32 user = msg_user(hdr); + u32 type = msg_type(hdr); + int rc = -ENOKEY; + u8 tx_key = 0; + + /* No encryption? */ + if (!tx->working) + return 0; + + /* Pending key if peer has active on it or probing time */ + if (unlikely(key.pending)) { + tx_key = key.pending; + if (!tx->key_master && !key.active) + goto encrypt; + if (__rx && atomic_read(&__rx->peer_rx_active) == tx_key) + goto encrypt; + if (TIPC_SKB_CB(*skb)->xmit_type == SKB_PROBING) { + pr_debug("%s: probing for key[%d]\n", tx->name, + key.pending); + goto encrypt; + } + if (user == LINK_CONFIG || user == LINK_PROTOCOL) + tipc_crypto_clone_msg(net, *skb, b, dst, __dnode, + SKB_PROBING); + } + + /* Master key if this is a *vital* message or in grace period */ + if (tx->key_master) { + tx_key = KEY_MASTER; + if (!key.active) + goto encrypt; + if (TIPC_SKB_CB(*skb)->xmit_type == SKB_GRACING) { + pr_debug("%s: gracing for msg (%d %d)\n", tx->name, + user, type); + goto encrypt; + } + if (user == LINK_CONFIG || + (user == LINK_PROTOCOL && type == RESET_MSG) || + (user == MSG_CRYPTO && type == KEY_DISTR_MSG) || + time_before(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD)) { + if (__rx && __rx->key_master && + !atomic_read(&__rx->peer_rx_active)) + goto encrypt; + if (!__rx) { + if (likely(!tx->legacy_user)) + goto encrypt; + tipc_crypto_clone_msg(net, *skb, b, dst, + __dnode, SKB_GRACING); + } + } + } + + /* Else, use the active key if any */ + if (likely(key.active)) { + tx_key = key.active; + goto encrypt; + } + + goto exit; + +encrypt: + aead = tipc_aead_get(tx->aead[tx_key]); + if (unlikely(!aead)) + goto exit; + rc = tipc_ehdr_build(net, aead, tx_key, *skb, __rx); + if (likely(rc > 0)) + rc = tipc_aead_encrypt(aead, *skb, b, dst, __dnode); + +exit: + switch (rc) { + case 0: + this_cpu_inc(stats->stat[STAT_OK]); + break; + case -EINPROGRESS: + case -EBUSY: + this_cpu_inc(stats->stat[STAT_ASYNC]); + *skb = NULL; + return rc; + default: + this_cpu_inc(stats->stat[STAT_NOK]); + if (rc == -ENOKEY) + this_cpu_inc(stats->stat[STAT_NOKEYS]); + else if (rc == -EKEYREVOKED) + this_cpu_inc(stats->stat[STAT_BADKEYS]); + kfree_skb(*skb); + *skb = NULL; + break; + } + + tipc_aead_put(aead); + return rc; +} + +/** + * tipc_crypto_rcv - Decrypt an encrypted TIPC message from peer + * @net: struct net + * @rx: RX crypto handle + * @skb: input/output message skb pointer + * @b: bearer where the message has been received + * + * If the decryption is successful, the decrypted skb is returned directly or + * as the callback, the encryption header and auth tag will be trimmed out + * before forwarding to tipc_rcv() via the tipc_crypto_rcv_complete(). + * Otherwise, the skb will be freed! + * Note: RX key(s) can be re-aligned, or in case of no key suitable, TX + * cluster key(s) can be taken for decryption (- recursive). + * + * Return: + * * 0 : the decryption has successfully completed + * * -EINPROGRESS/-EBUSY : the decryption is ongoing, a callback will be made + * * -ENOKEY : the decryption has failed due to no key + * * -EBADMSG : the decryption has failed due to bad message + * * -ENOMEM : the decryption has failed due to no memory + * * < 0 : the decryption has failed due to other reasons + */ +int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx, + struct sk_buff **skb, struct tipc_bearer *b) +{ + struct tipc_crypto *tx = tipc_net(net)->crypto_tx; + struct tipc_crypto_stats __percpu *stats; + struct tipc_aead *aead = NULL; + struct tipc_key key; + int rc = -ENOKEY; + u8 tx_key, n; + + tx_key = ((struct tipc_ehdr *)(*skb)->data)->tx_key; + + /* New peer? + * Let's try with TX key (i.e. cluster mode) & verify the skb first! + */ + if (unlikely(!rx || tx_key == KEY_MASTER)) + goto pick_tx; + + /* Pick RX key according to TX key if any */ + key = rx->key; + if (tx_key == key.active || tx_key == key.pending || + tx_key == key.passive) + goto decrypt; + + /* Unknown key, let's try to align RX key(s) */ + if (tipc_crypto_key_try_align(rx, tx_key)) + goto decrypt; + +pick_tx: + /* No key suitable? Try to pick one from TX... */ + aead = tipc_crypto_key_pick_tx(tx, rx, *skb, tx_key); + if (aead) + goto decrypt; + goto exit; + +decrypt: + rcu_read_lock(); + if (!aead) + aead = tipc_aead_get(rx->aead[tx_key]); + rc = tipc_aead_decrypt(net, aead, *skb, b); + rcu_read_unlock(); + +exit: + stats = ((rx) ?: tx)->stats; + switch (rc) { + case 0: + this_cpu_inc(stats->stat[STAT_OK]); + break; + case -EINPROGRESS: + case -EBUSY: + this_cpu_inc(stats->stat[STAT_ASYNC]); + *skb = NULL; + return rc; + default: + this_cpu_inc(stats->stat[STAT_NOK]); + if (rc == -ENOKEY) { + kfree_skb(*skb); + *skb = NULL; + if (rx) { + /* Mark rx->nokey only if we dont have a + * pending received session key, nor a newer + * one i.e. in the next slot. + */ + n = key_next(tx_key); + rx->nokey = !(rx->skey || + rcu_access_pointer(rx->aead[n])); + pr_debug_ratelimited("%s: nokey %d, key %d/%x\n", + rx->name, rx->nokey, + tx_key, rx->key.keys); + tipc_node_put(rx->node); + } + this_cpu_inc(stats->stat[STAT_NOKEYS]); + return rc; + } else if (rc == -EBADMSG) { + this_cpu_inc(stats->stat[STAT_BADMSGS]); + } + break; + } + + tipc_crypto_rcv_complete(net, aead, b, skb, rc); + return rc; +} + +static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead, + struct tipc_bearer *b, + struct sk_buff **skb, int err) +{ + struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(*skb); + struct tipc_crypto *rx = aead->crypto; + struct tipc_aead *tmp = NULL; + struct tipc_ehdr *ehdr; + struct tipc_node *n; + + /* Is this completed by TX? */ + if (unlikely(is_tx(aead->crypto))) { + rx = skb_cb->tx_clone_ctx.rx; + pr_debug("TX->RX(%s): err %d, aead %p, skb->next %p, flags %x\n", + (rx) ? tipc_node_get_id_str(rx->node) : "-", err, aead, + (*skb)->next, skb_cb->flags); + pr_debug("skb_cb [recurs %d, last %p], tx->aead [%p %p %p]\n", + skb_cb->tx_clone_ctx.recurs, skb_cb->tx_clone_ctx.last, + aead->crypto->aead[1], aead->crypto->aead[2], + aead->crypto->aead[3]); + if (unlikely(err)) { + if (err == -EBADMSG && (*skb)->next) + tipc_rcv(net, (*skb)->next, b); + goto free_skb; + } + + if (likely((*skb)->next)) { + kfree_skb((*skb)->next); + (*skb)->next = NULL; + } + ehdr = (struct tipc_ehdr *)(*skb)->data; + if (!rx) { + WARN_ON(ehdr->user != LINK_CONFIG); + n = tipc_node_create(net, 0, ehdr->id, 0xffffu, 0, + true); + rx = tipc_node_crypto_rx(n); + if (unlikely(!rx)) + goto free_skb; + } + + /* Ignore cloning if it was TX master key */ + if (ehdr->tx_key == KEY_MASTER) + goto rcv; + if (tipc_aead_clone(&tmp, aead) < 0) + goto rcv; + WARN_ON(!refcount_inc_not_zero(&tmp->refcnt)); + if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key, false) < 0) { + tipc_aead_free(&tmp->rcu); + goto rcv; + } + tipc_aead_put(aead); + aead = tmp; + } + + if (unlikely(err)) { + tipc_aead_users_dec((struct tipc_aead __force __rcu *)aead, INT_MIN); + goto free_skb; + } + + /* Set the RX key's user */ + tipc_aead_users_set((struct tipc_aead __force __rcu *)aead, 1); + + /* Mark this point, RX works */ + rx->timer1 = jiffies; + +rcv: + /* Remove ehdr & auth. tag prior to tipc_rcv() */ + ehdr = (struct tipc_ehdr *)(*skb)->data; + + /* Mark this point, RX passive still works */ + if (rx->key.passive && ehdr->tx_key == rx->key.passive) + rx->timer2 = jiffies; + + skb_reset_network_header(*skb); + skb_pull(*skb, tipc_ehdr_size(ehdr)); + if (pskb_trim(*skb, (*skb)->len - aead->authsize)) + goto free_skb; + + /* Validate TIPCv2 message */ + if (unlikely(!tipc_msg_validate(skb))) { + pr_err_ratelimited("Packet dropped after decryption!\n"); + goto free_skb; + } + + /* Ok, everything's fine, try to synch own keys according to peers' */ + tipc_crypto_key_synch(rx, *skb); + + /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */ + skb_cb = TIPC_SKB_CB(*skb); + + /* Mark skb decrypted */ + skb_cb->decrypted = 1; + + /* Clear clone cxt if any */ + if (likely(!skb_cb->tx_clone_deferred)) + goto exit; + skb_cb->tx_clone_deferred = 0; + memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx)); + goto exit; + +free_skb: + kfree_skb(*skb); + *skb = NULL; + +exit: + tipc_aead_put(aead); + if (rx) + tipc_node_put(rx->node); +} + +static void tipc_crypto_do_cmd(struct net *net, int cmd) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_crypto *tx = tn->crypto_tx, *rx; + struct list_head *p; + unsigned int stat; + int i, j, cpu; + char buf[200]; + + /* Currently only one command is supported */ + switch (cmd) { + case 0xfff1: + goto print_stats; + default: + return; + } + +print_stats: + /* Print a header */ + pr_info("\n=============== TIPC Crypto Statistics ===============\n\n"); + + /* Print key status */ + pr_info("Key status:\n"); + pr_info("TX(%7.7s)\n%s", tipc_own_id_string(net), + tipc_crypto_key_dump(tx, buf)); + + rcu_read_lock(); + for (p = tn->node_list.next; p != &tn->node_list; p = p->next) { + rx = tipc_node_crypto_rx_by_list(p); + pr_info("RX(%7.7s)\n%s", tipc_node_get_id_str(rx->node), + tipc_crypto_key_dump(rx, buf)); + } + rcu_read_unlock(); + + /* Print crypto statistics */ + for (i = 0, j = 0; i < MAX_STATS; i++) + j += scnprintf(buf + j, 200 - j, "|%11s ", hstats[i]); + pr_info("Counter %s", buf); + + memset(buf, '-', 115); + buf[115] = '\0'; + pr_info("%s\n", buf); + + j = scnprintf(buf, 200, "TX(%7.7s) ", tipc_own_id_string(net)); + for_each_possible_cpu(cpu) { + for (i = 0; i < MAX_STATS; i++) { + stat = per_cpu_ptr(tx->stats, cpu)->stat[i]; + j += scnprintf(buf + j, 200 - j, "|%11d ", stat); + } + pr_info("%s", buf); + j = scnprintf(buf, 200, "%12s", " "); + } + + rcu_read_lock(); + for (p = tn->node_list.next; p != &tn->node_list; p = p->next) { + rx = tipc_node_crypto_rx_by_list(p); + j = scnprintf(buf, 200, "RX(%7.7s) ", + tipc_node_get_id_str(rx->node)); + for_each_possible_cpu(cpu) { + for (i = 0; i < MAX_STATS; i++) { + stat = per_cpu_ptr(rx->stats, cpu)->stat[i]; + j += scnprintf(buf + j, 200 - j, "|%11d ", + stat); + } + pr_info("%s", buf); + j = scnprintf(buf, 200, "%12s", " "); + } + } + rcu_read_unlock(); + + pr_info("\n======================== Done ========================\n"); +} + +static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf) +{ + struct tipc_key key = c->key; + struct tipc_aead *aead; + int k, i = 0; + char *s; + + for (k = KEY_MIN; k <= KEY_MAX; k++) { + if (k == KEY_MASTER) { + if (is_rx(c)) + continue; + if (time_before(jiffies, + c->timer2 + TIPC_TX_GRACE_PERIOD)) + s = "ACT"; + else + s = "PAS"; + } else { + if (k == key.passive) + s = "PAS"; + else if (k == key.active) + s = "ACT"; + else if (k == key.pending) + s = "PEN"; + else + s = "-"; + } + i += scnprintf(buf + i, 200 - i, "\tKey%d: %s", k, s); + + rcu_read_lock(); + aead = rcu_dereference(c->aead[k]); + if (aead) + i += scnprintf(buf + i, 200 - i, + "{\"0x...%s\", \"%s\"}/%d:%d", + aead->hint, + (aead->mode == CLUSTER_KEY) ? "c" : "p", + atomic_read(&aead->users), + refcount_read(&aead->refcnt)); + rcu_read_unlock(); + i += scnprintf(buf + i, 200 - i, "\n"); + } + + if (is_rx(c)) + i += scnprintf(buf + i, 200 - i, "\tPeer RX active: %d\n", + atomic_read(&c->peer_rx_active)); + + return buf; +} + +static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new, + char *buf) +{ + struct tipc_key *key = &old; + int k, i = 0; + char *s; + + /* Output format: "[%s %s %s] -> [%s %s %s]", max len = 32 */ +again: + i += scnprintf(buf + i, 32 - i, "["); + for (k = KEY_1; k <= KEY_3; k++) { + if (k == key->passive) + s = "pas"; + else if (k == key->active) + s = "act"; + else if (k == key->pending) + s = "pen"; + else + s = "-"; + i += scnprintf(buf + i, 32 - i, + (k != KEY_3) ? "%s " : "%s", s); + } + if (key != &new) { + i += scnprintf(buf + i, 32 - i, "] -> "); + key = &new; + goto again; + } + i += scnprintf(buf + i, 32 - i, "]"); + return buf; +} + +/** + * tipc_crypto_msg_rcv - Common 'MSG_CRYPTO' processing point + * @net: the struct net + * @skb: the receiving message buffer + */ +void tipc_crypto_msg_rcv(struct net *net, struct sk_buff *skb) +{ + struct tipc_crypto *rx; + struct tipc_msg *hdr; + + if (unlikely(skb_linearize(skb))) + goto exit; + + hdr = buf_msg(skb); + rx = tipc_node_crypto_rx_by_addr(net, msg_prevnode(hdr)); + if (unlikely(!rx)) + goto exit; + + switch (msg_type(hdr)) { + case KEY_DISTR_MSG: + if (tipc_crypto_key_rcv(rx, hdr)) + goto exit; + break; + default: + break; + } + + tipc_node_put(rx->node); + +exit: + kfree_skb(skb); +} + +/** + * tipc_crypto_key_distr - Distribute a TX key + * @tx: the TX crypto + * @key: the key's index + * @dest: the destination tipc node, = NULL if distributing to all nodes + * + * Return: 0 in case of success, otherwise < 0 + */ +int tipc_crypto_key_distr(struct tipc_crypto *tx, u8 key, + struct tipc_node *dest) +{ + struct tipc_aead *aead; + u32 dnode = tipc_node_get_addr(dest); + int rc = -ENOKEY; + + if (!sysctl_tipc_key_exchange_enabled) + return 0; + + if (key) { + rcu_read_lock(); + aead = tipc_aead_get(tx->aead[key]); + if (likely(aead)) { + rc = tipc_crypto_key_xmit(tx->net, aead->key, + aead->gen, aead->mode, + dnode); + tipc_aead_put(aead); + } + rcu_read_unlock(); + } + + return rc; +} + +/** + * tipc_crypto_key_xmit - Send a session key + * @net: the struct net + * @skey: the session key to be sent + * @gen: the key's generation + * @mode: the key's mode + * @dnode: the destination node address, = 0 if broadcasting to all nodes + * + * The session key 'skey' is packed in a TIPC v2 'MSG_CRYPTO/KEY_DISTR_MSG' + * as its data section, then xmit-ed through the uc/bc link. + * + * Return: 0 in case of success, otherwise < 0 + */ +static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey, + u16 gen, u8 mode, u32 dnode) +{ + struct sk_buff_head pkts; + struct tipc_msg *hdr; + struct sk_buff *skb; + u16 size, cong_link_cnt; + u8 *data; + int rc; + + size = tipc_aead_key_size(skey); + skb = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + hdr = buf_msg(skb); + tipc_msg_init(tipc_own_addr(net), hdr, MSG_CRYPTO, KEY_DISTR_MSG, + INT_H_SIZE, dnode); + msg_set_size(hdr, INT_H_SIZE + size); + msg_set_key_gen(hdr, gen); + msg_set_key_mode(hdr, mode); + + data = msg_data(hdr); + *((__be32 *)(data + TIPC_AEAD_ALG_NAME)) = htonl(skey->keylen); + memcpy(data, skey->alg_name, TIPC_AEAD_ALG_NAME); + memcpy(data + TIPC_AEAD_ALG_NAME + sizeof(__be32), skey->key, + skey->keylen); + + __skb_queue_head_init(&pkts); + __skb_queue_tail(&pkts, skb); + if (dnode) + rc = tipc_node_xmit(net, &pkts, dnode, 0); + else + rc = tipc_bcast_xmit(net, &pkts, &cong_link_cnt); + + return rc; +} + +/** + * tipc_crypto_key_rcv - Receive a session key + * @rx: the RX crypto + * @hdr: the TIPC v2 message incl. the receiving session key in its data + * + * This function retrieves the session key in the message from peer, then + * schedules a RX work to attach the key to the corresponding RX crypto. + * + * Return: "true" if the key has been scheduled for attaching, otherwise + * "false". + */ +static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr) +{ + struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx; + struct tipc_aead_key *skey = NULL; + u16 key_gen = msg_key_gen(hdr); + u32 size = msg_data_sz(hdr); + u8 *data = msg_data(hdr); + unsigned int keylen; + + /* Verify whether the size can exist in the packet */ + if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) { + pr_debug("%s: message data size is too small\n", rx->name); + goto exit; + } + + keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME))); + + /* Verify the supplied size values */ + if (unlikely(keylen > TIPC_AEAD_KEY_SIZE_MAX || + size != keylen + sizeof(struct tipc_aead_key))) { + pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name); + goto exit; + } + + spin_lock(&rx->lock); + if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) { + pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name, + rx->skey, key_gen, rx->key_gen); + goto exit_unlock; + } + + /* Allocate memory for the key */ + skey = kmalloc(size, GFP_ATOMIC); + if (unlikely(!skey)) { + pr_err("%s: unable to allocate memory for skey\n", rx->name); + goto exit_unlock; + } + + /* Copy key from msg data */ + skey->keylen = keylen; + memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME); + memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32), + skey->keylen); + + rx->key_gen = key_gen; + rx->skey_mode = msg_key_mode(hdr); + rx->skey = skey; + rx->nokey = 0; + mb(); /* for nokey flag */ + +exit_unlock: + spin_unlock(&rx->lock); + +exit: + /* Schedule the key attaching on this crypto */ + if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0))) + return true; + + return false; +} + +/** + * tipc_crypto_work_rx - Scheduled RX works handler + * @work: the struct RX work + * + * The function processes the previous scheduled works i.e. distributing TX key + * or attaching a received session key on RX crypto. + */ +static void tipc_crypto_work_rx(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct tipc_crypto *rx = container_of(dwork, struct tipc_crypto, work); + struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx; + unsigned long delay = msecs_to_jiffies(5000); + bool resched = false; + u8 key; + int rc; + + /* Case 1: Distribute TX key to peer if scheduled */ + if (atomic_cmpxchg(&rx->key_distr, + KEY_DISTR_SCHED, + KEY_DISTR_COMPL) == KEY_DISTR_SCHED) { + /* Always pick the newest one for distributing */ + key = tx->key.pending ?: tx->key.active; + rc = tipc_crypto_key_distr(tx, key, rx->node); + if (unlikely(rc)) + pr_warn("%s: unable to distr key[%d] to %s, err %d\n", + tx->name, key, tipc_node_get_id_str(rx->node), + rc); + + /* Sched for key_distr releasing */ + resched = true; + } else { + atomic_cmpxchg(&rx->key_distr, KEY_DISTR_COMPL, 0); + } + + /* Case 2: Attach a pending received session key from peer if any */ + if (rx->skey) { + rc = tipc_crypto_key_init(rx, rx->skey, rx->skey_mode, false); + if (unlikely(rc < 0)) + pr_warn("%s: unable to attach received skey, err %d\n", + rx->name, rc); + switch (rc) { + case -EBUSY: + case -ENOMEM: + /* Resched the key attaching */ + resched = true; + break; + default: + synchronize_rcu(); + kfree(rx->skey); + rx->skey = NULL; + break; + } + } + + if (resched && queue_delayed_work(tx->wq, &rx->work, delay)) + return; + + tipc_node_put(rx->node); +} + +/** + * tipc_crypto_rekeying_sched - (Re)schedule rekeying w/o new interval + * @tx: TX crypto + * @changed: if the rekeying needs to be rescheduled with new interval + * @new_intv: new rekeying interval (when "changed" = true) + */ +void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed, + u32 new_intv) +{ + unsigned long delay; + bool now = false; + + if (changed) { + if (new_intv == TIPC_REKEYING_NOW) + now = true; + else + tx->rekeying_intv = new_intv; + cancel_delayed_work_sync(&tx->work); + } + + if (tx->rekeying_intv || now) { + delay = (now) ? 0 : tx->rekeying_intv * 60 * 1000; + queue_delayed_work(tx->wq, &tx->work, msecs_to_jiffies(delay)); + } +} + +/** + * tipc_crypto_work_tx - Scheduled TX works handler + * @work: the struct TX work + * + * The function processes the previous scheduled work, i.e. key rekeying, by + * generating a new session key based on current one, then attaching it to the + * TX crypto and finally distributing it to peers. It also re-schedules the + * rekeying if needed. + */ +static void tipc_crypto_work_tx(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct tipc_crypto *tx = container_of(dwork, struct tipc_crypto, work); + struct tipc_aead_key *skey = NULL; + struct tipc_key key = tx->key; + struct tipc_aead *aead; + int rc = -ENOMEM; + + if (unlikely(key.pending)) + goto resched; + + /* Take current key as a template */ + rcu_read_lock(); + aead = rcu_dereference(tx->aead[key.active ?: KEY_MASTER]); + if (unlikely(!aead)) { + rcu_read_unlock(); + /* At least one key should exist for securing */ + return; + } + + /* Lets duplicate it first */ + skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC); + rcu_read_unlock(); + + /* Now, generate new key, initiate & distribute it */ + if (likely(skey)) { + rc = tipc_aead_key_generate(skey) ?: + tipc_crypto_key_init(tx, skey, PER_NODE_KEY, false); + if (likely(rc > 0)) + rc = tipc_crypto_key_distr(tx, rc, NULL); + kfree_sensitive(skey); + } + + if (unlikely(rc)) + pr_warn_ratelimited("%s: rekeying returns %d\n", tx->name, rc); + +resched: + /* Re-schedule rekeying if any */ + tipc_crypto_rekeying_sched(tx, false, 0); +} diff --git a/net/tipc/crypto.h b/net/tipc/crypto.h new file mode 100644 index 000000000000..ce7d4cc8a9e0 --- /dev/null +++ b/net/tipc/crypto.h @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * net/tipc/crypto.h: Include file for TIPC crypto + * + * Copyright (c) 2019, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef CONFIG_TIPC_CRYPTO +#ifndef _TIPC_CRYPTO_H +#define _TIPC_CRYPTO_H + +#include "core.h" +#include "node.h" +#include "msg.h" +#include "bearer.h" + +#define TIPC_EVERSION 7 + +/* AEAD aes(gcm) */ +#define TIPC_AES_GCM_KEY_SIZE_128 16 +#define TIPC_AES_GCM_KEY_SIZE_192 24 +#define TIPC_AES_GCM_KEY_SIZE_256 32 + +#define TIPC_AES_GCM_SALT_SIZE 4 +#define TIPC_AES_GCM_IV_SIZE 12 +#define TIPC_AES_GCM_TAG_SIZE 16 + +/* + * TIPC crypto modes: + * - CLUSTER_KEY: + * One single key is used for both TX & RX in all nodes in the cluster. + * - PER_NODE_KEY: + * Each nodes in the cluster has one TX key, for RX a node needs to know + * its peers' TX key for the decryption of messages from those nodes. + */ +enum { + CLUSTER_KEY = 1, + PER_NODE_KEY = (1 << 1), +}; + +extern int sysctl_tipc_max_tfms __read_mostly; +extern int sysctl_tipc_key_exchange_enabled __read_mostly; + +/* + * TIPC encryption message format: + * + * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 + * 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w0:|Ver=7| User |D|TX |RX |K|M|N| Rsvd | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w1:| Seqno | + * w2:| (8 octets) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w3:\ Prevnode \ + * / (4 or 16 octets) / + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Encrypted complete TIPC V2 header and user data / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | AuthTag | + * | (16 octets) | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Word0: + * Ver : = 7 i.e. TIPC encryption message version + * User : = 7 (for LINK_PROTOCOL); = 13 (for LINK_CONFIG) or = 0 + * D : The destined bit i.e. the message's destination node is + * "known" or not at the message encryption + * TX : TX key used for the message encryption + * RX : Currently RX active key corresponding to the destination + * node's TX key (when the "D" bit is set) + * K : Keep-alive bit (for RPS, LINK_PROTOCOL/STATE_MSG only) + * M : Bit indicates if sender has master key + * N : Bit indicates if sender has no RX keys corresponding to the + * receiver's TX (when the "D" bit is set) + * Rsvd : Reserved bit, field + * Word1-2: + * Seqno : The 64-bit sequence number of the encrypted message, also + * part of the nonce used for the message encryption/decryption + * Word3-: + * Prevnode: The source node address, or ID in case LINK_CONFIG only + * AuthTag : The authentication tag for the message integrity checking + * generated by the message encryption + */ +struct tipc_ehdr { + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 destined:1, + user:4, + version:3; + __u8 reserved_1:1, + rx_nokey:1, + master_key:1, + keepalive:1, + rx_key_active:2, + tx_key:2; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 version:3, + user:4, + destined:1; + __u8 tx_key:2, + rx_key_active:2, + keepalive:1, + master_key:1, + rx_nokey:1, + reserved_1:1; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __be16 reserved_2; + } __packed; + __be32 w0; + }; + __be64 seqno; + union { + __be32 addr; + __u8 id[NODE_ID_LEN]; /* For a LINK_CONFIG message only! */ + }; +#define EHDR_SIZE (offsetof(struct tipc_ehdr, addr) + sizeof(__be32)) +#define EHDR_CFG_SIZE (sizeof(struct tipc_ehdr)) +#define EHDR_MIN_SIZE (EHDR_SIZE) +#define EHDR_MAX_SIZE (EHDR_CFG_SIZE) +#define EMSG_OVERHEAD (EHDR_SIZE + TIPC_AES_GCM_TAG_SIZE) +} __packed; + +int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, + struct tipc_node *node); +void tipc_crypto_stop(struct tipc_crypto **crypto); +void tipc_crypto_timeout(struct tipc_crypto *rx); +int tipc_crypto_xmit(struct net *net, struct sk_buff **skb, + struct tipc_bearer *b, struct tipc_media_addr *dst, + struct tipc_node *__dnode); +int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx, + struct sk_buff **skb, struct tipc_bearer *b); +int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey, + u8 mode, bool master_key); +void tipc_crypto_key_flush(struct tipc_crypto *c); +int tipc_crypto_key_distr(struct tipc_crypto *tx, u8 key, + struct tipc_node *dest); +void tipc_crypto_msg_rcv(struct net *net, struct sk_buff *skb); +void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed, + u32 new_intv); +int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info); +bool tipc_ehdr_validate(struct sk_buff *skb); + +static inline u32 msg_key_gen(struct tipc_msg *m) +{ + return msg_bits(m, 4, 16, 0xffff); +} + +static inline void msg_set_key_gen(struct tipc_msg *m, u32 gen) +{ + msg_set_bits(m, 4, 16, 0xffff, gen); +} + +static inline u32 msg_key_mode(struct tipc_msg *m) +{ + return msg_bits(m, 4, 0, 0xf); +} + +static inline void msg_set_key_mode(struct tipc_msg *m, u32 mode) +{ + msg_set_bits(m, 4, 0, 0xf, mode); +} + +#endif /* _TIPC_CRYPTO_H */ +#endif diff --git a/net/tipc/diag.c b/net/tipc/diag.c new file mode 100644 index 000000000000..54dde8c4e4d4 --- /dev/null +++ b/net/tipc/diag.c @@ -0,0 +1,118 @@ +/* + * net/tipc/diag.c: TIPC socket diag + * + * Copyright (c) 2018, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "socket.h" +#include <linux/sock_diag.h> +#include <linux/tipc_sockets_diag.h> + +static u64 __tipc_diag_gen_cookie(struct sock *sk) +{ + u32 res[2]; + + sock_diag_save_cookie(sk, res); + return *((u64 *)res); +} + +static int __tipc_add_sock_diag(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk) +{ + struct tipc_sock_diag_req *req = nlmsg_data(cb->nlh); + struct nlmsghdr *nlh; + int err; + + nlh = nlmsg_put_answer(skb, cb, SOCK_DIAG_BY_FAMILY, 0, + NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + err = tipc_sk_fill_sock_diag(skb, cb, tsk, req->tidiag_states, + __tipc_diag_gen_cookie); + if (err) + return err; + + nlmsg_end(skb, nlh); + return 0; +} + +static int tipc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return tipc_nl_sk_walk(skb, cb, __tipc_add_sock_diag); +} + +static int tipc_sock_diag_handler_dump(struct sk_buff *skb, + struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct tipc_sock_diag_req); + struct net *net = sock_net(skb->sk); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = tipc_dump_start, + .dump = tipc_diag_dump, + .done = tipc_dump_done, + }; + netlink_dump_start(net->diag_nlsk, skb, h, &c); + return 0; + } + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler tipc_sock_diag_handler = { + .owner = THIS_MODULE, + .family = AF_TIPC, + .dump = tipc_sock_diag_handler_dump, +}; + +static int __init tipc_diag_init(void) +{ + return sock_diag_register(&tipc_sock_diag_handler); +} + +static void __exit tipc_diag_exit(void) +{ + sock_diag_unregister(&tipc_sock_diag_handler); +} + +module_init(tipc_diag_init); +module_exit(tipc_diag_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("TIPC socket monitoring via SOCK_DIAG"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index ecc758c6eacf..775fd4f3f072 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -1,7 +1,7 @@ /* * net/tipc/discover.c * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2006, 2014-2018, Ericsson AB * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * @@ -35,351 +35,386 @@ */ #include "core.h" -#include "link.h" +#include "node.h" #include "discover.h" -#define TIPC_LINK_REQ_INIT 125 /* min delay during bearer start up */ -#define TIPC_LINK_REQ_FAST 1000 /* max delay if bearer has no links */ -#define TIPC_LINK_REQ_SLOW 60000 /* max delay if bearer has links */ -#define TIPC_LINK_REQ_INACTIVE 0xffffffff /* indicates no timer in use */ - +/* min delay during bearer start up */ +#define TIPC_DISC_INIT msecs_to_jiffies(125) +/* max delay if bearer has no links */ +#define TIPC_DISC_FAST msecs_to_jiffies(1000) +/* max delay if bearer has links */ +#define TIPC_DISC_SLOW msecs_to_jiffies(60000) +/* indicates no timer in use */ +#define TIPC_DISC_INACTIVE 0xffffffff /** - * struct tipc_link_req - information about an ongoing link setup request - * @bearer: bearer issuing requests + * struct tipc_discoverer - information about an ongoing link setup request + * @bearer_id: identity of bearer issuing requests + * @net: network namespace instance * @dest: destination address for request messages * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) - * @buf: request message to be (repeatedly) sent + * @lock: spinlock for controlling access to requests + * @skb: request message to be (repeatedly) sent * @timer: timer governing period between requests * @timer_intv: current interval between requests (in ms) */ -struct tipc_link_req { - struct tipc_bearer *bearer; +struct tipc_discoverer { + u32 bearer_id; struct tipc_media_addr dest; + struct net *net; u32 domain; int num_nodes; - struct sk_buff *buf; + spinlock_t lock; + struct sk_buff *skb; struct timer_list timer; - unsigned int timer_intv; + unsigned long timer_intv; }; /** * tipc_disc_init_msg - initialize a link setup message - * @type: message type (request or response) - * @dest_domain: network domain of node(s) which should respond to message - * @b_ptr: ptr to bearer issuing message + * @net: the applicable net namespace + * @skb: buffer containing message + * @mtyp: message type (request or response) + * @b: ptr to bearer issuing message */ -static struct sk_buff *tipc_disc_init_msg(u32 type, u32 dest_domain, - struct tipc_bearer *b_ptr) +static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, + u32 mtyp, struct tipc_bearer *b) { - struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); - struct tipc_msg *msg; - - if (buf) { - msg = buf_msg(buf); - tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); - msg_set_non_seq(msg, 1); - msg_set_node_sig(msg, tipc_random); - msg_set_dest_domain(msg, dest_domain); - msg_set_bc_netid(msg, tipc_net_id); - b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg)); - } - return buf; + struct tipc_net *tn = tipc_net(net); + u32 dest_domain = b->domain; + struct tipc_msg *hdr; + + hdr = buf_msg(skb); + tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp, + MAX_H_SIZE, dest_domain); + msg_set_size(hdr, MAX_H_SIZE + NODE_ID_LEN); + msg_set_non_seq(hdr, 1); + msg_set_node_sig(hdr, tn->random); + msg_set_node_capabilities(hdr, TIPC_NODE_CAPABILITIES); + msg_set_dest_domain(hdr, dest_domain); + msg_set_bc_netid(hdr, tn->net_id); + b->media->addr2msg(msg_media_addr(hdr), &b->addr); + msg_set_peer_net_hash(hdr, tipc_net_hash_mixes(net, tn->random)); + msg_set_node_id(hdr, tipc_own_id(net)); +} + +static void tipc_disc_msg_xmit(struct net *net, u32 mtyp, u32 dst, + u32 src, u32 sugg_addr, + struct tipc_media_addr *maddr, + struct tipc_bearer *b) +{ + struct tipc_msg *hdr; + struct sk_buff *skb; + + skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC); + if (!skb) + return; + hdr = buf_msg(skb); + tipc_disc_init_msg(net, skb, mtyp, b); + msg_set_sugg_node_addr(hdr, sugg_addr); + msg_set_dest_domain(hdr, dst); + tipc_bearer_xmit_skb(net, b->identity, skb, maddr); } /** * disc_dupl_alert - issue node address duplication alert - * @b_ptr: pointer to bearer detecting duplication + * @b: pointer to bearer detecting duplication * @node_addr: duplicated node address * @media_addr: media address advertised by duplicated node */ -static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, +static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr, struct tipc_media_addr *media_addr) { - char node_addr_str[16]; char media_addr_str[64]; - tipc_addr_string_fill(node_addr_str, node_addr); tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str), media_addr); - pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str, - media_addr_str, b_ptr->name); + pr_warn("Duplicate %x using %s seen on <%s>\n", node_addr, + media_addr_str, b->name); } -/** - * tipc_disc_recv_msg - handle incoming link setup message (request or response) - * @buf: buffer containing message - * @b_ptr: bearer that message arrived on +/* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer + * Returns true if message should be dropped by caller, i.e., if it is a + * trial message or we are inside trial period. Otherwise false. */ -void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) +static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, + struct tipc_media_addr *maddr, + struct tipc_bearer *b, + u32 dst, u32 src, + u32 sugg_addr, + u8 *peer_id, + int mtyp) { - struct tipc_node *n_ptr; - struct tipc_link *link; - struct tipc_media_addr media_addr; - struct sk_buff *rbuf; - struct tipc_msg *msg = buf_msg(buf); - u32 dest = msg_dest_domain(msg); - u32 orig = msg_prevnode(msg); - u32 net_id = msg_bc_netid(msg); - u32 type = msg_type(msg); - u32 signature = msg_node_sig(msg); - int addr_mismatch; - int link_fully_up; - - media_addr.broadcast = 1; - b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg)); - kfree_skb(buf); - - /* Ensure message from node is valid and communication is permitted */ - if (net_id != tipc_net_id) - return; - if (media_addr.broadcast) - return; - if (!tipc_addr_domain_valid(dest)) - return; - if (!tipc_addr_node_valid(orig)) - return; - if (orig == tipc_own_addr) { - if (memcmp(&media_addr, &b_ptr->addr, sizeof(media_addr))) - disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr); - return; - } - if (!tipc_in_scope(dest, tipc_own_addr)) - return; - if (!tipc_in_scope(b_ptr->link_req->domain, orig)) - return; - - /* Locate structure corresponding to requesting node */ - n_ptr = tipc_node_find(orig); - if (!n_ptr) { - n_ptr = tipc_node_create(orig); - if (!n_ptr) - return; - } - tipc_node_lock(n_ptr); - - /* Prepare to validate requesting node's signature and media address */ - link = n_ptr->links[b_ptr->identity]; - addr_mismatch = (link != NULL) && - memcmp(&link->media_addr, &media_addr, sizeof(media_addr)); - - /* - * Ensure discovery message's signature is correct - * - * If signature is incorrect and there is no working link to the node, - * accept the new signature but invalidate all existing links to the - * node so they won't re-activate without a new discovery message. - * - * If signature is incorrect and the requested link to the node is - * working, accept the new signature. (This is an instance of delayed - * rediscovery, where a link endpoint was able to re-establish contact - * with its peer endpoint on a node that rebooted before receiving a - * discovery message from that node.) - * - * If signature is incorrect and there is a working link to the node - * that is not the requested link, reject the request (must be from - * a duplicate node). - */ - if (signature != n_ptr->signature) { - if (n_ptr->working_links == 0) { - struct tipc_link *curr_link; - int i; - - for (i = 0; i < MAX_BEARERS; i++) { - curr_link = n_ptr->links[i]; - if (curr_link) { - memset(&curr_link->media_addr, 0, - sizeof(media_addr)); - tipc_link_reset(curr_link); - } - } - addr_mismatch = (link != NULL); - } else if (tipc_link_is_up(link) && !addr_mismatch) { - /* delayed rediscovery */ - } else { - disc_dupl_alert(b_ptr, orig, &media_addr); - tipc_node_unlock(n_ptr); - return; - } - n_ptr->signature = signature; - } - - /* - * Ensure requesting node's media address is correct - * - * If media address doesn't match and the link is working, reject the - * request (must be from a duplicate node). - * - * If media address doesn't match and the link is not working, accept - * the new media address and reset the link to ensure it starts up - * cleanly. - */ - if (addr_mismatch) { - if (tipc_link_is_up(link)) { - disc_dupl_alert(b_ptr, orig, &media_addr); - tipc_node_unlock(n_ptr); - return; - } else { - memcpy(&link->media_addr, &media_addr, - sizeof(media_addr)); - tipc_link_reset(link); - } + struct net *net = d->net; + struct tipc_net *tn = tipc_net(net); + u32 self = tipc_own_addr(net); + bool trial = time_before(jiffies, tn->addr_trial_end) && !self; + + if (mtyp == DSC_TRIAL_FAIL_MSG) { + if (!trial) + return true; + + /* Ignore if somebody else already gave new suggestion */ + if (dst != tn->trial_addr) + return true; + + /* Otherwise update trial address and restart trial period */ + tn->trial_addr = sugg_addr; + msg_set_prevnode(buf_msg(d->skb), sugg_addr); + tn->addr_trial_end = jiffies + msecs_to_jiffies(1000); + return true; } - /* Create a link endpoint for this bearer, if necessary */ - if (!link) { - link = tipc_link_create(n_ptr, b_ptr, &media_addr); - if (!link) { - tipc_node_unlock(n_ptr); - return; - } + /* Apply trial address if we just left trial period */ + if (!trial && !self) { + schedule_work(&tn->work); + msg_set_prevnode(buf_msg(d->skb), tn->trial_addr); + msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); } - /* Accept discovery message & send response, if necessary */ - link_fully_up = link_working_working(link); - - if ((type == DSC_REQ_MSG) && !link_fully_up && !b_ptr->blocked) { - rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr); - if (rbuf) { - tipc_bearer_send(b_ptr, rbuf, &media_addr); - kfree_skb(rbuf); - } - } + /* Accept regular link requests/responses only after trial period */ + if (mtyp != DSC_TRIAL_MSG) + return trial; - tipc_node_unlock(n_ptr); + sugg_addr = tipc_node_try_addr(net, peer_id, src); + if (sugg_addr) + tipc_disc_msg_xmit(net, DSC_TRIAL_FAIL_MSG, src, + self, sugg_addr, maddr, b); + return true; } /** - * disc_update - update frequency of periodic link setup requests - * @req: ptr to link request structure - * - * Reinitiates discovery process if discovery object has no associated nodes - * and is either not currently searching or is searching at a slow rate + * tipc_disc_rcv - handle incoming discovery message (request or response) + * @net: applicable net namespace + * @skb: buffer containing message + * @b: bearer that message arrived on */ -static void disc_update(struct tipc_link_req *req) +void tipc_disc_rcv(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b) { - if (!req->num_nodes) { - if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) || - (req->timer_intv > TIPC_LINK_REQ_FAST)) { - req->timer_intv = TIPC_LINK_REQ_INIT; - k_start_timer(&req->timer, req->timer_intv); - } + struct tipc_net *tn = tipc_net(net); + struct tipc_msg *hdr = buf_msg(skb); + u32 pnet_hash = msg_peer_net_hash(hdr); + u16 caps = msg_node_capabilities(hdr); + bool legacy = tn->legacy_addr_format; + u32 sugg = msg_sugg_node_addr(hdr); + u32 signature = msg_node_sig(hdr); + u8 peer_id[NODE_ID_LEN] = {0,}; + u32 dst = msg_dest_domain(hdr); + u32 net_id = msg_bc_netid(hdr); + struct tipc_media_addr maddr; + u32 src = msg_prevnode(hdr); + u32 mtyp = msg_type(hdr); + bool dupl_addr = false; + bool respond = false; + u32 self; + int err; + + if (skb_linearize(skb)) { + kfree_skb(skb); + return; } -} + hdr = buf_msg(skb); -/** - * tipc_disc_add_dest - increment set of discovered nodes - * @req: ptr to link request structure - */ -void tipc_disc_add_dest(struct tipc_link_req *req) -{ - req->num_nodes++; + if (caps & TIPC_NODE_ID128) + memcpy(peer_id, msg_node_id(hdr), NODE_ID_LEN); + else + sprintf(peer_id, "%x", src); + + err = b->media->msg2addr(b, &maddr, msg_media_addr(hdr)); + kfree_skb(skb); + if (err || maddr.broadcast) { + pr_warn_ratelimited("Rcv corrupt discovery message\n"); + return; + } + /* Ignore discovery messages from own node */ + if (!memcmp(&maddr, &b->addr, sizeof(maddr))) + return; + if (net_id != tn->net_id) + return; + if (tipc_disc_addr_trial_msg(b->disc, &maddr, b, dst, + src, sugg, peer_id, mtyp)) + return; + self = tipc_own_addr(net); + + /* Message from somebody using this node's address */ + if (in_own_node(net, src)) { + disc_dupl_alert(b, self, &maddr); + return; + } + if (!tipc_in_scope(legacy, dst, self)) + return; + if (!tipc_in_scope(legacy, b->domain, src)) + return; + tipc_node_check_dest(net, src, peer_id, b, caps, signature, pnet_hash, + &maddr, &respond, &dupl_addr); + if (dupl_addr) + disc_dupl_alert(b, src, &maddr); + if (!respond) + return; + if (mtyp != DSC_REQ_MSG) + return; + tipc_disc_msg_xmit(net, DSC_RESP_MSG, src, self, 0, &maddr, b); } -/** - * tipc_disc_remove_dest - decrement set of discovered nodes - * @req: ptr to link request structure +/* tipc_disc_add_dest - increment set of discovered nodes */ -void tipc_disc_remove_dest(struct tipc_link_req *req) +void tipc_disc_add_dest(struct tipc_discoverer *d) { - req->num_nodes--; - disc_update(req); + spin_lock_bh(&d->lock); + d->num_nodes++; + spin_unlock_bh(&d->lock); } -/** - * disc_send_msg - send link setup request message - * @req: ptr to link request structure +/* tipc_disc_remove_dest - decrement set of discovered nodes */ -static void disc_send_msg(struct tipc_link_req *req) +void tipc_disc_remove_dest(struct tipc_discoverer *d) { - if (!req->bearer->blocked) - tipc_bearer_send(req->bearer, req->buf, &req->dest); + int intv, num; + + spin_lock_bh(&d->lock); + d->num_nodes--; + num = d->num_nodes; + intv = d->timer_intv; + if (!num && (intv == TIPC_DISC_INACTIVE || intv > TIPC_DISC_FAST)) { + d->timer_intv = TIPC_DISC_INIT; + mod_timer(&d->timer, jiffies + d->timer_intv); + } + spin_unlock_bh(&d->lock); } -/** - * disc_timeout - send a periodic link setup request - * @req: ptr to link request structure - * +/* tipc_disc_timeout - send a periodic link setup request * Called whenever a link setup request timer associated with a bearer expires. + * - Keep doubling time between sent request until limit is reached; + * - Hold at fast polling rate if we don't have any associated nodes + * - Otherwise hold at slow polling rate */ -static void disc_timeout(struct tipc_link_req *req) +static void tipc_disc_timeout(struct timer_list *t) { - int max_delay; + struct tipc_discoverer *d = timer_container_of(d, t, timer); + struct tipc_net *tn = tipc_net(d->net); + struct tipc_media_addr maddr; + struct sk_buff *skb = NULL; + struct net *net = d->net; + u32 bearer_id; - spin_lock_bh(&req->bearer->lock); + spin_lock_bh(&d->lock); /* Stop searching if only desired node has been found */ - if (tipc_node(req->domain) && req->num_nodes) { - req->timer_intv = TIPC_LINK_REQ_INACTIVE; + if (tipc_node(d->domain) && d->num_nodes) { + d->timer_intv = TIPC_DISC_INACTIVE; goto exit; } - /* - * Send discovery message, then update discovery timer - * - * Keep doubling time between requests until limit is reached; - * hold at fast polling rate if don't have any associated nodes, - * otherwise hold at slow polling rate - */ - disc_send_msg(req); - - req->timer_intv *= 2; - if (req->num_nodes) - max_delay = TIPC_LINK_REQ_SLOW; - else - max_delay = TIPC_LINK_REQ_FAST; - if (req->timer_intv > max_delay) - req->timer_intv = max_delay; + /* Did we just leave trial period ? */ + if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) { + mod_timer(&d->timer, jiffies + TIPC_DISC_INIT); + spin_unlock_bh(&d->lock); + schedule_work(&tn->work); + return; + } + + /* Adjust timeout interval according to discovery phase */ + if (time_before(jiffies, tn->addr_trial_end)) { + d->timer_intv = TIPC_DISC_INIT; + } else { + d->timer_intv *= 2; + if (d->num_nodes && d->timer_intv > TIPC_DISC_SLOW) + d->timer_intv = TIPC_DISC_SLOW; + else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST) + d->timer_intv = TIPC_DISC_FAST; + msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); + msg_set_prevnode(buf_msg(d->skb), tn->trial_addr); + } - k_start_timer(&req->timer, req->timer_intv); + mod_timer(&d->timer, jiffies + d->timer_intv); + memcpy(&maddr, &d->dest, sizeof(maddr)); + skb = skb_clone(d->skb, GFP_ATOMIC); + bearer_id = d->bearer_id; exit: - spin_unlock_bh(&req->bearer->lock); + spin_unlock_bh(&d->lock); + if (skb) + tipc_bearer_xmit_skb(net, bearer_id, skb, &maddr); } /** * tipc_disc_create - create object to send periodic link setup requests - * @b_ptr: ptr to bearer issuing requests + * @net: the applicable net namespace + * @b: ptr to bearer issuing requests * @dest: destination address for request messages - * @dest_domain: network domain to which links can be established + * @skb: pointer to created frame * - * Returns 0 if successful, otherwise -errno. + * Return: 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, - u32 dest_domain) +int tipc_disc_create(struct net *net, struct tipc_bearer *b, + struct tipc_media_addr *dest, struct sk_buff **skb) { - struct tipc_link_req *req; + struct tipc_net *tn = tipc_net(net); + struct tipc_discoverer *d; - req = kmalloc(sizeof(*req), GFP_ATOMIC); - if (!req) + d = kmalloc(sizeof(*d), GFP_ATOMIC); + if (!d) + return -ENOMEM; + d->skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC); + if (!d->skb) { + kfree(d); return -ENOMEM; - - req->buf = tipc_disc_init_msg(DSC_REQ_MSG, dest_domain, b_ptr); - if (!req->buf) { - kfree(req); - return -ENOMSG; } + tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b); - memcpy(&req->dest, dest, sizeof(*dest)); - req->bearer = b_ptr; - req->domain = dest_domain; - req->num_nodes = 0; - req->timer_intv = TIPC_LINK_REQ_INIT; - k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); - k_start_timer(&req->timer, req->timer_intv); - b_ptr->link_req = req; - disc_send_msg(req); + /* Do we need an address trial period first ? */ + if (!tipc_own_addr(net)) { + tn->addr_trial_end = jiffies + msecs_to_jiffies(1000); + msg_set_type(buf_msg(d->skb), DSC_TRIAL_MSG); + } + memcpy(&d->dest, dest, sizeof(*dest)); + d->net = net; + d->bearer_id = b->identity; + d->domain = b->domain; + d->num_nodes = 0; + d->timer_intv = TIPC_DISC_INIT; + spin_lock_init(&d->lock); + timer_setup(&d->timer, tipc_disc_timeout, 0); + mod_timer(&d->timer, jiffies + d->timer_intv); + b->disc = d; + *skb = skb_clone(d->skb, GFP_ATOMIC); return 0; } /** * tipc_disc_delete - destroy object sending periodic link setup requests - * @req: ptr to link request structure + * @d: ptr to link dest structure + */ +void tipc_disc_delete(struct tipc_discoverer *d) +{ + timer_shutdown_sync(&d->timer); + kfree_skb(d->skb); + kfree(d); +} + +/** + * tipc_disc_reset - reset object to send periodic link setup requests + * @net: the applicable net namespace + * @b: ptr to bearer issuing requests */ -void tipc_disc_delete(struct tipc_link_req *req) +void tipc_disc_reset(struct net *net, struct tipc_bearer *b) { - k_cancel_timer(&req->timer); - k_term_timer(&req->timer); - kfree_skb(req->buf); - kfree(req); + struct tipc_discoverer *d = b->disc; + struct tipc_media_addr maddr; + struct sk_buff *skb; + + spin_lock_bh(&d->lock); + tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b); + d->net = net; + d->bearer_id = b->identity; + d->domain = b->domain; + d->num_nodes = 0; + d->timer_intv = TIPC_DISC_INIT; + memcpy(&maddr, &d->dest, sizeof(maddr)); + mod_timer(&d->timer, jiffies + d->timer_intv); + skb = skb_clone(d->skb, GFP_ATOMIC); + spin_unlock_bh(&d->lock); + if (skb) + tipc_bearer_xmit_skb(net, b->identity, skb, &maddr); } diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 75b67c403aa3..521d96c41dfd 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -37,13 +37,15 @@ #ifndef _TIPC_DISCOVER_H #define _TIPC_DISCOVER_H -struct tipc_link_req; +struct tipc_discoverer; -int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, - u32 dest_domain); -void tipc_disc_delete(struct tipc_link_req *req); -void tipc_disc_add_dest(struct tipc_link_req *req); -void tipc_disc_remove_dest(struct tipc_link_req *req); -void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr); +int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, + struct tipc_media_addr *dest, struct sk_buff **skb); +void tipc_disc_delete(struct tipc_discoverer *req); +void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr); +void tipc_disc_add_dest(struct tipc_discoverer *req); +void tipc_disc_remove_dest(struct tipc_discoverer *req); +void tipc_disc_rcv(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b_ptr); #endif diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 40ea40cf6204..cb0d185e06af 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -1,8 +1,8 @@ /* * net/tipc/eth_media.c: Ethernet bearer support for TIPC * - * Copyright (c) 2001-2007, Ericsson AB - * Copyright (c) 2005-2008, 2011, Wind River Systems + * Copyright (c) 2001-2007, 2013-2014, Ericsson AB + * Copyright (c) 2005-2008, 2011-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,340 +37,62 @@ #include "core.h" #include "bearer.h" -#define MAX_ETH_BEARERS MAX_BEARERS - -#define ETH_ADDR_OFFSET 4 /* message header offset of MAC address */ - -/** - * struct eth_bearer - Ethernet bearer data structure - * @bearer: ptr to associated "generic" bearer structure - * @dev: ptr to associated Ethernet network device - * @tipc_packet_type: used in binding TIPC to Ethernet driver - * @setup: work item used when enabling bearer - * @cleanup: work item used when disabling bearer - */ -struct eth_bearer { - struct tipc_bearer *bearer; - struct net_device *dev; - struct packet_type tipc_packet_type; - struct work_struct setup; - struct work_struct cleanup; -}; - -static struct tipc_media eth_media_info; -static struct eth_bearer eth_bearers[MAX_ETH_BEARERS]; -static int eth_started; - -static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv); -/* - * Network device notifier info - */ -static struct notifier_block notifier = { - .notifier_call = recv_notification, - .priority = 0 -}; - -/** - * eth_media_addr_set - initialize Ethernet media address structure - * - * Media-dependent "value" field stores MAC address in first 6 bytes - * and zeroes out the remaining bytes. - */ -static void eth_media_addr_set(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *mac) -{ - memcpy(a->value, mac, ETH_ALEN); - memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN); - a->media_id = TIPC_MEDIA_TYPE_ETH; - a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, ETH_ALEN); -} - -/** - * send_msg - send a TIPC message out over an Ethernet interface - */ -static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, - struct tipc_media_addr *dest) +/* Convert Ethernet address (media address format) to string */ +static int tipc_eth_addr2str(struct tipc_media_addr *addr, + char *strbuf, int bufsz) { - struct sk_buff *clone; - struct net_device *dev; - int delta; - - clone = skb_clone(buf, GFP_ATOMIC); - if (!clone) - return 0; - - dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev; - delta = dev->hard_header_len - skb_headroom(buf); - - if ((delta > 0) && - pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(clone); - return 0; - } - - skb_reset_network_header(clone); - clone->dev = dev; - clone->protocol = htons(ETH_P_TIPC); - dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, - dev->dev_addr, clone->len); - dev_queue_xmit(clone); - return 0; -} - -/** - * recv_msg - handle incoming TIPC message from an Ethernet interface - * - * Accept only packets explicitly sent to this node, or broadcast packets; - * ignores packets sent using Ethernet multicast, and traffic sent to other - * nodes (which can happen if interface is running in promiscuous mode). - */ -static int recv_msg(struct sk_buff *buf, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; - - if (!net_eq(dev_net(dev), &init_net)) { - kfree_skb(buf); - return 0; - } - - if (likely(eb_ptr->bearer)) { - if (likely(buf->pkt_type <= PACKET_BROADCAST)) { - buf->next = NULL; - tipc_recv_msg(buf, eb_ptr->bearer); - return 0; - } - } - kfree_skb(buf); - return 0; -} - -/** - * setup_bearer - setup association between Ethernet bearer and interface - */ -static void setup_bearer(struct work_struct *work) -{ - struct eth_bearer *eb_ptr = - container_of(work, struct eth_bearer, setup); - - dev_add_pack(&eb_ptr->tipc_packet_type); -} - -/** - * enable_bearer - attach TIPC bearer to an Ethernet interface - */ -static int enable_bearer(struct tipc_bearer *tb_ptr) -{ - struct net_device *dev; - struct eth_bearer *eb_ptr = ð_bearers[0]; - struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; - char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; - int pending_dev = 0; - - /* Find unused Ethernet bearer structure */ - while (eb_ptr->dev) { - if (!eb_ptr->bearer) - pending_dev++; - if (++eb_ptr == stop) - return pending_dev ? -EAGAIN : -EDQUOT; - } - - /* Find device with specified name */ - dev = dev_get_by_name(&init_net, driver_name); - if (!dev) - return -ENODEV; - - /* Create Ethernet bearer for device */ - eb_ptr->dev = dev; - eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); - eb_ptr->tipc_packet_type.dev = dev; - eb_ptr->tipc_packet_type.func = recv_msg; - eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; - INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list)); - INIT_WORK(&eb_ptr->setup, setup_bearer); - schedule_work(&eb_ptr->setup); + if (bufsz < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ + return 1; - /* Associate TIPC bearer with Ethernet bearer */ - eb_ptr->bearer = tb_ptr; - tb_ptr->usr_handle = (void *)eb_ptr; - memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value)); - memcpy(tb_ptr->bcast_addr.value, dev->broadcast, ETH_ALEN); - tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_ETH; - tb_ptr->bcast_addr.broadcast = 1; - tb_ptr->mtu = dev->mtu; - tb_ptr->blocked = 0; - eth_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr); + sprintf(strbuf, "%pM", addr->value); return 0; } -/** - * cleanup_bearer - break association between Ethernet bearer and interface - * - * This routine must be invoked from a work queue because it can sleep. - */ -static void cleanup_bearer(struct work_struct *work) -{ - struct eth_bearer *eb_ptr = - container_of(work, struct eth_bearer, cleanup); - - dev_remove_pack(&eb_ptr->tipc_packet_type); - dev_put(eb_ptr->dev); - eb_ptr->dev = NULL; -} - -/** - * disable_bearer - detach TIPC bearer from an Ethernet interface - * - * Mark Ethernet bearer as inactive so that incoming buffers are thrown away, - * then get worker thread to complete bearer cleanup. (Can't do cleanup - * here because cleanup code needs to sleep and caller holds spinlocks.) - */ -static void disable_bearer(struct tipc_bearer *tb_ptr) -{ - struct eth_bearer *eb_ptr = (struct eth_bearer *)tb_ptr->usr_handle; - - eb_ptr->bearer = NULL; - INIT_WORK(&eb_ptr->cleanup, cleanup_bearer); - schedule_work(&eb_ptr->cleanup); -} - -/** - * recv_notification - handle device updates from OS - * - * Change the state of the Ethernet bearer (if any) associated with the - * specified device. - */ -static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *ptr) +/* Convert from media address format to discovery message addr format */ +static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct eth_bearer *eb_ptr = ð_bearers[0]; - struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - while ((eb_ptr->dev != dev)) { - if (++eb_ptr == stop) - return NOTIFY_DONE; /* couldn't find device */ - } - if (!eb_ptr->bearer) - return NOTIFY_DONE; /* bearer had been disabled */ - - eb_ptr->bearer->mtu = dev->mtu; - - switch (evt) { - case NETDEV_CHANGE: - if (netif_carrier_ok(dev)) - tipc_continue(eb_ptr->bearer); - else - tipc_block_bearer(eb_ptr->bearer->name); - break; - case NETDEV_UP: - tipc_continue(eb_ptr->bearer); - break; - case NETDEV_DOWN: - tipc_block_bearer(eb_ptr->bearer->name); - break; - case NETDEV_CHANGEMTU: - case NETDEV_CHANGEADDR: - tipc_block_bearer(eb_ptr->bearer->name); - tipc_continue(eb_ptr->bearer); - break; - case NETDEV_UNREGISTER: - case NETDEV_CHANGENAME: - tipc_disable_bearer(eb_ptr->bearer->name); - break; - } - return NOTIFY_OK; -} - -/** - * eth_addr2str - convert Ethernet address to string - */ -static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) -{ - if (str_size < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ - return 1; - - sprintf(str_buf, "%pM", a->value); + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, addr->value, ETH_ALEN); return 0; } -/** - * eth_str2addr - convert Ethernet address format to message header format - */ -static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area) +/* Convert raw mac address format to media addr format */ +static int tipc_eth_raw2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + const char *msg) { - memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); - msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; - memcpy(msg_area + ETH_ADDR_OFFSET, a->value, ETH_ALEN); + memset(addr, 0, sizeof(*addr)); + ether_addr_copy(addr->value, msg); + addr->media_id = TIPC_MEDIA_TYPE_ETH; + addr->broadcast = is_broadcast_ether_addr(addr->value); return 0; } -/** - * eth_str2addr - convert message header address format to Ethernet format - */ -static int eth_msg2addr(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *msg_area) +/* Convert discovery msg addr format to Ethernet media addr format */ +static int tipc_eth_msg2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) { - if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH) - return 1; - - eth_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET); - return 0; + /* Skip past preamble: */ + msg += TIPC_MEDIA_ADDR_OFFSET; + return tipc_eth_raw2addr(b, addr, msg); } -/* - * Ethernet media registration info - */ -static struct tipc_media eth_media_info = { - .send_msg = send_msg, - .enable_bearer = enable_bearer, - .disable_bearer = disable_bearer, - .addr2str = eth_addr2str, - .addr2msg = eth_addr2msg, - .msg2addr = eth_msg2addr, +/* Ethernet media registration info */ +struct tipc_media eth_media_info = { + .send_msg = tipc_l2_send_msg, + .enable_media = tipc_enable_l2_media, + .disable_media = tipc_disable_l2_media, + .addr2str = tipc_eth_addr2str, + .addr2msg = tipc_eth_addr2msg, + .msg2addr = tipc_eth_msg2addr, + .raw2addr = tipc_eth_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, - .window = TIPC_DEF_LINK_WIN, + .min_win = TIPC_DEF_LINK_WIN, + .max_win = TIPC_MAX_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_ETH, + .hwaddr_len = ETH_ALEN, .name = "eth" }; - -/** - * tipc_eth_media_start - activate Ethernet bearer support - * - * Register Ethernet media type with TIPC bearer code. Also register - * with OS for notifications about device state changes. - */ -int tipc_eth_media_start(void) -{ - int res; - - if (eth_started) - return -EINVAL; - - res = tipc_register_media(ð_media_info); - if (res) - return res; - - res = register_netdevice_notifier(¬ifier); - if (!res) - eth_started = 1; - return res; -} - -/** - * tipc_eth_media_stop - deactivate Ethernet bearer support - */ -void tipc_eth_media_stop(void) -{ - if (!eth_started) - return; - - flush_scheduled_work(); - unregister_netdevice_notifier(¬ifier); - eth_started = 0; -} diff --git a/net/tipc/group.c b/net/tipc/group.c new file mode 100644 index 000000000000..3e137d8c9d2f --- /dev/null +++ b/net/tipc/group.c @@ -0,0 +1,959 @@ +/* + * net/tipc/group.c: TIPC group messaging code + * + * Copyright (c) 2017, Ericsson AB + * Copyright (c) 2020, Red Hat Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "addr.h" +#include "group.h" +#include "bcast.h" +#include "topsrv.h" +#include "msg.h" +#include "socket.h" +#include "node.h" +#include "name_table.h" +#include "subscr.h" + +#define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1) +#define ADV_IDLE ADV_UNIT +#define ADV_ACTIVE (ADV_UNIT * 12) + +enum mbr_state { + MBR_JOINING, + MBR_PUBLISHED, + MBR_JOINED, + MBR_PENDING, + MBR_ACTIVE, + MBR_RECLAIMING, + MBR_REMITTED, + MBR_LEAVING +}; + +struct tipc_member { + struct rb_node tree_node; + struct list_head list; + struct list_head small_win; + struct sk_buff_head deferredq; + struct tipc_group *group; + u32 node; + u32 port; + u32 instance; + enum mbr_state state; + u16 advertised; + u16 window; + u16 bc_rcv_nxt; + u16 bc_syncpt; + u16 bc_acked; +}; + +struct tipc_group { + struct rb_root members; + struct list_head small_win; + struct list_head pending; + struct list_head active; + struct tipc_nlist dests; + struct net *net; + int subid; + u32 type; + u32 instance; + u32 scope; + u32 portid; + u16 member_cnt; + u16 active_cnt; + u16 max_active; + u16 bc_snd_nxt; + u16 bc_ackers; + bool *open; + bool loopback; + bool events; +}; + +static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, + int mtyp, struct sk_buff_head *xmitq); + +static void tipc_group_open(struct tipc_member *m, bool *wakeup) +{ + *wakeup = false; + if (list_empty(&m->small_win)) + return; + list_del_init(&m->small_win); + *m->group->open = true; + *wakeup = true; +} + +static void tipc_group_decr_active(struct tipc_group *grp, + struct tipc_member *m) +{ + if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING || + m->state == MBR_REMITTED) + grp->active_cnt--; +} + +static int tipc_group_rcvbuf_limit(struct tipc_group *grp) +{ + int max_active, active_pool, idle_pool; + int mcnt = grp->member_cnt + 1; + + /* Limit simultaneous reception from other members */ + max_active = min(mcnt / 8, 64); + max_active = max(max_active, 16); + grp->max_active = max_active; + + /* Reserve blocks for active and idle members */ + active_pool = max_active * ADV_ACTIVE; + idle_pool = (mcnt - max_active) * ADV_IDLE; + + /* Scale to bytes, considering worst-case truesize/msgsize ratio */ + return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4; +} + +u16 tipc_group_bc_snd_nxt(struct tipc_group *grp) +{ + return grp->bc_snd_nxt; +} + +static bool tipc_group_is_receiver(struct tipc_member *m) +{ + return m && m->state != MBR_JOINING && m->state != MBR_LEAVING; +} + +static bool tipc_group_is_sender(struct tipc_member *m) +{ + return m && m->state != MBR_JOINING && m->state != MBR_PUBLISHED; +} + +u32 tipc_group_exclude(struct tipc_group *grp) +{ + if (!grp->loopback) + return grp->portid; + return 0; +} + +struct tipc_group *tipc_group_create(struct net *net, u32 portid, + struct tipc_group_req *mreq, + bool *group_is_open) +{ + u32 filter = TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS; + bool global = mreq->scope != TIPC_NODE_SCOPE; + struct tipc_group *grp; + u32 type = mreq->type; + + grp = kzalloc(sizeof(*grp), GFP_ATOMIC); + if (!grp) + return NULL; + tipc_nlist_init(&grp->dests, tipc_own_addr(net)); + INIT_LIST_HEAD(&grp->small_win); + INIT_LIST_HEAD(&grp->active); + INIT_LIST_HEAD(&grp->pending); + grp->members = RB_ROOT; + grp->net = net; + grp->portid = portid; + grp->type = type; + grp->instance = mreq->instance; + grp->scope = mreq->scope; + grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; + grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; + grp->open = group_is_open; + *grp->open = false; + filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE; + if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, + filter, &grp->subid)) + return grp; + kfree(grp); + return NULL; +} + +void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf) +{ + struct rb_root *tree = &grp->members; + struct tipc_member *m, *tmp; + struct sk_buff_head xmitq; + + __skb_queue_head_init(&xmitq); + rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq); + tipc_group_update_member(m, 0); + } + tipc_node_distr_xmit(net, &xmitq); + *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); +} + +void tipc_group_delete(struct net *net, struct tipc_group *grp) +{ + struct rb_root *tree = &grp->members; + struct tipc_member *m, *tmp; + struct sk_buff_head xmitq; + + __skb_queue_head_init(&xmitq); + + rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { + tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq); + __skb_queue_purge(&m->deferredq); + list_del(&m->list); + kfree(m); + } + tipc_node_distr_xmit(net, &xmitq); + tipc_nlist_purge(&grp->dests); + tipc_topsrv_kern_unsubscr(net, grp->subid); + kfree(grp); +} + +static struct tipc_member *tipc_group_find_member(struct tipc_group *grp, + u32 node, u32 port) +{ + struct rb_node *n = grp->members.rb_node; + u64 nkey, key = (u64)node << 32 | port; + struct tipc_member *m; + + while (n) { + m = container_of(n, struct tipc_member, tree_node); + nkey = (u64)m->node << 32 | m->port; + if (key < nkey) + n = n->rb_left; + else if (key > nkey) + n = n->rb_right; + else + return m; + } + return NULL; +} + +static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp, + u32 node, u32 port) +{ + struct tipc_member *m; + + m = tipc_group_find_member(grp, node, port); + if (m && tipc_group_is_receiver(m)) + return m; + return NULL; +} + +static struct tipc_member *tipc_group_find_node(struct tipc_group *grp, + u32 node) +{ + struct tipc_member *m; + struct rb_node *n; + + for (n = rb_first(&grp->members); n; n = rb_next(n)) { + m = container_of(n, struct tipc_member, tree_node); + if (m->node == node) + return m; + } + return NULL; +} + +static int tipc_group_add_to_tree(struct tipc_group *grp, + struct tipc_member *m) +{ + u64 nkey, key = (u64)m->node << 32 | m->port; + struct rb_node **n, *parent = NULL; + struct tipc_member *tmp; + + n = &grp->members.rb_node; + while (*n) { + tmp = container_of(*n, struct tipc_member, tree_node); + parent = *n; + tmp = container_of(parent, struct tipc_member, tree_node); + nkey = (u64)tmp->node << 32 | tmp->port; + if (key < nkey) + n = &(*n)->rb_left; + else if (key > nkey) + n = &(*n)->rb_right; + else + return -EEXIST; + } + rb_link_node(&m->tree_node, parent, n); + rb_insert_color(&m->tree_node, &grp->members); + return 0; +} + +static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, + u32 node, u32 port, + u32 instance, int state) +{ + struct tipc_member *m; + int ret; + + m = kzalloc(sizeof(*m), GFP_ATOMIC); + if (!m) + return NULL; + INIT_LIST_HEAD(&m->list); + INIT_LIST_HEAD(&m->small_win); + __skb_queue_head_init(&m->deferredq); + m->group = grp; + m->node = node; + m->port = port; + m->instance = instance; + m->bc_acked = grp->bc_snd_nxt - 1; + ret = tipc_group_add_to_tree(grp, m); + if (ret < 0) { + kfree(m); + return NULL; + } + grp->member_cnt++; + tipc_nlist_add(&grp->dests, m->node); + m->state = state; + return m; +} + +void tipc_group_add_member(struct tipc_group *grp, u32 node, + u32 port, u32 instance) +{ + tipc_group_create_member(grp, node, port, instance, MBR_PUBLISHED); +} + +static void tipc_group_delete_member(struct tipc_group *grp, + struct tipc_member *m) +{ + rb_erase(&m->tree_node, &grp->members); + grp->member_cnt--; + + /* Check if we were waiting for replicast ack from this member */ + if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1)) + grp->bc_ackers--; + + list_del_init(&m->list); + list_del_init(&m->small_win); + tipc_group_decr_active(grp, m); + + /* If last member on a node, remove node from dest list */ + if (!tipc_group_find_node(grp, m->node)) + tipc_nlist_del(&grp->dests, m->node); + + kfree(m); +} + +struct tipc_nlist *tipc_group_dests(struct tipc_group *grp) +{ + return &grp->dests; +} + +void tipc_group_self(struct tipc_group *grp, struct tipc_service_range *seq, + int *scope) +{ + seq->type = grp->type; + seq->lower = grp->instance; + seq->upper = grp->instance; + *scope = grp->scope; +} + +void tipc_group_update_member(struct tipc_member *m, int len) +{ + struct tipc_group *grp = m->group; + struct tipc_member *_m, *tmp; + + if (!tipc_group_is_receiver(m)) + return; + + m->window -= len; + + if (m->window >= ADV_IDLE) + return; + + list_del_init(&m->small_win); + + /* Sort member into small_window members' list */ + list_for_each_entry_safe(_m, tmp, &grp->small_win, small_win) { + if (_m->window > m->window) + break; + } + list_add_tail(&m->small_win, &_m->small_win); +} + +void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack) +{ + u16 prev = grp->bc_snd_nxt - 1; + struct tipc_member *m; + struct rb_node *n; + u16 ackers = 0; + + for (n = rb_first(&grp->members); n; n = rb_next(n)) { + m = container_of(n, struct tipc_member, tree_node); + if (tipc_group_is_receiver(m)) { + tipc_group_update_member(m, len); + m->bc_acked = prev; + ackers++; + } + } + + /* Mark number of acknowledges to expect, if any */ + if (ack) + grp->bc_ackers = ackers; + grp->bc_snd_nxt++; +} + +bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, + int len, struct tipc_member **mbr) +{ + struct sk_buff_head xmitq; + struct tipc_member *m; + int adv, state; + + m = tipc_group_find_dest(grp, dnode, dport); + if (!tipc_group_is_receiver(m)) { + *mbr = NULL; + return false; + } + *mbr = m; + + if (m->window >= len) + return false; + + *grp->open = false; + + /* If not fully advertised, do it now to prevent mutual blocking */ + adv = m->advertised; + state = m->state; + if (state == MBR_JOINED && adv == ADV_IDLE) + return true; + if (state == MBR_ACTIVE && adv == ADV_ACTIVE) + return true; + if (state == MBR_PENDING && adv == ADV_IDLE) + return true; + __skb_queue_head_init(&xmitq); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq); + tipc_node_distr_xmit(grp->net, &xmitq); + return true; +} + +bool tipc_group_bc_cong(struct tipc_group *grp, int len) +{ + struct tipc_member *m = NULL; + + /* If prev bcast was replicast, reject until all receivers have acked */ + if (grp->bc_ackers) { + *grp->open = false; + return true; + } + if (list_empty(&grp->small_win)) + return false; + + m = list_first_entry(&grp->small_win, struct tipc_member, small_win); + if (m->window >= len) + return false; + + return tipc_group_cong(grp, m->node, m->port, len, &m); +} + +/* tipc_group_sort_msg() - sort msg into queue by bcast sequence number + */ +static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq) +{ + struct tipc_msg *_hdr, *hdr = buf_msg(skb); + u16 bc_seqno = msg_grp_bc_seqno(hdr); + struct sk_buff *_skb, *tmp; + int mtyp = msg_type(hdr); + + /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */ + if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) { + skb_queue_walk_safe(defq, _skb, tmp) { + _hdr = buf_msg(_skb); + if (!less(bc_seqno, msg_grp_bc_seqno(_hdr))) + continue; + __skb_queue_before(defq, _skb, skb); + return; + } + /* Bcast was not bypassed, - add to tail */ + } + /* Unicasts are never bypassed, - always add to tail */ + __skb_queue_tail(defq, skb); +} + +/* tipc_group_filter_msg() - determine if we should accept arriving message + */ +void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb = __skb_dequeue(inputq); + bool ack, deliver, update, leave = false; + struct sk_buff_head *defq; + struct tipc_member *m; + struct tipc_msg *hdr; + u32 node, port; + int mtyp, blks; + + if (!skb) + return; + + hdr = buf_msg(skb); + node = msg_orignode(hdr); + port = msg_origport(hdr); + + if (!msg_in_group(hdr)) + goto drop; + + m = tipc_group_find_member(grp, node, port); + if (!tipc_group_is_sender(m)) + goto drop; + + if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) + goto drop; + + TIPC_SKB_CB(skb)->orig_member = m->instance; + defq = &m->deferredq; + tipc_group_sort_msg(skb, defq); + + while ((skb = skb_peek(defq))) { + hdr = buf_msg(skb); + mtyp = msg_type(hdr); + blks = msg_blocks(hdr); + deliver = true; + ack = false; + update = false; + + if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) + break; + + /* Decide what to do with message */ + switch (mtyp) { + case TIPC_GRP_MCAST_MSG: + if (msg_nameinst(hdr) != grp->instance) { + update = true; + deliver = false; + } + fallthrough; + case TIPC_GRP_BCAST_MSG: + m->bc_rcv_nxt++; + ack = msg_grp_bc_ack_req(hdr); + break; + case TIPC_GRP_UCAST_MSG: + break; + case TIPC_GRP_MEMBER_EVT: + if (m->state == MBR_LEAVING) + leave = true; + if (!grp->events) + deliver = false; + break; + default: + break; + } + + /* Execute decisions */ + __skb_dequeue(defq); + if (deliver) + __skb_queue_tail(inputq, skb); + else + kfree_skb(skb); + + if (ack) + tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq); + + if (leave) { + __skb_queue_purge(defq); + tipc_group_delete_member(grp, m); + break; + } + if (!update) + continue; + + tipc_group_update_rcv_win(grp, blks, node, port, xmitq); + } + return; +drop: + kfree_skb(skb); +} + +void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, + u32 port, struct sk_buff_head *xmitq) +{ + struct list_head *active = &grp->active; + int max_active = grp->max_active; + int reclaim_limit = max_active * 3 / 4; + int active_cnt = grp->active_cnt; + struct tipc_member *m, *rm, *pm; + + m = tipc_group_find_member(grp, node, port); + if (!m) + return; + + m->advertised -= blks; + + switch (m->state) { + case MBR_JOINED: + /* First, decide if member can go active */ + if (active_cnt <= max_active) { + m->state = MBR_ACTIVE; + list_add_tail(&m->list, active); + grp->active_cnt++; + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + } else { + m->state = MBR_PENDING; + list_add_tail(&m->list, &grp->pending); + } + + if (active_cnt < reclaim_limit) + break; + + /* Reclaim from oldest active member, if possible */ + if (!list_empty(active)) { + rm = list_first_entry(active, struct tipc_member, list); + rm->state = MBR_RECLAIMING; + list_del_init(&rm->list); + tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq); + break; + } + /* Nobody to reclaim from; - revert oldest pending to JOINED */ + pm = list_first_entry(&grp->pending, struct tipc_member, list); + list_del_init(&pm->list); + pm->state = MBR_JOINED; + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + break; + case MBR_ACTIVE: + if (!list_is_last(&m->list, &grp->active)) + list_move_tail(&m->list, &grp->active); + if (m->advertised > (ADV_ACTIVE * 3 / 4)) + break; + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + break; + case MBR_REMITTED: + if (m->advertised > ADV_IDLE) + break; + m->state = MBR_JOINED; + grp->active_cnt--; + if (m->advertised < ADV_IDLE) { + pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + } + + if (list_empty(&grp->pending)) + return; + + /* Set oldest pending member to active and advertise */ + pm = list_first_entry(&grp->pending, struct tipc_member, list); + pm->state = MBR_ACTIVE; + list_move_tail(&pm->list, &grp->active); + grp->active_cnt++; + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + break; + case MBR_RECLAIMING: + case MBR_JOINING: + case MBR_LEAVING: + default: + break; + } +} + +static void tipc_group_create_event(struct tipc_group *grp, + struct tipc_member *m, + u32 event, u16 seqno, + struct sk_buff_head *inputq) +{ u32 dnode = tipc_own_addr(grp->net); + struct tipc_event evt; + struct sk_buff *skb; + struct tipc_msg *hdr; + + memset(&evt, 0, sizeof(evt)); + evt.event = event; + evt.found_lower = m->instance; + evt.found_upper = m->instance; + evt.port.ref = m->port; + evt.port.node = m->node; + evt.s.seq.type = grp->type; + evt.s.seq.lower = m->instance; + evt.s.seq.upper = m->instance; + + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_GRP_MEMBER_EVT, + GROUP_H_SIZE, sizeof(evt), dnode, m->node, + grp->portid, m->port, 0); + if (!skb) + return; + + hdr = buf_msg(skb); + msg_set_nametype(hdr, grp->type); + msg_set_grp_evt(hdr, event); + msg_set_dest_droppable(hdr, true); + msg_set_grp_bc_seqno(hdr, seqno); + memcpy(msg_data(hdr), &evt, sizeof(evt)); + TIPC_SKB_CB(skb)->orig_member = m->instance; + __skb_queue_tail(inputq, skb); +} + +static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, + int mtyp, struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr; + struct sk_buff *skb; + int adv = 0; + + skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0, + m->node, tipc_own_addr(grp->net), + m->port, grp->portid, 0); + if (!skb) + return; + + if (m->state == MBR_ACTIVE) + adv = ADV_ACTIVE - m->advertised; + else if (m->state == MBR_JOINED || m->state == MBR_PENDING) + adv = ADV_IDLE - m->advertised; + + hdr = buf_msg(skb); + + if (mtyp == GRP_JOIN_MSG) { + msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt); + msg_set_adv_win(hdr, adv); + m->advertised += adv; + } else if (mtyp == GRP_LEAVE_MSG) { + msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt); + } else if (mtyp == GRP_ADV_MSG) { + msg_set_adv_win(hdr, adv); + m->advertised += adv; + } else if (mtyp == GRP_ACK_MSG) { + msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt); + } else if (mtyp == GRP_REMIT_MSG) { + msg_set_grp_remitted(hdr, m->window); + } + msg_set_dest_droppable(hdr, true); + __skb_queue_tail(xmitq, skb); +} + +void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, + struct tipc_msg *hdr, struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + u32 node = msg_orignode(hdr); + u32 port = msg_origport(hdr); + struct tipc_member *m, *pm; + u16 remitted, in_flight; + + if (!grp) + return; + + if (grp->scope == TIPC_NODE_SCOPE && node != tipc_own_addr(grp->net)) + return; + + m = tipc_group_find_member(grp, node, port); + + switch (msg_type(hdr)) { + case GRP_JOIN_MSG: + if (!m) + m = tipc_group_create_member(grp, node, port, + 0, MBR_JOINING); + if (!m) + return; + m->bc_syncpt = msg_grp_bc_syncpt(hdr); + m->bc_rcv_nxt = m->bc_syncpt; + m->window += msg_adv_win(hdr); + + /* Wait until PUBLISH event is received if necessary */ + if (m->state != MBR_PUBLISHED) + return; + + /* Member can be taken into service */ + m->state = MBR_JOINED; + tipc_group_open(m, usr_wakeup); + tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); + return; + case GRP_LEAVE_MSG: + if (!m) + return; + m->bc_syncpt = msg_grp_bc_syncpt(hdr); + list_del_init(&m->list); + tipc_group_open(m, usr_wakeup); + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_syncpt, inputq); + return; + case GRP_ADV_MSG: + if (!m) + return; + m->window += msg_adv_win(hdr); + tipc_group_open(m, usr_wakeup); + return; + case GRP_ACK_MSG: + if (!m) + return; + m->bc_acked = msg_grp_bc_acked(hdr); + if (--grp->bc_ackers) + return; + list_del_init(&m->small_win); + *m->group->open = true; + *usr_wakeup = true; + tipc_group_update_member(m, 0); + return; + case GRP_RECLAIM_MSG: + if (!m) + return; + tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq); + m->window = ADV_IDLE; + tipc_group_open(m, usr_wakeup); + return; + case GRP_REMIT_MSG: + if (!m || m->state != MBR_RECLAIMING) + return; + + remitted = msg_grp_remitted(hdr); + + /* Messages preceding the REMIT still in receive queue */ + if (m->advertised > remitted) { + m->state = MBR_REMITTED; + in_flight = m->advertised - remitted; + m->advertised = ADV_IDLE + in_flight; + return; + } + /* This should never happen */ + if (m->advertised < remitted) + pr_warn_ratelimited("Unexpected REMIT msg\n"); + + /* All messages preceding the REMIT have been read */ + m->state = MBR_JOINED; + grp->active_cnt--; + m->advertised = ADV_IDLE; + + /* Set oldest pending member to active and advertise */ + if (list_empty(&grp->pending)) + return; + pm = list_first_entry(&grp->pending, struct tipc_member, list); + pm->state = MBR_ACTIVE; + list_move_tail(&pm->list, &grp->active); + grp->active_cnt++; + if (pm->advertised <= (ADV_ACTIVE * 3 / 4)) + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + return; + default: + pr_warn("Received unknown GROUP_PROTO message\n"); + } +} + +/* tipc_group_member_evt() - receive and handle a member up/down event + */ +void tipc_group_member_evt(struct tipc_group *grp, + bool *usr_wakeup, + int *sk_rcvbuf, + struct tipc_msg *hdr, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct tipc_event *evt = (void *)msg_data(hdr); + u32 instance = evt->found_lower; + u32 node = evt->port.node; + u32 port = evt->port.ref; + int event = evt->event; + struct tipc_member *m; + struct net *net; + u32 self; + + if (!grp) + return; + + net = grp->net; + self = tipc_own_addr(net); + if (!grp->loopback && node == self && port == grp->portid) + return; + + m = tipc_group_find_member(grp, node, port); + + switch (event) { + case TIPC_PUBLISHED: + /* Send and wait for arrival of JOIN message if necessary */ + if (!m) { + m = tipc_group_create_member(grp, node, port, instance, + MBR_PUBLISHED); + if (!m) + break; + tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + break; + } + + if (m->state != MBR_JOINING) + break; + + /* Member can be taken into service */ + m->instance = instance; + m->state = MBR_JOINED; + tipc_group_open(m, usr_wakeup); + tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); + break; + case TIPC_WITHDRAWN: + if (!m) + break; + + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + list_del_init(&m->list); + tipc_group_open(m, usr_wakeup); + + /* Only send event if no LEAVE message can be expected */ + if (!tipc_node_is_up(net, node)) + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_rcv_nxt, inputq); + break; + default: + break; + } + *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); +} + +int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb) +{ + struct nlattr *group = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_GROUP); + + if (!group) + return -EMSGSIZE; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID, + grp->type) || + nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_INSTANCE, + grp->instance) || + nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_BC_SEND_NEXT, + grp->bc_snd_nxt)) + goto group_msg_cancel; + + if (grp->scope == TIPC_NODE_SCOPE) + if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_NODE_SCOPE)) + goto group_msg_cancel; + + if (grp->scope == TIPC_CLUSTER_SCOPE) + if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_CLUSTER_SCOPE)) + goto group_msg_cancel; + + if (*grp->open) + if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_OPEN)) + goto group_msg_cancel; + + nla_nest_end(skb, group); + return 0; + +group_msg_cancel: + nla_nest_cancel(skb, group); + return -1; +} diff --git a/net/tipc/group.h b/net/tipc/group.h new file mode 100644 index 000000000000..ea4c3be64c78 --- /dev/null +++ b/net/tipc/group.h @@ -0,0 +1,77 @@ +/* + * net/tipc/group.h: Include file for TIPC group unicast/multicast functions + * + * Copyright (c) 2017, Ericsson AB + * Copyright (c) 2020, Red Hat Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_GROUP_H +#define _TIPC_GROUP_H + +#include "core.h" + +struct tipc_group; +struct tipc_member; +struct tipc_msg; + +struct tipc_group *tipc_group_create(struct net *net, u32 portid, + struct tipc_group_req *mreq, + bool *group_is_open); +void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcv_buf); +void tipc_group_delete(struct net *net, struct tipc_group *grp); +void tipc_group_add_member(struct tipc_group *grp, u32 node, + u32 port, u32 instance); +struct tipc_nlist *tipc_group_dests(struct tipc_group *grp); +void tipc_group_self(struct tipc_group *grp, struct tipc_service_range *seq, + int *scope); +u32 tipc_group_exclude(struct tipc_group *grp); +void tipc_group_filter_msg(struct tipc_group *grp, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup, + int *sk_rcvbuf, struct tipc_msg *hdr, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup, + struct tipc_msg *hdr, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack); +bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, + int len, struct tipc_member **m); +bool tipc_group_bc_cong(struct tipc_group *grp, int len); +void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, + u32 port, struct sk_buff_head *xmitq); +u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); +void tipc_group_update_member(struct tipc_member *m, int len); +int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb); +#endif diff --git a/net/tipc/handler.c b/net/tipc/handler.c deleted file mode 100644 index b36f0fcd9bdf..000000000000 --- a/net/tipc/handler.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * net/tipc/handler.c: TIPC signal handling - * - * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" - -struct queue_item { - struct list_head next_signal; - void (*handler) (unsigned long); - unsigned long data; -}; - -static struct kmem_cache *tipc_queue_item_cache; -static struct list_head signal_queue_head; -static DEFINE_SPINLOCK(qitem_lock); -static int handler_enabled __read_mostly; - -static void process_signal_queue(unsigned long dummy); - -static DECLARE_TASKLET_DISABLED(tipc_tasklet, process_signal_queue, 0); - - -unsigned int tipc_k_signal(Handler routine, unsigned long argument) -{ - struct queue_item *item; - - if (!handler_enabled) { - pr_err("Signal request ignored by handler\n"); - return -ENOPROTOOPT; - } - - spin_lock_bh(&qitem_lock); - item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC); - if (!item) { - pr_err("Signal queue out of memory\n"); - spin_unlock_bh(&qitem_lock); - return -ENOMEM; - } - item->handler = routine; - item->data = argument; - list_add_tail(&item->next_signal, &signal_queue_head); - spin_unlock_bh(&qitem_lock); - tasklet_schedule(&tipc_tasklet); - return 0; -} - -static void process_signal_queue(unsigned long dummy) -{ - struct queue_item *__volatile__ item; - struct list_head *l, *n; - - spin_lock_bh(&qitem_lock); - list_for_each_safe(l, n, &signal_queue_head) { - item = list_entry(l, struct queue_item, next_signal); - list_del(&item->next_signal); - spin_unlock_bh(&qitem_lock); - item->handler(item->data); - spin_lock_bh(&qitem_lock); - kmem_cache_free(tipc_queue_item_cache, item); - } - spin_unlock_bh(&qitem_lock); -} - -int tipc_handler_start(void) -{ - tipc_queue_item_cache = - kmem_cache_create("tipc_queue_items", sizeof(struct queue_item), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!tipc_queue_item_cache) - return -ENOMEM; - - INIT_LIST_HEAD(&signal_queue_head); - tasklet_enable(&tipc_tasklet); - handler_enabled = 1; - return 0; -} - -void tipc_handler_stop(void) -{ - struct list_head *l, *n; - struct queue_item *item; - - if (!handler_enabled) - return; - - handler_enabled = 0; - tasklet_kill(&tipc_tasklet); - - spin_lock_bh(&qitem_lock); - list_for_each_safe(l, n, &signal_queue_head) { - item = list_entry(l, struct queue_item, next_signal); - list_del(&item->next_signal); - kmem_cache_free(tipc_queue_item_cache, item); - } - spin_unlock_bh(&qitem_lock); - - kmem_cache_destroy(tipc_queue_item_cache); -} diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 9934a32bfa87..b9ad0434c3cd 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -42,252 +42,11 @@ #include "core.h" #include "bearer.h" -#define MAX_IB_BEARERS MAX_BEARERS +#define TIPC_MAX_IB_LINK_WIN 500 -/** - * struct ib_bearer - Infiniband bearer data structure - * @bearer: ptr to associated "generic" bearer structure - * @dev: ptr to associated Infiniband network device - * @tipc_packet_type: used in binding TIPC to Infiniband driver - * @cleanup: work item used when disabling bearer - */ - -struct ib_bearer { - struct tipc_bearer *bearer; - struct net_device *dev; - struct packet_type tipc_packet_type; - struct work_struct setup; - struct work_struct cleanup; -}; - -static struct tipc_media ib_media_info; -static struct ib_bearer ib_bearers[MAX_IB_BEARERS]; -static int ib_started; - -/** - * ib_media_addr_set - initialize Infiniband media address structure - * - * Media-dependent "value" field stores MAC address in first 6 bytes - * and zeroes out the remaining bytes. - */ -static void ib_media_addr_set(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *mac) -{ - BUILD_BUG_ON(sizeof(a->value) < INFINIBAND_ALEN); - memcpy(a->value, mac, INFINIBAND_ALEN); - a->media_id = TIPC_MEDIA_TYPE_IB; - a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, INFINIBAND_ALEN); -} - -/** - * send_msg - send a TIPC message out over an InfiniBand interface - */ -static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, - struct tipc_media_addr *dest) -{ - struct sk_buff *clone; - struct net_device *dev; - int delta; - - clone = skb_clone(buf, GFP_ATOMIC); - if (!clone) - return 0; - - dev = ((struct ib_bearer *)(tb_ptr->usr_handle))->dev; - delta = dev->hard_header_len - skb_headroom(buf); - - if ((delta > 0) && - pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(clone); - return 0; - } - - skb_reset_network_header(clone); - clone->dev = dev; - clone->protocol = htons(ETH_P_TIPC); - dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, - dev->dev_addr, clone->len); - dev_queue_xmit(clone); - return 0; -} - -/** - * recv_msg - handle incoming TIPC message from an InfiniBand interface - * - * Accept only packets explicitly sent to this node, or broadcast packets; - * ignores packets sent using InfiniBand multicast, and traffic sent to other - * nodes (which can happen if interface is running in promiscuous mode). - */ -static int recv_msg(struct sk_buff *buf, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct ib_bearer *ib_ptr = (struct ib_bearer *)pt->af_packet_priv; - - if (!net_eq(dev_net(dev), &init_net)) { - kfree_skb(buf); - return 0; - } - - if (likely(ib_ptr->bearer)) { - if (likely(buf->pkt_type <= PACKET_BROADCAST)) { - buf->next = NULL; - tipc_recv_msg(buf, ib_ptr->bearer); - return 0; - } - } - kfree_skb(buf); - return 0; -} - -/** - * setup_bearer - setup association between InfiniBand bearer and interface - */ -static void setup_bearer(struct work_struct *work) -{ - struct ib_bearer *ib_ptr = - container_of(work, struct ib_bearer, setup); - - dev_add_pack(&ib_ptr->tipc_packet_type); -} - -/** - * enable_bearer - attach TIPC bearer to an InfiniBand interface - */ -static int enable_bearer(struct tipc_bearer *tb_ptr) -{ - struct net_device *dev; - struct ib_bearer *ib_ptr = &ib_bearers[0]; - struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; - char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; - int pending_dev = 0; - - /* Find unused InfiniBand bearer structure */ - while (ib_ptr->dev) { - if (!ib_ptr->bearer) - pending_dev++; - if (++ib_ptr == stop) - return pending_dev ? -EAGAIN : -EDQUOT; - } - - /* Find device with specified name */ - dev = dev_get_by_name(&init_net, driver_name); - if (!dev) - return -ENODEV; - - /* Create InfiniBand bearer for device */ - ib_ptr->dev = dev; - ib_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); - ib_ptr->tipc_packet_type.dev = dev; - ib_ptr->tipc_packet_type.func = recv_msg; - ib_ptr->tipc_packet_type.af_packet_priv = ib_ptr; - INIT_LIST_HEAD(&(ib_ptr->tipc_packet_type.list)); - INIT_WORK(&ib_ptr->setup, setup_bearer); - schedule_work(&ib_ptr->setup); - - /* Associate TIPC bearer with InfiniBand bearer */ - ib_ptr->bearer = tb_ptr; - tb_ptr->usr_handle = (void *)ib_ptr; - memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value)); - memcpy(tb_ptr->bcast_addr.value, dev->broadcast, INFINIBAND_ALEN); - tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_IB; - tb_ptr->bcast_addr.broadcast = 1; - tb_ptr->mtu = dev->mtu; - tb_ptr->blocked = 0; - ib_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr); - return 0; -} - -/** - * cleanup_bearer - break association between InfiniBand bearer and interface - * - * This routine must be invoked from a work queue because it can sleep. - */ -static void cleanup_bearer(struct work_struct *work) -{ - struct ib_bearer *ib_ptr = - container_of(work, struct ib_bearer, cleanup); - - dev_remove_pack(&ib_ptr->tipc_packet_type); - dev_put(ib_ptr->dev); - ib_ptr->dev = NULL; -} - -/** - * disable_bearer - detach TIPC bearer from an InfiniBand interface - * - * Mark InfiniBand bearer as inactive so that incoming buffers are thrown away, - * then get worker thread to complete bearer cleanup. (Can't do cleanup - * here because cleanup code needs to sleep and caller holds spinlocks.) - */ -static void disable_bearer(struct tipc_bearer *tb_ptr) -{ - struct ib_bearer *ib_ptr = (struct ib_bearer *)tb_ptr->usr_handle; - - ib_ptr->bearer = NULL; - INIT_WORK(&ib_ptr->cleanup, cleanup_bearer); - schedule_work(&ib_ptr->cleanup); -} - -/** - * recv_notification - handle device updates from OS - * - * Change the state of the InfiniBand bearer (if any) associated with the - * specified device. - */ -static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ib_bearer *ib_ptr = &ib_bearers[0]; - struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - while ((ib_ptr->dev != dev)) { - if (++ib_ptr == stop) - return NOTIFY_DONE; /* couldn't find device */ - } - if (!ib_ptr->bearer) - return NOTIFY_DONE; /* bearer had been disabled */ - - ib_ptr->bearer->mtu = dev->mtu; - - switch (evt) { - case NETDEV_CHANGE: - if (netif_carrier_ok(dev)) - tipc_continue(ib_ptr->bearer); - else - tipc_block_bearer(ib_ptr->bearer->name); - break; - case NETDEV_UP: - tipc_continue(ib_ptr->bearer); - break; - case NETDEV_DOWN: - tipc_block_bearer(ib_ptr->bearer->name); - break; - case NETDEV_CHANGEMTU: - case NETDEV_CHANGEADDR: - tipc_block_bearer(ib_ptr->bearer->name); - tipc_continue(ib_ptr->bearer); - break; - case NETDEV_UNREGISTER: - case NETDEV_CHANGENAME: - tipc_disable_bearer(ib_ptr->bearer->name); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block notifier = { - .notifier_call = recv_notification, - .priority = 0, -}; - -/** - * ib_addr2str - convert InfiniBand address to string - */ -static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) +/* convert InfiniBand address (media address format) media address to string */ +static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, + int str_size) { if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */ return 1; @@ -297,76 +56,49 @@ static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) return 0; } -/** - * ib_addr2msg - convert InfiniBand address format to message header format - */ -static int ib_addr2msg(struct tipc_media_addr *a, char *msg_area) +/* Convert from media address format to discovery message addr format */ +static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); - msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB; - memcpy(msg_area, a->value, INFINIBAND_ALEN); + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + memcpy(msg, addr->value, INFINIBAND_ALEN); return 0; } -/** - * ib_msg2addr - convert message header address format to InfiniBand format - */ -static int ib_msg2addr(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *msg_area) +/* Convert raw InfiniBand address format to media addr format */ +static int tipc_ib_raw2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + const char *msg) { - ib_media_addr_set(tb_ptr, a, msg_area); + memset(addr, 0, sizeof(*addr)); + memcpy(addr->value, msg, INFINIBAND_ALEN); + addr->media_id = TIPC_MEDIA_TYPE_IB; + addr->broadcast = !memcmp(msg, b->bcast_addr.value, + INFINIBAND_ALEN); return 0; } -/* - * InfiniBand media registration info - */ -static struct tipc_media ib_media_info = { - .send_msg = send_msg, - .enable_bearer = enable_bearer, - .disable_bearer = disable_bearer, - .addr2str = ib_addr2str, - .addr2msg = ib_addr2msg, - .msg2addr = ib_msg2addr, +/* Convert discovery msg addr format to InfiniBand media addr format */ +static int tipc_ib_msg2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + return tipc_ib_raw2addr(b, addr, msg); +} + +/* InfiniBand media registration info */ +struct tipc_media ib_media_info = { + .send_msg = tipc_l2_send_msg, + .enable_media = tipc_enable_l2_media, + .disable_media = tipc_disable_l2_media, + .addr2str = tipc_ib_addr2str, + .addr2msg = tipc_ib_addr2msg, + .msg2addr = tipc_ib_msg2addr, + .raw2addr = tipc_ib_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, - .window = TIPC_DEF_LINK_WIN, + .min_win = TIPC_DEF_LINK_WIN, + .max_win = TIPC_MAX_IB_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_IB, + .hwaddr_len = INFINIBAND_ALEN, .name = "ib" }; - -/** - * tipc_ib_media_start - activate InfiniBand bearer support - * - * Register InfiniBand media type with TIPC bearer code. Also register - * with OS for notifications about device state changes. - */ -int tipc_ib_media_start(void) -{ - int res; - - if (ib_started) - return -EINVAL; - - res = tipc_register_media(&ib_media_info); - if (res) - return res; - - res = register_netdevice_notifier(¬ifier); - if (!res) - ib_started = 1; - return res; -} - -/** - * tipc_ib_media_stop - deactivate InfiniBand bearer support - */ -void tipc_ib_media_stop(void) -{ - if (!ib_started) - return; - - flush_scheduled_work(); - unregister_netdevice_notifier(¬ifier); - ib_started = 0; -} diff --git a/net/tipc/link.c b/net/tipc/link.c index 0cc3d9015c5d..931f55f781a1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1,7 +1,7 @@ /* * net/tipc/link.c: TIPC link code * - * Copyright (c) 1996-2007, 2012, Ericsson AB + * Copyright (c) 1996-2007, 2012-2016, Ericsson AB * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * @@ -35,2925 +35,2952 @@ */ #include "core.h" +#include "subscr.h" #include "link.h" -#include "port.h" +#include "bcast.h" +#include "socket.h" #include "name_distr.h" #include "discover.h" -#include "config.h" +#include "netlink.h" +#include "monitor.h" +#include "trace.h" +#include "crypto.h" #include <linux/pkt_sched.h> -/* - * Error message prefixes - */ -static const char *link_co_err = "Link changeover error, "; -static const char *link_rst_msg = "Resetting link "; -static const char *link_unk_evt = "Unknown link event "; +struct tipc_stats { + u32 sent_pkts; + u32 recv_pkts; + u32 sent_states; + u32 recv_states; + u32 sent_probes; + u32 recv_probes; + u32 sent_nacks; + u32 recv_nacks; + u32 sent_acks; + u32 sent_bundled; + u32 sent_bundles; + u32 recv_bundled; + u32 recv_bundles; + u32 retransmitted; + u32 sent_fragmented; + u32 sent_fragments; + u32 recv_fragmented; + u32 recv_fragments; + u32 link_congs; /* # port sends blocked by congestion */ + u32 deferred_recv; + u32 duplicates; + u32 max_queue_sz; /* send queue size high water mark */ + u32 accu_queue_sz; /* used for send queue size profiling */ + u32 queue_sz_counts; /* used for send queue size profiling */ + u32 msg_length_counts; /* used for message length profiling */ + u32 msg_lengths_total; /* used for message length profiling */ + u32 msg_length_profile[7]; /* used for msg. length profiling */ +}; -/* - * Out-of-range value for link session numbers +/** + * struct tipc_link - TIPC link data structure + * @addr: network address of link's peer node + * @name: link name character string + * @net: pointer to namespace struct + * @peer_session: link session # being used by peer end of link + * @peer_bearer_id: bearer id used by link's peer endpoint + * @bearer_id: local bearer id used by link + * @tolerance: minimum link continuity loss needed to reset link [in ms] + * @abort_limit: # of unacknowledged continuity probes needed to reset link + * @state: current state of link FSM + * @peer_caps: bitmap describing capabilities of peer node + * @silent_intv_cnt: # of timer intervals without any reception from peer + * @priority: current link priority + * @net_plane: current link network plane ('A' through 'H') + * @mon_state: cookie with information needed by link monitor + * @mtu: current maximum packet size for this link + * @advertised_mtu: advertised own mtu when link is being established + * @backlogq: queue for messages waiting to be sent + * @ackers: # of peers that needs to ack each packet before it can be released + * @acked: # last packet acked by a certain peer. Used for broadcast. + * @rcv_nxt: next sequence number to expect for inbound messages + * @inputq: buffer queue for messages to be delivered upwards + * @namedq: buffer queue for name table messages to be delivered upwards + * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate + * @reasm_buf: head of partially reassembled inbound message fragments + * @stats: collects statistics regarding link activity + * @session: session to be used by link + * @snd_nxt_state: next send seq number + * @rcv_nxt_state: next rcv seq number + * @in_session: have received ACTIVATE_MSG from peer + * @active: link is active + * @if_name: associated interface name + * @rst_cnt: link reset counter + * @drop_point: seq number for failover handling (FIXME) + * @failover_reasm_skb: saved failover msg ptr (FIXME) + * @failover_deferdq: deferred message queue for failover processing (FIXME) + * @transmq: the link's transmit queue + * @backlog: link's backlog by priority (importance) + * @snd_nxt: next sequence number to be used + * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @deferdq: deferred receive queue + * @window: sliding window size for congestion handling + * @min_win: minimal send window to be used by link + * @ssthresh: slow start threshold for congestion handling + * @max_win: maximal send window to be used by link + * @cong_acks: congestion acks for congestion avoidance (FIXME) + * @checkpoint: seq number for congestion window size handling + * @reasm_tnlmsg: fragmentation/reassembly area for tunnel protocol message + * @last_gap: last gap ack blocks for bcast (FIXME) + * @last_ga: ptr to gap ack blocks + * @bc_rcvlink: the peer specific link used for broadcast reception + * @bc_sndlink: the namespace global link used for broadcast sending + * @nack_state: bcast nack state + * @bc_peer_is_up: peer has acked the bcast init msg */ -#define INVALID_SESSION 0x10000 +struct tipc_link { + u32 addr; + char name[TIPC_MAX_LINK_NAME]; + struct net *net; + + /* Management and link supervision data */ + u16 peer_session; + u16 session; + u16 snd_nxt_state; + u16 rcv_nxt_state; + u32 peer_bearer_id; + u32 bearer_id; + u32 tolerance; + u32 abort_limit; + u32 state; + u16 peer_caps; + bool in_session; + bool active; + u32 silent_intv_cnt; + char if_name[TIPC_MAX_IF_NAME]; + u32 priority; + char net_plane; + struct tipc_mon_state mon_state; + u16 rst_cnt; + + /* Failover/synch */ + u16 drop_point; + struct sk_buff *failover_reasm_skb; + struct sk_buff_head failover_deferdq; + + /* Max packet negotiation */ + u16 mtu; + u16 advertised_mtu; + + /* Sending */ + struct sk_buff_head transmq; + struct sk_buff_head backlogq; + struct { + u16 len; + u16 limit; + struct sk_buff *target_bskb; + } backlog[5]; + u16 snd_nxt; + + /* Reception */ + u16 rcv_nxt; + u32 rcv_unacked; + struct sk_buff_head deferdq; + struct sk_buff_head *inputq; + struct sk_buff_head *namedq; + + /* Congestion handling */ + struct sk_buff_head wakeupq; + u16 window; + u16 min_win; + u16 ssthresh; + u16 max_win; + u16 cong_acks; + u16 checkpoint; + + /* Fragmentation/reassembly */ + struct sk_buff *reasm_buf; + struct sk_buff *reasm_tnlmsg; + + /* Broadcast */ + u16 ackers; + u16 acked; + u16 last_gap; + struct tipc_gap_ack_blks *last_ga; + struct tipc_link *bc_rcvlink; + struct tipc_link *bc_sndlink; + u8 nack_state; + bool bc_peer_is_up; + + /* Statistics */ + struct tipc_stats stats; +}; /* - * Link state events: + * Error message prefixes */ -#define STARTING_EVT 856384768 /* link processing trigger */ -#define TRAFFIC_MSG_EVT 560815u /* rx'd ??? */ -#define TIMEOUT_EVT 560817u /* link timer expired */ +static const char *link_co_err = "Link tunneling error, "; +static const char *link_rst_msg = "Resetting link "; -/* - * The following two 'message types' is really just implementation - * data conveniently stored in the message header. - * They must not be considered part of the protocol +/* Send states for broadcast NACKs */ -#define OPEN_MSG 0 -#define CLOSED_MSG 1 +enum { + BC_NACK_SND_CONDITIONAL, + BC_NACK_SND_UNCONDITIONAL, + BC_NACK_SND_SUPPRESS, +}; -/* - * State value stored in 'exp_msg_count' - */ -#define START_CHANGEOVER 100000u +#define TIPC_BC_RETR_LIM (jiffies + msecs_to_jiffies(10)) +#define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1)) -/** - * struct tipc_link_name - deconstructed link name - * @addr_local: network address of node at this end - * @if_local: name of interface at this end - * @addr_peer: network address of node at far end - * @if_peer: name of interface at far end +/* Link FSM states: */ -struct tipc_link_name { - u32 addr_local; - char if_local[TIPC_MAX_IF_NAME]; - u32 addr_peer; - char if_peer[TIPC_MAX_IF_NAME]; +enum { + LINK_ESTABLISHED = 0xe, + LINK_ESTABLISHING = 0xe << 4, + LINK_RESET = 0x1 << 8, + LINK_RESETTING = 0x2 << 12, + LINK_PEER_RESET = 0xd << 16, + LINK_FAILINGOVER = 0xf << 20, + LINK_SYNCHING = 0xc << 24 }; -static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, - struct sk_buff *buf); -static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf); -static int link_recv_changeover_msg(struct tipc_link **l_ptr, - struct sk_buff **buf); -static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); -static int link_send_sections_long(struct tipc_port *sender, - struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, - u32 destnode); -static void link_state_event(struct tipc_link *l_ptr, u32 event); -static void link_reset_statistics(struct tipc_link *l_ptr); -static void link_print(struct tipc_link *l_ptr, const char *str); -static void link_start(struct tipc_link *l_ptr); -static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf); -static void tipc_link_send_sync(struct tipc_link *l); -static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf); - +static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq); +static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, + bool probe_reply, u16 rcvgap, + int tolerance, int priority, + struct sk_buff_head *xmitq); +static void link_print(struct tipc_link *l, const char *str); +static int tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); +static void tipc_link_build_bc_init_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); +static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga, + struct tipc_link *l, u8 start_index); +static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr); +static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, + u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq, + bool *retransmitted, int *rc); +static void tipc_link_update_cwin(struct tipc_link *l, int released, + bool retransmitted); /* - * Simple link routines + * Simple non-static link routines (i.e. referenced outside this file) */ -static unsigned int align(unsigned int i) +bool tipc_link_is_up(struct tipc_link *l) { - return (i + 3) & ~3u; + return l->state & (LINK_ESTABLISHED | LINK_SYNCHING); } -static void link_init_max_pkt(struct tipc_link *l_ptr) +bool tipc_link_peer_is_down(struct tipc_link *l) { - u32 max_pkt; - - max_pkt = (l_ptr->b_ptr->mtu & ~3); - if (max_pkt > MAX_MSG_SIZE) - max_pkt = MAX_MSG_SIZE; - - l_ptr->max_pkt_target = max_pkt; - if (l_ptr->max_pkt_target < MAX_PKT_DEFAULT) - l_ptr->max_pkt = l_ptr->max_pkt_target; - else - l_ptr->max_pkt = MAX_PKT_DEFAULT; + return l->state == LINK_PEER_RESET; +} - l_ptr->max_pkt_probes = 0; +bool tipc_link_is_reset(struct tipc_link *l) +{ + return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING); } -static u32 link_next_sent(struct tipc_link *l_ptr) +bool tipc_link_is_establishing(struct tipc_link *l) { - if (l_ptr->next_out) - return buf_seqno(l_ptr->next_out); - return mod(l_ptr->next_out_no); + return l->state == LINK_ESTABLISHING; } -static u32 link_last_sent(struct tipc_link *l_ptr) +bool tipc_link_is_synching(struct tipc_link *l) { - return mod(link_next_sent(l_ptr) - 1); + return l->state == LINK_SYNCHING; } -/* - * Simple non-static link routines (i.e. referenced outside this file) - */ -int tipc_link_is_up(struct tipc_link *l_ptr) +bool tipc_link_is_failingover(struct tipc_link *l) { - if (!l_ptr) - return 0; - return link_working_working(l_ptr) || link_working_unknown(l_ptr); + return l->state == LINK_FAILINGOVER; } -int tipc_link_is_active(struct tipc_link *l_ptr) +bool tipc_link_is_blocked(struct tipc_link *l) { - return (l_ptr->owner->active_links[0] == l_ptr) || - (l_ptr->owner->active_links[1] == l_ptr); + return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER); } -/** - * link_name_validate - validate & (optionally) deconstruct tipc_link name - * @name: ptr to link name string - * @name_parts: ptr to area for link name components (or NULL if not needed) - * - * Returns 1 if link name is valid, otherwise 0. - */ -static int link_name_validate(const char *name, - struct tipc_link_name *name_parts) -{ - char name_copy[TIPC_MAX_LINK_NAME]; - char *addr_local; - char *if_local; - char *addr_peer; - char *if_peer; - char dummy; - u32 z_local, c_local, n_local; - u32 z_peer, c_peer, n_peer; - u32 if_local_len; - u32 if_peer_len; - - /* copy link name & ensure length is OK */ - name_copy[TIPC_MAX_LINK_NAME - 1] = 0; - /* need above in case non-Posix strncpy() doesn't pad with nulls */ - strncpy(name_copy, name, TIPC_MAX_LINK_NAME); - if (name_copy[TIPC_MAX_LINK_NAME - 1] != 0) - return 0; +static bool link_is_bc_sndlink(struct tipc_link *l) +{ + return !l->bc_sndlink; +} - /* ensure all component parts of link name are present */ - addr_local = name_copy; - if_local = strchr(addr_local, ':'); - if (if_local == NULL) - return 0; - *(if_local++) = 0; - addr_peer = strchr(if_local, '-'); - if (addr_peer == NULL) - return 0; - *(addr_peer++) = 0; - if_local_len = addr_peer - if_local; - if_peer = strchr(addr_peer, ':'); - if (if_peer == NULL) - return 0; - *(if_peer++) = 0; - if_peer_len = strlen(if_peer) + 1; - - /* validate component parts of link name */ - if ((sscanf(addr_local, "%u.%u.%u%c", - &z_local, &c_local, &n_local, &dummy) != 3) || - (sscanf(addr_peer, "%u.%u.%u%c", - &z_peer, &c_peer, &n_peer, &dummy) != 3) || - (z_local > 255) || (c_local > 4095) || (n_local > 4095) || - (z_peer > 255) || (c_peer > 4095) || (n_peer > 4095) || - (if_local_len <= 1) || (if_local_len > TIPC_MAX_IF_NAME) || - (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME)) - return 0; +static bool link_is_bc_rcvlink(struct tipc_link *l) +{ + return ((l->bc_rcvlink == l) && !link_is_bc_sndlink(l)); +} - /* return link name components, if necessary */ - if (name_parts) { - name_parts->addr_local = tipc_addr(z_local, c_local, n_local); - strcpy(name_parts->if_local, if_local); - name_parts->addr_peer = tipc_addr(z_peer, c_peer, n_peer); - strcpy(name_parts->if_peer, if_peer); - } - return 1; +void tipc_link_set_active(struct tipc_link *l, bool active) +{ + l->active = active; } -/** - * link_timeout - handle expiration of link timer - * @l_ptr: pointer to link - * - * This routine must not grab "tipc_net_lock" to avoid a potential deadlock conflict - * with tipc_link_delete(). (There is no risk that the node will be deleted by - * another thread because tipc_link_delete() always cancels the link timer before - * tipc_node_delete() is called.) - */ -static void link_timeout(struct tipc_link *l_ptr) +u32 tipc_link_id(struct tipc_link *l) { - tipc_node_lock(l_ptr->owner); + return l->peer_bearer_id << 16 | l->bearer_id; +} - /* update counters used in statistical profiling of send traffic */ - l_ptr->stats.accu_queue_sz += l_ptr->out_queue_size; - l_ptr->stats.queue_sz_counts++; +int tipc_link_min_win(struct tipc_link *l) +{ + return l->min_win; +} - if (l_ptr->first_out) { - struct tipc_msg *msg = buf_msg(l_ptr->first_out); - u32 length = msg_size(msg); +int tipc_link_max_win(struct tipc_link *l) +{ + return l->max_win; +} - if ((msg_user(msg) == MSG_FRAGMENTER) && - (msg_type(msg) == FIRST_FRAGMENT)) { - length = msg_size(msg_get_wrapped(msg)); - } - if (length) { - l_ptr->stats.msg_lengths_total += length; - l_ptr->stats.msg_length_counts++; - if (length <= 64) - l_ptr->stats.msg_length_profile[0]++; - else if (length <= 256) - l_ptr->stats.msg_length_profile[1]++; - else if (length <= 1024) - l_ptr->stats.msg_length_profile[2]++; - else if (length <= 4096) - l_ptr->stats.msg_length_profile[3]++; - else if (length <= 16384) - l_ptr->stats.msg_length_profile[4]++; - else if (length <= 32768) - l_ptr->stats.msg_length_profile[5]++; - else - l_ptr->stats.msg_length_profile[6]++; - } - } +int tipc_link_prio(struct tipc_link *l) +{ + return l->priority; +} - /* do all other link processing performed on a periodic basis */ +unsigned long tipc_link_tolerance(struct tipc_link *l) +{ + return l->tolerance; +} - link_state_event(l_ptr, TIMEOUT_EVT); +struct sk_buff_head *tipc_link_inputq(struct tipc_link *l) +{ + return l->inputq; +} - if (l_ptr->next_out) - tipc_link_push_queue(l_ptr); +char tipc_link_plane(struct tipc_link *l) +{ + return l->net_plane; +} - tipc_node_unlock(l_ptr->owner); +struct net *tipc_link_net(struct tipc_link *l) +{ + return l->net; } -static void link_set_timer(struct tipc_link *l_ptr, u32 time) +void tipc_link_update_caps(struct tipc_link *l, u16 capabilities) { - k_start_timer(&l_ptr->timer, time); + l->peer_caps = capabilities; } -/** - * tipc_link_create - create a new link - * @n_ptr: pointer to associated node - * @b_ptr: pointer to associated bearer - * @media_addr: media address to use when sending messages over link - * - * Returns pointer to link. - */ -struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, - struct tipc_bearer *b_ptr, - const struct tipc_media_addr *media_addr) +void tipc_link_add_bc_peer(struct tipc_link *snd_l, + struct tipc_link *uc_l, + struct sk_buff_head *xmitq) { - struct tipc_link *l_ptr; - struct tipc_msg *msg; - char *if_name; - char addr_string[16]; - u32 peer = n_ptr->addr; + struct tipc_link *rcv_l = uc_l->bc_rcvlink; - if (n_ptr->link_cnt >= 2) { - tipc_addr_string_fill(addr_string, n_ptr->addr); - pr_err("Attempt to establish third link to %s\n", addr_string); - return NULL; - } + snd_l->ackers++; + rcv_l->acked = snd_l->snd_nxt - 1; + snd_l->state = LINK_ESTABLISHED; + tipc_link_build_bc_init_msg(uc_l, xmitq); +} - if (n_ptr->links[b_ptr->identity]) { - tipc_addr_string_fill(addr_string, n_ptr->addr); - pr_err("Attempt to establish second link on <%s> to %s\n", - b_ptr->name, addr_string); - return NULL; - } +void tipc_link_remove_bc_peer(struct tipc_link *snd_l, + struct tipc_link *rcv_l, + struct sk_buff_head *xmitq) +{ + u16 ack = snd_l->snd_nxt - 1; - l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC); - if (!l_ptr) { - pr_warn("Link creation failed, no memory\n"); - return NULL; + snd_l->ackers--; + rcv_l->bc_peer_is_up = true; + rcv_l->state = LINK_ESTABLISHED; + tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq, NULL); + trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!"); + tipc_link_reset(rcv_l); + rcv_l->state = LINK_RESET; + if (!snd_l->ackers) { + trace_tipc_link_reset(snd_l, TIPC_DUMP_ALL, "zero ackers!"); + tipc_link_reset(snd_l); + snd_l->state = LINK_RESET; + __skb_queue_purge(xmitq); } - - l_ptr->addr = peer; - if_name = strchr(b_ptr->name, ':') + 1; - sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown", - tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), - if_name, - tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); - /* note: peer i/f name is updated by reset/activate message */ - memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); - l_ptr->owner = n_ptr; - l_ptr->checkpoint = 1; - l_ptr->peer_session = INVALID_SESSION; - l_ptr->b_ptr = b_ptr; - link_set_supervision_props(l_ptr, b_ptr->tolerance); - l_ptr->state = RESET_UNKNOWN; - - l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; - msg = l_ptr->pmsg; - tipc_msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr); - msg_set_size(msg, sizeof(l_ptr->proto_msg)); - msg_set_session(msg, (tipc_random & 0xffff)); - msg_set_bearer_id(msg, b_ptr->identity); - strcpy((char *)msg_data(msg), if_name); - - l_ptr->priority = b_ptr->priority; - tipc_link_set_queue_limits(l_ptr, b_ptr->window); - - link_init_max_pkt(l_ptr); - - l_ptr->next_out_no = 1; - INIT_LIST_HEAD(&l_ptr->waiting_ports); - - link_reset_statistics(l_ptr); - - tipc_node_attach_link(n_ptr, l_ptr); - - k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); - list_add_tail(&l_ptr->link_list, &b_ptr->links); - tipc_k_signal((Handler)link_start, (unsigned long)l_ptr); - - return l_ptr; } -/** - * tipc_link_delete - delete a link - * @l_ptr: pointer to link - * - * Note: 'tipc_net_lock' is write_locked, bearer is locked. - * This routine must not grab the node lock until after link timer cancellation - * to avoid a potential deadlock situation. - */ -void tipc_link_delete(struct tipc_link *l_ptr) +int tipc_link_bc_peers(struct tipc_link *l) { - if (!l_ptr) { - pr_err("Attempt to delete non-existent link\n"); - return; - } + return l->ackers; +} - k_cancel_timer(&l_ptr->timer); +static u16 link_bc_rcv_gap(struct tipc_link *l) +{ + struct sk_buff *skb = skb_peek(&l->deferdq); + u16 gap = 0; - tipc_node_lock(l_ptr->owner); - tipc_link_reset(l_ptr); - tipc_node_detach_link(l_ptr->owner, l_ptr); - tipc_link_stop(l_ptr); - list_del_init(&l_ptr->link_list); - tipc_node_unlock(l_ptr->owner); - k_term_timer(&l_ptr->timer); - kfree(l_ptr); + if (more(l->snd_nxt, l->rcv_nxt)) + gap = l->snd_nxt - l->rcv_nxt; + if (skb) + gap = buf_seqno(skb) - l->rcv_nxt; + return gap; } -static void link_start(struct tipc_link *l_ptr) +void tipc_link_set_mtu(struct tipc_link *l, int mtu) { - tipc_node_lock(l_ptr->owner); - link_state_event(l_ptr, STARTING_EVT); - tipc_node_unlock(l_ptr->owner); + l->mtu = mtu; } -/** - * link_schedule_port - schedule port for deferred sending - * @l_ptr: pointer to link - * @origport: reference to sending port - * @sz: amount of data to be sent - * - * Schedules port for renewed sending of messages after link congestion - * has abated. - */ -static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz) -{ - struct tipc_port *p_ptr; - - spin_lock_bh(&tipc_port_list_lock); - p_ptr = tipc_port_lock(origport); - if (p_ptr) { - if (!p_ptr->wakeup) - goto exit; - if (!list_empty(&p_ptr->wait_list)) - goto exit; - p_ptr->congested = 1; - p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt); - list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports); - l_ptr->stats.link_congs++; -exit: - tipc_port_unlock(p_ptr); - } - spin_unlock_bh(&tipc_port_list_lock); - return -ELINKCONG; +int tipc_link_mtu(struct tipc_link *l) +{ + return l->mtu; } -void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all) +int tipc_link_mss(struct tipc_link *l) { - struct tipc_port *p_ptr; - struct tipc_port *temp_p_ptr; - int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size; - - if (all) - win = 100000; - if (win <= 0) - return; - if (!spin_trylock_bh(&tipc_port_list_lock)) - return; - if (link_congested(l_ptr)) - goto exit; - list_for_each_entry_safe(p_ptr, temp_p_ptr, &l_ptr->waiting_ports, - wait_list) { - if (win <= 0) - break; - list_del_init(&p_ptr->wait_list); - spin_lock_bh(p_ptr->lock); - p_ptr->congested = 0; - p_ptr->wakeup(p_ptr); - win -= p_ptr->waiting_pkts; - spin_unlock_bh(p_ptr->lock); - } - -exit: - spin_unlock_bh(&tipc_port_list_lock); +#ifdef CONFIG_TIPC_CRYPTO + return l->mtu - INT_H_SIZE - EMSG_OVERHEAD; +#else + return l->mtu - INT_H_SIZE; +#endif } -/** - * link_release_outqueue - purge link's outbound message queue - * @l_ptr: pointer to link - */ -static void link_release_outqueue(struct tipc_link *l_ptr) +u16 tipc_link_rcv_nxt(struct tipc_link *l) { - struct sk_buff *buf = l_ptr->first_out; - struct sk_buff *next; + return l->rcv_nxt; +} - while (buf) { - next = buf->next; - kfree_skb(buf); - buf = next; - } - l_ptr->first_out = NULL; - l_ptr->out_queue_size = 0; +u16 tipc_link_acked(struct tipc_link *l) +{ + return l->acked; } -/** - * tipc_link_reset_fragments - purge link's inbound message fragments queue - * @l_ptr: pointer to link - */ -void tipc_link_reset_fragments(struct tipc_link *l_ptr) +char *tipc_link_name(struct tipc_link *l) { - struct sk_buff *buf = l_ptr->defragm_buf; - struct sk_buff *next; + return l->name; +} - while (buf) { - next = buf->next; - kfree_skb(buf); - buf = next; - } - l_ptr->defragm_buf = NULL; +u32 tipc_link_state(struct tipc_link *l) +{ + return l->state; } /** - * tipc_link_stop - purge all inbound and outbound messages associated with link - * @l_ptr: pointer to link + * tipc_link_create - create a new link + * @net: pointer to associated network namespace + * @if_name: associated interface name + * @bearer_id: id (index) of associated bearer + * @tolerance: link tolerance to be used by link + * @net_plane: network plane (A,B,c..) this link belongs to + * @mtu: mtu to be advertised by link + * @priority: priority to be used by link + * @min_win: minimal send window to be used by link + * @max_win: maximal send window to be used by link + * @session: session to be used by link + * @peer: node id of peer node + * @peer_caps: bitmap describing peer node capabilities + * @bc_sndlink: the namespace global link used for broadcast sending + * @bc_rcvlink: the peer specific link used for broadcast reception + * @inputq: queue to put messages ready for delivery + * @namedq: queue to put binding table update messages ready for delivery + * @link: return value, pointer to put the created link + * @self: local unicast link id + * @peer_id: 128-bit ID of peer + * + * Return: true if link was created, otherwise false */ -void tipc_link_stop(struct tipc_link *l_ptr) -{ - struct sk_buff *buf; - struct sk_buff *next; - - buf = l_ptr->oldest_deferred_in; - while (buf) { - next = buf->next; - kfree_skb(buf); - buf = next; - } - - buf = l_ptr->first_out; - while (buf) { - next = buf->next; - kfree_skb(buf); - buf = next; - } - - tipc_link_reset_fragments(l_ptr); +bool tipc_link_create(struct net *net, char *if_name, int bearer_id, + int tolerance, char net_plane, u32 mtu, int priority, + u32 min_win, u32 max_win, u32 session, u32 self, + u32 peer, u8 *peer_id, u16 peer_caps, + struct tipc_link *bc_sndlink, + struct tipc_link *bc_rcvlink, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, + struct tipc_link **link) +{ + char peer_str[NODE_ID_STR_LEN] = {0,}; + char self_str[NODE_ID_STR_LEN] = {0,}; + struct tipc_link *l; - kfree_skb(l_ptr->proto_msg_queue); - l_ptr->proto_msg_queue = NULL; + l = kzalloc(sizeof(*l), GFP_ATOMIC); + if (!l) + return false; + *link = l; + l->session = session; + + /* Set link name for unicast links only */ + if (peer_id) { + if (tipc_nodeid2string(self_str, tipc_own_id(net)) > NODE_ID_LEN) + sprintf(self_str, "%x", self); + if (tipc_nodeid2string(peer_str, peer_id) > NODE_ID_LEN) + sprintf(peer_str, "%x", peer); + } + /* Peer i/f name will be completed by reset/activate message */ + snprintf(l->name, sizeof(l->name), "%s:%s-%s:unknown", + self_str, if_name, peer_str); + + strcpy(l->if_name, if_name); + l->addr = peer; + l->peer_caps = peer_caps; + l->net = net; + l->in_session = false; + l->bearer_id = bearer_id; + l->tolerance = tolerance; + if (bc_rcvlink) + bc_rcvlink->tolerance = tolerance; + l->net_plane = net_plane; + l->advertised_mtu = mtu; + l->mtu = mtu; + l->priority = priority; + tipc_link_set_queue_limits(l, min_win, max_win); + l->ackers = 1; + l->bc_sndlink = bc_sndlink; + l->bc_rcvlink = bc_rcvlink; + l->inputq = inputq; + l->namedq = namedq; + l->state = LINK_RESETTING; + __skb_queue_head_init(&l->transmq); + __skb_queue_head_init(&l->backlogq); + __skb_queue_head_init(&l->deferdq); + __skb_queue_head_init(&l->failover_deferdq); + skb_queue_head_init(&l->wakeupq); + skb_queue_head_init(l->inputq); + return true; } -void tipc_link_reset(struct tipc_link *l_ptr) +/** + * tipc_link_bc_create - create new link to be used for broadcast + * @net: pointer to associated network namespace + * @mtu: mtu to be used initially if no peers + * @min_win: minimal send window to be used by link + * @max_win: maximal send window to be used by link + * @inputq: queue to put messages ready for delivery + * @namedq: queue to put binding table update messages ready for delivery + * @link: return value, pointer to put the created link + * @ownnode: identity of own node + * @peer: node id of peer node + * @peer_id: 128-bit ID of peer + * @peer_caps: bitmap describing peer node capabilities + * @bc_sndlink: the namespace global link used for broadcast sending + * + * Return: true if link was created, otherwise false + */ +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id, + int mtu, u32 min_win, u32 max_win, u16 peer_caps, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, + struct tipc_link *bc_sndlink, + struct tipc_link **link) { - struct sk_buff *buf; - u32 prev_state = l_ptr->state; - u32 checkpoint = l_ptr->next_in_no; - int was_active_link = tipc_link_is_active(l_ptr); - - msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff)); - - /* Link is down, accept any session */ - l_ptr->peer_session = INVALID_SESSION; - - /* Prepare for max packet size negotiation */ - link_init_max_pkt(l_ptr); - - l_ptr->state = RESET_UNKNOWN; - - if ((prev_state == RESET_UNKNOWN) || (prev_state == RESET_RESET)) - return; + struct tipc_link *l; - tipc_node_link_down(l_ptr->owner, l_ptr); - tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); + if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, min_win, + max_win, 0, ownnode, peer, NULL, peer_caps, + bc_sndlink, NULL, inputq, namedq, link)) + return false; - if (was_active_link && tipc_node_active_links(l_ptr->owner) && - l_ptr->owner->permit_changeover) { - l_ptr->reset_checkpoint = checkpoint; - l_ptr->exp_msg_count = START_CHANGEOVER; - } + l = *link; + if (peer_id) { + char peer_str[NODE_ID_STR_LEN] = {0,}; - /* Clean up all queues: */ - link_release_outqueue(l_ptr); - kfree_skb(l_ptr->proto_msg_queue); - l_ptr->proto_msg_queue = NULL; - buf = l_ptr->oldest_deferred_in; - while (buf) { - struct sk_buff *next = buf->next; - kfree_skb(buf); - buf = next; + if (tipc_nodeid2string(peer_str, peer_id) > NODE_ID_LEN) + sprintf(peer_str, "%x", peer); + /* Broadcast receiver link name: "broadcast-link:<peer>" */ + snprintf(l->name, sizeof(l->name), "%s:%s", tipc_bclink_name, + peer_str); + } else { + strcpy(l->name, tipc_bclink_name); } - if (!list_empty(&l_ptr->waiting_ports)) - tipc_link_wakeup_ports(l_ptr, 1); + trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!"); + tipc_link_reset(l); + l->state = LINK_RESET; + l->ackers = 0; + l->bc_rcvlink = l; - l_ptr->retransm_queue_head = 0; - l_ptr->retransm_queue_size = 0; - l_ptr->last_out = NULL; - l_ptr->first_out = NULL; - l_ptr->next_out = NULL; - l_ptr->unacked_window = 0; - l_ptr->checkpoint = 1; - l_ptr->next_out_no = 1; - l_ptr->deferred_inqueue_sz = 0; - l_ptr->oldest_deferred_in = NULL; - l_ptr->newest_deferred_in = NULL; - l_ptr->fsm_msg_cnt = 0; - l_ptr->stale_count = 0; - link_reset_statistics(l_ptr); -} + /* Broadcast send link is always up */ + if (link_is_bc_sndlink(l)) + l->state = LINK_ESTABLISHED; + /* Disable replicast if even a single peer doesn't support it */ + if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST)) + tipc_bcast_toggle_rcast(net, false); -static void link_activate(struct tipc_link *l_ptr) -{ - l_ptr->next_in_no = l_ptr->stats.recv_info = 1; - tipc_node_link_up(l_ptr->owner, l_ptr); - tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr); + return true; } /** - * link_state_event - link finite state machine - * @l_ptr: pointer to link - * @event: state machine event to process + * tipc_link_fsm_evt - link finite state machine + * @l: pointer to link + * @evt: state machine event to be processed */ -static void link_state_event(struct tipc_link *l_ptr, unsigned int event) +int tipc_link_fsm_evt(struct tipc_link *l, int evt) { - struct tipc_link *other; - u32 cont_intv = l_ptr->continuity_interval; - - if (!l_ptr->started && (event != STARTING_EVT)) - return; /* Not yet. */ - - if (link_blocked(l_ptr)) { - if (event == TIMEOUT_EVT) - link_set_timer(l_ptr, cont_intv); - return; /* Changeover going on */ - } + int rc = 0; + int old_state = l->state; - switch (l_ptr->state) { - case WORKING_WORKING: - switch (event) { - case TRAFFIC_MSG_EVT: - case ACTIVATE_MSG: + switch (l->state) { + case LINK_RESETTING: + switch (evt) { + case LINK_PEER_RESET_EVT: + l->state = LINK_PEER_RESET; break; - case TIMEOUT_EVT: - if (l_ptr->next_in_no != l_ptr->checkpoint) { - l_ptr->checkpoint = l_ptr->next_in_no; - if (tipc_bclink_acks_missing(l_ptr->owner)) { - tipc_link_send_proto_msg(l_ptr, STATE_MSG, - 0, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - } else if (l_ptr->max_pkt < l_ptr->max_pkt_target) { - tipc_link_send_proto_msg(l_ptr, STATE_MSG, - 1, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - } - link_set_timer(l_ptr, cont_intv); - break; - } - l_ptr->state = WORKING_UNKNOWN; - l_ptr->fsm_msg_cnt = 0; - tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv / 4); + case LINK_RESET_EVT: + l->state = LINK_RESET; + break; + case LINK_FAILURE_EVT: + case LINK_FAILOVER_BEGIN_EVT: + case LINK_ESTABLISH_EVT: + case LINK_FAILOVER_END_EVT: + case LINK_SYNCH_BEGIN_EVT: + case LINK_SYNCH_END_EVT: + default: + goto illegal_evt; + } + break; + case LINK_RESET: + switch (evt) { + case LINK_PEER_RESET_EVT: + l->state = LINK_ESTABLISHING; + break; + case LINK_FAILOVER_BEGIN_EVT: + l->state = LINK_FAILINGOVER; break; - case RESET_MSG: - pr_info("%s<%s>, requested by peer\n", link_rst_msg, - l_ptr->name); - tipc_link_reset(l_ptr); - l_ptr->state = RESET_RESET; - l_ptr->fsm_msg_cnt = 0; - tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); + case LINK_FAILURE_EVT: + case LINK_RESET_EVT: + case LINK_ESTABLISH_EVT: + case LINK_FAILOVER_END_EVT: break; + case LINK_SYNCH_BEGIN_EVT: + case LINK_SYNCH_END_EVT: default: - pr_err("%s%u in WW state\n", link_unk_evt, event); + goto illegal_evt; } break; - case WORKING_UNKNOWN: - switch (event) { - case TRAFFIC_MSG_EVT: - case ACTIVATE_MSG: - l_ptr->state = WORKING_WORKING; - l_ptr->fsm_msg_cnt = 0; - link_set_timer(l_ptr, cont_intv); + case LINK_PEER_RESET: + switch (evt) { + case LINK_RESET_EVT: + l->state = LINK_ESTABLISHING; break; - case RESET_MSG: - pr_info("%s<%s>, requested by peer while probing\n", - link_rst_msg, l_ptr->name); - tipc_link_reset(l_ptr); - l_ptr->state = RESET_RESET; - l_ptr->fsm_msg_cnt = 0; - tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); + case LINK_PEER_RESET_EVT: + case LINK_ESTABLISH_EVT: + case LINK_FAILURE_EVT: break; - case TIMEOUT_EVT: - if (l_ptr->next_in_no != l_ptr->checkpoint) { - l_ptr->state = WORKING_WORKING; - l_ptr->fsm_msg_cnt = 0; - l_ptr->checkpoint = l_ptr->next_in_no; - if (tipc_bclink_acks_missing(l_ptr->owner)) { - tipc_link_send_proto_msg(l_ptr, STATE_MSG, - 0, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - } - link_set_timer(l_ptr, cont_intv); - } else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) { - tipc_link_send_proto_msg(l_ptr, STATE_MSG, - 1, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv / 4); - } else { /* Link has failed */ - pr_warn("%s<%s>, peer not responding\n", - link_rst_msg, l_ptr->name); - tipc_link_reset(l_ptr); - l_ptr->state = RESET_UNKNOWN; - l_ptr->fsm_msg_cnt = 0; - tipc_link_send_proto_msg(l_ptr, RESET_MSG, - 0, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); - } + case LINK_SYNCH_BEGIN_EVT: + case LINK_SYNCH_END_EVT: + case LINK_FAILOVER_BEGIN_EVT: + case LINK_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case LINK_FAILINGOVER: + switch (evt) { + case LINK_FAILOVER_END_EVT: + l->state = LINK_RESET; + break; + case LINK_PEER_RESET_EVT: + case LINK_RESET_EVT: + case LINK_ESTABLISH_EVT: + case LINK_FAILURE_EVT: break; + case LINK_FAILOVER_BEGIN_EVT: + case LINK_SYNCH_BEGIN_EVT: + case LINK_SYNCH_END_EVT: default: - pr_err("%s%u in WU state\n", link_unk_evt, event); + goto illegal_evt; } break; - case RESET_UNKNOWN: - switch (event) { - case TRAFFIC_MSG_EVT: + case LINK_ESTABLISHING: + switch (evt) { + case LINK_ESTABLISH_EVT: + l->state = LINK_ESTABLISHED; break; - case ACTIVATE_MSG: - other = l_ptr->owner->active_links[0]; - if (other && link_working_unknown(other)) - break; - l_ptr->state = WORKING_WORKING; - l_ptr->fsm_msg_cnt = 0; - link_activate(l_ptr); - tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - if (l_ptr->owner->working_links == 1) - tipc_link_send_sync(l_ptr); - link_set_timer(l_ptr, cont_intv); + case LINK_FAILOVER_BEGIN_EVT: + l->state = LINK_FAILINGOVER; break; - case RESET_MSG: - l_ptr->state = RESET_RESET; - l_ptr->fsm_msg_cnt = 0; - tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 1, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); + case LINK_RESET_EVT: + l->state = LINK_RESET; break; - case STARTING_EVT: - l_ptr->started = 1; - /* fall through */ - case TIMEOUT_EVT: - tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); + case LINK_FAILURE_EVT: + case LINK_PEER_RESET_EVT: + case LINK_SYNCH_BEGIN_EVT: + case LINK_FAILOVER_END_EVT: break; + case LINK_SYNCH_END_EVT: default: - pr_err("%s%u in RU state\n", link_unk_evt, event); + goto illegal_evt; } break; - case RESET_RESET: - switch (event) { - case TRAFFIC_MSG_EVT: - case ACTIVATE_MSG: - other = l_ptr->owner->active_links[0]; - if (other && link_working_unknown(other)) - break; - l_ptr->state = WORKING_WORKING; - l_ptr->fsm_msg_cnt = 0; - link_activate(l_ptr); - tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - if (l_ptr->owner->working_links == 1) - tipc_link_send_sync(l_ptr); - link_set_timer(l_ptr, cont_intv); + case LINK_ESTABLISHED: + switch (evt) { + case LINK_PEER_RESET_EVT: + l->state = LINK_PEER_RESET; + rc |= TIPC_LINK_DOWN_EVT; break; - case RESET_MSG: + case LINK_FAILURE_EVT: + l->state = LINK_RESETTING; + rc |= TIPC_LINK_DOWN_EVT; break; - case TIMEOUT_EVT: - tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - link_set_timer(l_ptr, cont_intv); + case LINK_RESET_EVT: + l->state = LINK_RESET; break; + case LINK_ESTABLISH_EVT: + case LINK_SYNCH_END_EVT: + break; + case LINK_SYNCH_BEGIN_EVT: + l->state = LINK_SYNCHING; + break; + case LINK_FAILOVER_BEGIN_EVT: + case LINK_FAILOVER_END_EVT: default: - pr_err("%s%u in RR state\n", link_unk_evt, event); + goto illegal_evt; + } + break; + case LINK_SYNCHING: + switch (evt) { + case LINK_PEER_RESET_EVT: + l->state = LINK_PEER_RESET; + rc |= TIPC_LINK_DOWN_EVT; + break; + case LINK_FAILURE_EVT: + l->state = LINK_RESETTING; + rc |= TIPC_LINK_DOWN_EVT; + break; + case LINK_RESET_EVT: + l->state = LINK_RESET; + break; + case LINK_ESTABLISH_EVT: + case LINK_SYNCH_BEGIN_EVT: + break; + case LINK_SYNCH_END_EVT: + l->state = LINK_ESTABLISHED; + break; + case LINK_FAILOVER_BEGIN_EVT: + case LINK_FAILOVER_END_EVT: + default: + goto illegal_evt; } break; default: - pr_err("Unknown link state %u/%u\n", l_ptr->state, event); + pr_err("Unknown FSM state %x in %s\n", l->state, l->name); } + trace_tipc_link_fsm(l->name, old_state, l->state, evt); + return rc; +illegal_evt: + pr_err("Illegal FSM event %x in state %x on link %s\n", + evt, l->state, l->name); + trace_tipc_link_fsm(l->name, old_state, l->state, evt); + return rc; } -/* - * link_bundle_buf(): Append contents of a buffer to - * the tail of an existing one. +/* link_profile_stats - update statistical profiling of traffic */ -static int link_bundle_buf(struct tipc_link *l_ptr, struct sk_buff *bundler, - struct sk_buff *buf) +static void link_profile_stats(struct tipc_link *l) { - struct tipc_msg *bundler_msg = buf_msg(bundler); - struct tipc_msg *msg = buf_msg(buf); - u32 size = msg_size(msg); - u32 bundle_size = msg_size(bundler_msg); - u32 to_pos = align(bundle_size); - u32 pad = to_pos - bundle_size; - - if (msg_user(bundler_msg) != MSG_BUNDLER) - return 0; - if (msg_type(bundler_msg) != OPEN_MSG) - return 0; - if (skb_tailroom(bundler) < (pad + size)) - return 0; - if (l_ptr->max_pkt < (to_pos + size)) - return 0; + struct sk_buff *skb; + struct tipc_msg *msg; + int length; - skb_put(bundler, pad + size); - skb_copy_to_linear_data_offset(bundler, to_pos, buf->data, size); - msg_set_size(bundler_msg, to_pos + size); - msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1); - kfree_skb(buf); - l_ptr->stats.sent_bundled++; - return 1; -} + /* Update counters used in statistical profiling of send traffic */ + l->stats.accu_queue_sz += skb_queue_len(&l->transmq); + l->stats.queue_sz_counts++; -static void link_add_to_outqueue(struct tipc_link *l_ptr, - struct sk_buff *buf, - struct tipc_msg *msg) -{ - u32 ack = mod(l_ptr->next_in_no - 1); - u32 seqno = mod(l_ptr->next_out_no++); - - msg_set_word(msg, 2, ((ack << 16) | seqno)); - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - buf->next = NULL; - if (l_ptr->first_out) { - l_ptr->last_out->next = buf; - l_ptr->last_out = buf; - } else - l_ptr->first_out = l_ptr->last_out = buf; + skb = skb_peek(&l->transmq); + if (!skb) + return; + msg = buf_msg(skb); + length = msg_size(msg); - l_ptr->out_queue_size++; - if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz) - l_ptr->stats.max_queue_sz = l_ptr->out_queue_size; + if (msg_user(msg) == MSG_FRAGMENTER) { + if (msg_type(msg) != FIRST_FRAGMENT) + return; + length = msg_size(msg_inner_hdr(msg)); + } + l->stats.msg_lengths_total += length; + l->stats.msg_length_counts++; + if (length <= 64) + l->stats.msg_length_profile[0]++; + else if (length <= 256) + l->stats.msg_length_profile[1]++; + else if (length <= 1024) + l->stats.msg_length_profile[2]++; + else if (length <= 4096) + l->stats.msg_length_profile[3]++; + else if (length <= 16384) + l->stats.msg_length_profile[4]++; + else if (length <= 32768) + l->stats.msg_length_profile[5]++; + else + l->stats.msg_length_profile[6]++; } -static void link_add_chain_to_outqueue(struct tipc_link *l_ptr, - struct sk_buff *buf_chain, - u32 long_msgno) +/** + * tipc_link_too_silent - check if link is "too silent" + * @l: tipc link to be checked + * + * Return: true if the link 'silent_intv_cnt' is about to reach the + * 'abort_limit' value, otherwise false + */ +bool tipc_link_too_silent(struct tipc_link *l) { - struct sk_buff *buf; - struct tipc_msg *msg; - - if (!l_ptr->next_out) - l_ptr->next_out = buf_chain; - while (buf_chain) { - buf = buf_chain; - buf_chain = buf_chain->next; - - msg = buf_msg(buf); - msg_set_long_msgno(msg, long_msgno); - link_add_to_outqueue(l_ptr, buf, msg); - } + return (l->silent_intv_cnt + 2 > l->abort_limit); } -/* - * tipc_link_send_buf() is the 'full path' for messages, called from - * inside TIPC when the 'fast path' in tipc_send_buf - * has failed, and from link_send() +/* tipc_link_timeout - perform periodic task as instructed from node timeout */ -int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - u32 size = msg_size(msg); - u32 dsz = msg_data_sz(msg); - u32 queue_size = l_ptr->out_queue_size; - u32 imp = tipc_msg_tot_importance(msg); - u32 queue_limit = l_ptr->queue_limit[imp]; - u32 max_packet = l_ptr->max_pkt; - - /* Match msg importance against queue limits: */ - if (unlikely(queue_size >= queue_limit)) { - if (imp <= TIPC_CRITICAL_IMPORTANCE) { - link_schedule_port(l_ptr, msg_origport(msg), size); - kfree_skb(buf); - return -ELINKCONG; - } - kfree_skb(buf); - if (imp > CONN_MANAGER) { - pr_warn("%s<%s>, send queue full", link_rst_msg, - l_ptr->name); - tipc_link_reset(l_ptr); +int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) +{ + int mtyp = 0; + int rc = 0; + bool state = false; + bool probe = false; + bool setup = false; + u16 bc_snt = l->bc_sndlink->snd_nxt - 1; + u16 bc_acked = l->bc_rcvlink->acked; + struct tipc_mon_state *mstate = &l->mon_state; + + trace_tipc_link_timeout(l, TIPC_DUMP_NONE, " "); + trace_tipc_link_too_silent(l, TIPC_DUMP_ALL, " "); + switch (l->state) { + case LINK_ESTABLISHED: + case LINK_SYNCHING: + mtyp = STATE_MSG; + link_profile_stats(l); + tipc_mon_get_state(l->net, l->addr, mstate, l->bearer_id); + if (mstate->reset || (l->silent_intv_cnt > l->abort_limit)) + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + state = bc_acked != bc_snt; + state |= l->bc_rcvlink->rcv_unacked; + state |= l->rcv_unacked; + state |= !skb_queue_empty(&l->transmq); + probe = mstate->probing; + probe |= l->silent_intv_cnt; + if (probe || mstate->monitoring) + l->silent_intv_cnt++; + probe |= !skb_queue_empty(&l->deferdq); + if (l->snd_nxt == l->checkpoint) { + tipc_link_update_cwin(l, 0, 0); + probe = true; } - return dsz; + l->checkpoint = l->snd_nxt; + break; + case LINK_RESET: + setup = l->rst_cnt++ <= 4; + setup |= !(l->rst_cnt % 16); + mtyp = RESET_MSG; + break; + case LINK_ESTABLISHING: + setup = true; + mtyp = ACTIVATE_MSG; + break; + case LINK_PEER_RESET: + case LINK_RESETTING: + case LINK_FAILINGOVER: + break; + default: + break; } - /* Fragmentation needed ? */ - if (size > max_packet) - return link_send_long_buf(l_ptr, buf); + if (state || probe || setup) + tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq); - /* Packet can be queued or sent. */ - if (likely(!tipc_bearer_blocked(l_ptr->b_ptr) && - !link_congested(l_ptr))) { - link_add_to_outqueue(l_ptr, buf, msg); - - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); - l_ptr->unacked_window = 0; - return dsz; - } - /* Congestion: can message be bundled ? */ - if ((msg_user(msg) != CHANGEOVER_PROTOCOL) && - (msg_user(msg) != MSG_FRAGMENTER)) { - - /* Try adding message to an existing bundle */ - if (l_ptr->next_out && - link_bundle_buf(l_ptr, l_ptr->last_out, buf)) - return dsz; - - /* Try creating a new bundle */ - if (size <= max_packet * 2 / 3) { - struct sk_buff *bundler = tipc_buf_acquire(max_packet); - struct tipc_msg bundler_hdr; - - if (bundler) { - tipc_msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG, - INT_H_SIZE, l_ptr->addr); - skb_copy_to_linear_data(bundler, &bundler_hdr, - INT_H_SIZE); - skb_trim(bundler, INT_H_SIZE); - link_bundle_buf(l_ptr, bundler, buf); - buf = bundler; - msg = buf_msg(buf); - l_ptr->stats.sent_bundles++; - } - } - } - if (!l_ptr->next_out) - l_ptr->next_out = buf; - link_add_to_outqueue(l_ptr, buf, msg); - return dsz; + return rc; } -/* - * tipc_link_send(): same as tipc_link_send_buf(), but the link to use has - * not been selected yet, and the the owner node is not locked - * Called by TIPC internal users, e.g. the name distributor +/** + * link_schedule_user - schedule a message sender for wakeup after congestion + * @l: congested link + * @hdr: header of message that is being sent + * Create pseudo msg to send back to user when congestion abates */ -int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector) -{ - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; - int res = -ELINKCONG; - - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(dest); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[selector & 1]; - if (l_ptr) - res = tipc_link_send_buf(l_ptr, buf); - else - kfree_skb(buf); - tipc_node_unlock(n_ptr); - } else { - kfree_skb(buf); - } - read_unlock_bh(&tipc_net_lock); - return res; +static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) +{ + u32 dnode = tipc_own_addr(l->net); + u32 dport = msg_origport(hdr); + struct sk_buff *skb; + + /* Create and schedule wakeup pseudo message */ + skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, + dnode, l->addr, dport, 0, 0); + if (!skb) + return -ENOBUFS; + msg_set_dest_droppable(buf_msg(skb), true); + TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr); + skb_queue_tail(&l->wakeupq, skb); + l->stats.link_congs++; + trace_tipc_link_conges(l, TIPC_DUMP_ALL, "wakeup scheduled!"); + return -ELINKCONG; } -/* - * tipc_link_send_sync - synchronize broadcast link endpoints. - * - * Give a newly added peer node the sequence number where it should - * start receiving and acking broadcast packets. - * - * Called with node locked +/** + * link_prepare_wakeup - prepare users for wakeup after congestion + * @l: congested link + * Wake up a number of waiting users, as permitted by available space + * in the send queue */ -static void tipc_link_send_sync(struct tipc_link *l) +static void link_prepare_wakeup(struct tipc_link *l) { - struct sk_buff *buf; - struct tipc_msg *msg; + struct sk_buff_head *wakeupq = &l->wakeupq; + struct sk_buff_head *inputq = l->inputq; + struct sk_buff *skb, *tmp; + struct sk_buff_head tmpq; + int avail[5] = {0,}; + int imp = 0; - buf = tipc_buf_acquire(INT_H_SIZE); - if (!buf) - return; + __skb_queue_head_init(&tmpq); + + for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) + avail[imp] = l->backlog[imp].limit - l->backlog[imp].len; + + skb_queue_walk_safe(wakeupq, skb, tmp) { + imp = TIPC_SKB_CB(skb)->chain_imp; + if (avail[imp] <= 0) + continue; + avail[imp]--; + __skb_unlink(skb, wakeupq); + __skb_queue_tail(&tmpq, skb); + } + + spin_lock_bh(&inputq->lock); + skb_queue_splice_tail(&tmpq, inputq); + spin_unlock_bh(&inputq->lock); - msg = buf_msg(buf); - tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, l->addr); - msg_set_last_bcast(msg, l->owner->bclink.acked); - link_add_chain_to_outqueue(l, buf, 0); - tipc_link_push_queue(l); } -/* - * tipc_link_recv_sync - synchronize broadcast link endpoints. - * Receive the sequence number where we should start receiving and - * acking broadcast packets from a newly added peer node, and open - * up for reception of such packets. - * - * Called with node locked +/** + * tipc_link_set_skb_retransmit_time - set the time at which retransmission of + * the given skb should be next attempted + * @skb: skb to set a future retransmission time for + * @l: link the skb will be transmitted on */ -static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf) +static void tipc_link_set_skb_retransmit_time(struct sk_buff *skb, + struct tipc_link *l) { - struct tipc_msg *msg = buf_msg(buf); - - n->bclink.last_sent = n->bclink.last_in = msg_last_bcast(msg); - n->bclink.recv_permitted = true; - kfree_skb(buf); + if (link_is_bc_sndlink(l)) + TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + else + TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; +} + +void tipc_link_reset(struct tipc_link *l) +{ + struct sk_buff_head list; + u32 imp; + + __skb_queue_head_init(&list); + + l->in_session = false; + /* Force re-synch of peer session number before establishing */ + l->peer_session--; + l->session++; + l->mtu = l->advertised_mtu; + + spin_lock_bh(&l->wakeupq.lock); + skb_queue_splice_init(&l->wakeupq, &list); + spin_unlock_bh(&l->wakeupq.lock); + + spin_lock_bh(&l->inputq->lock); + skb_queue_splice_init(&list, l->inputq); + spin_unlock_bh(&l->inputq->lock); + + __skb_queue_purge(&l->transmq); + __skb_queue_purge(&l->deferdq); + __skb_queue_purge(&l->backlogq); + __skb_queue_purge(&l->failover_deferdq); + for (imp = 0; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) { + l->backlog[imp].len = 0; + l->backlog[imp].target_bskb = NULL; + } + kfree_skb(l->reasm_buf); + kfree_skb(l->reasm_tnlmsg); + kfree_skb(l->failover_reasm_skb); + l->reasm_buf = NULL; + l->reasm_tnlmsg = NULL; + l->failover_reasm_skb = NULL; + l->rcv_unacked = 0; + l->snd_nxt = 1; + l->rcv_nxt = 1; + l->snd_nxt_state = 1; + l->rcv_nxt_state = 1; + l->acked = 0; + l->last_gap = 0; + kfree(l->last_ga); + l->last_ga = NULL; + l->silent_intv_cnt = 0; + l->rst_cnt = 0; + l->bc_peer_is_up = false; + memset(&l->mon_state, 0, sizeof(l->mon_state)); + tipc_link_reset_stats(l); } -/* - * tipc_link_send_names - send name table entries to new neighbor +/** + * tipc_link_xmit(): enqueue buffer list according to queue situation + * @l: link to use + * @list: chain of buffers containing message + * @xmitq: returned list of packets to be sent by caller * - * Send routine for bulk delivery of name table messages when contact - * with a new neighbor occurs. No link congestion checking is performed - * because name table messages *must* be delivered. The messages must be - * small enough not to require fragmentation. - * Called without any locks held. + * Consumes the buffer chain. + * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted + * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS */ -void tipc_link_send_names(struct list_head *message_list, u32 dest) -{ - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - struct sk_buff *buf; - struct sk_buff *temp_buf; - - if (list_empty(message_list)) - return; +int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, + struct sk_buff_head *xmitq) +{ + struct sk_buff_head *backlogq = &l->backlogq; + struct sk_buff_head *transmq = &l->transmq; + struct sk_buff *skb, *_skb; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + u16 ack = l->rcv_nxt - 1; + u16 seqno = l->snd_nxt; + int pkt_cnt = skb_queue_len(list); + unsigned int mss = tipc_link_mss(l); + unsigned int cwin = l->window; + unsigned int mtu = l->mtu; + struct tipc_msg *hdr; + bool new_bundle; + int rc = 0; + int imp; + + if (pkt_cnt <= 0) + return 0; - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(dest); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[0]; - if (l_ptr) { - /* convert circular list to linear list */ - ((struct sk_buff *)message_list->prev)->next = NULL; - link_add_chain_to_outqueue(l_ptr, - (struct sk_buff *)message_list->next, 0); - tipc_link_push_queue(l_ptr); - INIT_LIST_HEAD(message_list); + hdr = buf_msg(skb_peek(list)); + if (unlikely(msg_size(hdr) > mtu)) { + pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", + skb_queue_len(list), msg_user(hdr), + msg_type(hdr), msg_size(hdr), mtu); + __skb_queue_purge(list); + return -EMSGSIZE; + } + + imp = msg_importance(hdr); + /* Allow oversubscription of one data msg per source at congestion */ + if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) { + if (imp == TIPC_SYSTEM_IMPORTANCE) { + pr_warn("%s<%s>, link overflow", link_rst_msg, l->name); + __skb_queue_purge(list); + return -ENOBUFS; } - tipc_node_unlock(n_ptr); - } - read_unlock_bh(&tipc_net_lock); - - /* discard the messages if they couldn't be sent */ - list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) { - list_del((struct list_head *)buf); - kfree_skb(buf); - } -} - -/* - * link_send_buf_fast: Entry for data messages where the - * destination link is known and the header is complete, - * inclusive total message length. Very time critical. - * Link is locked. Returns user data length. - */ -static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, - u32 *used_max_pkt) -{ - struct tipc_msg *msg = buf_msg(buf); - int res = msg_data_sz(msg); - - if (likely(!link_congested(l_ptr))) { - if (likely(msg_size(msg) <= l_ptr->max_pkt)) { - if (likely(!tipc_bearer_blocked(l_ptr->b_ptr))) { - link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->b_ptr, buf, - &l_ptr->media_addr); - l_ptr->unacked_window = 0; - return res; + rc = link_schedule_user(l, hdr); + } + + if (pkt_cnt > 1) { + l->stats.sent_fragmented++; + l->stats.sent_fragments += pkt_cnt; + } + + /* Prepare each packet for sending, and add to relevant queue: */ + while ((skb = __skb_dequeue(list))) { + if (likely(skb_queue_len(transmq) < cwin)) { + hdr = buf_msg(skb); + msg_set_seqno(hdr, seqno); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); + _skb = skb_clone(skb, GFP_ATOMIC); + if (!_skb) { + kfree_skb(skb); + __skb_queue_purge(list); + return -ENOBUFS; } - } else - *used_max_pkt = l_ptr->max_pkt; + __skb_queue_tail(transmq, skb); + tipc_link_set_skb_retransmit_time(skb, l); + __skb_queue_tail(xmitq, _skb); + TIPC_SKB_CB(skb)->ackers = l->ackers; + l->rcv_unacked = 0; + l->stats.sent_pkts++; + seqno++; + continue; + } + if (tipc_msg_try_bundle(l->backlog[imp].target_bskb, &skb, + mss, l->addr, &new_bundle)) { + if (skb) { + /* Keep a ref. to the skb for next try */ + l->backlog[imp].target_bskb = skb; + l->backlog[imp].len++; + __skb_queue_tail(backlogq, skb); + } else { + if (new_bundle) { + l->stats.sent_bundles++; + l->stats.sent_bundled++; + } + l->stats.sent_bundled++; + } + continue; + } + l->backlog[imp].target_bskb = NULL; + l->backlog[imp].len += (1 + skb_queue_len(list)); + __skb_queue_tail(backlogq, skb); + skb_queue_splice_tail_init(list, backlogq); } - return tipc_link_send_buf(l_ptr, buf); /* All other cases */ + l->snd_nxt = seqno; + return rc; } -/* - * tipc_link_send_sections_fast: Entry for messages where the - * destination processor is known and the header is complete, - * except for total message length. - * Returns user data length or errno. - */ -int tipc_link_send_sections_fast(struct tipc_port *sender, - struct iovec const *msg_sect, - const u32 num_sect, unsigned int total_len, - u32 destaddr) -{ - struct tipc_msg *hdr = &sender->phdr; - struct tipc_link *l_ptr; - struct sk_buff *buf; - struct tipc_node *node; - int res; - u32 selector = msg_origport(hdr) & 1; - -again: - /* - * Try building message using port's max_pkt hint. - * (Must not hold any locks while building message.) - */ - res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, - sender->max_pkt, &buf); - /* Exit if build request was invalid */ - if (unlikely(res < 0)) - return res; - - read_lock_bh(&tipc_net_lock); - node = tipc_node_find(destaddr); - if (likely(node)) { - tipc_node_lock(node); - l_ptr = node->active_links[selector]; - if (likely(l_ptr)) { - if (likely(buf)) { - res = link_send_buf_fast(l_ptr, buf, - &sender->max_pkt); -exit: - tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); - return res; - } - - /* Exit if link (or bearer) is congested */ - if (link_congested(l_ptr) || - tipc_bearer_blocked(l_ptr->b_ptr)) { - res = link_schedule_port(l_ptr, - sender->ref, res); - goto exit; - } +static void tipc_link_update_cwin(struct tipc_link *l, int released, + bool retransmitted) +{ + int bklog_len = skb_queue_len(&l->backlogq); + struct sk_buff_head *txq = &l->transmq; + int txq_len = skb_queue_len(txq); + u16 cwin = l->window; - /* - * Message size exceeds max_pkt hint; update hint, - * then re-try fast path or fragment the message - */ - sender->max_pkt = l_ptr->max_pkt; - tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); + /* Enter fast recovery */ + if (unlikely(retransmitted)) { + l->ssthresh = max_t(u16, l->window / 2, 300); + l->window = min_t(u16, l->ssthresh, l->window); + return; + } + /* Enter slow start */ + if (unlikely(!released)) { + l->ssthresh = max_t(u16, l->window / 2, 300); + l->window = l->min_win; + return; + } + /* Don't increase window if no pressure on the transmit queue */ + if (txq_len + bklog_len < cwin) + return; + /* Don't increase window if there are holes the transmit queue */ + if (txq_len && l->snd_nxt - buf_seqno(skb_peek(txq)) != txq_len) + return; - if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) - goto again; + l->cong_acks += released; - return link_send_sections_long(sender, msg_sect, - num_sect, total_len, - destaddr); - } - tipc_node_unlock(node); + /* Slow start */ + if (cwin <= l->ssthresh) { + l->window = min_t(u16, cwin + released, l->max_win); + return; } - read_unlock_bh(&tipc_net_lock); + /* Congestion avoidance */ + if (l->cong_acks < cwin) + return; + l->window = min_t(u16, ++cwin, l->max_win); + l->cong_acks = 0; +} + +static void tipc_link_advance_backlog(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + struct sk_buff_head *txq = &l->transmq; + struct sk_buff *skb, *_skb; + u16 ack = l->rcv_nxt - 1; + u16 seqno = l->snd_nxt; + struct tipc_msg *hdr; + u16 cwin = l->window; + u32 imp; - /* Couldn't find a link to the destination node */ - if (buf) - return tipc_reject_msg(buf, TIPC_ERR_NO_NODE); - if (res >= 0) - return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect, - total_len, TIPC_ERR_NO_NODE); - return res; + while (skb_queue_len(txq) < cwin) { + skb = skb_peek(&l->backlogq); + if (!skb) + break; + _skb = skb_clone(skb, GFP_ATOMIC); + if (!_skb) + break; + __skb_dequeue(&l->backlogq); + hdr = buf_msg(skb); + imp = msg_importance(hdr); + l->backlog[imp].len--; + if (unlikely(skb == l->backlog[imp].target_bskb)) + l->backlog[imp].target_bskb = NULL; + __skb_queue_tail(&l->transmq, skb); + tipc_link_set_skb_retransmit_time(skb, l); + + __skb_queue_tail(xmitq, _skb); + TIPC_SKB_CB(skb)->ackers = l->ackers; + msg_set_seqno(hdr, seqno); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); + l->rcv_unacked = 0; + l->stats.sent_pkts++; + seqno++; + } + l->snd_nxt = seqno; } -/* - * link_send_sections_long(): Entry for long messages where the - * destination node is known and the header is complete, - * inclusive total message length. - * Link and bearer congestion status have been checked to be ok, - * and are ignored if they change. - * - * Note that fragments do not use the full link MTU so that they won't have - * to undergo refragmentation if link changeover causes them to be sent - * over another link with an additional tunnel header added as prefix. - * (Refragmentation will still occur if the other link has a smaller MTU.) +/** + * link_retransmit_failure() - Detect repeated retransmit failures + * @l: tipc link sender + * @r: tipc link receiver (= l in case of unicast) + * @rc: returned code * - * Returns user data length or errno. + * Return: true if the repeated retransmit failures happens, otherwise + * false */ -static int link_send_sections_long(struct tipc_port *sender, - struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, - u32 destaddr) -{ - struct tipc_link *l_ptr; - struct tipc_node *node; - struct tipc_msg *hdr = &sender->phdr; - u32 dsz = total_len; - u32 max_pkt, fragm_sz, rest; - struct tipc_msg fragm_hdr; - struct sk_buff *buf, *buf_chain, *prev; - u32 fragm_crs, fragm_rest, hsz, sect_rest; - const unchar *sect_crs; - int curr_sect; - u32 fragm_no; - int res = 0; - -again: - fragm_no = 1; - max_pkt = sender->max_pkt - INT_H_SIZE; - /* leave room for tunnel header in case of link changeover */ - fragm_sz = max_pkt - INT_H_SIZE; - /* leave room for fragmentation header in each fragment */ - rest = dsz; - fragm_crs = 0; - fragm_rest = 0; - sect_rest = 0; - sect_crs = NULL; - curr_sect = -1; - - /* Prepare reusable fragment header */ - tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - INT_H_SIZE, msg_destnode(hdr)); - msg_set_size(&fragm_hdr, max_pkt); - msg_set_fragm_no(&fragm_hdr, 1); - - /* Prepare header of first fragment */ - buf_chain = buf = tipc_buf_acquire(max_pkt); - if (!buf) - return -ENOMEM; - buf->next = NULL; - skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE); - hsz = msg_hdr_sz(hdr); - skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz); - - /* Chop up message */ - fragm_crs = INT_H_SIZE + hsz; - fragm_rest = fragm_sz - hsz; - - do { /* For all sections */ - u32 sz; - - if (!sect_rest) { - sect_rest = msg_sect[++curr_sect].iov_len; - sect_crs = (const unchar *)msg_sect[curr_sect].iov_base; - } - - if (sect_rest < fragm_rest) - sz = sect_rest; - else - sz = fragm_rest; - - if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { - res = -EFAULT; -error: - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } - return res; - } - sect_crs += sz; - sect_rest -= sz; - fragm_crs += sz; - fragm_rest -= sz; - rest -= sz; - - if (!fragm_rest && rest) { - - /* Initiate new fragment: */ - if (rest <= fragm_sz) { - fragm_sz = rest; - msg_set_type(&fragm_hdr, LAST_FRAGMENT); - } else { - msg_set_type(&fragm_hdr, FRAGMENT); - } - msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); - msg_set_fragm_no(&fragm_hdr, ++fragm_no); - prev = buf; - buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE); - if (!buf) { - res = -ENOMEM; - goto error; - } - - buf->next = NULL; - prev->next = buf; - skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE); - fragm_crs = INT_H_SIZE; - fragm_rest = fragm_sz; - } - } while (rest > 0); - - /* - * Now we have a buffer chain. Select a link and check - * that packet size is still OK - */ - node = tipc_node_find(destaddr); - if (likely(node)) { - tipc_node_lock(node); - l_ptr = node->active_links[sender->ref & 1]; - if (!l_ptr) { - tipc_node_unlock(node); - goto reject; - } - if (l_ptr->max_pkt < max_pkt) { - sender->max_pkt = l_ptr->max_pkt; - tipc_node_unlock(node); - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } - goto again; - } +static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, + int *rc) +{ + struct sk_buff *skb = skb_peek(&l->transmq); + struct tipc_msg *hdr; + + if (!skb) + return false; + + if (!TIPC_SKB_CB(skb)->retr_cnt) + return false; + + if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp + + msecs_to_jiffies(r->tolerance * 10))) + return false; + + hdr = buf_msg(skb); + if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr))) + return false; + + pr_warn("Retransmission failure on link <%s>\n", l->name); + link_print(l, "State of link "); + pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", + msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); + pr_info("sqno %u, prev: %x, dest: %x\n", + msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr)); + pr_info("retr_stamp %d, retr_cnt %d\n", + jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp), + TIPC_SKB_CB(skb)->retr_cnt); + + trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); + trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); + trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); + + if (link_is_bc_sndlink(l)) { + r->state = LINK_RESET; + *rc |= TIPC_LINK_DOWN_EVT; } else { -reject: - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } - return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect, - total_len, TIPC_ERR_NO_NODE); + *rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } - /* Append chain of fragments to send queue & send them */ - l_ptr->long_msg_seq_no++; - link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no); - l_ptr->stats.sent_fragments += fragm_no; - l_ptr->stats.sent_fragmented++; - tipc_link_push_queue(l_ptr); - tipc_node_unlock(node); - return dsz; + return true; } -/* - * tipc_link_push_packet: Push one unsent packet to the media +/* tipc_data_input - deliver data and name distr msgs to upper layer + * + * Consumes buffer if message is of right type + * Node lock must be held */ -u32 tipc_link_push_packet(struct tipc_link *l_ptr) -{ - struct sk_buff *buf = l_ptr->first_out; - u32 r_q_size = l_ptr->retransm_queue_size; - u32 r_q_head = l_ptr->retransm_queue_head; - - /* Step to position where retransmission failed, if any, */ - /* consider that buffers may have been released in meantime */ - if (r_q_size && buf) { - u32 last = lesser(mod(r_q_head + r_q_size), - link_last_sent(l_ptr)); - u32 first = buf_seqno(buf); - - while (buf && less(first, r_q_head)) { - first = mod(first + 1); - buf = buf->next; +static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *inputq) +{ + struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq; + struct tipc_msg *hdr = buf_msg(skb); + + switch (msg_user(hdr)) { + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) { + skb_queue_tail(mc_inputq, skb); + return true; + } + fallthrough; + case CONN_MANAGER: + skb_queue_tail(inputq, skb); + return true; + case GROUP_PROTOCOL: + skb_queue_tail(mc_inputq, skb); + return true; + case NAME_DISTRIBUTOR: + l->bc_rcvlink->state = LINK_ESTABLISHED; + skb_queue_tail(l->namedq, skb); + return true; + case MSG_BUNDLER: + case TUNNEL_PROTOCOL: + case MSG_FRAGMENTER: + case BCAST_PROTOCOL: + return false; +#ifdef CONFIG_TIPC_CRYPTO + case MSG_CRYPTO: + if (sysctl_tipc_key_exchange_enabled && + TIPC_SKB_CB(skb)->decrypted) { + tipc_crypto_msg_rcv(l->net, skb); + return true; } - l_ptr->retransm_queue_head = r_q_head = first; - l_ptr->retransm_queue_size = r_q_size = mod(last - first); + fallthrough; +#endif + default: + pr_warn("Dropping received illegal msg type\n"); + kfree_skb(skb); + return true; } +} - /* Continue retransmission now, if there is anything: */ - if (r_q_size && buf) { - msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); - msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); - l_ptr->retransm_queue_head = mod(++r_q_head); - l_ptr->retransm_queue_size = --r_q_size; - l_ptr->stats.retransmitted++; +/* tipc_link_input - process packet that has passed link protocol check + * + * Consumes buffer + */ +static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *inputq, + struct sk_buff **reasm_skb) +{ + struct tipc_msg *hdr = buf_msg(skb); + struct sk_buff *iskb; + struct sk_buff_head tmpq; + int usr = msg_user(hdr); + int pos = 0; + + if (usr == MSG_BUNDLER) { + skb_queue_head_init(&tmpq); + l->stats.recv_bundles++; + l->stats.recv_bundled += msg_msgcnt(hdr); + while (tipc_msg_extract(skb, &iskb, &pos)) + tipc_data_input(l, iskb, &tmpq); + tipc_skb_queue_splice_tail(&tmpq, inputq); return 0; + } else if (usr == MSG_FRAGMENTER) { + l->stats.recv_fragments++; + if (tipc_buf_append(reasm_skb, &skb)) { + l->stats.recv_fragmented++; + tipc_data_input(l, skb, inputq); + } else if (!*reasm_skb && !link_is_bc_rcvlink(l)) { + pr_warn_ratelimited("Unable to build fragment list\n"); + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + } + return 0; + } else if (usr == BCAST_PROTOCOL) { + tipc_bcast_lock(l->net); + tipc_link_bc_init_rcv(l->bc_rcvlink, hdr); + tipc_bcast_unlock(l->net); } - /* Send deferred protocol message, if any: */ - buf = l_ptr->proto_msg_queue; - if (buf) { - msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); - msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); - l_ptr->unacked_window = 0; - kfree_skb(buf); - l_ptr->proto_msg_queue = NULL; + kfree_skb(skb); + return 0; +} + +/* tipc_link_tnl_rcv() - receive TUNNEL_PROTOCOL message, drop or process the + * inner message along with the ones in the old link's + * deferdq + * @l: tunnel link + * @skb: TUNNEL_PROTOCOL message + * @inputq: queue to put messages ready for delivery + */ +static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *inputq) +{ + struct sk_buff **reasm_skb = &l->failover_reasm_skb; + struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg; + struct sk_buff_head *fdefq = &l->failover_deferdq; + struct tipc_msg *hdr = buf_msg(skb); + struct sk_buff *iskb; + int ipos = 0; + int rc = 0; + u16 seqno; + + if (msg_type(hdr) == SYNCH_MSG) { + kfree_skb(skb); return 0; } - /* Send one deferred data message, if send window not full: */ - buf = l_ptr->next_out; - if (buf) { - struct tipc_msg *msg = buf_msg(buf); - u32 next = msg_seqno(msg); - u32 first = buf_seqno(l_ptr->first_out); - - if (mod(next - first) < l_ptr->queue_limit[0]) { - msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); - if (msg_user(msg) == MSG_BUNDLER) - msg_set_type(msg, CLOSED_MSG); - l_ptr->next_out = buf->next; + /* Not a fragment? */ + if (likely(!msg_nof_fragms(hdr))) { + if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) { + pr_warn_ratelimited("Unable to extract msg, defq: %d\n", + skb_queue_len(fdefq)); return 0; } + kfree_skb(skb); + } else { + /* Set fragment type for buf_append */ + if (msg_fragm_no(hdr) == 1) + msg_set_type(hdr, FIRST_FRAGMENT); + else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr)) + msg_set_type(hdr, FRAGMENT); + else + msg_set_type(hdr, LAST_FRAGMENT); + + if (!tipc_buf_append(reasm_tnlmsg, &skb)) { + /* Successful but non-complete reassembly? */ + if (*reasm_tnlmsg || link_is_bc_rcvlink(l)) + return 0; + pr_warn_ratelimited("Unable to reassemble tunnel msg\n"); + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + } + iskb = skb; } - return 1; -} - -/* - * push_queue(): push out the unsent messages of a link where - * congestion has abated. Node is locked - */ -void tipc_link_push_queue(struct tipc_link *l_ptr) -{ - u32 res; - - if (tipc_bearer_blocked(l_ptr->b_ptr)) - return; do { - res = tipc_link_push_packet(l_ptr); - } while (!res); -} - -static void link_reset_all(unsigned long addr) -{ - struct tipc_node *n_ptr; - char addr_string[16]; - u32 i; - - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find((u32)addr); - if (!n_ptr) { - read_unlock_bh(&tipc_net_lock); - return; /* node no longer exists */ - } + seqno = buf_seqno(iskb); + if (unlikely(less(seqno, l->drop_point))) { + kfree_skb(iskb); + continue; + } + if (unlikely(seqno != l->drop_point)) { + __tipc_skb_queue_sorted(fdefq, seqno, iskb); + continue; + } - tipc_node_lock(n_ptr); + l->drop_point++; + if (!tipc_data_input(l, iskb, inputq)) + rc |= tipc_link_input(l, iskb, inputq, reasm_skb); + if (unlikely(rc)) + break; + } while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point))); - pr_warn("Resetting all links to %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); + return rc; +} - for (i = 0; i < MAX_BEARERS; i++) { - if (n_ptr->links[i]) { - link_print(n_ptr->links[i], "Resetting link\n"); - tipc_link_reset(n_ptr->links[i]); +/** + * tipc_get_gap_ack_blks - get Gap ACK blocks from PROTOCOL/STATE_MSG + * @ga: returned pointer to the Gap ACK blocks if any + * @l: the tipc link + * @hdr: the PROTOCOL/STATE_MSG header + * @uc: desired Gap ACK blocks type, i.e. unicast (= 1) or broadcast (= 0) + * + * Return: the total Gap ACK blocks size + */ +u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, + struct tipc_msg *hdr, bool uc) +{ + struct tipc_gap_ack_blks *p; + u16 sz = 0; + + /* Does peer support the Gap ACK blocks feature? */ + if (l->peer_caps & TIPC_GAP_ACK_BLOCK) { + p = (struct tipc_gap_ack_blks *)msg_data(hdr); + sz = ntohs(p->len); + /* Sanity check */ + if (sz == struct_size(p, gacks, size_add(p->ugack_cnt, p->bgack_cnt))) { + /* Good, check if the desired type exists */ + if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt)) + goto ok; + /* Backward compatible: peer might not support bc, but uc? */ + } else if (uc && sz == struct_size(p, gacks, p->ugack_cnt)) { + if (p->ugack_cnt) { + p->bgack_cnt = 0; + goto ok; + } } } + /* Other cases: ignore! */ + p = NULL; - tipc_node_unlock(n_ptr); - read_unlock_bh(&tipc_net_lock); +ok: + *ga = p; + return sz; } -static void link_retransmit_failure(struct tipc_link *l_ptr, - struct sk_buff *buf) +static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga, + struct tipc_link *l, u8 start_index) { - struct tipc_msg *msg = buf_msg(buf); - - pr_warn("Retransmission failure on link <%s>\n", l_ptr->name); - - if (l_ptr->addr) { - /* Handle failure on standard link */ - link_print(l_ptr, "Resetting link\n"); - tipc_link_reset(l_ptr); - - } else { - /* Handle failure on broadcast link */ - struct tipc_node *n_ptr; - char addr_string[16]; - - pr_info("Msg seq number: %u, ", msg_seqno(msg)); - pr_cont("Outstanding acks: %lu\n", - (unsigned long) TIPC_SKB_CB(buf)->handle); - - n_ptr = tipc_bclink_retransmit_to(); - tipc_node_lock(n_ptr); + struct tipc_gap_ack *gacks = &ga->gacks[start_index]; + struct sk_buff *skb = skb_peek(&l->deferdq); + u16 expect, seqno = 0; + u8 n = 0; - tipc_addr_string_fill(addr_string, n_ptr->addr); - pr_info("Broadcast link info for %s\n", addr_string); - pr_info("Reception permitted: %d, Acked: %u\n", - n_ptr->bclink.recv_permitted, - n_ptr->bclink.acked); - pr_info("Last in: %u, Oos state: %u, Last sent: %u\n", - n_ptr->bclink.last_in, - n_ptr->bclink.oos_state, - n_ptr->bclink.last_sent); - - tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr); - - tipc_node_unlock(n_ptr); + if (!skb) + return 0; - l_ptr->stale_count = 0; + expect = buf_seqno(skb); + skb_queue_walk(&l->deferdq, skb) { + seqno = buf_seqno(skb); + if (unlikely(more(seqno, expect))) { + gacks[n].ack = htons(expect - 1); + gacks[n].gap = htons(seqno - expect); + if (++n >= MAX_GAP_ACK_BLKS / 2) { + pr_info_ratelimited("Gacks on %s: %d, ql: %d!\n", + l->name, n, + skb_queue_len(&l->deferdq)); + return n; + } + } else if (unlikely(less(seqno, expect))) { + pr_warn("Unexpected skb in deferdq!\n"); + continue; + } + expect = seqno + 1; } -} -void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf, - u32 retransmits) -{ - struct tipc_msg *msg; - - if (!buf) - return; - - msg = buf_msg(buf); + /* last block */ + gacks[n].ack = htons(seqno); + gacks[n].gap = 0; + n++; + return n; +} - if (tipc_bearer_blocked(l_ptr->b_ptr)) { - if (l_ptr->retransm_queue_size == 0) { - l_ptr->retransm_queue_head = msg_seqno(msg); - l_ptr->retransm_queue_size = retransmits; +/* tipc_build_gap_ack_blks - build Gap ACK blocks + * @l: tipc unicast link + * @hdr: the tipc message buffer to store the Gap ACK blocks after built + * + * The function builds Gap ACK blocks for both the unicast & broadcast receiver + * links of a certain peer, the buffer after built has the network data format + * as found at the struct tipc_gap_ack_blks definition. + * + * returns the actual allocated memory size + */ +static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr) +{ + struct tipc_link *bcl = l->bc_rcvlink; + struct tipc_gap_ack_blks *ga; + u16 len; + + ga = (struct tipc_gap_ack_blks *)msg_data(hdr); + + /* Start with broadcast link first */ + tipc_bcast_lock(bcl->net); + msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); + msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); + ga->bgack_cnt = __tipc_build_gap_ack_blks(ga, bcl, 0); + tipc_bcast_unlock(bcl->net); + + /* Now for unicast link, but an explicit NACK only (???) */ + ga->ugack_cnt = (msg_seq_gap(hdr)) ? + __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0; + + /* Total len */ + len = struct_size(ga, gacks, size_add(ga->bgack_cnt, ga->ugack_cnt)); + ga->len = htons(len); + return len; +} + +/* tipc_link_advance_transmq - advance TIPC link transmq queue by releasing + * acked packets, also doing retransmissions if + * gaps found + * @l: tipc link with transmq queue to be advanced + * @r: tipc link "receiver" i.e. in case of broadcast (= "l" if unicast) + * @acked: seqno of last packet acked by peer without any gaps before + * @gap: # of gap packets + * @ga: buffer pointer to Gap ACK blocks from peer + * @xmitq: queue for accumulating the retransmitted packets if any + * @retransmitted: returned boolean value if a retransmission is really issued + * @rc: returned code e.g. TIPC_LINK_DOWN_EVT if a repeated retransmit failures + * happens (- unlikely case) + * + * Return: the number of packets released from the link transmq + */ +static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, + u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq, + bool *retransmitted, int *rc) +{ + struct tipc_gap_ack_blks *last_ga = r->last_ga, *this_ga = NULL; + struct tipc_gap_ack *gacks = NULL; + struct sk_buff *skb, *_skb, *tmp; + struct tipc_msg *hdr; + u32 qlen = skb_queue_len(&l->transmq); + u16 nacked = acked, ngap = gap, gack_cnt = 0; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + u16 ack = l->rcv_nxt - 1; + u16 seqno, n = 0; + u16 end = r->acked, start = end, offset = r->last_gap; + u16 si = (last_ga) ? last_ga->start_index : 0; + bool is_uc = !link_is_bc_sndlink(l); + bool bc_has_acked = false; + + trace_tipc_link_retrans(r, acked + 1, acked + gap, &l->transmq); + + /* Determine Gap ACK blocks if any for the particular link */ + if (ga && is_uc) { + /* Get the Gap ACKs, uc part */ + gack_cnt = ga->ugack_cnt; + gacks = &ga->gacks[ga->bgack_cnt]; + } else if (ga) { + /* Copy the Gap ACKs, bc part, for later renewal if needed */ + this_ga = kmemdup(ga, struct_size(ga, gacks, ga->bgack_cnt), + GFP_ATOMIC); + if (likely(this_ga)) { + this_ga->start_index = 0; + /* Start with the bc Gap ACKs */ + gack_cnt = this_ga->bgack_cnt; + gacks = &this_ga->gacks[0]; } else { - pr_err("Unexpected retransmit on link %s (qsize=%d)\n", - l_ptr->name, l_ptr->retransm_queue_size); + /* Hmm, we can get in trouble..., simply ignore it */ + pr_warn_ratelimited("Ignoring bc Gap ACKs, no memory\n"); } - return; - } else { - /* Detect repeated retransmit failures on unblocked bearer */ - if (l_ptr->last_retransmitted == msg_seqno(msg)) { - if (++l_ptr->stale_count > 100) { - link_retransmit_failure(l_ptr, buf); - return; + } + + /* Advance the link transmq */ + skb_queue_walk_safe(&l->transmq, skb, tmp) { + seqno = buf_seqno(skb); + +next_gap_ack: + if (less_eq(seqno, nacked)) { + if (is_uc) + goto release; + /* Skip packets peer has already acked */ + if (!more(seqno, r->acked)) + continue; + /* Get the next of last Gap ACK blocks */ + while (more(seqno, end)) { + if (!last_ga || si >= last_ga->bgack_cnt) + break; + start = end + offset + 1; + end = ntohs(last_ga->gacks[si].ack); + offset = ntohs(last_ga->gacks[si].gap); + si++; + WARN_ONCE(more(start, end) || + (!offset && + si < last_ga->bgack_cnt) || + si > MAX_GAP_ACK_BLKS, + "Corrupted Gap ACK: %d %d %d %d %d\n", + start, end, offset, si, + last_ga->bgack_cnt); } + /* Check against the last Gap ACK block */ + if (tipc_in_range(seqno, start, end)) + continue; + /* Update/release the packet peer is acking */ + bc_has_acked = true; + if (--TIPC_SKB_CB(skb)->ackers) + continue; +release: + /* release skb */ + __skb_unlink(skb, &l->transmq); + kfree_skb(skb); + } else if (less_eq(seqno, nacked + ngap)) { + /* First gap: check if repeated retrans failures? */ + if (unlikely(seqno == acked + 1 && + link_retransmit_failure(l, r, rc))) { + /* Ignore this bc Gap ACKs if any */ + kfree(this_ga); + this_ga = NULL; + break; + } + /* retransmit skb if unrestricted*/ + if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) + continue; + tipc_link_set_skb_retransmit_time(skb, l); + _skb = pskb_copy(skb, GFP_ATOMIC); + if (!_skb) + continue; + hdr = buf_msg(_skb); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); + _skb->priority = TC_PRIO_CONTROL; + __skb_queue_tail(xmitq, _skb); + l->stats.retransmitted++; + if (!is_uc) + r->stats.retransmitted++; + *retransmitted = true; + /* Increase actual retrans counter & mark first time */ + if (!TIPC_SKB_CB(skb)->retr_cnt++) + TIPC_SKB_CB(skb)->retr_stamp = jiffies; } else { - l_ptr->last_retransmitted = msg_seqno(msg); - l_ptr->stale_count = 1; + /* retry with Gap ACK blocks if any */ + if (n >= gack_cnt) + break; + nacked = ntohs(gacks[n].ack); + ngap = ntohs(gacks[n].gap); + n++; + goto next_gap_ack; } } - while (retransmits && (buf != l_ptr->next_out) && buf) { - msg = buf_msg(buf); - msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); - buf = buf->next; - retransmits--; - l_ptr->stats.retransmitted++; + /* Renew last Gap ACK blocks for bc if needed */ + if (bc_has_acked) { + if (this_ga) { + kfree(last_ga); + r->last_ga = this_ga; + r->last_gap = gap; + } else if (last_ga) { + if (less(acked, start)) { + si--; + offset = start - acked - 1; + } else if (less(acked, end)) { + acked = end; + } + if (si < last_ga->bgack_cnt) { + last_ga->start_index = si; + r->last_gap = offset; + } else { + kfree(last_ga); + r->last_ga = NULL; + r->last_gap = 0; + } + } else { + r->last_gap = 0; + } + r->acked = acked; + } else { + kfree(this_ga); } - l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0; + return qlen - skb_queue_len(&l->transmq); } -/** - * link_insert_deferred_queue - insert deferred messages back into receive chain +/* tipc_link_build_state_msg: prepare link state message for transmission + * + * Note that sending of broadcast ack is coordinated among nodes, to reduce + * risk of ack storms towards the sender */ -static struct sk_buff *link_insert_deferred_queue(struct tipc_link *l_ptr, - struct sk_buff *buf) +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { - u32 seq_no; + if (!l) + return 0; - if (l_ptr->oldest_deferred_in == NULL) - return buf; + /* Broadcast ACK must be sent via a unicast link => defer to caller */ + if (link_is_bc_rcvlink(l)) { + if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf) + return 0; + l->rcv_unacked = 0; - seq_no = buf_seqno(l_ptr->oldest_deferred_in); - if (seq_no == mod(l_ptr->next_in_no)) { - l_ptr->newest_deferred_in->next = buf; - buf = l_ptr->oldest_deferred_in; - l_ptr->oldest_deferred_in = NULL; - l_ptr->deferred_inqueue_sz = 0; + /* Use snd_nxt to store peer's snd_nxt in broadcast rcv link */ + l->snd_nxt = l->rcv_nxt; + return TIPC_LINK_SND_STATE; } - return buf; + /* Unicast ACK */ + l->rcv_unacked = 0; + l->stats.sent_acks++; + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); + return 0; } -/** - * link_recv_buf_validate - validate basic format of received message - * - * This routine ensures a TIPC message has an acceptable header, and at least - * as much data as the header indicates it should. The routine also ensures - * that the entire message header is stored in the main fragment of the message - * buffer, to simplify future access to message header fields. - * - * Note: Having extra info present in the message header or data areas is OK. - * TIPC will ignore the excess, under the assumption that it is optional info - * introduced by a later release of the protocol. +/* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message */ -static int link_recv_buf_validate(struct sk_buff *buf) +void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { - static u32 min_data_hdr_size[8] = { - SHORT_H_SIZE, MCAST_H_SIZE, NAMED_H_SIZE, BASIC_H_SIZE, - MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE - }; + int mtyp = RESET_MSG; + struct sk_buff *skb; - struct tipc_msg *msg; - u32 tipc_hdr[2]; - u32 size; - u32 hdr_size; - u32 min_hdr_size; + if (l->state == LINK_ESTABLISHING) + mtyp = ACTIVATE_MSG; - if (unlikely(buf->len < MIN_H_SIZE)) - return 0; + tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq); - msg = skb_header_pointer(buf, 0, sizeof(tipc_hdr), tipc_hdr); - if (msg == NULL) - return 0; + /* Inform peer that this endpoint is going down if applicable */ + skb = skb_peek_tail(xmitq); + if (skb && (l->state == LINK_RESET)) + msg_set_peer_stopping(buf_msg(skb), 1); +} - if (unlikely(msg_version(msg) != TIPC_VERSION)) +/* tipc_link_build_nack_msg: prepare link nack message for transmission + * Note that sending of broadcast NACK is coordinated among nodes, to + * reduce the risk of NACK storms towards the sender + */ +static int tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + u32 def_cnt = ++l->stats.deferred_recv; + struct sk_buff_head *dfq = &l->deferdq; + u32 defq_len = skb_queue_len(dfq); + int match1, match2; + + if (link_is_bc_rcvlink(l)) { + match1 = def_cnt & 0xf; + match2 = tipc_own_addr(l->net) & 0xf; + if (match1 == match2) + return TIPC_LINK_SND_STATE; return 0; + } - size = msg_size(msg); - hdr_size = msg_hdr_sz(msg); - min_hdr_size = msg_isdata(msg) ? - min_data_hdr_size[msg_type(msg)] : INT_H_SIZE; - - if (unlikely((hdr_size < min_hdr_size) || - (size < hdr_size) || - (buf->len < size) || - (size - hdr_size > TIPC_MAX_USER_MSG_SIZE))) - return 0; + if (defq_len >= 3 && !((defq_len - 3) % 16)) { + u16 rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; - return pskb_may_pull(buf, hdr_size); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, + rcvgap, 0, 0, xmitq); + } + return 0; } -/** - * tipc_recv_msg - process TIPC messages arriving from off-node - * @head: pointer to message buffer chain - * @tb_ptr: pointer to bearer message arrived on - * - * Invoked with no locks held. Bearer pointer must point to a valid bearer - * structure (i.e. cannot be NULL), but bearer can be inactive. +/* tipc_link_rcv - process TIPC packets/messages arriving from off-node + * @l: the link that should handle the message + * @skb: TIPC packet + * @xmitq: queue to place packets to be sent after this call */ -void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) -{ - read_lock_bh(&tipc_net_lock); - while (head) { - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - struct sk_buff *crs; - struct sk_buff *buf = head; - struct tipc_msg *msg; - u32 seq_no; - u32 ackd; - u32 released = 0; - int type; - - head = head->next; - - /* Ensure bearer is still enabled */ - if (unlikely(!b_ptr->active)) - goto cont; - - /* Ensure message is well-formed */ - if (unlikely(!link_recv_buf_validate(buf))) - goto cont; - - /* Ensure message data is a single contiguous unit */ - if (unlikely(skb_linearize(buf))) - goto cont; - - /* Handle arrival of a non-unicast link message */ - msg = buf_msg(buf); - - if (unlikely(msg_non_seq(msg))) { - if (msg_user(msg) == LINK_CONFIG) - tipc_disc_recv_msg(buf, b_ptr); - else - tipc_bclink_recv_pkt(buf); - continue; - } - - /* Discard unicast link messages destined for another node */ - if (unlikely(!msg_short(msg) && - (msg_destnode(msg) != tipc_own_addr))) - goto cont; - - /* Locate neighboring node that sent message */ - n_ptr = tipc_node_find(msg_prevnode(msg)); - if (unlikely(!n_ptr)) - goto cont; - tipc_node_lock(n_ptr); - - /* Locate unicast link endpoint that should handle message */ - l_ptr = n_ptr->links[b_ptr->identity]; - if (unlikely(!l_ptr)) { - tipc_node_unlock(n_ptr); - goto cont; - } - - /* Verify that communication with node is currently allowed */ - if ((n_ptr->block_setup & WAIT_PEER_DOWN) && - msg_user(msg) == LINK_PROTOCOL && - (msg_type(msg) == RESET_MSG || - msg_type(msg) == ACTIVATE_MSG) && - !msg_redundant_link(msg)) - n_ptr->block_setup &= ~WAIT_PEER_DOWN; - - if (n_ptr->block_setup) { - tipc_node_unlock(n_ptr); - goto cont; - } +int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + struct sk_buff_head *defq = &l->deferdq; + struct tipc_msg *hdr = buf_msg(skb); + u16 seqno, rcv_nxt, win_lim; + int released = 0; + int rc = 0; - /* Validate message sequence number info */ - seq_no = msg_seqno(msg); - ackd = msg_ack(msg); + /* Verify and update link state */ + if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) + return tipc_link_proto_rcv(l, skb, xmitq); - /* Release acked messages */ - if (n_ptr->bclink.recv_permitted) - tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg)); + /* Don't send probe at next timeout expiration */ + l->silent_intv_cnt = 0; - crs = l_ptr->first_out; - while ((crs != l_ptr->next_out) && - less_eq(buf_seqno(crs), ackd)) { - struct sk_buff *next = crs->next; + do { + hdr = buf_msg(skb); + seqno = msg_seqno(hdr); + rcv_nxt = l->rcv_nxt; + win_lim = rcv_nxt + TIPC_MAX_LINK_WIN; + + if (unlikely(!tipc_link_is_up(l))) { + if (l->state == LINK_ESTABLISHING) + rc = TIPC_LINK_UP_EVT; + kfree_skb(skb); + break; + } - kfree_skb(crs); - crs = next; - released++; + /* Drop if outside receive window */ + if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) { + l->stats.duplicates++; + kfree_skb(skb); + break; } - if (released) { - l_ptr->first_out = crs; - l_ptr->out_queue_size -= released; + released += tipc_link_advance_transmq(l, l, msg_ack(hdr), 0, + NULL, NULL, NULL, NULL); + + /* Defer delivery if sequence gap */ + if (unlikely(seqno != rcv_nxt)) { + if (!__tipc_skb_queue_sorted(defq, seqno, skb)) + l->stats.duplicates++; + rc |= tipc_link_build_nack_msg(l, xmitq); + break; } - /* Try sending any messages link endpoint has pending */ - if (unlikely(l_ptr->next_out)) - tipc_link_push_queue(l_ptr); - if (unlikely(!list_empty(&l_ptr->waiting_ports))) - tipc_link_wakeup_ports(l_ptr, 0); - if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) { - l_ptr->stats.sent_acks++; - tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); - } + /* Deliver packet */ + l->rcv_nxt++; + l->stats.recv_pkts++; + + if (unlikely(msg_user(hdr) == TUNNEL_PROTOCOL)) + rc |= tipc_link_tnl_rcv(l, skb, l->inputq); + else if (!tipc_data_input(l, skb, l->inputq)) + rc |= tipc_link_input(l, skb, l->inputq, &l->reasm_buf); + if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) + rc |= tipc_link_build_state_msg(l, xmitq); + if (unlikely(rc & ~TIPC_LINK_SND_STATE)) + break; + } while ((skb = __tipc_skb_dequeue(defq, l->rcv_nxt))); + + /* Forward queues and wake up waiting users */ + if (released) { + tipc_link_update_cwin(l, released, 0); + tipc_link_advance_backlog(l, xmitq); + if (unlikely(!skb_queue_empty(&l->wakeupq))) + link_prepare_wakeup(l); + } + return rc; +} + +static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, + bool probe_reply, u16 rcvgap, + int tolerance, int priority, + struct sk_buff_head *xmitq) +{ + struct tipc_mon_state *mstate = &l->mon_state; + struct sk_buff_head *dfq = &l->deferdq; + struct tipc_link *bcl = l->bc_rcvlink; + struct tipc_msg *hdr; + struct sk_buff *skb; + bool node_up = tipc_link_is_up(bcl); + u16 glen = 0, bc_rcvgap = 0; + int dlen = 0; + void *data; + + /* Don't send protocol message during reset or link failover */ + if (tipc_link_is_blocked(l)) + return; - /* Now (finally!) process the incoming message */ -protocol_check: - if (likely(link_working_working(l_ptr))) { - if (likely(seq_no == mod(l_ptr->next_in_no))) { - l_ptr->next_in_no++; - if (unlikely(l_ptr->oldest_deferred_in)) - head = link_insert_deferred_queue(l_ptr, - head); -deliver: - if (likely(msg_isdata(msg))) { - tipc_node_unlock(n_ptr); - tipc_port_recv_msg(buf); - continue; - } - switch (msg_user(msg)) { - int ret; - case MSG_BUNDLER: - l_ptr->stats.recv_bundles++; - l_ptr->stats.recv_bundled += - msg_msgcnt(msg); - tipc_node_unlock(n_ptr); - tipc_link_recv_bundle(buf); - continue; - case NAME_DISTRIBUTOR: - n_ptr->bclink.recv_permitted = true; - tipc_node_unlock(n_ptr); - tipc_named_recv(buf); - continue; - case BCAST_PROTOCOL: - tipc_link_recv_sync(n_ptr, buf); - tipc_node_unlock(n_ptr); - continue; - case CONN_MANAGER: - tipc_node_unlock(n_ptr); - tipc_port_recv_proto_msg(buf); - continue; - case MSG_FRAGMENTER: - l_ptr->stats.recv_fragments++; - ret = tipc_link_recv_fragment( - &l_ptr->defragm_buf, - &buf, &msg); - if (ret == 1) { - l_ptr->stats.recv_fragmented++; - goto deliver; - } - if (ret == -1) - l_ptr->next_in_no--; - break; - case CHANGEOVER_PROTOCOL: - type = msg_type(msg); - if (link_recv_changeover_msg(&l_ptr, - &buf)) { - msg = buf_msg(buf); - seq_no = msg_seqno(msg); - if (type == ORIGINAL_MSG) - goto deliver; - goto protocol_check; - } - break; - default: - kfree_skb(buf); - buf = NULL; - break; - } - tipc_node_unlock(n_ptr); - tipc_net_route_msg(buf); - continue; - } - link_handle_out_of_seq_msg(l_ptr, buf); - head = link_insert_deferred_queue(l_ptr, head); - tipc_node_unlock(n_ptr); - continue; - } + if (!tipc_link_is_up(l) && (mtyp == STATE_MSG)) + return; - /* Link is not in state WORKING_WORKING */ - if (msg_user(msg) == LINK_PROTOCOL) { - link_recv_proto_msg(l_ptr, buf); - head = link_insert_deferred_queue(l_ptr, head); - tipc_node_unlock(n_ptr); - continue; - } + if ((probe || probe_reply) && !skb_queue_empty(dfq)) + rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; - /* Traffic message. Conditionally activate link */ - link_state_event(l_ptr, TRAFFIC_MSG_EVT); + skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, + tipc_max_domain_size + MAX_GAP_ACK_BLKS_SZ, + l->addr, tipc_own_addr(l->net), 0, 0, 0); + if (!skb) + return; - if (link_working_working(l_ptr)) { - /* Re-insert buffer in front of queue */ - buf->next = head; - head = buf; - tipc_node_unlock(n_ptr); - continue; + hdr = buf_msg(skb); + data = msg_data(hdr); + msg_set_session(hdr, l->session); + msg_set_bearer_id(hdr, l->bearer_id); + msg_set_net_plane(hdr, l->net_plane); + msg_set_next_sent(hdr, l->snd_nxt); + msg_set_ack(hdr, l->rcv_nxt - 1); + msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); + msg_set_bc_ack_invalid(hdr, !node_up); + msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); + msg_set_link_tolerance(hdr, tolerance); + msg_set_linkprio(hdr, priority); + msg_set_redundant_link(hdr, node_up); + msg_set_seq_gap(hdr, 0); + msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2); + + if (mtyp == STATE_MSG) { + if (l->peer_caps & TIPC_LINK_PROTO_SEQNO) + msg_set_seqno(hdr, l->snd_nxt_state++); + msg_set_seq_gap(hdr, rcvgap); + bc_rcvgap = link_bc_rcv_gap(bcl); + msg_set_bc_gap(hdr, bc_rcvgap); + msg_set_probe(hdr, probe); + msg_set_is_keepalive(hdr, probe || probe_reply); + if (l->peer_caps & TIPC_GAP_ACK_BLOCK) + glen = tipc_build_gap_ack_blks(l, hdr); + tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id); + msg_set_size(hdr, INT_H_SIZE + glen + dlen); + skb_trim(skb, INT_H_SIZE + glen + dlen); + l->stats.sent_states++; + l->rcv_unacked = 0; + } else { + /* RESET_MSG or ACTIVATE_MSG */ + if (mtyp == ACTIVATE_MSG) { + msg_set_dest_session_valid(hdr, 1); + msg_set_dest_session(hdr, l->peer_session); } - tipc_node_unlock(n_ptr); -cont: - kfree_skb(buf); + msg_set_max_pkt(hdr, l->advertised_mtu); + strcpy(data, l->if_name); + msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); + skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME); + } + if (probe) + l->stats.sent_probes++; + if (rcvgap) + l->stats.sent_nacks++; + if (bc_rcvgap) + bcl->stats.sent_nacks++; + skb->priority = TC_PRIO_CONTROL; + __skb_queue_tail(xmitq, skb); + trace_tipc_proto_build(skb, false, l->name); +} + +void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + u32 onode = tipc_own_addr(l->net); + struct tipc_msg *hdr, *ihdr; + struct sk_buff_head tnlq; + struct sk_buff *skb; + u32 dnode = l->addr; + + __skb_queue_head_init(&tnlq); + skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, + INT_H_SIZE, BASIC_H_SIZE, + dnode, onode, 0, 0, 0); + if (!skb) { + pr_warn("%sunable to create tunnel packet\n", link_co_err); + return; } - read_unlock_bh(&tipc_net_lock); + + hdr = buf_msg(skb); + msg_set_msgcnt(hdr, 1); + msg_set_bearer_id(hdr, l->peer_bearer_id); + + ihdr = (struct tipc_msg *)msg_data(hdr); + tipc_msg_init(onode, ihdr, TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, + BASIC_H_SIZE, dnode); + msg_set_errcode(ihdr, TIPC_ERR_NO_PORT); + __skb_queue_tail(&tnlq, skb); + tipc_link_xmit(l, &tnlq, xmitq); } -/** - * tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue - * - * Returns increase in queue length (i.e. 0 or 1) +/* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets + * with contents of the link's transmit and backlog queues. */ -u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail, - struct sk_buff *buf) -{ - struct sk_buff *queue_buf; - struct sk_buff **prev; - u32 seq_no = buf_seqno(buf); +void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, + int mtyp, struct sk_buff_head *xmitq) +{ + struct sk_buff *skb, *tnlskb; + struct tipc_msg *hdr, tnlhdr; + struct sk_buff_head *queue = &l->transmq; + struct sk_buff_head tmpxq, tnlq, frags; + u16 pktlen, pktcnt, seqno = l->snd_nxt; + bool pktcnt_need_update = false; + u16 syncpt; + int rc; + + if (!tnl) + return; - buf->next = NULL; + __skb_queue_head_init(&tnlq); + /* Link Synching: + * From now on, send only one single ("dummy") SYNCH message + * to peer. The SYNCH message does not contain any data, just + * a header conveying the synch point to the peer. + */ + if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { + tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG, + INT_H_SIZE, 0, l->addr, + tipc_own_addr(l->net), + 0, 0, 0); + if (!tnlskb) { + pr_warn("%sunable to create dummy SYNCH_MSG\n", + link_co_err); + return; + } - /* Empty queue ? */ - if (*head == NULL) { - *head = *tail = buf; - return 1; + hdr = buf_msg(tnlskb); + syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1; + msg_set_syncpt(hdr, syncpt); + msg_set_bearer_id(hdr, l->peer_bearer_id); + __skb_queue_tail(&tnlq, tnlskb); + tipc_link_xmit(tnl, &tnlq, xmitq); + return; } - /* Last ? */ - if (less(buf_seqno(*tail), seq_no)) { - (*tail)->next = buf; - *tail = buf; - return 1; + __skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&frags); + /* At least one packet required for safe algorithm => add dummy */ + skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, + BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net), + 0, 0, TIPC_ERR_NO_PORT); + if (!skb) { + pr_warn("%sunable to create tunnel packet\n", link_co_err); + return; } + __skb_queue_tail(&tnlq, skb); + tipc_link_xmit(l, &tnlq, &tmpxq); + __skb_queue_purge(&tmpxq); - /* Locate insertion point in queue, then insert; discard if duplicate */ - prev = head; - queue_buf = *head; - for (;;) { - u32 curr_seqno = buf_seqno(queue_buf); - - if (seq_no == curr_seqno) { - kfree_skb(buf); - return 0; + /* Initialize reusable tunnel packet header */ + tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL, + mtyp, INT_H_SIZE, l->addr); + if (mtyp == SYNCH_MSG) + pktcnt = l->snd_nxt - buf_seqno(skb_peek(&l->transmq)); + else + pktcnt = skb_queue_len(&l->transmq); + pktcnt += skb_queue_len(&l->backlogq); + msg_set_msgcnt(&tnlhdr, pktcnt); + msg_set_bearer_id(&tnlhdr, l->peer_bearer_id); +tnl: + /* Wrap each packet into a tunnel packet */ + skb_queue_walk(queue, skb) { + hdr = buf_msg(skb); + if (queue == &l->backlogq) + msg_set_seqno(hdr, seqno++); + pktlen = msg_size(hdr); + + /* Tunnel link MTU is not large enough? This could be + * due to: + * 1) Link MTU has just changed or set differently; + * 2) Or FAILOVER on the top of a SYNCH message + * + * The 2nd case should not happen if peer supports + * TIPC_TUNNEL_ENHANCED + */ + if (pktlen > tnl->mtu - INT_H_SIZE) { + if (mtyp == FAILOVER_MSG && + (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { + rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu, + &frags); + if (rc) { + pr_warn("%sunable to frag msg: rc %d\n", + link_co_err, rc); + return; + } + pktcnt += skb_queue_len(&frags) - 1; + pktcnt_need_update = true; + skb_queue_splice_tail_init(&frags, &tnlq); + continue; + } + /* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED + * => Just warn it and return! + */ + pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n", + link_co_err, msg_user(hdr), + msg_type(hdr), msg_size(hdr)); + return; } - if (less(seq_no, curr_seqno)) - break; - - prev = &queue_buf->next; - queue_buf = queue_buf->next; + msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); + tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); + if (!tnlskb) { + pr_warn("%sunable to send packet\n", link_co_err); + return; + } + skb_copy_to_linear_data(tnlskb, &tnlhdr, INT_H_SIZE); + skb_copy_to_linear_data_offset(tnlskb, INT_H_SIZE, hdr, pktlen); + __skb_queue_tail(&tnlq, tnlskb); + } + if (queue != &l->backlogq) { + queue = &l->backlogq; + goto tnl; } - buf->next = queue_buf; - *prev = buf; - return 1; -} + if (pktcnt_need_update) + skb_queue_walk(&tnlq, skb) { + hdr = buf_msg(skb); + msg_set_msgcnt(hdr, pktcnt); + } -/* - * link_handle_out_of_seq_msg - handle arrival of out-of-sequence packet - */ -static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, - struct sk_buff *buf) -{ - u32 seq_no = buf_seqno(buf); + tipc_link_xmit(tnl, &tnlq, xmitq); - if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) { - link_recv_proto_msg(l_ptr, buf); - return; - } + if (mtyp == FAILOVER_MSG) { + struct sk_buff_head *fdefq = &tnl->failover_deferdq; - /* Record OOS packet arrival (force mismatch on next timeout) */ - l_ptr->checkpoint--; + tnl->drop_point = l->rcv_nxt; + tnl->failover_reasm_skb = l->reasm_buf; + l->reasm_buf = NULL; - /* - * Discard packet if a duplicate; otherwise add it to deferred queue - * and notify peer of gap as per protocol specification - */ - if (less(seq_no, mod(l_ptr->next_in_no))) { - l_ptr->stats.duplicates++; - kfree_skb(buf); - return; + /* Failover the link's deferdq */ + if (unlikely(!skb_queue_empty(fdefq))) { + pr_warn("Link failover deferdq not empty: %d!\n", + skb_queue_len(fdefq)); + __skb_queue_purge(fdefq); + } + skb_queue_splice_init(&l->deferdq, fdefq); } - - if (tipc_link_defer_pkt(&l_ptr->oldest_deferred_in, - &l_ptr->newest_deferred_in, buf)) { - l_ptr->deferred_inqueue_sz++; - l_ptr->stats.deferred_recv++; - if ((l_ptr->deferred_inqueue_sz % 16) == 1) - tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); - } else - l_ptr->stats.duplicates++; } -/* - * Send protocol message to the other endpoint. +/** + * tipc_link_failover_prepare() - prepare tnl for link failover + * + * This is a special version of the precursor - tipc_link_tnl_prepare(), + * see the tipc_node_link_failover() for details + * + * @l: failover link + * @tnl: tunnel link + * @xmitq: queue for messages to be xmited */ -void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, - int probe_msg, u32 gap, u32 tolerance, - u32 priority, u32 ack_mtu) -{ - struct sk_buff *buf = NULL; - struct tipc_msg *msg = l_ptr->pmsg; - u32 msg_size = sizeof(l_ptr->proto_msg); - int r_flag; - - /* Discard any previous message that was deferred due to congestion */ - if (l_ptr->proto_msg_queue) { - kfree_skb(l_ptr->proto_msg_queue); - l_ptr->proto_msg_queue = NULL; - } - - if (link_blocked(l_ptr)) - return; - - /* Abort non-RESET send if communication with node is prohibited */ - if ((l_ptr->owner->block_setup) && (msg_typ != RESET_MSG)) - return; - - /* Create protocol message with "out-of-sequence" sequence number */ - msg_set_type(msg, msg_typ); - msg_set_net_plane(msg, l_ptr->b_ptr->net_plane); - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - msg_set_last_bcast(msg, tipc_bclink_get_last_sent()); +void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl, + struct sk_buff_head *xmitq) +{ + struct sk_buff_head *fdefq = &tnl->failover_deferdq; - if (msg_typ == STATE_MSG) { - u32 next_sent = mod(l_ptr->next_out_no); + tipc_link_create_dummy_tnl_msg(tnl, xmitq); - if (!tipc_link_is_up(l_ptr)) - return; - if (l_ptr->next_out) - next_sent = buf_seqno(l_ptr->next_out); - msg_set_next_sent(msg, next_sent); - if (l_ptr->oldest_deferred_in) { - u32 rec = buf_seqno(l_ptr->oldest_deferred_in); - gap = mod(rec - mod(l_ptr->next_in_no)); - } - msg_set_seq_gap(msg, gap); - if (gap) - l_ptr->stats.sent_nacks++; - msg_set_link_tolerance(msg, tolerance); - msg_set_linkprio(msg, priority); - msg_set_max_pkt(msg, ack_mtu); - msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); - msg_set_probe(msg, probe_msg != 0); - if (probe_msg) { - u32 mtu = l_ptr->max_pkt; - - if ((mtu < l_ptr->max_pkt_target) && - link_working_working(l_ptr) && - l_ptr->fsm_msg_cnt) { - msg_size = (mtu + (l_ptr->max_pkt_target - mtu)/2 + 2) & ~3; - if (l_ptr->max_pkt_probes == 10) { - l_ptr->max_pkt_target = (msg_size - 4); - l_ptr->max_pkt_probes = 0; - msg_size = (mtu + (l_ptr->max_pkt_target - mtu)/2 + 2) & ~3; - } - l_ptr->max_pkt_probes++; - } + /* This failover link endpoint was never established before, + * so it has not received anything from peer. + * Otherwise, it must be a normal failover situation or the + * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes + * would have to start over from scratch instead. + */ + tnl->drop_point = 1; + tnl->failover_reasm_skb = NULL; - l_ptr->stats.sent_probes++; - } - l_ptr->stats.sent_states++; - } else { /* RESET_MSG or ACTIVATE_MSG */ - msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1)); - msg_set_seq_gap(msg, 0); - msg_set_next_sent(msg, 1); - msg_set_probe(msg, 0); - msg_set_link_tolerance(msg, l_ptr->tolerance); - msg_set_linkprio(msg, l_ptr->priority); - msg_set_max_pkt(msg, l_ptr->max_pkt_target); + /* Initiate the link's failover deferdq */ + if (unlikely(!skb_queue_empty(fdefq))) { + pr_warn("Link failover deferdq not empty: %d!\n", + skb_queue_len(fdefq)); + __skb_queue_purge(fdefq); } +} - r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr)); - msg_set_redundant_link(msg, r_flag); - msg_set_linkprio(msg, l_ptr->priority); - msg_set_size(msg, msg_size); - - msg_set_seqno(msg, mod(l_ptr->next_out_no + (0xffff/2))); - - buf = tipc_buf_acquire(msg_size); - if (!buf) - return; +/* tipc_link_validate_msg(): validate message against current link state + * Returns true if message should be accepted, otherwise false + */ +bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr) +{ + u16 curr_session = l->peer_session; + u16 session = msg_session(hdr); + int mtyp = msg_type(hdr); - skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); - buf->priority = TC_PRIO_CONTROL; + if (msg_user(hdr) != LINK_PROTOCOL) + return true; - /* Defer message if bearer is already blocked */ - if (tipc_bearer_blocked(l_ptr->b_ptr)) { - l_ptr->proto_msg_queue = buf; - return; + switch (mtyp) { + case RESET_MSG: + if (!l->in_session) + return true; + /* Accept only RESET with new session number */ + return more(session, curr_session); + case ACTIVATE_MSG: + if (!l->in_session) + return true; + /* Accept only ACTIVATE with new or current session number */ + return !less(session, curr_session); + case STATE_MSG: + /* Accept only STATE with current session number */ + if (!l->in_session) + return false; + if (session != curr_session) + return false; + /* Extra sanity check */ + if (!tipc_link_is_up(l) && msg_ack(hdr)) + return false; + if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO)) + return true; + /* Accept only STATE with new sequence number */ + return !less(msg_seqno(hdr), l->rcv_nxt_state); + default: + return false; } - - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); - l_ptr->unacked_window = 0; - kfree_skb(buf); } -/* - * Receive protocol message : +/* tipc_link_proto_rcv(): receive link level protocol message : * Note that network plane id propagates through the network, and may - * change at any time. The node with lowest address rules + * change at any time. The node with lowest numerical id determines + * network plane */ -static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf) -{ - u32 rec_gap = 0; - u32 max_pkt_info; - u32 max_pkt_ack; - u32 msg_tol; - struct tipc_msg *msg = buf_msg(buf); +static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr = buf_msg(skb); + struct tipc_gap_ack_blks *ga = NULL; + bool reply = msg_probe(hdr), retransmitted = false; + u32 dlen = msg_data_sz(hdr), glen = 0, msg_max; + u16 peers_snd_nxt = msg_next_sent(hdr); + u16 peers_tol = msg_link_tolerance(hdr); + u16 peers_prio = msg_linkprio(hdr); + u16 gap = msg_seq_gap(hdr); + u16 ack = msg_ack(hdr); + u16 rcv_nxt = l->rcv_nxt; + u16 rcvgap = 0; + int mtyp = msg_type(hdr); + int rc = 0, released; + char *if_name; + void *data; + + trace_tipc_proto_rcv(skb, false, l->name); + + if (dlen > U16_MAX) + goto exit; - if (link_blocked(l_ptr)) + if (tipc_link_is_blocked(l) || !xmitq) goto exit; - /* record unnumbered packet arrival (force mismatch on next timeout) */ - l_ptr->checkpoint--; + if (tipc_own_addr(l->net) > msg_prevnode(hdr)) + l->net_plane = msg_net_plane(hdr); - if (l_ptr->b_ptr->net_plane != msg_net_plane(msg)) - if (tipc_own_addr > msg_prevnode(msg)) - l_ptr->b_ptr->net_plane = msg_net_plane(msg); + if (skb_linearize(skb)) + goto exit; - l_ptr->owner->permit_changeover = msg_redundant_link(msg); + hdr = buf_msg(skb); + data = msg_data(hdr); - switch (msg_type(msg)) { + if (!tipc_link_validate_msg(l, hdr)) { + trace_tipc_skb_dump(skb, false, "PROTO invalid (1)!"); + trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (1)!"); + goto exit; + } + switch (mtyp) { case RESET_MSG: - if (!link_working_unknown(l_ptr) && - (l_ptr->peer_session != INVALID_SESSION)) { - if (less_eq(msg_session(msg), l_ptr->peer_session)) - break; /* duplicate or old reset: ignore */ - } + case ACTIVATE_MSG: + msg_max = msg_max_pkt(hdr); + if (msg_max < tipc_bearer_min_mtu(l->net, l->bearer_id)) + break; + /* Complete own link name with peer's interface name */ + if_name = strrchr(l->name, ':') + 1; + if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME) + break; + if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME) + break; + strscpy(if_name, data, TIPC_MAX_IF_NAME); - if (!msg_redundant_link(msg) && (link_working_working(l_ptr) || - link_working_unknown(l_ptr))) { - /* - * peer has lost contact -- don't allow peer's links - * to reactivate before we recognize loss & clean up - */ - l_ptr->owner->block_setup = WAIT_NODE_DOWN; + /* Update own tolerance if peer indicates a non-zero value */ + if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { + l->tolerance = peers_tol; + l->bc_rcvlink->tolerance = peers_tol; } + /* Update own priority if peer's priority is higher */ + if (tipc_in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) + l->priority = peers_prio; - link_state_event(l_ptr, RESET_MSG); - - /* fall thru' */ - case ACTIVATE_MSG: - /* Update link settings according other endpoint's values */ - strcpy((strrchr(l_ptr->name, ':') + 1), (char *)msg_data(msg)); - - msg_tol = msg_link_tolerance(msg); - if (msg_tol > l_ptr->tolerance) - link_set_supervision_props(l_ptr, msg_tol); - - if (msg_linkprio(msg) > l_ptr->priority) - l_ptr->priority = msg_linkprio(msg); - - max_pkt_info = msg_max_pkt(msg); - if (max_pkt_info) { - if (max_pkt_info < l_ptr->max_pkt_target) - l_ptr->max_pkt_target = max_pkt_info; - if (l_ptr->max_pkt > l_ptr->max_pkt_target) - l_ptr->max_pkt = l_ptr->max_pkt_target; - } else { - l_ptr->max_pkt = l_ptr->max_pkt_target; + /* If peer is going down we want full re-establish cycle */ + if (msg_peer_stopping(hdr)) { + rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + break; } - /* Synchronize broadcast link info, if not done previously */ - if (!tipc_node_is_up(l_ptr->owner)) { - l_ptr->owner->bclink.last_sent = - l_ptr->owner->bclink.last_in = - msg_last_bcast(msg); - l_ptr->owner->bclink.oos_state = 0; + /* If this endpoint was re-created while peer was ESTABLISHING + * it doesn't know current session number. Force re-synch. + */ + if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) && + l->session != msg_dest_session(hdr)) { + if (less(l->session, msg_dest_session(hdr))) + l->session = msg_dest_session(hdr) + 1; + break; } - l_ptr->peer_session = msg_session(msg); - l_ptr->peer_bearer_id = msg_bearer_id(msg); + /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ + if (mtyp == RESET_MSG || !tipc_link_is_up(l)) + rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); + + /* ACTIVATE_MSG takes up link if it was already locally reset */ + if (mtyp == ACTIVATE_MSG && l->state == LINK_ESTABLISHING) + rc = TIPC_LINK_UP_EVT; - if (msg_type(msg) == ACTIVATE_MSG) - link_state_event(l_ptr, ACTIVATE_MSG); + l->peer_session = msg_session(hdr); + l->in_session = true; + l->peer_bearer_id = msg_bearer_id(hdr); + if (l->mtu > msg_max) + l->mtu = msg_max; break; - case STATE_MSG: - msg_tol = msg_link_tolerance(msg); - if (msg_tol) - link_set_supervision_props(l_ptr, msg_tol); - - if (msg_linkprio(msg) && - (msg_linkprio(msg) != l_ptr->priority)) { - pr_warn("%s<%s>, priority change %u->%u\n", - link_rst_msg, l_ptr->name, l_ptr->priority, - msg_linkprio(msg)); - l_ptr->priority = msg_linkprio(msg); - tipc_link_reset(l_ptr); /* Enforce change to take effect */ - break; - } - link_state_event(l_ptr, TRAFFIC_MSG_EVT); - l_ptr->stats.recv_states++; - if (link_reset_unknown(l_ptr)) + case STATE_MSG: + /* Validate Gap ACK blocks, drop if invalid */ + glen = tipc_get_gap_ack_blks(&ga, l, hdr, true); + if (glen > dlen) break; - if (less_eq(mod(l_ptr->next_in_no), msg_next_sent(msg))) { - rec_gap = mod(msg_next_sent(msg) - - mod(l_ptr->next_in_no)); - } + l->rcv_nxt_state = msg_seqno(hdr) + 1; - max_pkt_ack = msg_max_pkt(msg); - if (max_pkt_ack > l_ptr->max_pkt) { - l_ptr->max_pkt = max_pkt_ack; - l_ptr->max_pkt_probes = 0; + /* Update own tolerance if peer indicates a non-zero value */ + if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { + l->tolerance = peers_tol; + l->bc_rcvlink->tolerance = peers_tol; } - - max_pkt_ack = 0; - if (msg_probe(msg)) { - l_ptr->stats.recv_probes++; - if (msg_size(msg) > sizeof(l_ptr->proto_msg)) - max_pkt_ack = msg_size(msg); + /* Update own prio if peer indicates a different value */ + if ((peers_prio != l->priority) && + tipc_in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { + l->priority = peers_prio; + rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } - /* Protocol message before retransmits, reduce loss risk */ - if (l_ptr->owner->bclink.recv_permitted) - tipc_bclink_update_link_state(l_ptr->owner, - msg_last_bcast(msg)); + l->silent_intv_cnt = 0; + l->stats.recv_states++; + if (msg_probe(hdr)) + l->stats.recv_probes++; - if (rec_gap || (msg_probe(msg))) { - tipc_link_send_proto_msg(l_ptr, STATE_MSG, - 0, rec_gap, 0, 0, max_pkt_ack); - } - if (msg_seq_gap(msg)) { - l_ptr->stats.recv_nacks++; - tipc_link_retransmit(l_ptr, l_ptr->first_out, - msg_seq_gap(msg)); + if (!tipc_link_is_up(l)) { + if (l->state == LINK_ESTABLISHING) + rc = TIPC_LINK_UP_EVT; + break; } - break; + + tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr, + &l->mon_state, l->bearer_id); + + /* Send NACK if peer has sent pkts we haven't received yet */ + if ((reply || msg_is_keepalive(hdr)) && + more(peers_snd_nxt, rcv_nxt) && + !tipc_link_is_synching(l) && + skb_queue_empty(&l->deferdq)) + rcvgap = peers_snd_nxt - l->rcv_nxt; + if (rcvgap || reply) + tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, + rcvgap, 0, 0, xmitq); + + released = tipc_link_advance_transmq(l, l, ack, gap, ga, xmitq, + &retransmitted, &rc); + if (gap) + l->stats.recv_nacks++; + if (released || retransmitted) + tipc_link_update_cwin(l, released, retransmitted); + if (released) + tipc_link_advance_backlog(l, xmitq); + if (unlikely(!skb_queue_empty(&l->wakeupq))) + link_prepare_wakeup(l); } exit: - kfree_skb(buf); + kfree_skb(skb); + return rc; } - -/* - * tipc_link_tunnel(): Send one message via a link belonging to - * another bearer. Owner node is locked. +/* tipc_link_build_bc_proto_msg() - create broadcast protocol message + */ +static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast, + u16 peers_snd_nxt, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb; + struct tipc_msg *hdr; + struct sk_buff *dfrd_skb = skb_peek(&l->deferdq); + u16 ack = l->rcv_nxt - 1; + u16 gap_to = peers_snd_nxt - 1; + + skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, + 0, l->addr, tipc_own_addr(l->net), 0, 0, 0); + if (!skb) + return false; + hdr = buf_msg(skb); + msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); + msg_set_bcast_ack(hdr, ack); + msg_set_bcgap_after(hdr, ack); + if (dfrd_skb) + gap_to = buf_seqno(dfrd_skb) - 1; + msg_set_bcgap_to(hdr, gap_to); + msg_set_non_seq(hdr, bcast); + __skb_queue_tail(xmitq, skb); + return true; +} + +/* tipc_link_build_bc_init_msg() - synchronize broadcast link endpoints. + * + * Give a newly added peer node the sequence number where it should + * start receiving and acking broadcast packets. */ -static void tipc_link_tunnel(struct tipc_link *l_ptr, - struct tipc_msg *tunnel_hdr, struct tipc_msg *msg, - u32 selector) +static void tipc_link_build_bc_init_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) { - struct tipc_link *tunnel; - struct sk_buff *buf; - u32 length = msg_size(msg); + struct sk_buff_head list; - tunnel = l_ptr->owner->active_links[selector & 1]; - if (!tipc_link_is_up(tunnel)) { - pr_warn("%stunnel link no longer available\n", link_co_err); - return; - } - msg_set_size(tunnel_hdr, length + INT_H_SIZE); - buf = tipc_buf_acquire(length + INT_H_SIZE); - if (!buf) { - pr_warn("%sunable to send tunnel msg\n", link_co_err); + __skb_queue_head_init(&list); + if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list)) return; - } - skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE); - skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length); - tipc_link_send_buf(tunnel, buf); + msg_set_bc_ack_invalid(buf_msg(skb_peek(&list)), true); + tipc_link_xmit(l, &list, xmitq); } - - -/* - * changeover(): Send whole message queue via the remaining link - * Owner node is locked. +/* tipc_link_bc_init_rcv - receive initial broadcast synch data from peer */ -void tipc_link_changeover(struct tipc_link *l_ptr) +void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) { - u32 msgcount = l_ptr->out_queue_size; - struct sk_buff *crs = l_ptr->first_out; - struct tipc_link *tunnel = l_ptr->owner->active_links[0]; - struct tipc_msg tunnel_hdr; - int split_bundles; + int mtyp = msg_type(hdr); + u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); - if (!tunnel) + if (tipc_link_is_up(l)) return; - if (!l_ptr->owner->permit_changeover) { - pr_warn("%speer did not permit changeover\n", link_co_err); + if (msg_user(hdr) == BCAST_PROTOCOL) { + l->rcv_nxt = peers_snd_nxt; + l->state = LINK_ESTABLISHED; return; } - tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); - msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); - msg_set_msgcnt(&tunnel_hdr, msgcount); - - if (!l_ptr->first_out) { - struct sk_buff *buf; + if (l->peer_caps & TIPC_BCAST_SYNCH) + return; - buf = tipc_buf_acquire(INT_H_SIZE); - if (buf) { - skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE); - msg_set_size(&tunnel_hdr, INT_H_SIZE); - tipc_link_send_buf(tunnel, buf); - } else { - pr_warn("%sunable to send changeover msg\n", - link_co_err); - } + if (msg_peer_node_is_up(hdr)) return; - } - split_bundles = (l_ptr->owner->active_links[0] != - l_ptr->owner->active_links[1]); + /* Compatibility: accept older, less safe initial synch data */ + if ((mtyp == RESET_MSG) || (mtyp == ACTIVATE_MSG)) + l->rcv_nxt = peers_snd_nxt; +} - while (crs) { - struct tipc_msg *msg = buf_msg(crs); +/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state + */ +int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq) +{ + u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); + int rc = 0; - if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) { - struct tipc_msg *m = msg_get_wrapped(msg); - unchar *pos = (unchar *)m; + if (!tipc_link_is_up(l)) + return rc; - msgcount = msg_msgcnt(msg); - while (msgcount--) { - msg_set_seqno(m, msg_seqno(msg)); - tipc_link_tunnel(l_ptr, &tunnel_hdr, m, - msg_link_selector(m)); - pos += align(msg_size(m)); - m = (struct tipc_msg *)pos; - } - } else { - tipc_link_tunnel(l_ptr, &tunnel_hdr, msg, - msg_link_selector(msg)); - } - crs = crs->next; - } -} + if (!msg_peer_node_is_up(hdr)) + return rc; -void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *tunnel) -{ - struct sk_buff *iter; - struct tipc_msg tunnel_hdr; - - tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); - msg_set_msgcnt(&tunnel_hdr, l_ptr->out_queue_size); - msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); - iter = l_ptr->first_out; - while (iter) { - struct sk_buff *outbuf; - struct tipc_msg *msg = buf_msg(iter); - u32 length = msg_size(msg); - - if (msg_user(msg) == MSG_BUNDLER) - msg_set_type(msg, CLOSED_MSG); - msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); /* Update */ - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - msg_set_size(&tunnel_hdr, length + INT_H_SIZE); - outbuf = tipc_buf_acquire(length + INT_H_SIZE); - if (outbuf == NULL) { - pr_warn("%sunable to send duplicate msg\n", - link_co_err); - return; - } - skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE); - skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data, - length); - tipc_link_send_buf(tunnel, outbuf); - if (!tipc_link_is_up(l_ptr)) - return; - iter = iter->next; - } -} + /* Open when peer acknowledges our bcast init msg (pkt #1) */ + if (msg_ack(hdr)) + l->bc_peer_is_up = true; -/** - * buf_extract - extracts embedded TIPC message from another message - * @skb: encapsulating message buffer - * @from_pos: offset to extract from - * - * Returns a new message buffer containing an embedded message. The - * encapsulating message itself is left unchanged. - */ -static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) -{ - struct tipc_msg *msg = (struct tipc_msg *)(skb->data + from_pos); - u32 size = msg_size(msg); - struct sk_buff *eb; + if (!l->bc_peer_is_up) + return rc; - eb = tipc_buf_acquire(size); - if (eb) - skb_copy_to_linear_data(eb, msg, size); - return eb; -} + /* Ignore if peers_snd_nxt goes beyond receive window */ + if (more(peers_snd_nxt, l->rcv_nxt + l->window)) + return rc; -/* - * link_recv_changeover_msg(): Receive tunneled packet sent - * via other link. Node is locked. Return extracted buffer. - */ -static int link_recv_changeover_msg(struct tipc_link **l_ptr, - struct sk_buff **buf) -{ - struct sk_buff *tunnel_buf = *buf; - struct tipc_link *dest_link; - struct tipc_msg *msg; - struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf); - u32 msg_typ = msg_type(tunnel_msg); - u32 msg_count = msg_msgcnt(tunnel_msg); - u32 bearer_id = msg_bearer_id(tunnel_msg); + l->snd_nxt = peers_snd_nxt; + if (link_bc_rcv_gap(l)) + rc |= TIPC_LINK_SND_STATE; - if (bearer_id >= MAX_BEARERS) - goto exit; - dest_link = (*l_ptr)->owner->links[bearer_id]; - if (!dest_link) - goto exit; - if (dest_link == *l_ptr) { - pr_err("Unexpected changeover message on link <%s>\n", - (*l_ptr)->name); - goto exit; - } - *l_ptr = dest_link; - msg = msg_get_wrapped(tunnel_msg); - - if (msg_typ == DUPLICATE_MSG) { - if (less(msg_seqno(msg), mod(dest_link->next_in_no))) - goto exit; - *buf = buf_extract(tunnel_buf, INT_H_SIZE); - if (*buf == NULL) { - pr_warn("%sduplicate msg dropped\n", link_co_err); - goto exit; - } - kfree_skb(tunnel_buf); - return 1; - } + /* Return now if sender supports nack via STATE messages */ + if (l->peer_caps & TIPC_BCAST_STATE_NACK) + return rc; - /* First original message ?: */ - if (tipc_link_is_up(dest_link)) { - pr_info("%s<%s>, changeover initiated by peer\n", link_rst_msg, - dest_link->name); - tipc_link_reset(dest_link); - dest_link->exp_msg_count = msg_count; - if (!msg_count) - goto exit; - } else if (dest_link->exp_msg_count == START_CHANGEOVER) { - dest_link->exp_msg_count = msg_count; - if (!msg_count) - goto exit; + /* Otherwise, be backwards compatible */ + + if (!more(peers_snd_nxt, l->rcv_nxt)) { + l->nack_state = BC_NACK_SND_CONDITIONAL; + return 0; } - /* Receive original message */ - if (dest_link->exp_msg_count == 0) { - pr_warn("%sgot too many tunnelled messages\n", link_co_err); - goto exit; + /* Don't NACK if one was recently sent or peeked */ + if (l->nack_state == BC_NACK_SND_SUPPRESS) { + l->nack_state = BC_NACK_SND_UNCONDITIONAL; + return 0; } - dest_link->exp_msg_count--; - if (less(msg_seqno(msg), dest_link->reset_checkpoint)) { - goto exit; - } else { - *buf = buf_extract(tunnel_buf, INT_H_SIZE); - if (*buf != NULL) { - kfree_skb(tunnel_buf); - return 1; - } else { - pr_warn("%soriginal msg dropped\n", link_co_err); - } + + /* Conditionally delay NACK sending until next synch rcv */ + if (l->nack_state == BC_NACK_SND_CONDITIONAL) { + l->nack_state = BC_NACK_SND_UNCONDITIONAL; + if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN) + return 0; } -exit: - *buf = NULL; - kfree_skb(tunnel_buf); + + /* Send NACK now but suppress next one */ + tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq); + l->nack_state = BC_NACK_SND_SUPPRESS; return 0; } -/* - * Bundler functionality: - */ -void tipc_link_recv_bundle(struct sk_buff *buf) +int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq, + struct sk_buff_head *retrq) { - u32 msgcount = msg_msgcnt(buf_msg(buf)); - u32 pos = INT_H_SIZE; - struct sk_buff *obuf; - - while (msgcount--) { - obuf = buf_extract(buf, pos); - if (obuf == NULL) { - pr_warn("Link unable to unbundle message(s)\n"); - break; - } - pos += align(msg_size(buf_msg(obuf))); - tipc_net_route_msg(obuf); - } - kfree_skb(buf); -} + struct tipc_link *l = r->bc_sndlink; + bool unused = false; + int rc = 0; -/* - * Fragmentation/defragmentation: - */ - -/* - * link_send_long_buf: Entry for buffers needing fragmentation. - * The buffer is complete, inclusive total message length. - * Returns user data length. - */ -static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf) -{ - struct sk_buff *buf_chain = NULL; - struct sk_buff *buf_chain_tail = (struct sk_buff *)&buf_chain; - struct tipc_msg *inmsg = buf_msg(buf); - struct tipc_msg fragm_hdr; - u32 insize = msg_size(inmsg); - u32 dsz = msg_data_sz(inmsg); - unchar *crs = buf->data; - u32 rest = insize; - u32 pack_sz = l_ptr->max_pkt; - u32 fragm_sz = pack_sz - INT_H_SIZE; - u32 fragm_no = 0; - u32 destaddr; - - if (msg_short(inmsg)) - destaddr = l_ptr->addr; - else - destaddr = msg_destnode(inmsg); + if (!tipc_link_is_up(r) || !r->bc_peer_is_up) + return 0; - /* Prepare reusable fragment header: */ - tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - INT_H_SIZE, destaddr); + if (gap) { + l->stats.recv_nacks++; + r->stats.recv_nacks++; + } - /* Chop up message: */ - while (rest > 0) { - struct sk_buff *fragm; + if (less(acked, r->acked) || (acked == r->acked && !gap && !ga)) + return 0; - if (rest <= fragm_sz) { - fragm_sz = rest; - msg_set_type(&fragm_hdr, LAST_FRAGMENT); - } - fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE); - if (fragm == NULL) { - kfree_skb(buf); - while (buf_chain) { - buf = buf_chain; - buf_chain = buf_chain->next; - kfree_skb(buf); - } - return -ENOMEM; - } - msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); - fragm_no++; - msg_set_fragm_no(&fragm_hdr, fragm_no); - skb_copy_to_linear_data(fragm, &fragm_hdr, INT_H_SIZE); - skb_copy_to_linear_data_offset(fragm, INT_H_SIZE, crs, - fragm_sz); - buf_chain_tail->next = fragm; - buf_chain_tail = fragm; - - rest -= fragm_sz; - crs += fragm_sz; - msg_set_type(&fragm_hdr, FRAGMENT); - } - kfree_skb(buf); + trace_tipc_link_bc_ack(r, acked, gap, &l->transmq); + tipc_link_advance_transmq(l, r, acked, gap, ga, retrq, &unused, &rc); - /* Append chain of fragments to send queue & send them */ - l_ptr->long_msg_seq_no++; - link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no); - l_ptr->stats.sent_fragments += fragm_no; - l_ptr->stats.sent_fragmented++; - tipc_link_push_queue(l_ptr); + tipc_link_advance_backlog(l, xmitq); + if (unlikely(!skb_queue_empty(&l->wakeupq))) + link_prepare_wakeup(l); - return dsz; + return rc; } -/* - * A pending message being re-assembled must store certain values - * to handle subsequent fragments correctly. The following functions - * help storing these values in unused, available fields in the - * pending message. This makes dynamic memory allocation unnecessary. +/* tipc_link_bc_nack_rcv(): receive broadcast nack message + * This function is here for backwards compatibility, since + * no BCAST_PROTOCOL/STATE messages occur from TIPC v2.5. */ -static void set_long_msg_seqno(struct sk_buff *buf, u32 seqno) +int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq) { - msg_set_seqno(buf_msg(buf), seqno); -} + struct tipc_msg *hdr = buf_msg(skb); + u32 dnode = msg_destnode(hdr); + int mtyp = msg_type(hdr); + u16 acked = msg_bcast_ack(hdr); + u16 from = acked + 1; + u16 to = msg_bcgap_to(hdr); + u16 peers_snd_nxt = to + 1; + int rc = 0; -static u32 get_fragm_size(struct sk_buff *buf) -{ - return msg_ack(buf_msg(buf)); -} + kfree_skb(skb); -static void set_fragm_size(struct sk_buff *buf, u32 sz) -{ - msg_set_ack(buf_msg(buf), sz); -} - -static u32 get_expected_frags(struct sk_buff *buf) -{ - return msg_bcast_ack(buf_msg(buf)); -} + if (!tipc_link_is_up(l) || !l->bc_peer_is_up) + return 0; -static void set_expected_frags(struct sk_buff *buf, u32 exp) -{ - msg_set_bcast_ack(buf_msg(buf), exp); -} + if (mtyp != STATE_MSG) + return 0; -/* - * tipc_link_recv_fragment(): Called with node lock on. Returns - * the reassembled buffer if message is complete. - */ -int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, - struct tipc_msg **m) -{ - struct sk_buff *prev = NULL; - struct sk_buff *fbuf = *fb; - struct tipc_msg *fragm = buf_msg(fbuf); - struct sk_buff *pbuf = *pending; - u32 long_msg_seq_no = msg_long_msgno(fragm); - - *fb = NULL; - - /* Is there an incomplete message waiting for this fragment? */ - while (pbuf && ((buf_seqno(pbuf) != long_msg_seq_no) || - (msg_orignode(fragm) != msg_orignode(buf_msg(pbuf))))) { - prev = pbuf; - pbuf = pbuf->next; + if (dnode == tipc_own_addr(l->net)) { + rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq, + xmitq); + l->stats.recv_nacks++; + return rc; } - if (!pbuf && (msg_type(fragm) == FIRST_FRAGMENT)) { - struct tipc_msg *imsg = (struct tipc_msg *)msg_data(fragm); - u32 msg_sz = msg_size(imsg); - u32 fragm_sz = msg_data_sz(fragm); - u32 exp_fragm_cnt; - u32 max = TIPC_MAX_USER_MSG_SIZE + NAMED_H_SIZE; - - if (msg_type(imsg) == TIPC_MCAST_MSG) - max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE; - if (fragm_sz == 0 || msg_size(imsg) > max) { - kfree_skb(fbuf); - return 0; - } - exp_fragm_cnt = msg_sz / fragm_sz + !!(msg_sz % fragm_sz); - pbuf = tipc_buf_acquire(msg_size(imsg)); - if (pbuf != NULL) { - pbuf->next = *pending; - *pending = pbuf; - skb_copy_to_linear_data(pbuf, imsg, - msg_data_sz(fragm)); - /* Prepare buffer for subsequent fragments. */ - set_long_msg_seqno(pbuf, long_msg_seq_no); - set_fragm_size(pbuf, fragm_sz); - set_expected_frags(pbuf, exp_fragm_cnt - 1); - } else { - pr_debug("Link unable to reassemble fragmented message\n"); - kfree_skb(fbuf); - return -1; - } - kfree_skb(fbuf); - return 0; - } else if (pbuf && (msg_type(fragm) != FIRST_FRAGMENT)) { - u32 dsz = msg_data_sz(fragm); - u32 fsz = get_fragm_size(pbuf); - u32 crs = ((msg_fragm_no(fragm) - 1) * fsz); - u32 exp_frags = get_expected_frags(pbuf) - 1; - skb_copy_to_linear_data_offset(pbuf, crs, - msg_data(fragm), dsz); - kfree_skb(fbuf); - - /* Is message complete? */ - if (exp_frags == 0) { - if (prev) - prev->next = pbuf->next; - else - *pending = pbuf->next; - msg_reset_reroute_cnt(buf_msg(pbuf)); - *fb = pbuf; - *m = buf_msg(pbuf); - return 1; - } - set_expected_frags(pbuf, exp_frags); - return 0; - } - kfree_skb(fbuf); + /* Msg for other node => suppress own NACK at next sync if applicable */ + if (more(peers_snd_nxt, l->rcv_nxt) && !less(l->rcv_nxt, from)) + l->nack_state = BC_NACK_SND_SUPPRESS; + return 0; } -static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance) +void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win) { - if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL)) - return; + int max_bulk = TIPC_MAX_PUBL / (l->mtu / ITEM_SIZE); - l_ptr->tolerance = tolerance; - l_ptr->continuity_interval = - ((tolerance / 4) > 500) ? 500 : tolerance / 4; - l_ptr->abort_limit = tolerance / (l_ptr->continuity_interval / 4); + l->min_win = min_win; + l->ssthresh = max_win; + l->max_win = max_win; + l->window = min_win; + l->backlog[TIPC_LOW_IMPORTANCE].limit = min_win * 2; + l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = min_win * 4; + l->backlog[TIPC_HIGH_IMPORTANCE].limit = min_win * 6; + l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = min_win * 8; + l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } -void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) +/** + * tipc_link_reset_stats - reset link statistics + * @l: pointer to link + */ +void tipc_link_reset_stats(struct tipc_link *l) { - /* Data messages from this node, inclusive FIRST_FRAGM */ - l_ptr->queue_limit[TIPC_LOW_IMPORTANCE] = window; - l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE] = (window / 3) * 4; - l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE] = (window / 3) * 5; - l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE] = (window / 3) * 6; - /* Transiting data messages,inclusive FIRST_FRAGM */ - l_ptr->queue_limit[TIPC_LOW_IMPORTANCE + 4] = 300; - l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE + 4] = 600; - l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE + 4] = 900; - l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE + 4] = 1200; - l_ptr->queue_limit[CONN_MANAGER] = 1200; - l_ptr->queue_limit[CHANGEOVER_PROTOCOL] = 2500; - l_ptr->queue_limit[NAME_DISTRIBUTOR] = 3000; - /* FRAGMENT and LAST_FRAGMENT packets */ - l_ptr->queue_limit[MSG_FRAGMENTER] = 4000; + memset(&l->stats, 0, sizeof(l->stats)); } -/** - * link_find_link - locate link by name - * @name: ptr to link name string - * @node: ptr to area to be filled with ptr to associated node - * - * Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted; - * this also prevents link deletion. - * - * Returns pointer to link (or 0 if invalid link name). - */ -static struct tipc_link *link_find_link(const char *name, - struct tipc_node **node) +static void link_print(struct tipc_link *l, const char *str) { - struct tipc_link_name link_name_parts; - struct tipc_bearer *b_ptr; - struct tipc_link *l_ptr; + struct sk_buff *hskb = skb_peek(&l->transmq); + u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt - 1; + u16 tail = l->snd_nxt - 1; - if (!link_name_validate(name, &link_name_parts)) - return NULL; + pr_info("%s Link <%s> state %x\n", str, l->name, l->state); + pr_info("XMTQ: %u [%u-%u], BKLGQ: %u, SNDNX: %u, RCVNX: %u\n", + skb_queue_len(&l->transmq), head, tail, + skb_queue_len(&l->backlogq), l->snd_nxt, l->rcv_nxt); +} - b_ptr = tipc_bearer_find_interface(link_name_parts.if_local); - if (!b_ptr) - return NULL; +/* Parse and validate nested (link) properties valid for media, bearer and link + */ +int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) +{ + int err; - *node = tipc_node_find(link_name_parts.addr_peer); - if (!*node) - return NULL; + err = nla_parse_nested_deprecated(props, TIPC_NLA_PROP_MAX, prop, + tipc_nl_prop_policy, NULL); + if (err) + return err; - l_ptr = (*node)->links[b_ptr->identity]; - if (!l_ptr || strcmp(l_ptr->name, name)) - return NULL; + if (props[TIPC_NLA_PROP_PRIO]) { + u32 prio; - return l_ptr; -} - -/** - * link_value_is_valid -- validate proposed link tolerance/priority/window - * - * @cmd: value type (TIPC_CMD_SET_LINK_*) - * @new_value: the new value - * - * Returns 1 if value is within range, 0 if not. - */ -static int link_value_is_valid(u16 cmd, u32 new_value) -{ - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - return (new_value >= TIPC_MIN_LINK_TOL) && - (new_value <= TIPC_MAX_LINK_TOL); - case TIPC_CMD_SET_LINK_PRI: - return (new_value <= TIPC_MAX_LINK_PRI); - case TIPC_CMD_SET_LINK_WINDOW: - return (new_value >= TIPC_MIN_LINK_WIN) && - (new_value <= TIPC_MAX_LINK_WIN); + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + if (prio > TIPC_MAX_LINK_PRI) + return -EINVAL; } - return 0; -} -/** - * link_cmd_set_value - change priority/tolerance/window for link/bearer/media - * @name: ptr to link, bearer, or media name - * @new_value: new value of link, bearer, or media setting - * @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*) - * - * Caller must hold 'tipc_net_lock' to ensure link/bearer/media is not deleted. - * - * Returns 0 if value updated and negative value on error. - */ -static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) -{ - struct tipc_node *node; - struct tipc_link *l_ptr; - struct tipc_bearer *b_ptr; - struct tipc_media *m_ptr; - - l_ptr = link_find_link(name, &node); - if (l_ptr) { - /* - * acquire node lock for tipc_link_send_proto_msg(). - * see "TIPC locking policy" in net.c. - */ - tipc_node_lock(node); - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - link_set_supervision_props(l_ptr, new_value); - tipc_link_send_proto_msg(l_ptr, - STATE_MSG, 0, 0, new_value, 0, 0); - break; - case TIPC_CMD_SET_LINK_PRI: - l_ptr->priority = new_value; - tipc_link_send_proto_msg(l_ptr, - STATE_MSG, 0, 0, 0, new_value, 0); - break; - case TIPC_CMD_SET_LINK_WINDOW: - tipc_link_set_queue_limits(l_ptr, new_value); - break; - } - tipc_node_unlock(node); - return 0; - } + if (props[TIPC_NLA_PROP_TOL]) { + u32 tol; - b_ptr = tipc_bearer_find(name); - if (b_ptr) { - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - b_ptr->tolerance = new_value; - return 0; - case TIPC_CMD_SET_LINK_PRI: - b_ptr->priority = new_value; - return 0; - case TIPC_CMD_SET_LINK_WINDOW: - b_ptr->window = new_value; - return 0; - } - return -EINVAL; + tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL)) + return -EINVAL; } - m_ptr = tipc_media_find(name); - if (!m_ptr) - return -ENODEV; - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - m_ptr->tolerance = new_value; - return 0; - case TIPC_CMD_SET_LINK_PRI: - m_ptr->priority = new_value; - return 0; - case TIPC_CMD_SET_LINK_WINDOW: - m_ptr->window = new_value; - return 0; + if (props[TIPC_NLA_PROP_WIN]) { + u32 max_win; + + max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (max_win < TIPC_DEF_LINK_WIN || max_win > TIPC_MAX_LINK_WIN) + return -EINVAL; } - return -EINVAL; -} -struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, - u16 cmd) -{ - struct tipc_link_config *args; - u32 new_value; - int res; + return 0; +} - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_CONFIG)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); +static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) +{ + int i; + struct nlattr *stats; + + struct nla_map { + u32 key; + u32 val; + }; + + struct nla_map map[] = { + {TIPC_NLA_STATS_RX_INFO, 0}, + {TIPC_NLA_STATS_RX_FRAGMENTS, s->recv_fragments}, + {TIPC_NLA_STATS_RX_FRAGMENTED, s->recv_fragmented}, + {TIPC_NLA_STATS_RX_BUNDLES, s->recv_bundles}, + {TIPC_NLA_STATS_RX_BUNDLED, s->recv_bundled}, + {TIPC_NLA_STATS_TX_INFO, 0}, + {TIPC_NLA_STATS_TX_FRAGMENTS, s->sent_fragments}, + {TIPC_NLA_STATS_TX_FRAGMENTED, s->sent_fragmented}, + {TIPC_NLA_STATS_TX_BUNDLES, s->sent_bundles}, + {TIPC_NLA_STATS_TX_BUNDLED, s->sent_bundled}, + {TIPC_NLA_STATS_MSG_PROF_TOT, (s->msg_length_counts) ? + s->msg_length_counts : 1}, + {TIPC_NLA_STATS_MSG_LEN_CNT, s->msg_length_counts}, + {TIPC_NLA_STATS_MSG_LEN_TOT, s->msg_lengths_total}, + {TIPC_NLA_STATS_MSG_LEN_P0, s->msg_length_profile[0]}, + {TIPC_NLA_STATS_MSG_LEN_P1, s->msg_length_profile[1]}, + {TIPC_NLA_STATS_MSG_LEN_P2, s->msg_length_profile[2]}, + {TIPC_NLA_STATS_MSG_LEN_P3, s->msg_length_profile[3]}, + {TIPC_NLA_STATS_MSG_LEN_P4, s->msg_length_profile[4]}, + {TIPC_NLA_STATS_MSG_LEN_P5, s->msg_length_profile[5]}, + {TIPC_NLA_STATS_MSG_LEN_P6, s->msg_length_profile[6]}, + {TIPC_NLA_STATS_RX_STATES, s->recv_states}, + {TIPC_NLA_STATS_RX_PROBES, s->recv_probes}, + {TIPC_NLA_STATS_RX_NACKS, s->recv_nacks}, + {TIPC_NLA_STATS_RX_DEFERRED, s->deferred_recv}, + {TIPC_NLA_STATS_TX_STATES, s->sent_states}, + {TIPC_NLA_STATS_TX_PROBES, s->sent_probes}, + {TIPC_NLA_STATS_TX_NACKS, s->sent_nacks}, + {TIPC_NLA_STATS_TX_ACKS, s->sent_acks}, + {TIPC_NLA_STATS_RETRANSMITTED, s->retransmitted}, + {TIPC_NLA_STATS_DUPLICATES, s->duplicates}, + {TIPC_NLA_STATS_LINK_CONGS, s->link_congs}, + {TIPC_NLA_STATS_MAX_QUEUE, s->max_queue_sz}, + {TIPC_NLA_STATS_AVG_QUEUE, s->queue_sz_counts ? + (s->accu_queue_sz / s->queue_sz_counts) : 0} + }; + + stats = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS); + if (!stats) + return -EMSGSIZE; + + for (i = 0; i < ARRAY_SIZE(map); i++) + if (nla_put_u32(skb, map[i].key, map[i].val)) + goto msg_full; + + nla_nest_end(skb, stats); - args = (struct tipc_link_config *)TLV_DATA(req_tlv_area); - new_value = ntohl(args->value); + return 0; +msg_full: + nla_nest_cancel(skb, stats); + + return -EMSGSIZE; +} + +/* Caller should hold appropriate locks to protect the link */ +int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags) +{ + u32 self = tipc_own_addr(net); + struct nlattr *attrs; + struct nlattr *prop; + void *hdr; + int err; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + nlflags, TIPC_NL_LINK_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK); + if (!attrs) + goto msg_full; + + if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(self))) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->stats.recv_pkts)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->stats.sent_pkts)) + goto attr_msg_full; + + if (tipc_link_is_up(link)) + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) + goto attr_msg_full; + if (link->active) + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE)) + goto attr_msg_full; + + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); + if (!prop) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, link->tolerance)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, + link->window)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) + goto prop_msg_full; + nla_nest_end(msg->skb, prop); + + err = __tipc_nl_add_stats(msg->skb, &link->stats); + if (err) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); - if (!link_value_is_valid(cmd, new_value)) - return tipc_cfg_reply_error_string( - "cannot change, value invalid"); + return 0; - if (!strcmp(args->name, tipc_bclink_name)) { - if ((cmd == TIPC_CMD_SET_LINK_WINDOW) && - (tipc_bclink_set_queue_limits(new_value) == 0)) - return tipc_cfg_reply_none(); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change setting on broadcast link)"); - } +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, + struct tipc_stats *stats) +{ + int i; + struct nlattr *nest; + + struct nla_map { + __u32 key; + __u32 val; + }; + + struct nla_map map[] = { + {TIPC_NLA_STATS_RX_INFO, stats->recv_pkts}, + {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, + {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, + {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, + {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, + {TIPC_NLA_STATS_TX_INFO, stats->sent_pkts}, + {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, + {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, + {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, + {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, + {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, + {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, + {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, + {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, + {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, + {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, + {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, + {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, + {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? + (stats->accu_queue_sz / stats->queue_sz_counts) : 0} + }; + + nest = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS); + if (!nest) + return -EMSGSIZE; + + for (i = 0; i < ARRAY_SIZE(map); i++) + if (nla_put_u32(skb, map[i].key, map[i].val)) + goto msg_full; + + nla_nest_end(skb, nest); - read_lock_bh(&tipc_net_lock); - res = link_cmd_set_value(args->name, new_value, cmd); - read_unlock_bh(&tipc_net_lock); - if (res) - return tipc_cfg_reply_error_string("cannot change link setting"); + return 0; +msg_full: + nla_nest_cancel(skb, nest); - return tipc_cfg_reply_none(); + return -EMSGSIZE; } -/** - * link_reset_statistics - reset link statistics - * @l_ptr: pointer to link - */ -static void link_reset_statistics(struct tipc_link *l_ptr) +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *bcl) { - memset(&l_ptr->stats, 0, sizeof(l_ptr->stats)); - l_ptr->stats.sent_info = l_ptr->next_out_no; - l_ptr->stats.recv_info = l_ptr->next_in_no; -} + int err; + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + u32 bc_mode = tipc_bcast_get_mode(net); + u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net); -struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space) -{ - char *link_name; - struct tipc_link *l_ptr; - struct tipc_node *node; + if (!bcl) + return 0; - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); + tipc_bcast_lock(net); + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_LINK_GET); + if (!hdr) { + tipc_bcast_unlock(net); + return -EMSGSIZE; + } + + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK); + if (!attrs) + goto msg_full; + + /* The broadcast link is always up */ + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) + goto attr_msg_full; + + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) + goto attr_msg_full; + if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, 0)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, 0)) + goto attr_msg_full; + + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); + if (!prop) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->max_win)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode)) + goto prop_msg_full; + if (bc_mode & BCLINK_MODE_SEL) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST_RATIO, + bc_ratio)) + goto prop_msg_full; + nla_nest_end(msg->skb, prop); + + err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); + if (err) + goto attr_msg_full; + + tipc_bcast_unlock(net); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); - link_name = (char *)TLV_DATA(req_tlv_area); - if (!strcmp(link_name, tipc_bclink_name)) { - if (tipc_bclink_reset_stats()) - return tipc_cfg_reply_error_string("link not found"); - return tipc_cfg_reply_none(); - } + return 0; - read_lock_bh(&tipc_net_lock); - l_ptr = link_find_link(link_name, &node); - if (!l_ptr) { - read_unlock_bh(&tipc_net_lock); - return tipc_cfg_reply_error_string("link not found"); - } +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + tipc_bcast_unlock(net); + genlmsg_cancel(msg->skb, hdr); - tipc_node_lock(node); - link_reset_statistics(l_ptr); - tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); - return tipc_cfg_reply_none(); + return -EMSGSIZE; } -/** - * percent - convert count to a percentage of total (rounding up or down) - */ -static u32 percent(u32 count, u32 total) +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, + struct sk_buff_head *xmitq) { - return (count * 100 + (total / 2)) / total; + l->tolerance = tol; + if (l->bc_rcvlink) + l->bc_rcvlink->tolerance = tol; + if (tipc_link_is_up(l)) + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } -/** - * tipc_link_stats - print link statistics - * @name: link name - * @buf: print buffer area - * @buf_size: size of print buffer area - * - * Returns length of print buffer data string (or 0 if error) - */ -static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) +void tipc_link_set_prio(struct tipc_link *l, u32 prio, + struct sk_buff_head *xmitq) { - struct tipc_link *l; - struct tipc_stats *s; - struct tipc_node *node; - char *status; - u32 profile_total = 0; - int ret; - - if (!strcmp(name, tipc_bclink_name)) - return tipc_bclink_stats(buf, buf_size); - - read_lock_bh(&tipc_net_lock); - l = link_find_link(name, &node); - if (!l) { - read_unlock_bh(&tipc_net_lock); - return 0; - } - tipc_node_lock(node); - s = &l->stats; - - if (tipc_link_is_active(l)) - status = "ACTIVE"; - else if (tipc_link_is_up(l)) - status = "STANDBY"; - else - status = "DEFUNCT"; - - ret = tipc_snprintf(buf, buf_size, "Link <%s>\n" - " %s MTU:%u Priority:%u Tolerance:%u ms" - " Window:%u packets\n", - l->name, status, l->max_pkt, l->priority, - l->tolerance, l->queue_limit[0]); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX packets:%u fragments:%u/%u bundles:%u/%u\n", - l->next_in_no - s->recv_info, s->recv_fragments, - s->recv_fragmented, s->recv_bundles, - s->recv_bundled); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX packets:%u fragments:%u/%u bundles:%u/%u\n", - l->next_out_no - s->sent_info, s->sent_fragments, - s->sent_fragmented, s->sent_bundles, - s->sent_bundled); - - profile_total = s->msg_length_counts; - if (!profile_total) - profile_total = 1; - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX profile sample:%u packets average:%u octets\n" - " 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% " - "-16384:%u%% -32768:%u%% -66000:%u%%\n", - s->msg_length_counts, - s->msg_lengths_total / profile_total, - percent(s->msg_length_profile[0], profile_total), - percent(s->msg_length_profile[1], profile_total), - percent(s->msg_length_profile[2], profile_total), - percent(s->msg_length_profile[3], profile_total), - percent(s->msg_length_profile[4], profile_total), - percent(s->msg_length_profile[5], profile_total), - percent(s->msg_length_profile[6], profile_total)); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX states:%u probes:%u naks:%u defs:%u" - " dups:%u\n", s->recv_states, s->recv_probes, - s->recv_nacks, s->deferred_recv, s->duplicates); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX states:%u probes:%u naks:%u acks:%u" - " dups:%u\n", s->sent_states, s->sent_probes, - s->sent_nacks, s->sent_acks, s->retransmitted); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " Congestion link:%u Send queue" - " max:%u avg:%u\n", s->link_congs, - s->max_queue_sz, s->queue_sz_counts ? - (s->accu_queue_sz / s->queue_sz_counts) : 0); - - tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); - return ret; -} - -struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space) -{ - struct sk_buff *buf; - struct tlv_desc *rep_tlv; - int str_len; - int pb_len; - char *pb; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (!buf) - return NULL; - - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - str_len = tipc_link_stats((char *)TLV_DATA(req_tlv_area), - pb, pb_len); - if (!str_len) { - kfree_skb(buf); - return tipc_cfg_reply_error_string("link not found"); - } - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); + l->priority = prio; + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq); +} - return buf; +void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) +{ + l->abort_limit = limit; } /** - * tipc_link_get_max_pkt - get maximum packet size to use when sending to destination - * @dest: network address of destination node - * @selector: used to select from set of active links - * - * If no active link can be found, uses default maximum packet size. + * tipc_link_dump - dump TIPC link data + * @l: tipc link to be dumped + * @dqueues: bitmask to decide if any link queue to be dumped? + * - TIPC_DUMP_NONE: don't dump link queues + * - TIPC_DUMP_TRANSMQ: dump link transmq queue + * - TIPC_DUMP_BACKLOGQ: dump link backlog queue + * - TIPC_DUMP_DEFERDQ: dump link deferd queue + * - TIPC_DUMP_INPUTQ: dump link input queue + * - TIPC_DUMP_WAKEUP: dump link wakeup queue + * - TIPC_DUMP_ALL: dump all the link queues above + * @buf: returned buffer of dump data in format */ -u32 tipc_link_get_max_pkt(u32 dest, u32 selector) -{ - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - u32 res = MAX_PKT_DEFAULT; - - if (dest == tipc_own_addr) - return MAX_MSG_SIZE; - - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(dest); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[selector & 1]; - if (l_ptr) - res = l_ptr->max_pkt; - tipc_node_unlock(n_ptr); - } - read_unlock_bh(&tipc_net_lock); - return res; -} - -static void link_print(struct tipc_link *l_ptr, const char *str) +int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf) { - pr_info("%s Link %x<%s>:", str, l_ptr->addr, l_ptr->b_ptr->name); + int i = 0; + size_t sz = (dqueues) ? LINK_LMAX : LINK_LMIN; + struct sk_buff_head *list; + struct sk_buff *hskb, *tskb; + u32 len; - if (link_working_unknown(l_ptr)) - pr_cont(":WU\n"); - else if (link_reset_reset(l_ptr)) - pr_cont(":RR\n"); - else if (link_reset_unknown(l_ptr)) - pr_cont(":RU\n"); - else if (link_working_working(l_ptr)) - pr_cont(":WW\n"); - else - pr_cont("\n"); + if (!l) { + i += scnprintf(buf, sz, "link data: (null)\n"); + return i; + } + + i += scnprintf(buf, sz, "link data: %x", l->addr); + i += scnprintf(buf + i, sz - i, " %x", l->state); + i += scnprintf(buf + i, sz - i, " %u", l->in_session); + i += scnprintf(buf + i, sz - i, " %u", l->session); + i += scnprintf(buf + i, sz - i, " %u", l->peer_session); + i += scnprintf(buf + i, sz - i, " %u", l->snd_nxt); + i += scnprintf(buf + i, sz - i, " %u", l->rcv_nxt); + i += scnprintf(buf + i, sz - i, " %u", l->snd_nxt_state); + i += scnprintf(buf + i, sz - i, " %u", l->rcv_nxt_state); + i += scnprintf(buf + i, sz - i, " %x", l->peer_caps); + i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt); + i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt); + i += scnprintf(buf + i, sz - i, " %u", 0); + i += scnprintf(buf + i, sz - i, " %u", 0); + i += scnprintf(buf + i, sz - i, " %u", l->acked); + + list = &l->transmq; + len = skb_queue_len(list); + hskb = skb_peek(list); + tskb = skb_peek_tail(list); + i += scnprintf(buf + i, sz - i, " | %u %u %u", len, + (hskb) ? msg_seqno(buf_msg(hskb)) : 0, + (tskb) ? msg_seqno(buf_msg(tskb)) : 0); + + list = &l->deferdq; + len = skb_queue_len(list); + hskb = skb_peek(list); + tskb = skb_peek_tail(list); + i += scnprintf(buf + i, sz - i, " | %u %u %u", len, + (hskb) ? msg_seqno(buf_msg(hskb)) : 0, + (tskb) ? msg_seqno(buf_msg(tskb)) : 0); + + list = &l->backlogq; + len = skb_queue_len(list); + hskb = skb_peek(list); + tskb = skb_peek_tail(list); + i += scnprintf(buf + i, sz - i, " | %u %u %u", len, + (hskb) ? msg_seqno(buf_msg(hskb)) : 0, + (tskb) ? msg_seqno(buf_msg(tskb)) : 0); + + list = l->inputq; + len = skb_queue_len(list); + hskb = skb_peek(list); + tskb = skb_peek_tail(list); + i += scnprintf(buf + i, sz - i, " | %u %u %u\n", len, + (hskb) ? msg_seqno(buf_msg(hskb)) : 0, + (tskb) ? msg_seqno(buf_msg(tskb)) : 0); + + if (dqueues & TIPC_DUMP_TRANSMQ) { + i += scnprintf(buf + i, sz - i, "transmq: "); + i += tipc_list_dump(&l->transmq, false, buf + i); + } + if (dqueues & TIPC_DUMP_BACKLOGQ) { + i += scnprintf(buf + i, sz - i, + "backlogq: <%u %u %u %u %u>, ", + l->backlog[TIPC_LOW_IMPORTANCE].len, + l->backlog[TIPC_MEDIUM_IMPORTANCE].len, + l->backlog[TIPC_HIGH_IMPORTANCE].len, + l->backlog[TIPC_CRITICAL_IMPORTANCE].len, + l->backlog[TIPC_SYSTEM_IMPORTANCE].len); + i += tipc_list_dump(&l->backlogq, false, buf + i); + } + if (dqueues & TIPC_DUMP_DEFERDQ) { + i += scnprintf(buf + i, sz - i, "deferdq: "); + i += tipc_list_dump(&l->deferdq, false, buf + i); + } + if (dqueues & TIPC_DUMP_INPUTQ) { + i += scnprintf(buf + i, sz - i, "inputq: "); + i += tipc_list_dump(l->inputq, false, buf + i); + } + if (dqueues & TIPC_DUMP_WAKEUP) { + i += scnprintf(buf + i, sz - i, "wakeup: "); + i += tipc_list_dump(&l->wakeupq, false, buf + i); + } + + return i; } diff --git a/net/tipc/link.h b/net/tipc/link.h index c048ed1cbd76..d80f5649b395 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -1,7 +1,7 @@ /* * net/tipc/link.h: Include file for TIPC link code * - * Copyright (c) 1995-2006, Ericsson AB + * Copyright (c) 1995-2006, 2013-2014, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -37,283 +37,122 @@ #ifndef _TIPC_LINK_H #define _TIPC_LINK_H +#include <net/genetlink.h> #include "msg.h" #include "node.h" -/* - * Out-of-range value for link sequence numbers - */ -#define INVALID_LINK_SEQ 0x10000 +/* TIPC-specific error codes +*/ +#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ -/* - * Link states +/* Link FSM events: */ -#define WORKING_WORKING 560810u -#define WORKING_UNKNOWN 560811u -#define RESET_UNKNOWN 560812u -#define RESET_RESET 560813u - -/* - * Starting value for maximum packet size negotiation on unicast links - * (unless bearer MTU is less) - */ -#define MAX_PKT_DEFAULT 1500 - -struct tipc_stats { - u32 sent_info; /* used in counting # sent packets */ - u32 recv_info; /* used in counting # recv'd packets */ - u32 sent_states; - u32 recv_states; - u32 sent_probes; - u32 recv_probes; - u32 sent_nacks; - u32 recv_nacks; - u32 sent_acks; - u32 sent_bundled; - u32 sent_bundles; - u32 recv_bundled; - u32 recv_bundles; - u32 retransmitted; - u32 sent_fragmented; - u32 sent_fragments; - u32 recv_fragmented; - u32 recv_fragments; - u32 link_congs; /* # port sends blocked by congestion */ - u32 deferred_recv; - u32 duplicates; - u32 max_queue_sz; /* send queue size high water mark */ - u32 accu_queue_sz; /* used for send queue size profiling */ - u32 queue_sz_counts; /* used for send queue size profiling */ - u32 msg_length_counts; /* used for message length profiling */ - u32 msg_lengths_total; /* used for message length profiling */ - u32 msg_length_profile[7]; /* used for msg. length profiling */ +enum { + LINK_ESTABLISH_EVT = 0xec1ab1e, + LINK_PEER_RESET_EVT = 0x9eed0e, + LINK_FAILURE_EVT = 0xfa110e, + LINK_RESET_EVT = 0x10ca1d0e, + LINK_FAILOVER_BEGIN_EVT = 0xfa110bee, + LINK_FAILOVER_END_EVT = 0xfa110ede, + LINK_SYNCH_BEGIN_EVT = 0xc1ccbee, + LINK_SYNCH_END_EVT = 0xc1ccede }; -/** - * struct tipc_link - TIPC link data structure - * @addr: network address of link's peer node - * @name: link name character string - * @media_addr: media address to use when sending messages over link - * @timer: link timer - * @owner: pointer to peer node - * @link_list: adjacent links in bearer's list of links - * @started: indicates if link has been started - * @checkpoint: reference point for triggering link continuity checking - * @peer_session: link session # being used by peer end of link - * @peer_bearer_id: bearer id used by link's peer endpoint - * @b_ptr: pointer to bearer used by link - * @tolerance: minimum link continuity loss needed to reset link [in ms] - * @continuity_interval: link continuity testing interval [in ms] - * @abort_limit: # of unacknowledged continuity probes needed to reset link - * @state: current state of link FSM - * @blocked: indicates if link has been administratively blocked - * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state - * @proto_msg: template for control messages generated by link - * @pmsg: convenience pointer to "proto_msg" field - * @priority: current link priority - * @queue_limit: outbound message queue congestion thresholds (indexed by user) - * @exp_msg_count: # of tunnelled messages expected during link changeover - * @reset_checkpoint: seq # of last acknowledged message at time of link reset - * @max_pkt: current maximum packet size for this link - * @max_pkt_target: desired maximum packet size for this link - * @max_pkt_probes: # of probes based on current (max_pkt, max_pkt_target) - * @out_queue_size: # of messages in outbound message queue - * @first_out: ptr to first outbound message in queue - * @last_out: ptr to last outbound message in queue - * @next_out_no: next sequence number to use for outbound messages - * @last_retransmitted: sequence number of most recently retransmitted message - * @stale_count: # of identical retransmit requests made by peer - * @next_in_no: next sequence number to expect for inbound messages - * @deferred_inqueue_sz: # of messages in inbound message queue - * @oldest_deferred_in: ptr to first inbound message in queue - * @newest_deferred_in: ptr to last inbound message in queue - * @unacked_window: # of inbound messages rx'd without ack'ing back to peer - * @proto_msg_queue: ptr to (single) outbound control message - * @retransm_queue_size: number of messages to retransmit - * @retransm_queue_head: sequence number of first message to retransmit - * @next_out: ptr to first unsent outbound message in queue - * @waiting_ports: linked list of ports waiting for link congestion to abate - * @long_msg_seq_no: next identifier to use for outbound fragmented messages - * @defragm_buf: list of partially reassembled inbound message fragments - * @stats: collects statistics regarding link activity +/* Events returned from link at packet reception or at timeout */ -struct tipc_link { - u32 addr; - char name[TIPC_MAX_LINK_NAME]; - struct tipc_media_addr media_addr; - struct timer_list timer; - struct tipc_node *owner; - struct list_head link_list; - - /* Management and link supervision data */ - int started; - u32 checkpoint; - u32 peer_session; - u32 peer_bearer_id; - struct tipc_bearer *b_ptr; - u32 tolerance; - u32 continuity_interval; - u32 abort_limit; - int state; - int blocked; - u32 fsm_msg_cnt; - struct { - unchar hdr[INT_H_SIZE]; - unchar body[TIPC_MAX_IF_NAME]; - } proto_msg; - struct tipc_msg *pmsg; - u32 priority; - u32 queue_limit[15]; /* queue_limit[0]==window limit */ - - /* Changeover */ - u32 exp_msg_count; - u32 reset_checkpoint; - - /* Max packet negotiation */ - u32 max_pkt; - u32 max_pkt_target; - u32 max_pkt_probes; - - /* Sending */ - u32 out_queue_size; - struct sk_buff *first_out; - struct sk_buff *last_out; - u32 next_out_no; - u32 last_retransmitted; - u32 stale_count; - - /* Reception */ - u32 next_in_no; - u32 deferred_inqueue_sz; - struct sk_buff *oldest_deferred_in; - struct sk_buff *newest_deferred_in; - u32 unacked_window; - - /* Congestion handling */ - struct sk_buff *proto_msg_queue; - u32 retransm_queue_size; - u32 retransm_queue_head; - struct sk_buff *next_out; - struct list_head waiting_ports; - - /* Fragmentation/defragmentation */ - u32 long_msg_seq_no; - struct sk_buff *defragm_buf; - - /* Statistics */ - struct tipc_stats stats; +enum { + TIPC_LINK_UP_EVT = 1, + TIPC_LINK_DOWN_EVT = (1 << 1), + TIPC_LINK_SND_STATE = (1 << 2) }; -struct tipc_port; - -struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, - struct tipc_bearer *b_ptr, - const struct tipc_media_addr *media_addr); -void tipc_link_delete(struct tipc_link *l_ptr); -void tipc_link_changeover(struct tipc_link *l_ptr); -void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *dest); -void tipc_link_reset_fragments(struct tipc_link *l_ptr); -int tipc_link_is_up(struct tipc_link *l_ptr); -int tipc_link_is_active(struct tipc_link *l_ptr); -u32 tipc_link_push_packet(struct tipc_link *l_ptr); -void tipc_link_stop(struct tipc_link *l_ptr); -struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd); -struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space); -struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); -void tipc_link_reset(struct tipc_link *l_ptr); -int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); -void tipc_link_send_names(struct list_head *message_list, u32 dest); -int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf); -u32 tipc_link_get_max_pkt(u32 dest, u32 selector); -int tipc_link_send_sections_fast(struct tipc_port *sender, - struct iovec const *msg_sect, - const u32 num_sect, - unsigned int total_len, - u32 destnode); -void tipc_link_recv_bundle(struct sk_buff *buf); -int tipc_link_recv_fragment(struct sk_buff **pending, - struct sk_buff **fb, - struct tipc_msg **msg); -void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, int prob, - u32 gap, u32 tolerance, u32 priority, - u32 acked_mtu); -void tipc_link_push_queue(struct tipc_link *l_ptr); -u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail, - struct sk_buff *buf); -void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all); -void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window); -void tipc_link_retransmit(struct tipc_link *l_ptr, - struct sk_buff *start, u32 retransmits); - -/* - * Link sequence number manipulation routines (uses modulo 2**16 arithmetic) - */ -static inline u32 buf_seqno(struct sk_buff *buf) -{ - return msg_seqno(buf_msg(buf)); -} - -static inline u32 mod(u32 x) -{ - return x & 0xffffu; -} - -static inline int between(u32 lower, u32 upper, u32 n) -{ - if ((lower < n) && (n < upper)) - return 1; - if ((upper < lower) && ((n > lower) || (n < upper))) - return 1; - return 0; -} - -static inline int less_eq(u32 left, u32 right) -{ - return mod(right - left) < 32768u; -} - -static inline int less(u32 left, u32 right) -{ - return less_eq(left, right) && (mod(right) != mod(left)); -} - -static inline u32 lesser(u32 left, u32 right) -{ - return less_eq(left, right) ? left : right; -} - - -/* - * Link status checking routines +/* Starting value for maximum packet size negotiation on unicast links + * (unless bearer MTU is less) */ -static inline int link_working_working(struct tipc_link *l_ptr) -{ - return l_ptr->state == WORKING_WORKING; -} - -static inline int link_working_unknown(struct tipc_link *l_ptr) -{ - return l_ptr->state == WORKING_UNKNOWN; -} - -static inline int link_reset_unknown(struct tipc_link *l_ptr) -{ - return l_ptr->state == RESET_UNKNOWN; -} - -static inline int link_reset_reset(struct tipc_link *l_ptr) -{ - return l_ptr->state == RESET_RESET; -} - -static inline int link_blocked(struct tipc_link *l_ptr) -{ - return l_ptr->exp_msg_count || l_ptr->blocked; -} - -static inline int link_congested(struct tipc_link *l_ptr) -{ - return l_ptr->out_queue_size >= l_ptr->queue_limit[0]; -} +#define MAX_PKT_DEFAULT 1500 +bool tipc_link_create(struct net *net, char *if_name, int bearer_id, + int tolerance, char net_plane, u32 mtu, int priority, + u32 min_win, u32 max_win, u32 session, u32 ownnode, + u32 peer, u8 *peer_id, u16 peer_caps, + struct tipc_link *bc_sndlink, + struct tipc_link *bc_rcvlink, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, + struct tipc_link **link); +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id, + int mtu, u32 min_win, u32 max_win, u16 peer_caps, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, + struct tipc_link *bc_sndlink, + struct tipc_link **link); +void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, + int mtyp, struct sk_buff_head *xmitq); +void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl, + struct sk_buff_head *xmitq); +void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl, + struct sk_buff_head *xmitq); +void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +int tipc_link_fsm_evt(struct tipc_link *l, int evt); +bool tipc_link_is_up(struct tipc_link *l); +bool tipc_link_peer_is_down(struct tipc_link *l); +bool tipc_link_is_reset(struct tipc_link *l); +bool tipc_link_is_establishing(struct tipc_link *l); +bool tipc_link_is_synching(struct tipc_link *l); +bool tipc_link_is_failingover(struct tipc_link *l); +bool tipc_link_is_blocked(struct tipc_link *l); +void tipc_link_set_active(struct tipc_link *l, bool active); +void tipc_link_reset(struct tipc_link *l); +void tipc_link_reset_stats(struct tipc_link *l); +int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, + struct sk_buff_head *xmitq); +struct sk_buff_head *tipc_link_inputq(struct tipc_link *l); +u16 tipc_link_rcv_nxt(struct tipc_link *l); +u16 tipc_link_acked(struct tipc_link *l); +u32 tipc_link_id(struct tipc_link *l); +char *tipc_link_name(struct tipc_link *l); +u32 tipc_link_state(struct tipc_link *l); +char tipc_link_plane(struct tipc_link *l); +int tipc_link_prio(struct tipc_link *l); +int tipc_link_min_win(struct tipc_link *l); +int tipc_link_max_win(struct tipc_link *l); +void tipc_link_update_caps(struct tipc_link *l, u16 capabilities); +bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr); +unsigned long tipc_link_tolerance(struct tipc_link *l); +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, + struct sk_buff_head *xmitq); +void tipc_link_set_prio(struct tipc_link *l, u32 prio, + struct sk_buff_head *xmitq); +void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit); +void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win); +int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags); +int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); +int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); +int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq); +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +void tipc_link_add_bc_peer(struct tipc_link *snd_l, + struct tipc_link *uc_l, + struct sk_buff_head *xmitq); +void tipc_link_remove_bc_peer(struct tipc_link *snd_l, + struct tipc_link *rcv_l, + struct sk_buff_head *xmitq); +int tipc_link_bc_peers(struct tipc_link *l); +void tipc_link_set_mtu(struct tipc_link *l, int mtu); +int tipc_link_mtu(struct tipc_link *l); +int tipc_link_mss(struct tipc_link *l); +u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, + struct tipc_msg *hdr, bool uc); +int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq, + struct sk_buff_head *retrq); +void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr); +int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq); +int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq); +bool tipc_link_too_silent(struct tipc_link *l); +struct net *tipc_link_net(struct tipc_link *l); #endif diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c new file mode 100644 index 000000000000..572b79bf76ce --- /dev/null +++ b/net/tipc/monitor.c @@ -0,0 +1,875 @@ +/* + * net/tipc/monitor.c + * + * Copyright (c) 2016, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/genetlink.h> +#include "core.h" +#include "addr.h" +#include "monitor.h" +#include "bearer.h" + +#define MAX_MON_DOMAIN 64 +#define MON_TIMEOUT 120000 +#define MAX_PEER_DOWN_EVENTS 4 + +/* struct tipc_mon_domain: domain record to be transferred between peers + * @len: actual size of domain record + * @gen: current generation of sender's domain + * @ack_gen: most recent generation of self's domain acked by peer + * @member_cnt: number of domain member nodes described in this record + * @up_map: bit map indicating which of the members the sender considers up + * @members: identity of the domain members + */ +struct tipc_mon_domain { + u16 len; + u16 gen; + u16 ack_gen; + u16 member_cnt; + u64 up_map; + u32 members[MAX_MON_DOMAIN]; +}; + +/* struct tipc_peer: state of a peer node and its domain + * @addr: tipc node identity of peer + * @head_map: shows which other nodes currently consider peer 'up' + * @domain: most recent domain record from peer + * @hash: position in hashed lookup list + * @list: position in linked list, in circular ascending order by 'addr' + * @applied: number of reported domain members applied on this monitor list + * @is_up: peer is up as seen from this node + * @is_head: peer is assigned domain head as seen from this node + * @is_local: peer is in local domain and should be continuously monitored + * @down_cnt: - numbers of other peers which have reported this on lost + */ +struct tipc_peer { + u32 addr; + struct tipc_mon_domain *domain; + struct hlist_node hash; + struct list_head list; + u8 applied; + u8 down_cnt; + bool is_up; + bool is_head; + bool is_local; +}; + +struct tipc_monitor { + struct hlist_head peers[NODE_HTABLE_SIZE]; + int peer_cnt; + struct tipc_peer *self; + rwlock_t lock; + struct tipc_mon_domain cache; + u16 list_gen; + u16 dom_gen; + struct net *net; + struct timer_list timer; + unsigned long timer_intv; +}; + +static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id) +{ + return tipc_net(net)->monitors[bearer_id]; +} + +const int tipc_max_domain_size = sizeof(struct tipc_mon_domain); + +static inline u16 mon_cpu_to_le16(u16 val) +{ + return (__force __u16)htons(val); +} + +static inline u32 mon_cpu_to_le32(u32 val) +{ + return (__force __u32)htonl(val); +} + +static inline u64 mon_cpu_to_le64(u64 val) +{ + return (__force __u64)cpu_to_be64(val); +} + +static inline u16 mon_le16_to_cpu(u16 val) +{ + return ntohs((__force __be16)val); +} + +static inline u32 mon_le32_to_cpu(u32 val) +{ + return ntohl((__force __be32)val); +} + +static inline u64 mon_le64_to_cpu(u64 val) +{ + return be64_to_cpu((__force __be64)val); +} + +/* dom_rec_len(): actual length of domain record for transport + */ +static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt) +{ + return (offsetof(struct tipc_mon_domain, members)) + (mcnt * sizeof(u32)); +} + +/* dom_size() : calculate size of own domain based on number of peers + */ +static int dom_size(int peers) +{ + int i = 0; + + while ((i * i) < peers) + i++; + return min(i, MAX_MON_DOMAIN); +} + +static void map_set(u64 *up_map, int i, unsigned int v) +{ + *up_map &= ~(1ULL << i); + *up_map |= ((u64)v << i); +} + +static int map_get(u64 up_map, int i) +{ + return (up_map & (1ULL << i)) >> i; +} + +static struct tipc_peer *peer_prev(struct tipc_peer *peer) +{ + return list_last_entry(&peer->list, struct tipc_peer, list); +} + +static struct tipc_peer *peer_nxt(struct tipc_peer *peer) +{ + return list_first_entry(&peer->list, struct tipc_peer, list); +} + +static struct tipc_peer *peer_head(struct tipc_peer *peer) +{ + while (!peer->is_head) + peer = peer_prev(peer); + return peer; +} + +static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr) +{ + struct tipc_peer *peer; + unsigned int thash = tipc_hashfn(addr); + + hlist_for_each_entry(peer, &mon->peers[thash], hash) { + if (peer->addr == addr) + return peer; + } + return NULL; +} + +static struct tipc_peer *get_self(struct net *net, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + + return mon->self; +} + +static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon) +{ + struct tipc_net *tn = tipc_net(net); + + return mon->peer_cnt > tn->mon_threshold; +} + +/* mon_identify_lost_members() : - identify amd mark potentially lost members + */ +static void mon_identify_lost_members(struct tipc_peer *peer, + struct tipc_mon_domain *dom_bef, + int applied_bef) +{ + struct tipc_peer *member = peer; + struct tipc_mon_domain *dom_aft = peer->domain; + int applied_aft = peer->applied; + int i; + + for (i = 0; i < applied_bef; i++) { + member = peer_nxt(member); + + /* Do nothing if self or peer already see member as down */ + if (!member->is_up || !map_get(dom_bef->up_map, i)) + continue; + + /* Loss of local node must be detected by active probing */ + if (member->is_local) + continue; + + /* Start probing if member was removed from applied domain */ + if (!applied_aft || (applied_aft < i)) { + member->down_cnt = 1; + continue; + } + + /* Member loss is confirmed if it is still in applied domain */ + if (!map_get(dom_aft->up_map, i)) + member->down_cnt++; + } +} + +/* mon_apply_domain() : match a peer's domain record against monitor list + */ +static void mon_apply_domain(struct tipc_monitor *mon, + struct tipc_peer *peer) +{ + struct tipc_mon_domain *dom = peer->domain; + struct tipc_peer *member; + u32 addr; + int i; + + if (!dom || !peer->is_up) + return; + + /* Scan across domain members and match against monitor list */ + peer->applied = 0; + member = peer_nxt(peer); + for (i = 0; i < dom->member_cnt; i++) { + addr = dom->members[i]; + if (addr != member->addr) + return; + peer->applied++; + member = peer_nxt(member); + } +} + +/* mon_update_local_domain() : update after peer addition/removal/up/down + */ +static void mon_update_local_domain(struct tipc_monitor *mon) +{ + struct tipc_peer *self = mon->self; + struct tipc_mon_domain *cache = &mon->cache; + struct tipc_mon_domain *dom = self->domain; + struct tipc_peer *peer = self; + u64 prev_up_map = dom->up_map; + u16 member_cnt, i; + bool diff; + + /* Update local domain size based on current size of cluster */ + member_cnt = dom_size(mon->peer_cnt) - 1; + self->applied = member_cnt; + + /* Update native and cached outgoing local domain records */ + dom->len = dom_rec_len(dom, member_cnt); + diff = dom->member_cnt != member_cnt; + dom->member_cnt = member_cnt; + for (i = 0; i < member_cnt; i++) { + peer = peer_nxt(peer); + diff |= dom->members[i] != peer->addr; + dom->members[i] = peer->addr; + map_set(&dom->up_map, i, peer->is_up); + cache->members[i] = mon_cpu_to_le32(peer->addr); + } + diff |= dom->up_map != prev_up_map; + if (!diff) + return; + dom->gen = ++mon->dom_gen; + cache->len = mon_cpu_to_le16(dom->len); + cache->gen = mon_cpu_to_le16(dom->gen); + cache->member_cnt = mon_cpu_to_le16(member_cnt); + cache->up_map = mon_cpu_to_le64(dom->up_map); + mon_apply_domain(mon, self); +} + +/* mon_update_neighbors() : update preceding neighbors of added/removed peer + */ +static void mon_update_neighbors(struct tipc_monitor *mon, + struct tipc_peer *peer) +{ + int dz, i; + + dz = dom_size(mon->peer_cnt); + for (i = 0; i < dz; i++) { + mon_apply_domain(mon, peer); + peer = peer_prev(peer); + } +} + +/* mon_assign_roles() : reassign peer roles after a network change + * The monitor list is consistent at this stage; i.e., each peer is monitoring + * a set of domain members as matched between domain record and the monitor list + */ +static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head) +{ + struct tipc_peer *peer = peer_nxt(head); + struct tipc_peer *self = mon->self; + int i = 0; + + for (; peer != self; peer = peer_nxt(peer)) { + peer->is_local = false; + + /* Update domain member */ + if (i++ < head->applied) { + peer->is_head = false; + if (head == self) + peer->is_local = true; + continue; + } + /* Assign next domain head */ + if (!peer->is_up) + continue; + if (peer->is_head) + break; + head = peer; + head->is_head = true; + i = 0; + } + mon->list_gen++; +} + +void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self; + struct tipc_peer *peer, *prev, *head; + + if (!mon) + return; + + self = get_self(net, bearer_id); + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer) + goto exit; + prev = peer_prev(peer); + list_del(&peer->list); + hlist_del(&peer->hash); + kfree(peer->domain); + kfree(peer); + mon->peer_cnt--; + head = peer_head(prev); + if (head == self) + mon_update_local_domain(mon); + mon_update_neighbors(mon, prev); + + /* Revert to full-mesh monitoring if we reach threshold */ + if (!tipc_mon_is_active(net, mon)) { + list_for_each_entry(peer, &self->list, list) { + kfree(peer->domain); + peer->domain = NULL; + peer->applied = 0; + } + } + mon_assign_roles(mon, head); +exit: + write_unlock_bh(&mon->lock); +} + +static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr, + struct tipc_peer **peer) +{ + struct tipc_peer *self = mon->self; + struct tipc_peer *cur, *prev, *p; + + p = kzalloc(sizeof(*p), GFP_ATOMIC); + *peer = p; + if (!p) + return false; + p->addr = addr; + + /* Add new peer to lookup list */ + INIT_LIST_HEAD(&p->list); + hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]); + + /* Sort new peer into iterator list, in ascending circular order */ + prev = self; + list_for_each_entry(cur, &self->list, list) { + if ((addr > prev->addr) && (addr < cur->addr)) + break; + if (((addr < cur->addr) || (addr > prev->addr)) && + (prev->addr > cur->addr)) + break; + prev = cur; + } + list_add_tail(&p->list, &cur->list); + mon->peer_cnt++; + mon_update_neighbors(mon, p); + return true; +} + +void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *peer, *head; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer && !tipc_mon_add_peer(mon, addr, &peer)) + goto exit; + peer->is_up = true; + head = peer_head(peer); + if (head == self) + mon_update_local_domain(mon); + mon_assign_roles(mon, head); +exit: + write_unlock_bh(&mon->lock); +} + +void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self; + struct tipc_peer *peer, *head; + struct tipc_mon_domain *dom; + int applied; + + if (!mon) + return; + + self = get_self(net, bearer_id); + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer) { + pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id); + goto exit; + } + applied = peer->applied; + peer->applied = 0; + dom = peer->domain; + peer->domain = NULL; + if (peer->is_head) + mon_identify_lost_members(peer, dom, applied); + kfree(dom); + peer->is_up = false; + peer->is_head = false; + peer->is_local = false; + peer->down_cnt = 0; + head = peer_head(peer); + if (head == self) + mon_update_local_domain(mon); + mon_assign_roles(mon, head); +exit: + write_unlock_bh(&mon->lock); +} + +/* tipc_mon_rcv - process monitor domain event message + */ +void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, + struct tipc_mon_state *state, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_mon_domain *arrv_dom = data; + struct tipc_mon_domain dom_bef; + struct tipc_mon_domain *dom; + struct tipc_peer *peer; + u16 new_member_cnt = mon_le16_to_cpu(arrv_dom->member_cnt); + int new_dlen = dom_rec_len(arrv_dom, new_member_cnt); + u16 new_gen = mon_le16_to_cpu(arrv_dom->gen); + u16 acked_gen = mon_le16_to_cpu(arrv_dom->ack_gen); + u16 arrv_dlen = mon_le16_to_cpu(arrv_dom->len); + bool probing = state->probing; + int i, applied_bef; + + state->probing = false; + + /* Sanity check received domain record */ + if (new_member_cnt > MAX_MON_DOMAIN) + return; + if (dlen < dom_rec_len(arrv_dom, 0)) + return; + if (dlen != dom_rec_len(arrv_dom, new_member_cnt)) + return; + if (dlen < new_dlen || arrv_dlen != new_dlen) + return; + + /* Synch generation numbers with peer if link just came up */ + if (!state->synched) { + state->peer_gen = new_gen - 1; + state->acked_gen = acked_gen; + state->synched = true; + } + + if (more(acked_gen, state->acked_gen)) + state->acked_gen = acked_gen; + + /* Drop duplicate unless we are waiting for a probe response */ + if (!more(new_gen, state->peer_gen) && !probing) + return; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer || !peer->is_up) + goto exit; + + /* Peer is confirmed, stop any ongoing probing */ + peer->down_cnt = 0; + + /* Task is done for duplicate record */ + if (!more(new_gen, state->peer_gen)) + goto exit; + + state->peer_gen = new_gen; + + /* Cache current domain record for later use */ + dom_bef.member_cnt = 0; + dom = peer->domain; + if (dom) + memcpy(&dom_bef, dom, dom->len); + + /* Transform and store received domain record */ + if (!dom || (dom->len < new_dlen)) { + kfree(dom); + dom = kmalloc(new_dlen, GFP_ATOMIC); + peer->domain = dom; + if (!dom) + goto exit; + } + dom->len = new_dlen; + dom->gen = new_gen; + dom->member_cnt = new_member_cnt; + dom->up_map = mon_le64_to_cpu(arrv_dom->up_map); + for (i = 0; i < new_member_cnt; i++) + dom->members[i] = mon_le32_to_cpu(arrv_dom->members[i]); + + /* Update peers affected by this domain record */ + applied_bef = peer->applied; + mon_apply_domain(mon, peer); + mon_identify_lost_members(peer, &dom_bef, applied_bef); + mon_assign_roles(mon, peer_head(peer)); +exit: + write_unlock_bh(&mon->lock); +} + +void tipc_mon_prep(struct net *net, void *data, int *dlen, + struct tipc_mon_state *state, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_mon_domain *dom = data; + u16 gen = mon->dom_gen; + u16 len; + + /* Send invalid record if not active */ + if (!tipc_mon_is_active(net, mon)) { + dom->len = 0; + return; + } + + /* Send only a dummy record with ack if peer has acked our last sent */ + if (likely(state->acked_gen == gen)) { + len = dom_rec_len(dom, 0); + *dlen = len; + dom->len = mon_cpu_to_le16(len); + dom->gen = mon_cpu_to_le16(gen); + dom->ack_gen = mon_cpu_to_le16(state->peer_gen); + dom->member_cnt = 0; + return; + } + /* Send the full record */ + read_lock_bh(&mon->lock); + len = mon_le16_to_cpu(mon->cache.len); + *dlen = len; + memcpy(data, &mon->cache, len); + read_unlock_bh(&mon->lock); + dom->ack_gen = mon_cpu_to_le16(state->peer_gen); +} + +void tipc_mon_get_state(struct net *net, u32 addr, + struct tipc_mon_state *state, + int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *peer; + + if (!tipc_mon_is_active(net, mon)) { + state->probing = false; + state->monitoring = true; + return; + } + + /* Used cached state if table has not changed */ + if (!state->probing && + (state->list_gen == mon->list_gen) && + (state->acked_gen == mon->dom_gen)) + return; + + read_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (peer) { + state->probing = state->acked_gen != mon->dom_gen; + state->probing |= peer->down_cnt; + state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS; + state->monitoring = peer->is_local; + state->monitoring |= peer->is_head; + state->list_gen = mon->list_gen; + } + read_unlock_bh(&mon->lock); +} + +static void mon_timeout(struct timer_list *t) +{ + struct tipc_monitor *mon = timer_container_of(mon, t, timer); + struct tipc_peer *self; + int best_member_cnt = dom_size(mon->peer_cnt) - 1; + + write_lock_bh(&mon->lock); + self = mon->self; + if (self && (best_member_cnt != self->applied)) { + mon_update_local_domain(mon); + mon_assign_roles(mon, self); + } + write_unlock_bh(&mon->lock); + mod_timer(&mon->timer, jiffies + mon->timer_intv); +} + +int tipc_mon_create(struct net *net, int bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_monitor *mon; + struct tipc_peer *self; + struct tipc_mon_domain *dom; + + if (tn->monitors[bearer_id]) + return 0; + + mon = kzalloc(sizeof(*mon), GFP_ATOMIC); + self = kzalloc(sizeof(*self), GFP_ATOMIC); + dom = kzalloc(sizeof(*dom), GFP_ATOMIC); + if (!mon || !self || !dom) { + kfree(mon); + kfree(self); + kfree(dom); + return -ENOMEM; + } + tn->monitors[bearer_id] = mon; + rwlock_init(&mon->lock); + mon->net = net; + mon->peer_cnt = 1; + mon->self = self; + self->domain = dom; + self->addr = tipc_own_addr(net); + self->is_up = true; + self->is_head = true; + INIT_LIST_HEAD(&self->list); + timer_setup(&mon->timer, mon_timeout, 0); + mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff)); + mod_timer(&mon->timer, jiffies + mon->timer_intv); + return 0; +} + +void tipc_mon_delete(struct net *net, int bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self; + struct tipc_peer *peer, *tmp; + + if (!mon) + return; + + self = get_self(net, bearer_id); + write_lock_bh(&mon->lock); + tn->monitors[bearer_id] = NULL; + list_for_each_entry_safe(peer, tmp, &self->list, list) { + list_del(&peer->list); + hlist_del(&peer->hash); + kfree(peer->domain); + kfree(peer); + } + mon->self = NULL; + write_unlock_bh(&mon->lock); + timer_shutdown_sync(&mon->timer); + kfree(self->domain); + kfree(self); + kfree(mon); +} + +void tipc_mon_reinit_self(struct net *net) +{ + struct tipc_monitor *mon; + int bearer_id; + + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + mon = tipc_monitor(net, bearer_id); + if (!mon) + continue; + write_lock_bh(&mon->lock); + if (mon->self) + mon->self->addr = tipc_own_addr(net); + write_unlock_bh(&mon->lock); + } +} + +int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) +{ + struct tipc_net *tn = tipc_net(net); + + if (cluster_size > TIPC_CLUSTER_SIZE) + return -EINVAL; + + tn->mon_threshold = cluster_size; + + return 0; +} + +int tipc_nl_monitor_get_threshold(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + return tn->mon_threshold; +} + +static int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, + struct tipc_nl_msg *msg) +{ + struct tipc_mon_domain *dom = peer->domain; + struct nlattr *attrs; + void *hdr; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_MON_PEER_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON_PEER); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_ADDR, peer->addr)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_APPLIED, peer->applied)) + goto attr_msg_full; + + if (peer->is_up) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_UP)) + goto attr_msg_full; + if (peer->is_local) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_LOCAL)) + goto attr_msg_full; + if (peer->is_head) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_HEAD)) + goto attr_msg_full; + + if (dom) { + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_DOMGEN, dom->gen)) + goto attr_msg_full; + if (nla_put_u64_64bit(msg->skb, TIPC_NLA_MON_PEER_UPMAP, + dom->up_map, TIPC_NLA_MON_PEER_PAD)) + goto attr_msg_full; + if (nla_put(msg->skb, TIPC_NLA_MON_PEER_MEMBERS, + dom->member_cnt * sizeof(u32), &dom->members)) + goto attr_msg_full; + } + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id, u32 *prev_node) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *peer; + + if (!mon) + return -EINVAL; + + read_lock_bh(&mon->lock); + peer = mon->self; + do { + if (*prev_node) { + if (peer->addr == *prev_node) + *prev_node = 0; + else + continue; + } + if (__tipc_nl_add_monitor_peer(peer, msg)) { + *prev_node = peer->addr; + read_unlock_bh(&mon->lock); + return -EMSGSIZE; + } + } while ((peer = peer_nxt(peer)) != mon->self); + read_unlock_bh(&mon->lock); + + return 0; +} + +int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + char bearer_name[TIPC_MAX_BEARER_NAME]; + struct nlattr *attrs; + void *hdr; + int ret; + + ret = tipc_bearer_get_name(net, bearer_name, bearer_id); + if (ret || !mon) + return 0; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_MON_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON); + if (!attrs) + goto msg_full; + + read_lock_bh(&mon->lock); + if (nla_put_u32(msg->skb, TIPC_NLA_MON_REF, bearer_id)) + goto attr_msg_full; + if (tipc_mon_is_active(net, mon)) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_ACTIVE)) + goto attr_msg_full; + if (nla_put_string(msg->skb, TIPC_NLA_MON_BEARER_NAME, bearer_name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEERCNT, mon->peer_cnt)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_LISTGEN, mon->list_gen)) + goto attr_msg_full; + + read_unlock_bh(&mon->lock); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + read_unlock_bh(&mon->lock); + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h new file mode 100644 index 000000000000..ed63d2e650b0 --- /dev/null +++ b/net/tipc/monitor.h @@ -0,0 +1,83 @@ +/* + * net/tipc/monitor.h + * + * Copyright (c) 2015, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_MONITOR_H +#define _TIPC_MONITOR_H + +#include "netlink.h" + +/* struct tipc_mon_state: link instance's cache of monitor list and domain state + * @list_gen: current generation of this node's monitor list + * @gen: current generation of this node's local domain + * @peer_gen: most recent domain generation received from peer + * @acked_gen: most recent generation of self's domain acked by peer + * @monitoring: this peer endpoint should continuously monitored + * @probing: peer endpoint should be temporarily probed for potential loss + * @synched: domain record's generation has been synched with peer after reset + */ +struct tipc_mon_state { + u16 list_gen; + u16 peer_gen; + u16 acked_gen; + bool monitoring :1; + bool probing :1; + bool reset :1; + bool synched :1; +}; + +int tipc_mon_create(struct net *net, int bearer_id); +void tipc_mon_delete(struct net *net, int bearer_id); + +void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id); +void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id); +void tipc_mon_prep(struct net *net, void *data, int *dlen, + struct tipc_mon_state *state, int bearer_id); +void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, + struct tipc_mon_state *state, int bearer_id); +void tipc_mon_get_state(struct net *net, u32 addr, + struct tipc_mon_state *state, + int bearer_id); +void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id); + +int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size); +int tipc_nl_monitor_get_threshold(struct net *net); +int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id); +int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id, u32 *prev_node); +void tipc_mon_reinit_self(struct net *net); + +extern const int tipc_max_domain_size; +#endif diff --git a/net/tipc/msg.c b/net/tipc/msg.c index ced60e2fc4f7..76284fc538eb 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -1,7 +1,7 @@ /* * net/tipc/msg.c: TIPC message header routines * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -34,73 +34,822 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <net/sock.h> #include "core.h" #include "msg.h" +#include "addr.h" +#include "name_table.h" +#include "crypto.h" -u32 tipc_msg_tot_importance(struct tipc_msg *m) +#define BUF_ALIGN(x) ALIGN(x, 4) +#define MAX_FORWARD_SIZE 1024 +#ifdef CONFIG_TIPC_CRYPTO +#define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16) +#define BUF_OVERHEAD (BUF_HEADROOM + TIPC_AES_GCM_TAG_SIZE) +#else +#define BUF_HEADROOM (LL_MAX_HEADER + 48) +#define BUF_OVERHEAD BUF_HEADROOM +#endif + +const int one_page_mtu = PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + +/** + * tipc_buf_acquire - creates a TIPC message buffer + * @size: message size (including TIPC header) + * @gfp: memory allocation flags + * + * Return: a new buffer with data pointers set to the specified size. + * + * NOTE: + * Headroom is reserved to allow prepending of a data link header. + * There may also be unrequested tailroom present at the buffer's end. + */ +struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) { - if (likely(msg_isdata(m))) { - if (likely(msg_orignode(m) == tipc_own_addr)) - return msg_importance(m); - return msg_importance(m) + 4; + struct sk_buff *skb; + + skb = alloc_skb_fclone(BUF_OVERHEAD + size, gfp); + if (skb) { + skb_reserve(skb, BUF_HEADROOM); + skb_put(skb, size); + skb->next = NULL; } - if ((msg_user(m) == MSG_FRAGMENTER) && - (msg_type(m) == FIRST_FRAGMENT)) - return msg_importance(msg_get_wrapped(m)); - return msg_importance(m); + return skb; } - -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, - u32 destnode) +void tipc_msg_init(u32 own_node, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 dnode) { memset(m, 0, hsize); msg_set_version(m); msg_set_user(m, user); msg_set_hdr_sz(m, hsize); msg_set_size(m, hsize); - msg_set_prevnode(m, tipc_own_addr); + msg_set_prevnode(m, own_node); msg_set_type(m, type); - msg_set_orignode(m, tipc_own_addr); - msg_set_destnode(m, destnode); + if (hsize > SHORT_H_SIZE) { + msg_set_orignode(m, own_node); + msg_set_destnode(m, dnode); + } +} + +struct sk_buff *tipc_msg_create(uint user, uint type, + uint hdr_sz, uint data_sz, u32 dnode, + u32 onode, u32 dport, u32 oport, int errcode) +{ + struct tipc_msg *msg; + struct sk_buff *buf; + + buf = tipc_buf_acquire(hdr_sz + data_sz, GFP_ATOMIC); + if (unlikely(!buf)) + return NULL; + + msg = buf_msg(buf); + tipc_msg_init(onode, msg, user, type, hdr_sz, dnode); + msg_set_size(msg, hdr_sz + data_sz); + msg_set_origport(msg, oport); + msg_set_destport(msg, dport); + msg_set_errcode(msg, errcode); + return buf; +} + +/* tipc_buf_append(): Append a buffer to the fragment list of another buffer + * @*headbuf: in: NULL for first frag, otherwise value returned from prev call + * out: set when successful non-complete reassembly, otherwise NULL + * @*buf: in: the buffer to append. Always defined + * out: head buf after successful complete reassembly, otherwise NULL + * Returns 1 when reassembly complete, otherwise 0 + */ +int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) +{ + struct sk_buff *head = *headbuf; + struct sk_buff *frag = *buf; + struct sk_buff *tail = NULL; + struct tipc_msg *msg; + u32 fragid; + int delta; + bool headstolen; + + if (!frag) + goto err; + + msg = buf_msg(frag); + fragid = msg_type(msg); + frag->next = NULL; + skb_pull(frag, msg_hdr_sz(msg)); + + if (fragid == FIRST_FRAGMENT) { + if (unlikely(head)) + goto err; + if (skb_has_frag_list(frag) && __skb_linearize(frag)) + goto err; + *buf = NULL; + frag = skb_unshare(frag, GFP_ATOMIC); + if (unlikely(!frag)) + goto err; + head = *headbuf = frag; + TIPC_SKB_CB(head)->tail = NULL; + return 0; + } + + if (!head) + goto err; + + /* Either the input skb ownership is transferred to headskb + * or the input skb is freed, clear the reference to avoid + * bad access on error path. + */ + *buf = NULL; + if (skb_try_coalesce(head, frag, &headstolen, &delta)) { + kfree_skb_partial(frag, headstolen); + } else { + tail = TIPC_SKB_CB(head)->tail; + if (!skb_has_frag_list(head)) + skb_shinfo(head)->frag_list = frag; + else + tail->next = frag; + head->truesize += frag->truesize; + head->data_len += frag->len; + head->len += frag->len; + TIPC_SKB_CB(head)->tail = frag; + } + + if (fragid == LAST_FRAGMENT) { + TIPC_SKB_CB(head)->validated = 0; + if (unlikely(!tipc_msg_validate(&head))) + goto err; + *buf = head; + TIPC_SKB_CB(head)->tail = NULL; + *headbuf = NULL; + return 1; + } + return 0; +err: + kfree_skb(*buf); + kfree_skb(*headbuf); + *buf = *headbuf = NULL; + return 0; } /** - * tipc_msg_build - create message using specified header and data + * tipc_msg_append(): Append data to tail of an existing buffer queue + * @_hdr: header to be used + * @m: the data to be appended + * @mss: max allowable size of buffer + * @dlen: size of data to be appended + * @txq: queue to append to + * + * Return: the number of 1k blocks appended or errno value + */ +int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen, + int mss, struct sk_buff_head *txq) +{ + struct sk_buff *skb; + int accounted, total, curr; + int mlen, cpy, rem = dlen; + struct tipc_msg *hdr; + + skb = skb_peek_tail(txq); + accounted = skb ? msg_blocks(buf_msg(skb)) : 0; + total = accounted; + + do { + if (!skb || skb->len >= mss) { + skb = tipc_buf_acquire(mss, GFP_KERNEL); + if (unlikely(!skb)) + return -ENOMEM; + skb_orphan(skb); + skb_trim(skb, MIN_H_SIZE); + hdr = buf_msg(skb); + skb_copy_to_linear_data(skb, _hdr, MIN_H_SIZE); + msg_set_hdr_sz(hdr, MIN_H_SIZE); + msg_set_size(hdr, MIN_H_SIZE); + __skb_queue_tail(txq, skb); + total += 1; + } + hdr = buf_msg(skb); + curr = msg_blocks(hdr); + mlen = msg_size(hdr); + cpy = min_t(size_t, rem, mss - mlen); + if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter)) + return -EFAULT; + msg_set_size(hdr, mlen + cpy); + skb_put(skb, cpy); + rem -= cpy; + total += msg_blocks(hdr) - curr; + } while (rem > 0); + return total - accounted; +} + +/* tipc_msg_validate - validate basic format of received message * - * Note: Caller must not hold any locks in case copy_from_user() is interrupted! + * This routine ensures a TIPC message has an acceptable header, and at least + * as much data as the header indicates it should. The routine also ensures + * that the entire message header is stored in the main fragment of the message + * buffer, to simplify future access to message header fields. * - * Returns message data size or errno + * Note: Having extra info present in the message header or data areas is OK. + * TIPC will ignore the excess, under the assumption that it is optional info + * introduced by a later release of the protocol. */ -int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, int max_size, - struct sk_buff **buf) +bool tipc_msg_validate(struct sk_buff **_skb) { - int dsz, sz, hsz, pos, res, cnt; + struct sk_buff *skb = *_skb; + struct tipc_msg *hdr; + int msz, hsz; - dsz = total_len; - pos = hsz = msg_hdr_sz(hdr); - sz = hsz + dsz; - msg_set_size(hdr, sz); - if (unlikely(sz > max_size)) { - *buf = NULL; - return dsz; + /* Ensure that flow control ratio condition is satisfied */ + if (unlikely(skb->truesize / buf_roundup_len(skb) >= 4)) { + skb = skb_copy_expand(skb, BUF_HEADROOM, 0, GFP_ATOMIC); + if (!skb) + return false; + kfree_skb(*_skb); + *_skb = skb; } - *buf = tipc_buf_acquire(sz); - if (!(*buf)) + if (unlikely(TIPC_SKB_CB(skb)->validated)) + return true; + + if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE))) + return false; + + hsz = msg_hdr_sz(buf_msg(skb)); + if (unlikely(hsz < MIN_H_SIZE) || (hsz > MAX_H_SIZE)) + return false; + if (unlikely(!pskb_may_pull(skb, hsz))) + return false; + + hdr = buf_msg(skb); + if (unlikely(msg_version(hdr) != TIPC_VERSION)) + return false; + + msz = msg_size(hdr); + if (unlikely(msz < hsz)) + return false; + if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE)) + return false; + if (unlikely(skb->len < msz)) + return false; + + TIPC_SKB_CB(skb)->validated = 1; + return true; +} + +/** + * tipc_msg_fragment - build a fragment skb list for TIPC message + * + * @skb: TIPC message skb + * @hdr: internal msg header to be put on the top of the fragments + * @pktmax: max size of a fragment incl. the header + * @frags: returned fragment skb list + * + * Return: 0 if the fragmentation is successful, otherwise: -EINVAL + * or -ENOMEM + */ +int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, + int pktmax, struct sk_buff_head *frags) +{ + int pktno, nof_fragms, dsz, dmax, eat; + struct tipc_msg *_hdr; + struct sk_buff *_skb; + u8 *data; + + /* Non-linear buffer? */ + if (skb_linearize(skb)) return -ENOMEM; - skb_copy_to_linear_data(*buf, hdr, hsz); - for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) { - skb_copy_to_linear_data_offset(*buf, pos, - msg_sect[cnt].iov_base, - msg_sect[cnt].iov_len); - pos += msg_sect[cnt].iov_len; + + data = (u8 *)skb->data; + dsz = msg_size(buf_msg(skb)); + dmax = pktmax - INT_H_SIZE; + if (dsz <= dmax || !dmax) + return -EINVAL; + + nof_fragms = dsz / dmax + 1; + for (pktno = 1; pktno <= nof_fragms; pktno++) { + if (pktno < nof_fragms) + eat = dmax; + else + eat = dsz % dmax; + /* Allocate a new fragment */ + _skb = tipc_buf_acquire(INT_H_SIZE + eat, GFP_ATOMIC); + if (!_skb) + goto error; + skb_orphan(_skb); + __skb_queue_tail(frags, _skb); + /* Copy header & data to the fragment */ + skb_copy_to_linear_data(_skb, hdr, INT_H_SIZE); + skb_copy_to_linear_data_offset(_skb, INT_H_SIZE, data, eat); + data += eat; + /* Update the fragment's header */ + _hdr = buf_msg(_skb); + msg_set_fragm_no(_hdr, pktno); + msg_set_nof_fragms(_hdr, nof_fragms); + msg_set_size(_hdr, INT_H_SIZE + eat); } - if (likely(res)) - return dsz; + return 0; - kfree_skb(*buf); - *buf = NULL; - return -EFAULT; +error: + __skb_queue_purge(frags); + __skb_queue_head_init(frags); + return -ENOMEM; +} + +/** + * tipc_msg_build - create buffer chain containing specified header and data + * @mhdr: Message header, to be prepended to data + * @m: User message + * @offset: buffer offset for fragmented messages (FIXME) + * @dsz: Total length of user data + * @pktmax: Max packet size that can be used + * @list: Buffer or chain of buffers to be returned to caller + * + * Note that the recursive call we are making here is safe, since it can + * logically go only one further level down. + * + * Return: message data size or errno: -ENOMEM, -EFAULT + */ +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, + int dsz, int pktmax, struct sk_buff_head *list) +{ + int mhsz = msg_hdr_sz(mhdr); + struct tipc_msg pkthdr; + int msz = mhsz + dsz; + int pktrem = pktmax; + struct sk_buff *skb; + int drem = dsz; + int pktno = 1; + char *pktpos; + int pktsz; + int rc; + + msg_set_size(mhdr, msz); + + /* No fragmentation needed? */ + if (likely(msz <= pktmax)) { + skb = tipc_buf_acquire(msz, GFP_KERNEL); + + /* Fall back to smaller MTU if node local message */ + if (unlikely(!skb)) { + if (pktmax != MAX_MSG_SIZE) + return -ENOMEM; + rc = tipc_msg_build(mhdr, m, offset, dsz, + one_page_mtu, list); + if (rc != dsz) + return rc; + if (tipc_msg_assemble(list)) + return dsz; + return -ENOMEM; + } + skb_orphan(skb); + __skb_queue_tail(list, skb); + skb_copy_to_linear_data(skb, mhdr, mhsz); + pktpos = skb->data + mhsz; + if (copy_from_iter_full(pktpos, dsz, &m->msg_iter)) + return dsz; + rc = -EFAULT; + goto error; + } + + /* Prepare reusable fragment header */ + tipc_msg_init(msg_prevnode(mhdr), &pkthdr, MSG_FRAGMENTER, + FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr)); + msg_set_size(&pkthdr, pktmax); + msg_set_fragm_no(&pkthdr, pktno); + msg_set_importance(&pkthdr, msg_importance(mhdr)); + + /* Prepare first fragment */ + skb = tipc_buf_acquire(pktmax, GFP_KERNEL); + if (!skb) + return -ENOMEM; + skb_orphan(skb); + __skb_queue_tail(list, skb); + pktpos = skb->data; + skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE); + pktpos += INT_H_SIZE; + pktrem -= INT_H_SIZE; + skb_copy_to_linear_data_offset(skb, INT_H_SIZE, mhdr, mhsz); + pktpos += mhsz; + pktrem -= mhsz; + + do { + if (drem < pktrem) + pktrem = drem; + + if (!copy_from_iter_full(pktpos, pktrem, &m->msg_iter)) { + rc = -EFAULT; + goto error; + } + drem -= pktrem; + + if (!drem) + break; + + /* Prepare new fragment: */ + if (drem < (pktmax - INT_H_SIZE)) + pktsz = drem + INT_H_SIZE; + else + pktsz = pktmax; + skb = tipc_buf_acquire(pktsz, GFP_KERNEL); + if (!skb) { + rc = -ENOMEM; + goto error; + } + skb_orphan(skb); + __skb_queue_tail(list, skb); + msg_set_type(&pkthdr, FRAGMENT); + msg_set_size(&pkthdr, pktsz); + msg_set_fragm_no(&pkthdr, ++pktno); + skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE); + pktpos = skb->data + INT_H_SIZE; + pktrem = pktsz - INT_H_SIZE; + + } while (1); + msg_set_type(buf_msg(skb), LAST_FRAGMENT); + return dsz; +error: + __skb_queue_purge(list); + __skb_queue_head_init(list); + return rc; +} + +/** + * tipc_msg_bundle - Append contents of a buffer to tail of an existing one + * @bskb: the bundle buffer to append to + * @msg: message to be appended + * @max: max allowable size for the bundle buffer + * + * Return: "true" if bundling has been performed, otherwise "false" + */ +static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg, + u32 max) +{ + struct tipc_msg *bmsg = buf_msg(bskb); + u32 msz, bsz, offset, pad; + + msz = msg_size(msg); + bsz = msg_size(bmsg); + offset = BUF_ALIGN(bsz); + pad = offset - bsz; + + if (unlikely(skb_tailroom(bskb) < (pad + msz))) + return false; + if (unlikely(max < (offset + msz))) + return false; + + skb_put(bskb, pad + msz); + skb_copy_to_linear_data_offset(bskb, offset, msg, msz); + msg_set_size(bmsg, offset + msz); + msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1); + return true; +} + +/** + * tipc_msg_try_bundle - Try to bundle a new message to the last one + * @tskb: the last/target message to which the new one will be appended + * @skb: the new message skb pointer + * @mss: max message size (header inclusive) + * @dnode: destination node for the message + * @new_bundle: if this call made a new bundle or not + * + * Return: "true" if the new message skb is potential for bundling this time or + * later, in the case a bundling has been done this time, the skb is consumed + * (the skb pointer = NULL). + * Otherwise, "false" if the skb cannot be bundled at all. + */ +bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss, + u32 dnode, bool *new_bundle) +{ + struct tipc_msg *msg, *inner, *outer; + u32 tsz; + + /* First, check if the new buffer is suitable for bundling */ + msg = buf_msg(*skb); + if (msg_user(msg) == MSG_FRAGMENTER) + return false; + if (msg_user(msg) == TUNNEL_PROTOCOL) + return false; + if (msg_user(msg) == BCAST_PROTOCOL) + return false; + if (mss <= INT_H_SIZE + msg_size(msg)) + return false; + + /* Ok, but the last/target buffer can be empty? */ + if (unlikely(!tskb)) + return true; + + /* Is it a bundle already? Try to bundle the new message to it */ + if (msg_user(buf_msg(tskb)) == MSG_BUNDLER) { + *new_bundle = false; + goto bundle; + } + + /* Make a new bundle of the two messages if possible */ + tsz = msg_size(buf_msg(tskb)); + if (unlikely(mss < BUF_ALIGN(INT_H_SIZE + tsz) + msg_size(msg))) + return true; + if (unlikely(pskb_expand_head(tskb, INT_H_SIZE, mss - tsz - INT_H_SIZE, + GFP_ATOMIC))) + return true; + inner = buf_msg(tskb); + skb_push(tskb, INT_H_SIZE); + outer = buf_msg(tskb); + tipc_msg_init(msg_prevnode(inner), outer, MSG_BUNDLER, 0, INT_H_SIZE, + dnode); + msg_set_importance(outer, msg_importance(inner)); + msg_set_size(outer, INT_H_SIZE + tsz); + msg_set_msgcnt(outer, 1); + *new_bundle = true; + +bundle: + if (likely(tipc_msg_bundle(tskb, msg, mss))) { + consume_skb(*skb); + *skb = NULL; + } + return true; +} + +/** + * tipc_msg_extract(): extract bundled inner packet from buffer + * @skb: buffer to be extracted from. + * @iskb: extracted inner buffer, to be returned + * @pos: position in outer message of msg to be extracted. + * Returns position of next msg. + * Consumes outer buffer when last packet extracted + * Return: true when there is an extracted buffer, otherwise false + */ +bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) +{ + struct tipc_msg *hdr, *ihdr; + int imsz; + + *iskb = NULL; + if (unlikely(skb_linearize(skb))) + goto none; + + hdr = buf_msg(skb); + if (unlikely(*pos > (msg_data_sz(hdr) - MIN_H_SIZE))) + goto none; + + ihdr = (struct tipc_msg *)(msg_data(hdr) + *pos); + imsz = msg_size(ihdr); + + if ((*pos + imsz) > msg_data_sz(hdr)) + goto none; + + *iskb = tipc_buf_acquire(imsz, GFP_ATOMIC); + if (!*iskb) + goto none; + + skb_copy_to_linear_data(*iskb, ihdr, imsz); + if (unlikely(!tipc_msg_validate(iskb))) + goto none; + + *pos += BUF_ALIGN(imsz); + return true; +none: + kfree_skb(skb); + kfree_skb(*iskb); + *iskb = NULL; + return false; +} + +/** + * tipc_msg_reverse(): swap source and destination addresses and add error code + * @own_node: originating node id for reversed message + * @skb: buffer containing message to be reversed; will be consumed + * @err: error code to be set in message, if any + * Replaces consumed buffer with new one when successful + * Return: true if success, otherwise false + */ +bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) +{ + struct sk_buff *_skb = *skb; + struct tipc_msg *_hdr, *hdr; + int hlen, dlen; + + if (skb_linearize(_skb)) + goto exit; + _hdr = buf_msg(_skb); + dlen = min_t(uint, msg_data_sz(_hdr), MAX_FORWARD_SIZE); + hlen = msg_hdr_sz(_hdr); + + if (msg_dest_droppable(_hdr)) + goto exit; + if (msg_errcode(_hdr)) + goto exit; + + /* Never return SHORT header */ + if (hlen == SHORT_H_SIZE) + hlen = BASIC_H_SIZE; + + /* Don't return data along with SYN+, - sender has a clone */ + if (msg_is_syn(_hdr) && err == TIPC_ERR_OVERLOAD) + dlen = 0; + + /* Allocate new buffer to return */ + *skb = tipc_buf_acquire(hlen + dlen, GFP_ATOMIC); + if (!*skb) + goto exit; + memcpy((*skb)->data, _skb->data, msg_hdr_sz(_hdr)); + memcpy((*skb)->data + hlen, msg_data(_hdr), dlen); + + /* Build reverse header in new buffer */ + hdr = buf_msg(*skb); + msg_set_hdr_sz(hdr, hlen); + msg_set_errcode(hdr, err); + msg_set_non_seq(hdr, 0); + msg_set_origport(hdr, msg_destport(_hdr)); + msg_set_destport(hdr, msg_origport(_hdr)); + msg_set_destnode(hdr, msg_prevnode(_hdr)); + msg_set_prevnode(hdr, own_node); + msg_set_orignode(hdr, own_node); + msg_set_size(hdr, hlen + dlen); + skb_orphan(_skb); + kfree_skb(_skb); + return true; +exit: + kfree_skb(_skb); + *skb = NULL; + return false; +} + +bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy) +{ + struct sk_buff *skb, *_skb; + + skb_queue_walk(msg, skb) { + _skb = skb_clone(skb, GFP_ATOMIC); + if (!_skb) { + __skb_queue_purge(cpy); + pr_err_ratelimited("Failed to clone buffer chain\n"); + return false; + } + __skb_queue_tail(cpy, _skb); + } + return true; +} + +/** + * tipc_msg_lookup_dest(): try to find new destination for named message + * @net: pointer to associated network namespace + * @skb: the buffer containing the message. + * @err: error code to be used by caller if lookup fails + * Does not consume buffer + * Return: true if a destination is found, false otherwise + */ +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) +{ + struct tipc_msg *msg = buf_msg(skb); + u32 scope = msg_lookup_scope(msg); + u32 self = tipc_own_addr(net); + u32 inst = msg_nameinst(msg); + struct tipc_socket_addr sk; + struct tipc_uaddr ua; + + if (!msg_isdata(msg)) + return false; + if (!msg_named(msg)) + return false; + if (msg_errcode(msg)) + return false; + *err = TIPC_ERR_NO_NAME; + if (skb_linearize(skb)) + return false; + msg = buf_msg(skb); + if (msg_reroute_cnt(msg)) + return false; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, scope, + msg_nametype(msg), inst, inst); + sk.node = tipc_scope2node(net, scope); + if (!tipc_nametbl_lookup_anycast(net, &ua, &sk)) + return false; + msg_incr_reroute_cnt(msg); + if (sk.node != self) + msg_set_prevnode(msg, self); + msg_set_destnode(msg, sk.node); + msg_set_destport(msg, sk.ref); + *err = TIPC_OK; + + return true; +} + +/* tipc_msg_assemble() - assemble chain of fragments into one message + */ +bool tipc_msg_assemble(struct sk_buff_head *list) +{ + struct sk_buff *skb, *tmp = NULL; + + if (skb_queue_len(list) == 1) + return true; + + while ((skb = __skb_dequeue(list))) { + skb->next = NULL; + if (tipc_buf_append(&tmp, &skb)) { + __skb_queue_tail(list, skb); + return true; + } + if (!tmp) + break; + } + __skb_queue_purge(list); + __skb_queue_head_init(list); + pr_warn("Failed do assemble buffer\n"); + return false; +} + +/* tipc_msg_reassemble() - clone a buffer chain of fragments and + * reassemble the clones into one message + */ +bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq) +{ + struct sk_buff *skb, *_skb; + struct sk_buff *frag = NULL; + struct sk_buff *head = NULL; + int hdr_len; + + /* Copy header if single buffer */ + if (skb_queue_len(list) == 1) { + skb = skb_peek(list); + hdr_len = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb)); + _skb = __pskb_copy(skb, hdr_len, GFP_ATOMIC); + if (!_skb) + return false; + __skb_queue_tail(rcvq, _skb); + return true; + } + + /* Clone all fragments and reassemble */ + skb_queue_walk(list, skb) { + frag = skb_clone(skb, GFP_ATOMIC); + if (!frag) + goto error; + frag->next = NULL; + if (tipc_buf_append(&head, &frag)) + break; + if (!head) + goto error; + } + __skb_queue_tail(rcvq, frag); + return true; +error: + pr_warn("Failed do clone local mcast rcv buffer\n"); + kfree_skb(head); + return false; +} + +bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, + struct sk_buff_head *cpy) +{ + struct sk_buff *skb, *_skb; + + skb_queue_walk(msg, skb) { + _skb = pskb_copy(skb, GFP_ATOMIC); + if (!_skb) { + __skb_queue_purge(cpy); + return false; + } + msg_set_destnode(buf_msg(_skb), dst); + __skb_queue_tail(cpy, _skb); + } + return true; +} + +/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number + * @list: list to be appended to + * @seqno: sequence number of buffer to add + * @skb: buffer to add + */ +bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, + struct sk_buff *skb) +{ + struct sk_buff *_skb, *tmp; + + if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) { + __skb_queue_head(list, skb); + return true; + } + + if (more(seqno, buf_seqno(skb_peek_tail(list)))) { + __skb_queue_tail(list, skb); + return true; + } + + skb_queue_walk_safe(list, _skb, tmp) { + if (more(seqno, buf_seqno(_skb))) + continue; + if (seqno == buf_seqno(_skb)) + break; + __skb_queue_before(list, _skb, skb); + return true; + } + kfree_skb(skb); + return false; +} + +void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + if (tipc_msg_reverse(tipc_own_addr(net), &skb, err)) + __skb_queue_tail(xmitq, skb); } diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 5e4ccf5c27df..c5eec16213d7 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1,7 +1,7 @@ /* * net/tipc/msg.h: Include file for TIPC message header routines * - * Copyright (c) 2000-2007, Ericsson AB + * Copyright (c) 2000-2007, 2014-2017 Ericsson AB * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * @@ -37,7 +37,8 @@ #ifndef _TIPC_MSG_H #define _TIPC_MSG_H -#include "bearer.h" +#include <linux/tipc.h> +#include "core.h" /* * Constants and routines used to read and write TIPC payload message headers @@ -45,6 +46,7 @@ * Note: Some items are also used with TIPC internal message headers */ #define TIPC_VERSION 2 +struct plist; /* * Payload message users are defined in TIPC's public API: @@ -53,14 +55,36 @@ * - TIPC_HIGH_IMPORTANCE * - TIPC_CRITICAL_IMPORTANCE */ +#define TIPC_SYSTEM_IMPORTANCE 4 + /* * Payload message types */ -#define TIPC_CONN_MSG 0 -#define TIPC_MCAST_MSG 1 -#define TIPC_NAMED_MSG 2 -#define TIPC_DIRECT_MSG 3 +#define TIPC_CONN_MSG 0 +#define TIPC_MCAST_MSG 1 +#define TIPC_NAMED_MSG 2 +#define TIPC_DIRECT_MSG 3 +#define TIPC_GRP_MEMBER_EVT 4 +#define TIPC_GRP_BCAST_MSG 5 +#define TIPC_GRP_MCAST_MSG 6 +#define TIPC_GRP_UCAST_MSG 7 + +/* + * Internal message users + */ +#define BCAST_PROTOCOL 5 +#define MSG_BUNDLER 6 +#define LINK_PROTOCOL 7 +#define CONN_MANAGER 8 +#define GROUP_PROTOCOL 9 +#define TUNNEL_PROTOCOL 10 +#define NAME_DISTRIBUTOR 11 +#define MSG_FRAGMENTER 12 +#define LINK_CONFIG 13 +#define MSG_CRYPTO 14 +#define SOCK_WAKEUP 14 /* pseudo user */ +#define TOP_SRV 15 /* pseudo user */ /* * Message header sizes @@ -69,19 +93,114 @@ #define BASIC_H_SIZE 32 /* Basic payload message */ #define NAMED_H_SIZE 40 /* Named payload message */ #define MCAST_H_SIZE 44 /* Multicast payload message */ +#define GROUP_H_SIZE 44 /* Group payload message */ #define INT_H_SIZE 40 /* Internal messages */ #define MIN_H_SIZE 24 /* Smallest legal TIPC header size */ #define MAX_H_SIZE 60 /* Largest possible TIPC header size */ #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) +#define TIPC_MEDIA_INFO_OFFSET 5 + +extern const int one_page_mtu; + +struct tipc_skb_cb { + union { + struct { + struct sk_buff *tail; + unsigned long nxt_retr; + unsigned long retr_stamp; + u32 bytes_read; + u32 orig_member; + u16 chain_imp; + u16 ackers; + u16 retr_cnt; + } __packed; +#ifdef CONFIG_TIPC_CRYPTO + struct { + struct tipc_crypto *rx; + struct tipc_aead *last; + u8 recurs; + } tx_clone_ctx __packed; +#endif + } __packed; + union { + struct { + u8 validated:1; +#ifdef CONFIG_TIPC_CRYPTO + u8 encrypted:1; + u8 decrypted:1; +#define SKB_PROBING 1 +#define SKB_GRACING 2 + u8 xmit_type:2; + u8 tx_clone_deferred:1; +#endif + }; + u8 flags; + }; + u8 reserved; +#ifdef CONFIG_TIPC_CRYPTO + void *crypto_ctx; +#endif +} __packed; -#define TIPC_MEDIA_ADDR_OFFSET 5 - +#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) struct tipc_msg { __be32 hdr[15]; }; +/* struct tipc_gap_ack - TIPC Gap ACK block + * @ack: seqno of the last consecutive packet in link deferdq + * @gap: number of gap packets since the last ack + * + * E.g: + * link deferdq: 1 2 3 4 10 11 13 14 15 20 + * --> Gap ACK blocks: <4, 5>, <11, 1>, <15, 4>, <20, 0> + */ +struct tipc_gap_ack { + __be16 ack; + __be16 gap; +}; + +/* struct tipc_gap_ack_blks + * @len: actual length of the record + * @ugack_cnt: number of Gap ACK blocks for unicast (following the broadcast + * ones) + * @start_index: starting index for "valid" broadcast Gap ACK blocks + * @bgack_cnt: number of Gap ACK blocks for broadcast in the record + * @gacks: array of Gap ACK blocks + * + * 31 16 15 0 + * +-------------+-------------+-------------+-------------+ + * | bgack_cnt | ugack_cnt | len | + * +-------------+-------------+-------------+-------------+ - + * | gap | ack | | + * +-------------+-------------+-------------+-------------+ > bc gacks + * : : : | + * +-------------+-------------+-------------+-------------+ - + * | gap | ack | | + * +-------------+-------------+-------------+-------------+ > uc gacks + * : : : | + * +-------------+-------------+-------------+-------------+ - + */ +struct tipc_gap_ack_blks { + __be16 len; + union { + u8 ugack_cnt; + u8 start_index; + }; + u8 bgack_cnt; + struct tipc_gap_ack gacks[]; +}; + +#define MAX_GAP_ACK_BLKS 128 +#define MAX_GAP_ACK_BLKS_SZ (sizeof(struct tipc_gap_ack_blks) + \ + sizeof(struct tipc_gap_ack) * MAX_GAP_ACK_BLKS) + +static inline struct tipc_msg *buf_msg(struct sk_buff *skb) +{ + return (struct tipc_msg *)skb->data; +} static inline u32 msg_word(struct tipc_msg *m, u32 pos) { @@ -107,14 +226,6 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w, m->hdr[w] |= htonl(val); } -static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b) -{ - u32 temp = msg->hdr[a]; - - msg->hdr[a] = msg->hdr[b]; - msg->hdr[b] = temp; -} - /* * Word 0 */ @@ -143,16 +254,6 @@ static inline void msg_set_user(struct tipc_msg *m, u32 n) msg_set_bits(m, 0, 25, 0xf, n); } -static inline u32 msg_importance(struct tipc_msg *m) -{ - return msg_bits(m, 0, 25, 0xf); -} - -static inline void msg_set_importance(struct tipc_msg *m, u32 i) -{ - msg_set_user(m, i); -} - static inline u32 msg_hdr_sz(struct tipc_msg *m) { return msg_bits(m, 0, 21, 0xf) << 2; @@ -168,6 +269,11 @@ static inline u32 msg_size(struct tipc_msg *m) return msg_bits(m, 0, 0, 0x1ffff); } +static inline u32 msg_blocks(struct tipc_msg *m) +{ + return (msg_size(m) / 1024) + 1; +} + static inline u32 msg_data_sz(struct tipc_msg *m) { return msg_size(m) - msg_hdr_sz(m); @@ -183,6 +289,16 @@ static inline void msg_set_non_seq(struct tipc_msg *m, u32 n) msg_set_bits(m, 0, 20, 1, n); } +static inline int msg_is_syn(struct tipc_msg *m) +{ + return msg_bits(m, 0, 17, 1); +} + +static inline void msg_set_syn(struct tipc_msg *m, u32 d) +{ + msg_set_bits(m, 0, 17, 1, d); +} + static inline int msg_dest_droppable(struct tipc_msg *m) { return msg_bits(m, 0, 19, 1); @@ -193,6 +309,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d) msg_set_bits(m, 0, 19, 1, d); } +static inline int msg_is_keepalive(struct tipc_msg *m) +{ + return msg_bits(m, 0, 19, 1); +} + +static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d) +{ + msg_set_bits(m, 0, 19, 1, d); +} + static inline int msg_src_droppable(struct tipc_msg *m) { return msg_bits(m, 0, 18, 1); @@ -203,11 +329,50 @@ static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d) msg_set_bits(m, 0, 18, 1, d); } +static inline int msg_ack_required(struct tipc_msg *m) +{ + return msg_bits(m, 0, 18, 1); +} + +static inline void msg_set_ack_required(struct tipc_msg *m) +{ + msg_set_bits(m, 0, 18, 1, 1); +} + +static inline int msg_nagle_ack(struct tipc_msg *m) +{ + return msg_bits(m, 0, 18, 1); +} + +static inline void msg_set_nagle_ack(struct tipc_msg *m) +{ + msg_set_bits(m, 0, 18, 1, 1); +} + +static inline bool msg_is_rcast(struct tipc_msg *m) +{ + return msg_bits(m, 0, 18, 0x1); +} + +static inline void msg_set_is_rcast(struct tipc_msg *m, bool d) +{ + msg_set_bits(m, 0, 18, 0x1, d); +} + static inline void msg_set_size(struct tipc_msg *m, u32 sz) { m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz); } +static inline unchar *msg_data(struct tipc_msg *m) +{ + return ((unchar *)m) + msg_hdr_sz(m); +} + +static inline struct tipc_msg *msg_inner_hdr(struct tipc_msg *m) +{ + return (struct tipc_msg *)msg_data(m); +} /* * Word 1 @@ -222,6 +387,18 @@ static inline void msg_set_type(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 29, 0x7, n); } +static inline int msg_in_group(struct tipc_msg *m) +{ + int mtyp = msg_type(m); + + return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG; +} + +static inline bool msg_is_grp_evt(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_GRP_MEMBER_EVT; +} + static inline u32 msg_named(struct tipc_msg *m) { return msg_type(m) == TIPC_NAMED_MSG; @@ -229,7 +406,10 @@ static inline u32 msg_named(struct tipc_msg *m) static inline u32 msg_mcast(struct tipc_msg *m) { - return msg_type(m) == TIPC_MCAST_MSG; + int mtyp = msg_type(m); + + return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) || + (mtyp == TIPC_GRP_MCAST_MSG)); } static inline u32 msg_connected(struct tipc_msg *m) @@ -237,6 +417,11 @@ static inline u32 msg_connected(struct tipc_msg *m) return msg_type(m) == TIPC_CONN_MSG; } +static inline u32 msg_direct(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_DIRECT_MSG; +} + static inline u32 msg_errcode(struct tipc_msg *m) { return msg_bits(m, 1, 25, 0xf); @@ -247,6 +432,36 @@ static inline void msg_set_errcode(struct tipc_msg *m, u32 err) msg_set_bits(m, 1, 25, 0xf, err); } +static inline void msg_set_bulk(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 28, 0x1, 1); +} + +static inline u32 msg_is_bulk(struct tipc_msg *m) +{ + return msg_bits(m, 1, 28, 0x1); +} + +static inline void msg_set_last_bulk(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 27, 0x1, 1); +} + +static inline u32 msg_is_last_bulk(struct tipc_msg *m) +{ + return msg_bits(m, 1, 27, 0x1); +} + +static inline void msg_set_non_legacy(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 26, 0x1, 1); +} + +static inline u32 msg_is_legacy(struct tipc_msg *m) +{ + return !msg_bits(m, 1, 26, 0x1); +} + static inline u32 msg_reroute_cnt(struct tipc_msg *m) { return msg_bits(m, 1, 21, 0xf); @@ -257,11 +472,6 @@ static inline void msg_incr_reroute_cnt(struct tipc_msg *m) msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1); } -static inline void msg_reset_reroute_cnt(struct tipc_msg *m) -{ - msg_set_bits(m, 1, 21, 0xf, 0); -} - static inline u32 msg_lookup_scope(struct tipc_msg *m) { return msg_bits(m, 1, 19, 0x3); @@ -272,36 +482,58 @@ static inline void msg_set_lookup_scope(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 19, 0x3, n); } -static inline u32 msg_bcast_ack(struct tipc_msg *m) +static inline u16 msg_bcast_ack(struct tipc_msg *m) { return msg_bits(m, 1, 0, 0xffff); } -static inline void msg_set_bcast_ack(struct tipc_msg *m, u32 n) +static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n) { msg_set_bits(m, 1, 0, 0xffff, n); } +/* Note: reusing bits in word 1 for ACTIVATE_MSG only, to re-synch + * link peer session number + */ +static inline bool msg_dest_session_valid(struct tipc_msg *m) +{ + return msg_bits(m, 1, 16, 0x1); +} + +static inline void msg_set_dest_session_valid(struct tipc_msg *m, bool valid) +{ + msg_set_bits(m, 1, 16, 0x1, valid); +} + +static inline u16 msg_dest_session(struct tipc_msg *m) +{ + return msg_bits(m, 1, 0, 0xffff); +} + +static inline void msg_set_dest_session(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 1, 0, 0xffff, n); +} /* * Word 2 */ -static inline u32 msg_ack(struct tipc_msg *m) +static inline u16 msg_ack(struct tipc_msg *m) { return msg_bits(m, 2, 16, 0xffff); } -static inline void msg_set_ack(struct tipc_msg *m, u32 n) +static inline void msg_set_ack(struct tipc_msg *m, u16 n) { msg_set_bits(m, 2, 16, 0xffff, n); } -static inline u32 msg_seqno(struct tipc_msg *m) +static inline u16 msg_seqno(struct tipc_msg *m) { return msg_bits(m, 2, 0, 0xffff); } -static inline void msg_set_seqno(struct tipc_msg *m, u32 n) +static inline void msg_set_seqno(struct tipc_msg *m, u16 n) { msg_set_bits(m, 2, 0, 0xffff, n); } @@ -309,6 +541,29 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n) /* * Words 3-10 */ +static inline u32 msg_importance(struct tipc_msg *m) +{ + int usr = msg_user(m); + + if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m))) + return usr; + if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)) + return msg_bits(m, 9, 0, 0x7); + return TIPC_SYSTEM_IMPORTANCE; +} + +static inline void msg_set_importance(struct tipc_msg *m, u32 i) +{ + int usr = msg_user(m); + + if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))) + msg_set_bits(m, 9, 0, 0x7, i); + else if (i < TIPC_SYSTEM_IMPORTANCE) + msg_set_user(m, i); + else + pr_warn("Trying to set illegal importance in message\n"); +} + static inline u32 msg_prevnode(struct tipc_msg *m) { return msg_word(m, 3); @@ -321,6 +576,8 @@ static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) static inline u32 msg_origport(struct tipc_msg *m) { + if (msg_user(m) == MSG_FRAGMENTER) + m = msg_inner_hdr(m); return msg_word(m, 4); } @@ -329,6 +586,16 @@ static inline void msg_set_origport(struct tipc_msg *m, u32 p) msg_set_word(m, 4, p); } +static inline u16 msg_named_seqno(struct tipc_msg *m) +{ + return msg_bits(m, 4, 0, 0xffff); +} + +static inline void msg_set_named_seqno(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 4, 0, 0xffff, n); +} + static inline u32 msg_destport(struct tipc_msg *m) { return msg_word(m, 5); @@ -416,34 +683,11 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) msg_set_word(m, 10, n); } -static inline unchar *msg_data(struct tipc_msg *m) -{ - return ((unchar *)m) + msg_hdr_sz(m); -} - -static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) -{ - return (struct tipc_msg *)msg_data(m); -} - /* * Constants and routines used to read and write TIPC internal message headers */ /* - * Internal message users - */ -#define BCAST_PROTOCOL 5 -#define MSG_BUNDLER 6 -#define LINK_PROTOCOL 7 -#define CONN_MANAGER 8 -#define ROUTE_DISTRIBUTOR 9 /* obsoleted */ -#define CHANGEOVER_PROTOCOL 10 -#define NAME_DISTRIBUTOR 11 -#define MSG_FRAGMENTER 12 -#define LINK_CONFIG 13 - -/* * Connection management protocol message types */ #define CONN_PROBE 0 @@ -473,15 +717,29 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) /* * Changeover tunnel message types */ -#define DUPLICATE_MSG 0 -#define ORIGINAL_MSG 1 +#define SYNCH_MSG 0 +#define FAILOVER_MSG 1 /* * Config protocol message types */ #define DSC_REQ_MSG 0 #define DSC_RESP_MSG 1 +#define DSC_TRIAL_MSG 2 +#define DSC_TRIAL_FAIL_MSG 3 + +/* + * Group protocol message types + */ +#define GRP_JOIN_MSG 0 +#define GRP_LEAVE_MSG 1 +#define GRP_ADV_MSG 2 +#define GRP_ACK_MSG 3 +#define GRP_RECLAIM_MSG 4 +#define GRP_REMIT_MSG 5 +/* Crypto message types */ +#define KEY_DISTR_MSG 0 /* * Word 1 @@ -506,6 +764,15 @@ static inline void msg_set_node_sig(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 0, 0xffff, n); } +static inline u32 msg_node_capabilities(struct tipc_msg *m) +{ + return msg_bits(m, 1, 15, 0x1fff); +} + +static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 15, 0x1fff, n); +} /* * Word 2 @@ -520,11 +787,6 @@ static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n) msg_set_word(m, 2, n); } -static inline u32 msg_bcgap_after(struct tipc_msg *m) -{ - return msg_bits(m, 2, 16, 0xffff); -} - static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n) { msg_set_bits(m, 2, 16, 0xffff, n); @@ -540,7 +802,6 @@ static inline void msg_set_bcgap_to(struct tipc_msg *m, u32 n) msg_set_bits(m, 2, 0, 0xffff, n); } - /* * Word 4 */ @@ -549,40 +810,42 @@ static inline u32 msg_last_bcast(struct tipc_msg *m) return msg_bits(m, 4, 16, 0xffff); } +static inline u32 msg_bc_snd_nxt(struct tipc_msg *m) +{ + return msg_last_bcast(m) + 1; +} + static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n) { msg_set_bits(m, 4, 16, 0xffff, n); } - -static inline u32 msg_fragm_no(struct tipc_msg *m) +static inline u32 msg_nof_fragms(struct tipc_msg *m) { - return msg_bits(m, 4, 16, 0xffff); + return msg_bits(m, 4, 0, 0xffff); } -static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n) +static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n) { - msg_set_bits(m, 4, 16, 0xffff, n); + msg_set_bits(m, 4, 0, 0xffff, n); } - -static inline u32 msg_next_sent(struct tipc_msg *m) +static inline u32 msg_fragm_no(struct tipc_msg *m) { - return msg_bits(m, 4, 0, 0xffff); + return msg_bits(m, 4, 16, 0xffff); } -static inline void msg_set_next_sent(struct tipc_msg *m, u32 n) +static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n) { - msg_set_bits(m, 4, 0, 0xffff, n); + msg_set_bits(m, 4, 16, 0xffff, n); } - -static inline u32 msg_long_msgno(struct tipc_msg *m) +static inline u16 msg_next_sent(struct tipc_msg *m) { return msg_bits(m, 4, 0, 0xffff); } -static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n) +static inline void msg_set_next_sent(struct tipc_msg *m, u16 n) { msg_set_bits(m, 4, 0, 0xffff, n); } @@ -599,23 +862,20 @@ static inline void msg_set_bc_netid(struct tipc_msg *m, u32 id) static inline u32 msg_link_selector(struct tipc_msg *m) { + if (msg_user(m) == MSG_FRAGMENTER) + m = (void *)msg_data(m); return msg_bits(m, 4, 0, 1); } -static inline void msg_set_link_selector(struct tipc_msg *m, u32 n) -{ - msg_set_bits(m, 4, 0, 1, n); -} - /* * Word 5 */ -static inline u32 msg_session(struct tipc_msg *m) +static inline u16 msg_session(struct tipc_msg *m) { return msg_bits(m, 5, 16, 0xffff); } -static inline void msg_set_session(struct tipc_msg *m, u32 n) +static inline void msg_set_session(struct tipc_msg *m, u16 n) { msg_set_bits(m, 5, 16, 0xffff, n); } @@ -670,34 +930,91 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) msg_set_bits(m, 5, 12, 0x1, r); } +static inline u32 msg_peer_stopping(struct tipc_msg *m) +{ + return msg_bits(m, 5, 13, 0x1); +} + +static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s) +{ + msg_set_bits(m, 5, 13, 0x1, s); +} + +static inline bool msg_bc_ack_invalid(struct tipc_msg *m) +{ + switch (msg_user(m)) { + case BCAST_PROTOCOL: + case NAME_DISTRIBUTOR: + case LINK_PROTOCOL: + return msg_bits(m, 5, 14, 0x1); + default: + return false; + } +} + +static inline void msg_set_bc_ack_invalid(struct tipc_msg *m, bool invalid) +{ + msg_set_bits(m, 5, 14, 0x1, invalid); +} + static inline char *msg_media_addr(struct tipc_msg *m) { - return (char *)&m->hdr[TIPC_MEDIA_ADDR_OFFSET]; + return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; +} + +static inline u32 msg_bc_gap(struct tipc_msg *m) +{ + return msg_bits(m, 8, 0, 0x3ff); +} + +static inline void msg_set_bc_gap(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 8, 0, 0x3ff, n); } /* * Word 9 */ -static inline u32 msg_msgcnt(struct tipc_msg *m) +static inline u16 msg_msgcnt(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); } -static inline void msg_set_msgcnt(struct tipc_msg *m, u32 n) +static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n) { msg_set_bits(m, 9, 16, 0xffff, n); } -static inline u32 msg_bcast_tag(struct tipc_msg *m) +static inline u16 msg_syncpt(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); } -static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n) +static inline void msg_set_syncpt(struct tipc_msg *m, u16 n) { msg_set_bits(m, 9, 16, 0xffff, n); } +static inline u32 msg_conn_ack(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u16 msg_adv_win(struct tipc_msg *m) +{ + return msg_bits(m, 9, 0, 0xffff); +} + +static inline void msg_set_adv_win(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 0, 0xffff, n); +} + static inline u32 msg_max_pkt(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff) * 4; @@ -718,10 +1035,276 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } -u32 tipc_msg_tot_importance(struct tipc_msg *m); -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, - u32 destnode); -int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, int max_size, - struct sk_buff **buf); +static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u16 msg_grp_bc_acked(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u16 msg_grp_remitted(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +/* Word 10 + */ +static inline u16 msg_grp_evt(struct tipc_msg *m) +{ + return msg_bits(m, 10, 0, 0x3); +} + +static inline void msg_set_grp_evt(struct tipc_msg *m, int n) +{ + msg_set_bits(m, 10, 0, 0x3, n); +} + +static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m) +{ + return msg_bits(m, 10, 0, 0x1); +} + +static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n) +{ + msg_set_bits(m, 10, 0, 0x1, n); +} + +static inline u16 msg_grp_bc_seqno(struct tipc_msg *m) +{ + return msg_bits(m, 10, 16, 0xffff); +} + +static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 10, 16, 0xffff, n); +} + +static inline bool msg_peer_link_is_up(struct tipc_msg *m) +{ + if (likely(msg_user(m) != LINK_PROTOCOL)) + return true; + if (msg_type(m) == STATE_MSG) + return true; + return false; +} + +static inline bool msg_peer_node_is_up(struct tipc_msg *m) +{ + if (msg_peer_link_is_up(m)) + return true; + return msg_redundant_link(m); +} + +static inline bool msg_is_reset(struct tipc_msg *hdr) +{ + return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG); +} + +/* Word 13 + */ +static inline void msg_set_peer_net_hash(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 13, n); +} + +static inline u32 msg_peer_net_hash(struct tipc_msg *m) +{ + return msg_word(m, 13); +} + +/* Word 14 + */ +static inline u32 msg_sugg_node_addr(struct tipc_msg *m) +{ + return msg_word(m, 14); +} + +static inline void msg_set_sugg_node_addr(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 14, n); +} + +static inline void msg_set_node_id(struct tipc_msg *hdr, u8 *id) +{ + memcpy(msg_data(hdr), id, 16); +} + +static inline u8 *msg_node_id(struct tipc_msg *hdr) +{ + return (u8 *)msg_data(hdr); +} + +struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp); +bool tipc_msg_validate(struct sk_buff **_skb); +bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); +void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, + struct sk_buff_head *xmitq); +void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 destnode); +struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, + uint data_sz, u32 dnode, u32 onode, + u32 dport, u32 oport, int errcode); +int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); +bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss, + u32 dnode, bool *new_bundle); +bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); +int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, + int pktmax, struct sk_buff_head *frags); +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, + int offset, int dsz, int mtu, struct sk_buff_head *list); +int tipc_msg_append(struct tipc_msg *hdr, struct msghdr *m, int dlen, + int mss, struct sk_buff_head *txq); +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); +bool tipc_msg_assemble(struct sk_buff_head *list); +bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); +bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, + struct sk_buff_head *cpy); +bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, + struct sk_buff *skb); +bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy); + +static inline u16 buf_seqno(struct sk_buff *skb) +{ + return msg_seqno(buf_msg(skb)); +} + +static inline int buf_roundup_len(struct sk_buff *skb) +{ + return (skb->len / 1024 + 1) * 1024; +} + +/* tipc_skb_peek(): peek and reserve first buffer in list + * @list: list to be peeked in + * Returns pointer to first buffer in list, if any + */ +static inline struct sk_buff *tipc_skb_peek(struct sk_buff_head *list, + spinlock_t *lock) +{ + struct sk_buff *skb; + + spin_lock_bh(lock); + skb = skb_peek(list); + if (skb) + skb_get(skb); + spin_unlock_bh(lock); + return skb; +} + +/* tipc_skb_peek_port(): find a destination port, ignoring all destinations + * up to and including 'filter'. + * Note: ignoring previously tried destinations minimizes the risk of + * contention on the socket lock + * @list: list to be peeked in + * @filter: last destination to be ignored from search + * Returns a destination port number, of applicable. + */ +static inline u32 tipc_skb_peek_port(struct sk_buff_head *list, u32 filter) +{ + struct sk_buff *skb; + u32 dport = 0; + bool ignore = true; + + spin_lock_bh(&list->lock); + skb_queue_walk(list, skb) { + dport = msg_destport(buf_msg(skb)); + if (!filter || skb_queue_is_last(list, skb)) + break; + if (dport == filter) + ignore = false; + else if (!ignore) + break; + } + spin_unlock_bh(&list->lock); + return dport; +} + +/* tipc_skb_dequeue(): unlink first buffer with dest 'dport' from list + * @list: list to be unlinked from + * @dport: selection criteria for buffer to unlink + */ +static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list, + u32 dport) +{ + struct sk_buff *_skb, *tmp, *skb = NULL; + + spin_lock_bh(&list->lock); + skb_queue_walk_safe(list, _skb, tmp) { + if (msg_destport(buf_msg(_skb)) == dport) { + __skb_unlink(_skb, list); + skb = _skb; + break; + } + } + spin_unlock_bh(&list->lock); + return skb; +} + +/* tipc_skb_queue_splice_tail - append an skb list to lock protected list + * @list: the new list to append. Not lock protected + * @head: target list. Lock protected. + */ +static inline void tipc_skb_queue_splice_tail(struct sk_buff_head *list, + struct sk_buff_head *head) +{ + spin_lock_bh(&head->lock); + skb_queue_splice_tail(list, head); + spin_unlock_bh(&head->lock); +} + +/* tipc_skb_queue_splice_tail_init - merge two lock protected skb lists + * @list: the new list to add. Lock protected. Will be reinitialized + * @head: target list. Lock protected. + */ +static inline void tipc_skb_queue_splice_tail_init(struct sk_buff_head *list, + struct sk_buff_head *head) +{ + struct sk_buff_head tmp; + + __skb_queue_head_init(&tmp); + + spin_lock_bh(&list->lock); + skb_queue_splice_tail_init(list, &tmp); + spin_unlock_bh(&list->lock); + tipc_skb_queue_splice_tail(&tmp, head); +} + +/* __tipc_skb_dequeue() - dequeue the head skb according to expected seqno + * @list: list to be dequeued from + * @seqno: seqno of the expected msg + * + * returns skb dequeued from the list if its seqno is less than or equal to + * the expected one, otherwise the skb is still hold + * + * Note: must be used with appropriate locks held only + */ +static inline struct sk_buff *__tipc_skb_dequeue(struct sk_buff_head *list, + u16 seqno) +{ + struct sk_buff *skb = skb_peek(list); + + if (skb && less_eq(buf_seqno(skb), seqno)) { + __skb_unlink(skb, list); + return skb; + } + return NULL; +} + #endif diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index e0d08055754e..190b49c5cbc3 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -1,8 +1,9 @@ /* * net/tipc/name_distr.c: TIPC name distribution code * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2014-2019, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,330 +39,373 @@ #include "link.h" #include "name_distr.h" -#define ITEM_SIZE sizeof(struct distr_item) - -/** - * struct distr_item - publication info distributed to other nodes - * @type: name sequence type - * @lower: name sequence lower bound - * @upper: name sequence upper bound - * @ref: publishing port reference - * @key: publication key - * - * ===> All fields are stored in network byte order. <=== - * - * First 3 fields identify (name or) name sequence being published. - * Reference field uniquely identifies port that published name sequence. - * Key field uniquely identifies publication, in the event a port has - * multiple publications of the same name sequence. - * - * Note: There is no field that identifies the publishing node because it is - * the same for all items contained within a publication message. - */ -struct distr_item { - __be32 type; - __be32 lower; - __be32 upper; - __be32 ref; - __be32 key; -}; - -/** - * struct publ_list - list of publications made by this node - * @list: circular list of publications - * @list_size: number of entries in list - */ -struct publ_list { - struct list_head list; - u32 size; -}; - -static struct publ_list publ_zone = { - .list = LIST_HEAD_INIT(publ_zone.list), - .size = 0, -}; - -static struct publ_list publ_cluster = { - .list = LIST_HEAD_INIT(publ_cluster.list), - .size = 0, -}; - -static struct publ_list publ_node = { - .list = LIST_HEAD_INIT(publ_node.list), - .size = 0, -}; - -static struct publ_list *publ_lists[] = { - NULL, - &publ_zone, /* publ_lists[TIPC_ZONE_SCOPE] */ - &publ_cluster, /* publ_lists[TIPC_CLUSTER_SCOPE] */ - &publ_node /* publ_lists[TIPC_NODE_SCOPE] */ -}; - +int sysctl_tipc_named_timeout __read_mostly = 2000; /** * publ_to_item - add publication info to a publication message + * @p: publication info + * @i: location of item in the message */ static void publ_to_item(struct distr_item *i, struct publication *p) { - i->type = htonl(p->type); - i->lower = htonl(p->lower); - i->upper = htonl(p->upper); - i->ref = htonl(p->ref); + i->type = htonl(p->sr.type); + i->lower = htonl(p->sr.lower); + i->upper = htonl(p->sr.upper); + i->port = htonl(p->sk.ref); i->key = htonl(p->key); } /** * named_prepare_buf - allocate & initialize a publication message + * @net: the associated network namespace + * @type: message type + * @size: payload size + * @dest: destination node + * + * The buffer returned is of size INT_H_SIZE + payload size */ -static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) +static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, + u32 dest) { - struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size); + struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC); + u32 self = tipc_own_addr(net); struct tipc_msg *msg; if (buf != NULL) { msg = buf_msg(buf); - tipc_msg_init(msg, NAME_DISTRIBUTOR, type, INT_H_SIZE, dest); + tipc_msg_init(self, msg, NAME_DISTRIBUTOR, + type, INT_H_SIZE, dest); msg_set_size(msg, INT_H_SIZE + size); } return buf; } -static void named_cluster_distribute(struct sk_buff *buf) -{ - struct sk_buff *buf_copy; - struct tipc_node *n_ptr; - - list_for_each_entry(n_ptr, &tipc_node_list, list) { - if (tipc_node_active_links(n_ptr)) { - buf_copy = skb_copy(buf, GFP_ATOMIC); - if (!buf_copy) - break; - msg_set_destnode(buf_msg(buf_copy), n_ptr->addr); - tipc_link_send(buf_copy, n_ptr->addr, n_ptr->addr); - } - } - - kfree_skb(buf); -} - /** * tipc_named_publish - tell other nodes about a new publication by this node + * @net: the associated network namespace + * @p: the new publication */ -void tipc_named_publish(struct publication *publ) +struct sk_buff *tipc_named_publish(struct net *net, struct publication *p) { - struct sk_buff *buf; + struct name_table *nt = tipc_name_table(net); struct distr_item *item; + struct sk_buff *skb; - list_add_tail(&publ->local_list, &publ_lists[publ->scope]->list); - publ_lists[publ->scope]->size++; - - if (publ->scope == TIPC_NODE_SCOPE) - return; - - buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0); - if (!buf) { + if (p->scope == TIPC_NODE_SCOPE) { + list_add_tail_rcu(&p->binding_node, &nt->node_scope); + return NULL; + } + write_lock_bh(&nt->cluster_scope_lock); + list_add_tail(&p->binding_node, &nt->cluster_scope); + write_unlock_bh(&nt->cluster_scope_lock); + skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); + if (!skb) { pr_warn("Publication distribution failure\n"); - return; + return NULL; } - - item = (struct distr_item *)msg_data(buf_msg(buf)); - publ_to_item(item, publ); - named_cluster_distribute(buf); + msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++); + msg_set_non_legacy(buf_msg(skb)); + item = (struct distr_item *)msg_data(buf_msg(skb)); + publ_to_item(item, p); + return skb; } /** * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node + * @net: the associated network namespace + * @p: the withdrawn publication */ -void tipc_named_withdraw(struct publication *publ) +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *p) { - struct sk_buff *buf; + struct name_table *nt = tipc_name_table(net); struct distr_item *item; + struct sk_buff *skb; - list_del(&publ->local_list); - publ_lists[publ->scope]->size--; - - if (publ->scope == TIPC_NODE_SCOPE) - return; + write_lock_bh(&nt->cluster_scope_lock); + list_del(&p->binding_node); + write_unlock_bh(&nt->cluster_scope_lock); + if (p->scope == TIPC_NODE_SCOPE) + return NULL; - buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); - if (!buf) { + skb = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0); + if (!skb) { pr_warn("Withdrawal distribution failure\n"); - return; + return NULL; } - - item = (struct distr_item *)msg_data(buf_msg(buf)); - publ_to_item(item, publ); - named_cluster_distribute(buf); + msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++); + msg_set_non_legacy(buf_msg(skb)); + item = (struct distr_item *)msg_data(buf_msg(skb)); + publ_to_item(item, p); + return skb; } -/* +/** * named_distribute - prepare name info for bulk distribution to another node + * @net: the associated network namespace + * @list: list of messages (buffers) to be returned from this function + * @dnode: node to be updated + * @pls: linked list of publication items to be packed into buffer chain + * @seqno: sequence number for this message */ -static void named_distribute(struct list_head *message_list, u32 node, - struct publ_list *pls, u32 max_item_buf) +static void named_distribute(struct net *net, struct sk_buff_head *list, + u32 dnode, struct list_head *pls, u16 seqno) { struct publication *publ; - struct sk_buff *buf = NULL; + struct sk_buff *skb = NULL; struct distr_item *item = NULL; - u32 left = 0; - u32 rest = pls->size * ITEM_SIZE; - - list_for_each_entry(publ, &pls->list, local_list) { - if (!buf) { - left = (rest <= max_item_buf) ? rest : max_item_buf; - rest -= left; - buf = named_prepare_buf(PUBLICATION, left, node); - if (!buf) { + u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) / + ITEM_SIZE) * ITEM_SIZE; + u32 msg_rem = msg_dsz; + struct tipc_msg *hdr; + + list_for_each_entry(publ, pls, binding_node) { + /* Prepare next buffer: */ + if (!skb) { + skb = named_prepare_buf(net, PUBLICATION, msg_rem, + dnode); + if (!skb) { pr_warn("Bulk publication failure\n"); return; } - item = (struct distr_item *)msg_data(buf_msg(buf)); + hdr = buf_msg(skb); + msg_set_bc_ack_invalid(hdr, true); + msg_set_bulk(hdr); + msg_set_non_legacy(hdr); + item = (struct distr_item *)msg_data(hdr); } + + /* Pack publication into message: */ publ_to_item(item, publ); item++; - left -= ITEM_SIZE; - if (!left) { - list_add_tail((struct list_head *)buf, message_list); - buf = NULL; + msg_rem -= ITEM_SIZE; + + /* Append full buffer to list: */ + if (!msg_rem) { + __skb_queue_tail(list, skb); + skb = NULL; + msg_rem = msg_dsz; } } + if (skb) { + hdr = buf_msg(skb); + msg_set_size(hdr, INT_H_SIZE + (msg_dsz - msg_rem)); + skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem)); + __skb_queue_tail(list, skb); + } + hdr = buf_msg(skb_peek_tail(list)); + msg_set_last_bulk(hdr); + msg_set_named_seqno(hdr, seqno); } /** * tipc_named_node_up - tell specified node about all publications by this node + * @net: the associated network namespace + * @dnode: destination node + * @capabilities: peer node's capabilities */ -void tipc_named_node_up(unsigned long nodearg) +void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities) { - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - struct list_head message_list; - u32 node = (u32)nodearg; - u32 max_item_buf = 0; - - /* compute maximum amount of publication data to send per message */ - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(node); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[0]; - if (l_ptr) - max_item_buf = ((l_ptr->max_pkt - INT_H_SIZE) / - ITEM_SIZE) * ITEM_SIZE; - tipc_node_unlock(n_ptr); - } - read_unlock_bh(&tipc_net_lock); - if (!max_item_buf) - return; - - /* create list of publication messages, then send them as a unit */ - INIT_LIST_HEAD(&message_list); - - read_lock_bh(&tipc_nametbl_lock); - named_distribute(&message_list, node, &publ_cluster, max_item_buf); - named_distribute(&message_list, node, &publ_zone, max_item_buf); - read_unlock_bh(&tipc_nametbl_lock); - - tipc_link_send_names(&message_list, node); + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); + struct sk_buff_head head; + u16 seqno; + + __skb_queue_head_init(&head); + spin_lock_bh(&tn->nametbl_lock); + if (!(capabilities & TIPC_NAMED_BCAST)) + nt->rc_dests++; + seqno = nt->snd_nxt; + spin_unlock_bh(&tn->nametbl_lock); + + read_lock_bh(&nt->cluster_scope_lock); + named_distribute(net, &head, dnode, &nt->cluster_scope, seqno); + tipc_node_xmit(net, &head, dnode, 0); + read_unlock_bh(&nt->cluster_scope_lock); } /** - * named_purge_publ - remove publication associated with a failed node + * tipc_publ_purge - remove publication associated with a failed node + * @net: the associated network namespace + * @p: the publication to remove + * @addr: failed node's address * * Invoked for each publication issued by a newly failed node. * Removes publication structure from name table & deletes it. */ -static void named_purge_publ(struct publication *publ) +static void tipc_publ_purge(struct net *net, struct publication *p, u32 addr) { - struct publication *p; + struct tipc_net *tn = tipc_net(net); + struct publication *_p; + struct tipc_uaddr ua; + + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, p->scope, p->sr.type, + p->sr.lower, p->sr.upper); + spin_lock_bh(&tn->nametbl_lock); + _p = tipc_nametbl_remove_publ(net, &ua, &p->sk, p->key); + if (_p) + tipc_node_unsubscribe(net, &_p->binding_node, addr); + spin_unlock_bh(&tn->nametbl_lock); + if (_p) + kfree_rcu(_p, rcu); +} + +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, + u32 addr, u16 capabilities) +{ + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); - write_lock_bh(&tipc_nametbl_lock); - p = tipc_nametbl_remove_publ(publ->type, publ->lower, - publ->node, publ->ref, publ->key); - if (p) - tipc_nodesub_unsubscribe(&p->subscr); - write_unlock_bh(&tipc_nametbl_lock); - - if (p != publ) { - pr_err("Unable to remove publication from failed node\n" - " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n", - publ->type, publ->lower, publ->node, publ->ref, - publ->key); + struct publication *publ, *tmp; + + list_for_each_entry_safe(publ, tmp, nsub_list, binding_node) + tipc_publ_purge(net, publ, addr); + spin_lock_bh(&tn->nametbl_lock); + if (!(capabilities & TIPC_NAMED_BCAST)) + nt->rc_dests--; + spin_unlock_bh(&tn->nametbl_lock); +} + +/** + * tipc_update_nametbl - try to process a nametable update and notify + * subscribers + * @net: the associated network namespace + * @i: location of item in the message + * @node: node address + * @dtype: name distributor message type + * + * tipc_nametbl_lock must be held. + * Return: the publication item if successful, otherwise NULL. + */ +static bool tipc_update_nametbl(struct net *net, struct distr_item *i, + u32 node, u32 dtype) +{ + struct publication *p = NULL; + struct tipc_socket_addr sk; + struct tipc_uaddr ua; + u32 key = ntohl(i->key); + + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE, + ntohl(i->type), ntohl(i->lower), ntohl(i->upper)); + sk.ref = ntohl(i->port); + sk.node = node; + + if (dtype == PUBLICATION) { + p = tipc_nametbl_insert_publ(net, &ua, &sk, key); + if (p) { + tipc_node_subscribe(net, &p->binding_node, node); + return true; + } + } else if (dtype == WITHDRAWAL) { + p = tipc_nametbl_remove_publ(net, &ua, &sk, key); + if (p) { + tipc_node_unsubscribe(net, &p->binding_node, node); + kfree_rcu(p, rcu); + return true; + } + pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n", + ua.sr.type, ua.sr.lower, node); + } else { + pr_warn_ratelimited("Unknown name table message received\n"); } + return false; +} + +static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, + u16 *rcv_nxt, bool *open) +{ + struct sk_buff *skb, *tmp; + struct tipc_msg *hdr; + u16 seqno; + + spin_lock_bh(&namedq->lock); + skb_queue_walk_safe(namedq, skb, tmp) { + if (unlikely(skb_linearize(skb))) { + __skb_unlink(skb, namedq); + kfree_skb(skb); + continue; + } + hdr = buf_msg(skb); + seqno = msg_named_seqno(hdr); + if (msg_is_last_bulk(hdr)) { + *rcv_nxt = seqno; + *open = true; + } + + if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) { + __skb_unlink(skb, namedq); + spin_unlock_bh(&namedq->lock); + return skb; + } - kfree(p); + if (*open && (*rcv_nxt == seqno)) { + (*rcv_nxt)++; + __skb_unlink(skb, namedq); + spin_unlock_bh(&namedq->lock); + return skb; + } + + if (less(seqno, *rcv_nxt)) { + __skb_unlink(skb, namedq); + kfree_skb(skb); + continue; + } + } + spin_unlock_bh(&namedq->lock); + return NULL; } /** - * tipc_named_recv - process name table update message sent by another node + * tipc_named_rcv - process name table update messages sent by another node + * @net: the associated network namespace + * @namedq: queue to receive from + * @rcv_nxt: store last received seqno here + * @open: last bulk msg was received (FIXME) */ -void tipc_named_recv(struct sk_buff *buf) +void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq, + u16 *rcv_nxt, bool *open) { - struct publication *publ; - struct tipc_msg *msg = buf_msg(buf); - struct distr_item *item = (struct distr_item *)msg_data(msg); - u32 count = msg_data_sz(msg) / ITEM_SIZE; - - write_lock_bh(&tipc_nametbl_lock); - while (count--) { - if (msg_type(msg) == PUBLICATION) { - publ = tipc_nametbl_insert_publ(ntohl(item->type), - ntohl(item->lower), - ntohl(item->upper), - TIPC_CLUSTER_SCOPE, - msg_orignode(msg), - ntohl(item->ref), - ntohl(item->key)); - if (publ) { - tipc_nodesub_subscribe(&publ->subscr, - msg_orignode(msg), - publ, - (net_ev_handler) - named_purge_publ); - } - } else if (msg_type(msg) == WITHDRAWAL) { - publ = tipc_nametbl_remove_publ(ntohl(item->type), - ntohl(item->lower), - msg_orignode(msg), - ntohl(item->ref), - ntohl(item->key)); - - if (publ) { - tipc_nodesub_unsubscribe(&publ->subscr); - kfree(publ); - } else { - pr_err("Unable to remove publication by node 0x%x\n" - " (type=%u, lower=%u, ref=%u, key=%u)\n", - msg_orignode(msg), ntohl(item->type), - ntohl(item->lower), ntohl(item->ref), - ntohl(item->key)); - } - } else { - pr_warn("Unrecognized name table message received\n"); + struct tipc_net *tn = tipc_net(net); + struct distr_item *item; + struct tipc_msg *hdr; + struct sk_buff *skb; + u32 count, node; + + spin_lock_bh(&tn->nametbl_lock); + while ((skb = tipc_named_dequeue(namedq, rcv_nxt, open))) { + hdr = buf_msg(skb); + node = msg_orignode(hdr); + item = (struct distr_item *)msg_data(hdr); + count = msg_data_sz(hdr) / ITEM_SIZE; + while (count--) { + tipc_update_nametbl(net, item, node, msg_type(hdr)); + item++; } - item++; + kfree_skb(skb); } - write_unlock_bh(&tipc_nametbl_lock); - kfree_skb(buf); + spin_unlock_bh(&tn->nametbl_lock); } /** * tipc_named_reinit - re-initialize local publications + * @net: the associated network namespace * * This routine is called whenever TIPC networking is enabled. * All name table entries published by this node are updated to reflect * the node's new network address. */ -void tipc_named_reinit(void) +void tipc_named_reinit(struct net *net) { - struct publication *publ; - int scope; - - write_lock_bh(&tipc_nametbl_lock); + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); + struct publication *p; + u32 self = tipc_own_addr(net); - for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++) - list_for_each_entry(publ, &publ_lists[scope]->list, local_list) - publ->node = tipc_own_addr; + spin_lock_bh(&tn->nametbl_lock); - write_unlock_bh(&tipc_nametbl_lock); + list_for_each_entry_rcu(p, &nt->node_scope, binding_node) + p->sk.node = self; + list_for_each_entry_rcu(p, &nt->cluster_scope, binding_node) + p->sk.node = self; + nt->rc_dests = 0; + spin_unlock_bh(&tn->nametbl_lock); } diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 1e41bdd4f255..c677f6f082df 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -39,10 +39,41 @@ #include "name_table.h" -void tipc_named_publish(struct publication *publ); -void tipc_named_withdraw(struct publication *publ); -void tipc_named_node_up(unsigned long node); -void tipc_named_recv(struct sk_buff *buf); -void tipc_named_reinit(void); +#define ITEM_SIZE sizeof(struct distr_item) + +/** + * struct distr_item - publication info distributed to other nodes + * @type: name sequence type + * @lower: name sequence lower bound + * @upper: name sequence upper bound + * @port: publishing port reference + * @key: publication key + * + * ===> All fields are stored in network byte order. <=== + * + * First 3 fields identify (name or) name sequence being published. + * Reference field uniquely identifies port that published name sequence. + * Key field uniquely identifies publication, in the event a port has + * multiple publications of the same name sequence. + * + * Note: There is no field that identifies the publishing node because it is + * the same for all items contained within a publication message. + */ +struct distr_item { + __be32 type; + __be32 lower; + __be32 upper; + __be32 port; + __be32 key; +}; + +struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ); +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ); +void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities); +void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq, + u16 *rcv_nxt, bool *open); +void tipc_named_reinit(struct net *net); +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, + u32 addr, u16 capabilities); #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 09dcd54b04e1..e74940eab3a4 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1,8 +1,9 @@ /* * net/tipc/name_table.c: TIPC name table code * - * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2004-2008, 2010-2011, Wind River Systems + * Copyright (c) 2000-2006, 2014-2018, Ericsson AB + * Copyright (c) 2004-2008, 2010-2014, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,930 +35,1170 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <net/sock.h> +#include <linux/list_sort.h> +#include <linux/rbtree_augmented.h> #include "core.h" -#include "config.h" +#include "netlink.h" #include "name_table.h" #include "name_distr.h" #include "subscr.h" -#include "port.h" - -#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ - -/** - * struct name_info - name sequence publication info - * @node_list: circular list of publications made by own node - * @cluster_list: circular list of publications made by own cluster - * @zone_list: circular list of publications made by own zone - * @node_list_size: number of entries in "node_list" - * @cluster_list_size: number of entries in "cluster_list" - * @zone_list_size: number of entries in "zone_list" - * - * Note: The zone list always contains at least one entry, since all - * publications of the associated name sequence belong to it. - * (The cluster and node lists may be empty.) - */ -struct name_info { - struct list_head node_list; - struct list_head cluster_list; - struct list_head zone_list; - u32 node_list_size; - u32 cluster_list_size; - u32 zone_list_size; -}; +#include "bcast.h" +#include "addr.h" +#include "node.h" +#include "group.h" /** - * struct sub_seq - container for all published instances of a name sequence - * @lower: name sequence lower bound - * @upper: name sequence upper bound - * @info: pointer to name sequence publication info + * struct service_range - container for all bindings of a service range + * @lower: service range lower bound + * @upper: service range upper bound + * @tree_node: member of service range RB tree + * @max: largest 'upper' in this node subtree + * @local_publ: list of identical publications made from this node + * Used by closest_first lookup and multicast lookup algorithm + * @all_publ: all publications identical to this one, whatever node and scope + * Used by round-robin lookup algorithm */ -struct sub_seq { +struct service_range { u32 lower; u32 upper; - struct name_info *info; + struct rb_node tree_node; + u32 max; + struct list_head local_publ; + struct list_head all_publ; }; /** - * struct name_seq - container for all published instances of a name type - * @type: 32 bit 'type' value for name sequence - * @sseq: pointer to dynamically-sized array of sub-sequences of this 'type'; - * sub-sequences are sorted in ascending order - * @alloc: number of sub-sequences currently in array - * @first_free: array index of first unused sub-sequence entry - * @ns_list: links to adjacent name sequences in hash chain - * @subscriptions: list of subscriptions for this 'type' - * @lock: spinlock controlling access to publication lists of all sub-sequences + * struct tipc_service - container for all published instances of a service type + * @type: 32 bit 'type' value for service + * @publ_cnt: increasing counter for publications in this service + * @ranges: rb tree containing all service ranges for this service + * @service_list: links to adjacent name ranges in hash chain + * @subscriptions: list of subscriptions for this service type + * @lock: spinlock controlling access to pertaining service ranges/publications + * @rcu: RCU callback head used for deferred freeing */ -struct name_seq { +struct tipc_service { u32 type; - struct sub_seq *sseqs; - u32 alloc; - u32 first_free; - struct hlist_node ns_list; + u32 publ_cnt; + struct rb_root ranges; + struct hlist_node service_list; struct list_head subscriptions; - spinlock_t lock; + spinlock_t lock; /* Covers service range list */ + struct rcu_head rcu; }; +#define service_range_upper(sr) ((sr)->upper) +RB_DECLARE_CALLBACKS_MAX(static, sr_callbacks, + struct service_range, tree_node, u32, max, + service_range_upper) + +#define service_range_entry(rbtree_node) \ + (container_of(rbtree_node, struct service_range, tree_node)) + +#define service_range_overlap(sr, start, end) \ + ((sr)->lower <= (end) && (sr)->upper >= (start)) + /** - * struct name_table - table containing all existing port name publications - * @types: pointer to fixed-sized array of name sequence lists, - * accessed via hashing on 'type'; name sequence lists are *not* sorted - * @local_publ_count: number of publications issued by this node + * service_range_foreach_match - iterate over tipc service rbtree for each + * range match + * @sr: the service range pointer as a loop cursor + * @sc: the pointer to tipc service which holds the service range rbtree + * @start: beginning of the search range (end >= start) for matching + * @end: end of the search range (end >= start) for matching */ -struct name_table { - struct hlist_head *types; - u32 local_publ_count; -}; +#define service_range_foreach_match(sr, sc, start, end) \ + for (sr = service_range_match_first((sc)->ranges.rb_node, \ + start, \ + end); \ + sr; \ + sr = service_range_match_next(&(sr)->tree_node, \ + start, \ + end)) -static struct name_table table; -DEFINE_RWLOCK(tipc_nametbl_lock); - -static int hash(int x) +/** + * service_range_match_first - find first service range matching a range + * @n: the root node of service range rbtree for searching + * @start: beginning of the search range (end >= start) for matching + * @end: end of the search range (end >= start) for matching + * + * Return: the leftmost service range node in the rbtree that overlaps the + * specific range if any. Otherwise, returns NULL. + */ +static struct service_range *service_range_match_first(struct rb_node *n, + u32 start, u32 end) { - return x & (TIPC_NAMETBL_SIZE - 1); + struct service_range *sr; + struct rb_node *l, *r; + + /* Non overlaps in tree at all? */ + if (!n || service_range_entry(n)->max < start) + return NULL; + + while (n) { + l = n->rb_left; + if (l && service_range_entry(l)->max >= start) { + /* A leftmost overlap range node must be one in the left + * subtree. If not, it has lower > end, then nodes on + * the right side cannot satisfy the condition either. + */ + n = l; + continue; + } + + /* No one in the left subtree can match, return if this node is + * an overlap i.e. leftmost. + */ + sr = service_range_entry(n); + if (service_range_overlap(sr, start, end)) + return sr; + + /* Ok, try to lookup on the right side */ + r = n->rb_right; + if (sr->lower <= end && + r && service_range_entry(r)->max >= start) { + n = r; + continue; + } + break; + } + + return NULL; } /** - * publ_create - create a publication structure + * service_range_match_next - find next service range matching a range + * @n: a node in service range rbtree from which the searching starts + * @start: beginning of the search range (end >= start) for matching + * @end: end of the search range (end >= start) for matching + * + * Return: the next service range node to the given node in the rbtree that + * overlaps the specific range if any. Otherwise, returns NULL. */ -static struct publication *publ_create(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port_ref, - u32 key) +static struct service_range *service_range_match_next(struct rb_node *n, + u32 start, u32 end) { - struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC); - if (publ == NULL) { - pr_warn("Publication creation failure, no memory\n"); - return NULL; + struct service_range *sr; + struct rb_node *p, *r; + + while (n) { + r = n->rb_right; + if (r && service_range_entry(r)->max >= start) + /* A next overlap range node must be one in the right + * subtree. If not, it has lower > end, then any next + * successor (- an ancestor) of this node cannot + * satisfy the condition either. + */ + return service_range_match_first(r, start, end); + + /* No one in the right subtree can match, go up to find an + * ancestor of this node which is parent of a left-hand child. + */ + while ((p = rb_parent(n)) && n == p->rb_right) + n = p; + if (!p) + break; + + /* Return if this ancestor is an overlap */ + sr = service_range_entry(p); + if (service_range_overlap(sr, start, end)) + return sr; + + /* Ok, try to lookup more from this ancestor */ + if (sr->lower <= end) { + n = p; + continue; + } + break; } - publ->type = type; - publ->lower = lower; - publ->upper = upper; - publ->scope = scope; - publ->node = node; - publ->ref = port_ref; - publ->key = key; - INIT_LIST_HEAD(&publ->local_list); - INIT_LIST_HEAD(&publ->pport_list); - INIT_LIST_HEAD(&publ->subscr.nodesub_list); - return publ; + return NULL; +} + +static int hash(int x) +{ + return x & (TIPC_NAMETBL_SIZE - 1); } /** - * tipc_subseq_alloc - allocate a specified number of sub-sequence structures + * tipc_publ_create - create a publication structure + * @ua: the service range the user is binding to + * @sk: the address of the socket that is bound + * @key: publication key */ -static struct sub_seq *tipc_subseq_alloc(u32 cnt) +static struct publication *tipc_publ_create(struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, + u32 key) { - struct sub_seq *sseq = kcalloc(cnt, sizeof(struct sub_seq), GFP_ATOMIC); - return sseq; + struct publication *p = kzalloc(sizeof(*p), GFP_ATOMIC); + + if (!p) + return NULL; + + p->sr = ua->sr; + p->sk = *sk; + p->scope = ua->scope; + p->key = key; + INIT_LIST_HEAD(&p->binding_sock); + INIT_LIST_HEAD(&p->binding_node); + INIT_LIST_HEAD(&p->local_publ); + INIT_LIST_HEAD(&p->all_publ); + INIT_LIST_HEAD(&p->list); + return p; } /** - * tipc_nameseq_create - create a name sequence structure for the specified 'type' + * tipc_service_create - create a service structure for the specified 'type' + * @net: network namespace + * @ua: address representing the service to be bound * - * Allocates a single sub-sequence structure and sets it to all 0's. + * Allocates a single range structure and sets it to all 0's. */ -static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_head) +static struct tipc_service *tipc_service_create(struct net *net, + struct tipc_uaddr *ua) { - struct name_seq *nseq = kzalloc(sizeof(*nseq), GFP_ATOMIC); - struct sub_seq *sseq = tipc_subseq_alloc(1); + struct name_table *nt = tipc_name_table(net); + struct tipc_service *service; + struct hlist_head *hd; - if (!nseq || !sseq) { - pr_warn("Name sequence creation failed, no memory\n"); - kfree(nseq); - kfree(sseq); + service = kzalloc(sizeof(*service), GFP_ATOMIC); + if (!service) { + pr_warn("Service creation failed, no memory\n"); return NULL; } - spin_lock_init(&nseq->lock); - nseq->type = type; - nseq->sseqs = sseq; - nseq->alloc = 1; - INIT_HLIST_NODE(&nseq->ns_list); - INIT_LIST_HEAD(&nseq->subscriptions); - hlist_add_head(&nseq->ns_list, seq_head); - return nseq; + spin_lock_init(&service->lock); + service->type = ua->sr.type; + service->ranges = RB_ROOT; + INIT_HLIST_NODE(&service->service_list); + INIT_LIST_HEAD(&service->subscriptions); + hd = &nt->services[hash(ua->sr.type)]; + hlist_add_head_rcu(&service->service_list, hd); + return service; } -/* - * nameseq_delete_empty - deletes a name sequence structure if now unused +/* tipc_service_find_range - find service range matching publication parameters */ -static void nameseq_delete_empty(struct name_seq *seq) +static struct service_range *tipc_service_find_range(struct tipc_service *sc, + struct tipc_uaddr *ua) { - if (!seq->first_free && list_empty(&seq->subscriptions)) { - hlist_del_init(&seq->ns_list); - kfree(seq->sseqs); - kfree(seq); - } -} + struct service_range *sr; -/** - * nameseq_find_subseq - find sub-sequence (if any) matching a name instance - * - * Very time-critical, so binary searches through sub-sequence array. - */ -static struct sub_seq *nameseq_find_subseq(struct name_seq *nseq, - u32 instance) -{ - struct sub_seq *sseqs = nseq->sseqs; - int low = 0; - int high = nseq->first_free - 1; - int mid; - - while (low <= high) { - mid = (low + high) / 2; - if (instance < sseqs[mid].lower) - high = mid - 1; - else if (instance > sseqs[mid].upper) - low = mid + 1; - else - return &sseqs[mid]; + service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) { + /* Look for exact match */ + if (sr->lower == ua->sr.lower && sr->upper == ua->sr.upper) + return sr; } + return NULL; } -/** - * nameseq_locate_subseq - determine position of name instance in sub-sequence - * - * Returns index in sub-sequence array of the entry that contains the specified - * instance value; if no entry contains that value, returns the position - * where a new entry for it would be inserted in the array. - * - * Note: Similar to binary search code for locating a sub-sequence. - */ -static u32 nameseq_locate_subseq(struct name_seq *nseq, u32 instance) -{ - struct sub_seq *sseqs = nseq->sseqs; - int low = 0; - int high = nseq->first_free - 1; - int mid; - - while (low <= high) { - mid = (low + high) / 2; - if (instance < sseqs[mid].lower) - high = mid - 1; - else if (instance > sseqs[mid].upper) - low = mid + 1; +static struct service_range *tipc_service_create_range(struct tipc_service *sc, + struct publication *p) +{ + struct rb_node **n, *parent = NULL; + struct service_range *sr; + u32 lower = p->sr.lower; + u32 upper = p->sr.upper; + + n = &sc->ranges.rb_node; + while (*n) { + parent = *n; + sr = service_range_entry(parent); + if (lower == sr->lower && upper == sr->upper) + return sr; + if (sr->max < upper) + sr->max = upper; + if (lower <= sr->lower) + n = &parent->rb_left; else - return mid; + n = &parent->rb_right; } - return low; + sr = kzalloc(sizeof(*sr), GFP_ATOMIC); + if (!sr) + return NULL; + sr->lower = lower; + sr->upper = upper; + sr->max = upper; + INIT_LIST_HEAD(&sr->local_publ); + INIT_LIST_HEAD(&sr->all_publ); + rb_link_node(&sr->tree_node, parent, n); + rb_insert_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); + return sr; } -/** - * tipc_nameseq_insert_publ - */ -static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, - u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port, u32 key) -{ - struct tipc_subscription *s; - struct tipc_subscription *st; - struct publication *publ; - struct sub_seq *sseq; - struct name_info *info; - int created_subseq = 0; - - sseq = nameseq_find_subseq(nseq, lower); - if (sseq) { - - /* Lower end overlaps existing entry => need an exact match */ - if ((sseq->lower != lower) || (sseq->upper != upper)) { - pr_warn("Cannot publish {%u,%u,%u}, overlap error\n", - type, lower, upper); - return NULL; - } - - info = sseq->info; - - /* Check if an identical publication already exists */ - list_for_each_entry(publ, &info->zone_list, zone_list) { - if ((publ->ref == port) && (publ->key == key) && - (!publ->node || (publ->node == node))) - return NULL; - } - } else { - u32 inspos; - struct sub_seq *freesseq; - - /* Find where lower end should be inserted */ - inspos = nameseq_locate_subseq(nseq, lower); - - /* Fail if upper end overlaps into an existing entry */ - if ((inspos < nseq->first_free) && - (upper >= nseq->sseqs[inspos].lower)) { - pr_warn("Cannot publish {%u,%u,%u}, overlap error\n", - type, lower, upper); - return NULL; - } - - /* Ensure there is space for new sub-sequence */ - if (nseq->first_free == nseq->alloc) { - struct sub_seq *sseqs = tipc_subseq_alloc(nseq->alloc * 2); - - if (!sseqs) { - pr_warn("Cannot publish {%u,%u,%u}, no memory\n", - type, lower, upper); - return NULL; - } - memcpy(sseqs, nseq->sseqs, - nseq->alloc * sizeof(struct sub_seq)); - kfree(nseq->sseqs); - nseq->sseqs = sseqs; - nseq->alloc *= 2; - } - - info = kzalloc(sizeof(*info), GFP_ATOMIC); - if (!info) { - pr_warn("Cannot publish {%u,%u,%u}, no memory\n", - type, lower, upper); - return NULL; +static bool tipc_service_insert_publ(struct net *net, + struct tipc_service *sc, + struct publication *p) +{ + struct tipc_subscription *sub, *tmp; + struct service_range *sr; + struct publication *_p; + u32 node = p->sk.node; + bool first = false; + bool res = false; + u32 key = p->key; + + spin_lock_bh(&sc->lock); + sr = tipc_service_create_range(sc, p); + if (!sr) + goto exit; + + first = list_empty(&sr->all_publ); + + /* Return if the publication already exists */ + list_for_each_entry(_p, &sr->all_publ, all_publ) { + if (_p->key == key && (!_p->sk.node || _p->sk.node == node)) { + pr_debug("Failed to bind duplicate %u,%u,%u/%u:%u/%u\n", + p->sr.type, p->sr.lower, p->sr.upper, + node, p->sk.ref, key); + goto exit; } - - INIT_LIST_HEAD(&info->node_list); - INIT_LIST_HEAD(&info->cluster_list); - INIT_LIST_HEAD(&info->zone_list); - - /* Insert new sub-sequence */ - sseq = &nseq->sseqs[inspos]; - freesseq = &nseq->sseqs[nseq->first_free]; - memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof(*sseq)); - memset(sseq, 0, sizeof(*sseq)); - nseq->first_free++; - sseq->lower = lower; - sseq->upper = upper; - sseq->info = info; - created_subseq = 1; - } - - /* Insert a publication */ - publ = publ_create(type, lower, upper, scope, node, port, key); - if (!publ) - return NULL; - - list_add(&publ->zone_list, &info->zone_list); - info->zone_list_size++; - - if (in_own_cluster(node)) { - list_add(&publ->cluster_list, &info->cluster_list); - info->cluster_list_size++; } - if (in_own_node(node)) { - list_add(&publ->node_list, &info->node_list); - info->node_list_size++; - } + if (in_own_node(net, p->sk.node)) + list_add(&p->local_publ, &sr->local_publ); + list_add(&p->all_publ, &sr->all_publ); + p->id = sc->publ_cnt++; /* Any subscriptions waiting for notification? */ - list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { - tipc_subscr_report_overlap(s, - publ->lower, - publ->upper, - TIPC_PUBLISHED, - publ->ref, - publ->node, - created_subseq); + list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) { + tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, first); } - return publ; + res = true; +exit: + if (!res) + pr_warn("Failed to bind to %u,%u,%u\n", + p->sr.type, p->sr.lower, p->sr.upper); + spin_unlock_bh(&sc->lock); + return res; } /** - * tipc_nameseq_remove_publ - * - * NOTE: There may be cases where TIPC is asked to remove a publication - * that is not in the name table. For example, if another node issues a - * publication for a name sequence that overlaps an existing name sequence - * the publication will not be recorded, which means the publication won't - * be found when the name sequence is later withdrawn by that node. - * A failed withdraw request simply returns a failure indication and lets the - * caller issue any error or warning messages associated with such a problem. + * tipc_service_remove_publ - remove a publication from a service + * @r: service_range to remove publication from + * @sk: address publishing socket + * @key: target publication key */ -static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 inst, - u32 node, u32 ref, u32 key) +static struct publication *tipc_service_remove_publ(struct service_range *r, + struct tipc_socket_addr *sk, + u32 key) { - struct publication *publ; - struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); - struct name_info *info; - struct sub_seq *free; - struct tipc_subscription *s, *st; - int removed_subseq = 0; - - if (!sseq) - return NULL; + struct publication *p; + u32 node = sk->node; - info = sseq->info; - - /* Locate publication, if it exists */ - list_for_each_entry(publ, &info->zone_list, zone_list) { - if ((publ->key == key) && (publ->ref == ref) && - (!publ->node || (publ->node == node))) - goto found; + list_for_each_entry(p, &r->all_publ, all_publ) { + if (p->key != key || (node && node != p->sk.node)) + continue; + list_del(&p->all_publ); + list_del(&p->local_publ); + return p; } return NULL; +} -found: - /* Remove publication from zone scope list */ - list_del(&publ->zone_list); - info->zone_list_size--; - - /* Remove publication from cluster scope list, if present */ - if (in_own_cluster(node)) { - list_del(&publ->cluster_list); - info->cluster_list_size--; - } - - /* Remove publication from node scope list, if present */ - if (in_own_node(node)) { - list_del(&publ->node_list); - info->node_list_size--; - } - - /* Contract subseq list if no more publications for that subseq */ - if (list_empty(&info->zone_list)) { - kfree(info); - free = &nseq->sseqs[nseq->first_free--]; - memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq)); - removed_subseq = 1; - } - - /* Notify any waiting subscriptions */ - list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { - tipc_subscr_report_overlap(s, - publ->lower, - publ->upper, - TIPC_WITHDRAWN, - publ->ref, - publ->node, - removed_subseq); - } +/* + * Code reused: time_after32() for the same purpose + */ +#define publication_after(pa, pb) time_after32((pa)->id, (pb)->id) +static int tipc_publ_sort(void *priv, const struct list_head *a, + const struct list_head *b) +{ + struct publication *pa, *pb; - return publ; + pa = container_of(a, struct publication, list); + pb = container_of(b, struct publication, list); + return publication_after(pa, pb); } /** - * tipc_nameseq_subscribe - attach a subscription, and issue - * the prescribed number of events if there is any sub- - * sequence overlapping with the requested sequence + * tipc_service_subscribe - attach a subscription, and optionally + * issue the prescribed number of events if there is any service + * range overlapping with the requested range + * @service: the tipc_service to attach the @sub to + * @sub: the subscription to attach */ -static void tipc_nameseq_subscribe(struct name_seq *nseq, - struct tipc_subscription *s) +static void tipc_service_subscribe(struct tipc_service *service, + struct tipc_subscription *sub) { - struct sub_seq *sseq = nseq->sseqs; + struct publication *p, *first, *tmp; + struct list_head publ_list; + struct service_range *sr; + u32 filter, lower, upper; - list_add(&s->nameseq_list, &nseq->subscriptions); + filter = sub->s.filter; + lower = sub->s.seq.lower; + upper = sub->s.seq.upper; - if (!sseq) + tipc_sub_get(sub); + list_add(&sub->service_list, &service->subscriptions); + + if (filter & TIPC_SUB_NO_STATUS) return; - while (sseq != &nseq->sseqs[nseq->first_free]) { - if (tipc_subscr_overlap(s, sseq->lower, sseq->upper)) { - struct publication *crs; - struct name_info *info = sseq->info; - int must_report = 1; - - list_for_each_entry(crs, &info->zone_list, zone_list) { - tipc_subscr_report_overlap(s, - sseq->lower, - sseq->upper, - TIPC_PUBLISHED, - crs->ref, - crs->node, - must_report); - must_report = 0; - } + INIT_LIST_HEAD(&publ_list); + service_range_foreach_match(sr, service, lower, upper) { + first = NULL; + list_for_each_entry(p, &sr->all_publ, all_publ) { + if (filter & TIPC_SUB_PORTS) + list_add_tail(&p->list, &publ_list); + else if (!first || publication_after(first, p)) + /* Pick this range's *first* publication */ + first = p; } - sseq++; + if (first) + list_add_tail(&first->list, &publ_list); + } + + /* Sort the publications before reporting */ + list_sort(NULL, &publ_list, tipc_publ_sort); + list_for_each_entry_safe(p, tmp, &publ_list, list) { + tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, true); + list_del_init(&p->list); } } -static struct name_seq *nametbl_find_seq(u32 type) +static struct tipc_service *tipc_service_find(struct net *net, + struct tipc_uaddr *ua) { - struct hlist_head *seq_head; - struct name_seq *ns; - - seq_head = &table.types[hash(type)]; - hlist_for_each_entry(ns, seq_head, ns_list) { - if (ns->type == type) - return ns; + struct name_table *nt = tipc_name_table(net); + struct hlist_head *service_head; + struct tipc_service *service; + + service_head = &nt->services[hash(ua->sr.type)]; + hlist_for_each_entry_rcu(service, service_head, service_list) { + if (service->type == ua->sr.type) + return service; } - return NULL; }; -struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port, u32 key) +struct publication *tipc_nametbl_insert_publ(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, + u32 key) { - struct name_seq *seq = nametbl_find_seq(type); - - if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) || - (lower > upper)) { - pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n", - type, lower, upper, scope); - return NULL; - } + struct tipc_service *sc; + struct publication *p; - if (!seq) - seq = tipc_nameseq_create(type, &table.types[hash(type)]); - if (!seq) + p = tipc_publ_create(ua, sk, key); + if (!p) return NULL; - return tipc_nameseq_insert_publ(seq, type, lower, upper, - scope, node, port, key); + sc = tipc_service_find(net, ua); + if (!sc) + sc = tipc_service_create(net, ua); + if (sc && tipc_service_insert_publ(net, sc, p)) + return p; + kfree(p); + return NULL; } -struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, - u32 node, u32 ref, u32 key) +struct publication *tipc_nametbl_remove_publ(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, + u32 key) { - struct publication *publ; - struct name_seq *seq = nametbl_find_seq(type); + struct tipc_subscription *sub, *tmp; + struct publication *p = NULL; + struct service_range *sr; + struct tipc_service *sc; + bool last; + + sc = tipc_service_find(net, ua); + if (!sc) + goto exit; - if (!seq) - return NULL; + spin_lock_bh(&sc->lock); + sr = tipc_service_find_range(sc, ua); + if (!sr) + goto unlock; + p = tipc_service_remove_publ(sr, sk, key); + if (!p) + goto unlock; - publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key); - nameseq_delete_empty(seq); - return publ; + /* Notify any waiting subscriptions */ + last = list_empty(&sr->all_publ); + list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) { + tipc_sub_report_overlap(sub, p, TIPC_WITHDRAWN, last); + } + + /* Remove service range item if this was its last publication */ + if (list_empty(&sr->all_publ)) { + rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); + kfree(sr); + } + + /* Delete service item if no more publications and subscriptions */ + if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) { + hlist_del_init_rcu(&sc->service_list); + kfree_rcu(sc, rcu); + } +unlock: + spin_unlock_bh(&sc->lock); +exit: + if (!p) { + pr_err("Failed to remove unknown binding: %u,%u,%u/%u:%u/%u\n", + ua->sr.type, ua->sr.lower, ua->sr.upper, + sk->node, sk->ref, key); + } + return p; } /** - * tipc_nametbl_translate - perform name translation + * tipc_nametbl_lookup_anycast - perform service instance to socket translation + * @net: network namespace + * @ua: service address to look up + * @sk: address to socket we want to find * - * On entry, 'destnode' is the search domain used during translation. + * On entry, a non-zero 'sk->node' indicates the node where we want lookup to be + * performed, which may not be this one. * * On exit: - * - if name translation is deferred to another node/cluster/zone, - * leaves 'destnode' unchanged (will be non-zero) and returns 0 - * - if name translation is attempted and succeeds, sets 'destnode' - * to publishing node and returns port reference (will be non-zero) - * - if name translation is attempted and fails, sets 'destnode' to 0 - * and returns 0 + * + * - If lookup is deferred to another node, leave 'sk->node' unchanged and + * return 'true'. + * - If lookup is successful, set the 'sk->node' and 'sk->ref' (== portid) which + * represent the bound socket and return 'true'. + * - If lookup fails, return 'false' + * + * Note that for legacy users (node configured with Z.C.N address format) the + * 'closest-first' lookup algorithm must be maintained, i.e., if sk.node is 0 + * we must look in the local binding list first */ -u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) +bool tipc_nametbl_lookup_anycast(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk) { - struct sub_seq *sseq; - struct name_info *info; - struct publication *publ; - struct name_seq *seq; - u32 ref = 0; - u32 node = 0; - - if (!tipc_in_scope(*destnode, tipc_own_addr)) - return 0; + struct tipc_net *tn = tipc_net(net); + bool legacy = tn->legacy_addr_format; + u32 self = tipc_own_addr(net); + u32 inst = ua->sa.instance; + struct service_range *r; + struct tipc_service *sc; + struct publication *p; + struct list_head *l; + bool res = false; + + if (!tipc_in_scope(legacy, sk->node, self)) + return true; + + rcu_read_lock(); + sc = tipc_service_find(net, ua); + if (unlikely(!sc)) + goto exit; - read_lock_bh(&tipc_nametbl_lock); - seq = nametbl_find_seq(type); - if (unlikely(!seq)) - goto not_found; - sseq = nameseq_find_subseq(seq, instance); - if (unlikely(!sseq)) - goto not_found; - spin_lock_bh(&seq->lock); - info = sseq->info; - - /* Closest-First Algorithm */ - if (likely(!*destnode)) { - if (!list_empty(&info->node_list)) { - publ = list_first_entry(&info->node_list, - struct publication, - node_list); - list_move_tail(&publ->node_list, - &info->node_list); - } else if (!list_empty(&info->cluster_list)) { - publ = list_first_entry(&info->cluster_list, - struct publication, - cluster_list); - list_move_tail(&publ->cluster_list, - &info->cluster_list); + spin_lock_bh(&sc->lock); + service_range_foreach_match(r, sc, inst, inst) { + /* Select lookup algo: local, closest-first or round-robin */ + if (sk->node == self) { + l = &r->local_publ; + if (list_empty(l)) + continue; + p = list_first_entry(l, struct publication, local_publ); + list_move_tail(&p->local_publ, &r->local_publ); + } else if (legacy && !sk->node && !list_empty(&r->local_publ)) { + l = &r->local_publ; + p = list_first_entry(l, struct publication, local_publ); + list_move_tail(&p->local_publ, &r->local_publ); } else { - publ = list_first_entry(&info->zone_list, - struct publication, - zone_list); - list_move_tail(&publ->zone_list, - &info->zone_list); + l = &r->all_publ; + p = list_first_entry(l, struct publication, all_publ); + list_move_tail(&p->all_publ, &r->all_publ); } + *sk = p->sk; + res = true; + /* Todo: as for legacy, pick the first matching range only, a + * "true" round-robin will be performed as needed. + */ + break; } + spin_unlock_bh(&sc->lock); - /* Round-Robin Algorithm */ - else if (*destnode == tipc_own_addr) { - if (list_empty(&info->node_list)) - goto no_match; - publ = list_first_entry(&info->node_list, struct publication, - node_list); - list_move_tail(&publ->node_list, &info->node_list); - } else if (in_own_cluster_exact(*destnode)) { - if (list_empty(&info->cluster_list)) - goto no_match; - publ = list_first_entry(&info->cluster_list, struct publication, - cluster_list); - list_move_tail(&publ->cluster_list, &info->cluster_list); - } else { - publ = list_first_entry(&info->zone_list, struct publication, - zone_list); - list_move_tail(&publ->zone_list, &info->zone_list); - } +exit: + rcu_read_unlock(); + return res; +} + +/* tipc_nametbl_lookup_group(): lookup destinaton(s) in a communication group + * Returns a list of one (== group anycast) or more (== group multicast) + * destination socket/node pairs matching the given address. + * The requester may or may not want to exclude himself from the list. + */ +bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua, + struct list_head *dsts, int *dstcnt, + u32 exclude, bool mcast) +{ + u32 self = tipc_own_addr(net); + u32 inst = ua->sa.instance; + struct service_range *sr; + struct tipc_service *sc; + struct publication *p; + + *dstcnt = 0; + rcu_read_lock(); + sc = tipc_service_find(net, ua); + if (unlikely(!sc)) + goto exit; - ref = publ->ref; - node = publ->node; + spin_lock_bh(&sc->lock); + + /* Todo: a full search i.e. service_range_foreach_match() instead? */ + sr = service_range_match_first(sc->ranges.rb_node, inst, inst); + if (!sr) + goto no_match; + + list_for_each_entry(p, &sr->all_publ, all_publ) { + if (p->scope != ua->scope) + continue; + if (p->sk.ref == exclude && p->sk.node == self) + continue; + tipc_dest_push(dsts, p->sk.node, p->sk.ref); + (*dstcnt)++; + if (mcast) + continue; + list_move_tail(&p->all_publ, &sr->all_publ); + break; + } no_match: - spin_unlock_bh(&seq->lock); -not_found: - read_unlock_bh(&tipc_nametbl_lock); - *destnode = node; - return ref; + spin_unlock_bh(&sc->lock); +exit: + rcu_read_unlock(); + return !list_empty(dsts); } -/** - * tipc_nametbl_mc_translate - find multicast destinations - * - * Creates list of all local ports that overlap the given multicast address; - * also determines if any off-node ports overlap. - * - * Note: Publications with a scope narrower than 'limit' are ignored. - * (i.e. local node-scope publications mustn't receive messages arriving - * from another node, even if the multcast link brought it here) - * - * Returns non-zero if any off-node ports overlap +/* tipc_nametbl_lookup_mcast_sockets(): look up node local destinaton sockets + * matching the given address + * Used on nodes which have received a multicast/broadcast message + * Returns a list of local sockets */ -int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct tipc_port_list *dports) -{ - struct name_seq *seq; - struct sub_seq *sseq; - struct sub_seq *sseq_stop; - struct name_info *info; - int res = 0; - - read_lock_bh(&tipc_nametbl_lock); - seq = nametbl_find_seq(type); - if (!seq) +void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua, + struct list_head *dports) +{ + struct service_range *sr; + struct tipc_service *sc; + struct publication *p; + u8 scope = ua->scope; + + rcu_read_lock(); + sc = tipc_service_find(net, ua); + if (!sc) goto exit; - spin_lock_bh(&seq->lock); + spin_lock_bh(&sc->lock); + service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) { + list_for_each_entry(p, &sr->local_publ, local_publ) { + if (scope == p->scope || scope == TIPC_ANY_SCOPE) + tipc_dest_push(dports, 0, p->sk.ref); + } + } + spin_unlock_bh(&sc->lock); +exit: + rcu_read_unlock(); +} - sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); - sseq_stop = seq->sseqs + seq->first_free; - for (; sseq != sseq_stop; sseq++) { - struct publication *publ; +/* tipc_nametbl_lookup_mcast_nodes(): look up all destination nodes matching + * the given address. Used in sending node. + * Used on nodes which are sending out a multicast/broadcast message + * Returns a list of nodes, including own node if applicable + */ +void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua, + struct tipc_nlist *nodes) +{ + struct service_range *sr; + struct tipc_service *sc; + struct publication *p; - if (sseq->lower > upper) - break; + rcu_read_lock(); + sc = tipc_service_find(net, ua); + if (!sc) + goto exit; - info = sseq->info; - list_for_each_entry(publ, &info->node_list, node_list) { - if (publ->scope <= limit) - tipc_port_list_add(dports, publ->ref); + spin_lock_bh(&sc->lock); + service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) { + list_for_each_entry(p, &sr->all_publ, all_publ) { + tipc_nlist_add(nodes, p->sk.node); } - - if (info->cluster_list_size != info->node_list_size) - res = 1; } + spin_unlock_bh(&sc->lock); +exit: + rcu_read_unlock(); +} + +/* tipc_nametbl_build_group - build list of communication group members + */ +void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, + struct tipc_uaddr *ua) +{ + struct service_range *sr; + struct tipc_service *sc; + struct publication *p; + struct rb_node *n; + + rcu_read_lock(); + sc = tipc_service_find(net, ua); + if (!sc) + goto exit; - spin_unlock_bh(&seq->lock); + spin_lock_bh(&sc->lock); + for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { + sr = container_of(n, struct service_range, tree_node); + list_for_each_entry(p, &sr->all_publ, all_publ) { + if (p->scope != ua->scope) + continue; + tipc_group_add_member(grp, p->sk.node, p->sk.ref, + p->sr.lower); + } + } + spin_unlock_bh(&sc->lock); exit: - read_unlock_bh(&tipc_nametbl_lock); - return res; + rcu_read_unlock(); } -/* - * tipc_nametbl_publish - add name publication to network name tables +/* tipc_nametbl_publish - add service binding to name table */ -struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key) +struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key) { - struct publication *publ; + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); + struct publication *p = NULL; + struct sk_buff *skb = NULL; + u32 rc_dests; - if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) { - pr_warn("Publication failed, local publication limit reached (%u)\n", - TIPC_MAX_PUBLICATIONS); - return NULL; + spin_lock_bh(&tn->nametbl_lock); + + if (nt->local_publ_count >= TIPC_MAX_PUBL) { + pr_warn("Bind failed, max limit %u reached\n", TIPC_MAX_PUBL); + goto exit; } - write_lock_bh(&tipc_nametbl_lock); - publ = tipc_nametbl_insert_publ(type, lower, upper, scope, - tipc_own_addr, port_ref, key); - if (likely(publ)) { - table.local_publ_count++; - tipc_named_publish(publ); + p = tipc_nametbl_insert_publ(net, ua, sk, key); + if (p) { + nt->local_publ_count++; + skb = tipc_named_publish(net, p); } - write_unlock_bh(&tipc_nametbl_lock); - return publ; + rc_dests = nt->rc_dests; +exit: + spin_unlock_bh(&tn->nametbl_lock); + + if (skb) + tipc_node_broadcast(net, skb, rc_dests); + return p; + } /** - * tipc_nametbl_withdraw - withdraw name publication from network name tables + * tipc_nametbl_withdraw - withdraw a service binding + * @net: network namespace + * @ua: service address/range being unbound + * @sk: address of the socket being unbound from + * @key: target publication key */ -int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) -{ - struct publication *publ; - - write_lock_bh(&tipc_nametbl_lock); - publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); - if (likely(publ)) { - table.local_publ_count--; - tipc_named_withdraw(publ); - write_unlock_bh(&tipc_nametbl_lock); - list_del_init(&publ->pport_list); - kfree(publ); - return 1; - } - write_unlock_bh(&tipc_nametbl_lock); - pr_err("Unable to remove local publication\n" - "(type=%u, lower=%u, ref=%u, key=%u)\n", - type, lower, ref, key); - return 0; +void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key) +{ + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); + struct sk_buff *skb = NULL; + struct publication *p; + u32 rc_dests; + + spin_lock_bh(&tn->nametbl_lock); + + p = tipc_nametbl_remove_publ(net, ua, sk, key); + if (p) { + nt->local_publ_count--; + skb = tipc_named_withdraw(net, p); + list_del_init(&p->binding_sock); + kfree_rcu(p, rcu); + } + rc_dests = nt->rc_dests; + spin_unlock_bh(&tn->nametbl_lock); + + if (skb) + tipc_node_broadcast(net, skb, rc_dests); } /** * tipc_nametbl_subscribe - add a subscription object to the name table + * @sub: subscription to add */ -void tipc_nametbl_subscribe(struct tipc_subscription *s) -{ - u32 type = s->seq.type; - struct name_seq *seq; - - write_lock_bh(&tipc_nametbl_lock); - seq = nametbl_find_seq(type); - if (!seq) - seq = tipc_nameseq_create(type, &table.types[hash(type)]); - if (seq) { - spin_lock_bh(&seq->lock); - tipc_nameseq_subscribe(seq, s); - spin_unlock_bh(&seq->lock); +bool tipc_nametbl_subscribe(struct tipc_subscription *sub) +{ + struct tipc_net *tn = tipc_net(sub->net); + u32 type = sub->s.seq.type; + struct tipc_service *sc; + struct tipc_uaddr ua; + bool res = true; + + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type, + sub->s.seq.lower, sub->s.seq.upper); + spin_lock_bh(&tn->nametbl_lock); + sc = tipc_service_find(sub->net, &ua); + if (!sc) + sc = tipc_service_create(sub->net, &ua); + if (sc) { + spin_lock_bh(&sc->lock); + tipc_service_subscribe(sc, sub); + spin_unlock_bh(&sc->lock); } else { - pr_warn("Failed to create subscription for {%u,%u,%u}\n", - s->seq.type, s->seq.lower, s->seq.upper); + pr_warn("Failed to subscribe for {%u,%u,%u}\n", + type, sub->s.seq.lower, sub->s.seq.upper); + res = false; } - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); + return res; } /** * tipc_nametbl_unsubscribe - remove a subscription object from name table + * @sub: subscription to remove */ -void tipc_nametbl_unsubscribe(struct tipc_subscription *s) +void tipc_nametbl_unsubscribe(struct tipc_subscription *sub) { - struct name_seq *seq; + struct tipc_net *tn = tipc_net(sub->net); + struct tipc_service *sc; + struct tipc_uaddr ua; + + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, + sub->s.seq.type, sub->s.seq.lower, sub->s.seq.upper); + spin_lock_bh(&tn->nametbl_lock); + sc = tipc_service_find(sub->net, &ua); + if (!sc) + goto exit; - write_lock_bh(&tipc_nametbl_lock); - seq = nametbl_find_seq(s->seq.type); - if (seq != NULL) { - spin_lock_bh(&seq->lock); - list_del_init(&s->nameseq_list); - spin_unlock_bh(&seq->lock); - nameseq_delete_empty(seq); + spin_lock_bh(&sc->lock); + list_del_init(&sub->service_list); + tipc_sub_put(sub); + + /* Delete service item if no more publications and subscriptions */ + if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) { + hlist_del_init_rcu(&sc->service_list); + kfree_rcu(sc, rcu); } - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&sc->lock); +exit: + spin_unlock_bh(&tn->nametbl_lock); } +int tipc_nametbl_init(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + struct name_table *nt; + int i; + + nt = kzalloc(sizeof(*nt), GFP_KERNEL); + if (!nt) + return -ENOMEM; + + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) + INIT_HLIST_HEAD(&nt->services[i]); + + INIT_LIST_HEAD(&nt->node_scope); + INIT_LIST_HEAD(&nt->cluster_scope); + rwlock_init(&nt->cluster_scope_lock); + tn->nametbl = nt; + spin_lock_init(&tn->nametbl_lock); + return 0; +} /** - * subseq_list - print specified sub-sequence contents into the given buffer + * tipc_service_delete - purge all publications for a service and delete it + * @net: the associated network namespace + * @sc: tipc_service to delete */ -static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, - u32 index) +static void tipc_service_delete(struct net *net, struct tipc_service *sc) { - char portIdStr[27]; - const char *scope_str[] = {"", " zone", " cluster", " node"}; - struct publication *publ; - struct name_info *info; - int ret; - - ret = tipc_snprintf(buf, len, "%-10u %-10u ", sseq->lower, sseq->upper); - - if (depth == 2) { - ret += tipc_snprintf(buf - ret, len + ret, "\n"); - return ret; + struct service_range *sr, *tmpr; + struct publication *p, *tmp; + + spin_lock_bh(&sc->lock); + rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) { + list_for_each_entry_safe(p, tmp, &sr->all_publ, all_publ) { + tipc_service_remove_publ(sr, &p->sk, p->key); + kfree_rcu(p, rcu); + } + rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); + kfree(sr); } + hlist_del_init_rcu(&sc->service_list); + spin_unlock_bh(&sc->lock); + kfree_rcu(sc, rcu); +} - info = sseq->info; +void tipc_nametbl_stop(struct net *net) +{ + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); + struct hlist_head *service_head; + struct tipc_service *service; + u32 i; - list_for_each_entry(publ, &info->zone_list, zone_list) { - sprintf(portIdStr, "<%u.%u.%u:%u>", - tipc_zone(publ->node), tipc_cluster(publ->node), - tipc_node(publ->node), publ->ref); - ret += tipc_snprintf(buf + ret, len - ret, "%-26s ", portIdStr); - if (depth > 3) { - ret += tipc_snprintf(buf + ret, len - ret, "%-10u %s", - publ->key, scope_str[publ->scope]); + /* Verify name table is empty and purge any lingering + * publications, then release the name table + */ + spin_lock_bh(&tn->nametbl_lock); + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { + if (hlist_empty(&nt->services[i])) + continue; + service_head = &nt->services[i]; + hlist_for_each_entry_rcu(service, service_head, service_list) { + tipc_service_delete(net, service); } - if (!list_is_last(&publ->zone_list, &info->zone_list)) - ret += tipc_snprintf(buf + ret, len - ret, - "\n%33s", " "); } + spin_unlock_bh(&tn->nametbl_lock); - ret += tipc_snprintf(buf + ret, len - ret, "\n"); - return ret; + /* TODO: clear tn->nametbl, implement proper RCU rules ? */ + kfree_rcu(nt, rcu); } -/** - * nameseq_list - print specified name sequence contents into the given buffer - */ -static int nameseq_list(struct name_seq *seq, char *buf, int len, u32 depth, - u32 type, u32 lowbound, u32 upbound, u32 index) +static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, + struct tipc_service *service, + struct service_range *sr, + u32 *last_key) { - struct sub_seq *sseq; - char typearea[11]; - int ret = 0; + struct publication *p; + struct nlattr *attrs; + struct nlattr *b; + void *hdr; + + if (*last_key) { + list_for_each_entry(p, &sr->all_publ, all_publ) + if (p->key == *last_key) + break; + if (list_entry_is_head(p, &sr->all_publ, all_publ)) + return -EPIPE; + } else { + p = list_first_entry(&sr->all_publ, + struct publication, + all_publ); + } - if (seq->first_free == 0) - return 0; + list_for_each_entry_from(p, &sr->all_publ, all_publ) { + *last_key = p->key; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, + &tipc_genl_family, NLM_F_MULTI, + TIPC_NL_NAME_TABLE_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NAME_TABLE); + if (!attrs) + goto msg_full; + + b = nla_nest_start_noflag(msg->skb, TIPC_NLA_NAME_TABLE_PUBL); + if (!b) + goto attr_msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_TYPE, service->type)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_LOWER, sr->lower)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_UPPER, sr->upper)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_SCOPE, p->scope)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->sk.node)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->sk.ref)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key)) + goto publ_msg_full; + + nla_nest_end(msg->skb, b); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + } + *last_key = 0; - sprintf(typearea, "%-10u", seq->type); + return 0; - if (depth == 1) { - ret += tipc_snprintf(buf, len, "%s\n", typearea); - return ret; - } +publ_msg_full: + nla_nest_cancel(msg->skb, b); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); - for (sseq = seq->sseqs; sseq != &seq->sseqs[seq->first_free]; sseq++) { - if ((lowbound <= sseq->upper) && (upbound >= sseq->lower)) { - ret += tipc_snprintf(buf + ret, len - ret, "%s ", - typearea); - spin_lock_bh(&seq->lock); - ret += subseq_list(sseq, buf + ret, len - ret, - depth, index); - spin_unlock_bh(&seq->lock); - sprintf(typearea, "%10s", " "); - } - } - return ret; + return -EMSGSIZE; } -/** - * nametbl_header - print name table header into the given buffer - */ -static int nametbl_header(char *buf, int len, u32 depth) +static int __tipc_nl_service_range_list(struct tipc_nl_msg *msg, + struct tipc_service *sc, + u32 *last_lower, u32 *last_key) { - const char *header[] = { - "Type ", - "Lower Upper ", - "Port Identity ", - "Publication Scope" - }; - - int i; - int ret = 0; + struct service_range *sr; + struct rb_node *n; + int err; - if (depth > 4) - depth = 4; - for (i = 0; i < depth; i++) - ret += tipc_snprintf(buf + ret, len - ret, header[i]); - ret += tipc_snprintf(buf + ret, len - ret, "\n"); - return ret; + for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { + sr = container_of(n, struct service_range, tree_node); + if (sr->lower < *last_lower) + continue; + err = __tipc_nl_add_nametable_publ(msg, sc, sr, last_key); + if (err) { + *last_lower = sr->lower; + return err; + } + } + *last_lower = 0; + return 0; } -/** - * nametbl_list - print specified name table contents into the given buffer - */ -static int nametbl_list(char *buf, int len, u32 depth_info, - u32 type, u32 lowbound, u32 upbound) -{ - struct hlist_head *seq_head; - struct name_seq *seq; - int all_types; - int ret = 0; - u32 depth; - u32 i; +static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg, + u32 *last_type, u32 *last_lower, u32 *last_key) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_service *service = NULL; + struct hlist_head *head; + struct tipc_uaddr ua; + int err; + int i; - all_types = (depth_info & TIPC_NTQ_ALLTYPES); - depth = (depth_info & ~TIPC_NTQ_ALLTYPES); + if (*last_type) + i = hash(*last_type); + else + i = 0; + + for (; i < TIPC_NAMETBL_SIZE; i++) { + head = &tn->nametbl->services[i]; + + if (*last_type || + (!i && *last_key && (*last_lower == *last_key))) { + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, + *last_type, *last_lower, *last_lower); + service = tipc_service_find(net, &ua); + if (!service) + return -EPIPE; + } else { + hlist_for_each_entry_rcu(service, head, service_list) + break; + if (!service) + continue; + } - if (depth == 0) - return 0; + hlist_for_each_entry_from_rcu(service, service_list) { + spin_lock_bh(&service->lock); + err = __tipc_nl_service_range_list(msg, service, + last_lower, + last_key); - if (all_types) { - /* display all entries in name table to specified depth */ - ret += nametbl_header(buf, len, depth); - lowbound = 0; - upbound = ~0; - for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { - seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_head, ns_list) { - ret += nameseq_list(seq, buf + ret, len - ret, - depth, seq->type, - lowbound, upbound, i); - } - } - } else { - /* display only the sequence that matches the specified type */ - if (upbound < lowbound) { - ret += tipc_snprintf(buf + ret, len - ret, - "invalid name sequence specified\n"); - return ret; - } - ret += nametbl_header(buf + ret, len - ret, depth); - i = hash(type); - seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_head, ns_list) { - if (seq->type == type) { - ret += nameseq_list(seq, buf + ret, len - ret, - depth, type, - lowbound, upbound, i); - break; + if (err) { + *last_type = service->type; + spin_unlock_bh(&service->lock); + return err; } + spin_unlock_bh(&service->lock); } + *last_type = 0; } - return ret; + return 0; } -struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) +int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct sk_buff *buf; - struct tipc_name_table_query *argv; - struct tlv_desc *rep_tlv; - char *pb; - int pb_len; - int str_len; + struct net *net = sock_net(skb->sk); + u32 last_type = cb->args[0]; + u32 last_lower = cb->args[1]; + u32 last_key = cb->args[2]; + int done = cb->args[3]; + struct tipc_nl_msg msg; + int err; + + if (done) + return 0; - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NAME_TBL_QUERY)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + err = tipc_nl_service_list(net, &msg, &last_type, + &last_lower, &last_key); + if (!err) { + done = 1; + } else if (err != -EMSGSIZE) { + /* We never set seq or call nl_dump_check_consistent() this + * means that setting prev_seq here will cause the consistence + * check to fail in the netlink callback handler. Resulting in + * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if + * we got an error. + */ + cb->prev_seq = 1; + } + rcu_read_unlock(); - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (!buf) - return NULL; + cb->args[0] = last_type; + cb->args[1] = last_lower; + cb->args[2] = last_key; + cb->args[3] = done; - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area); - read_lock_bh(&tipc_nametbl_lock); - str_len = nametbl_list(pb, pb_len, ntohl(argv->depth), - ntohl(argv->type), - ntohl(argv->lowbound), ntohl(argv->upbound)); - read_unlock_bh(&tipc_nametbl_lock); - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); + return skb->len; +} - return buf; +struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port) +{ + struct tipc_dest *dst; + + list_for_each_entry(dst, l, list) { + if (dst->node == node && dst->port == port) + return dst; + } + return NULL; } -int tipc_nametbl_init(void) +bool tipc_dest_push(struct list_head *l, u32 node, u32 port) { - table.types = kcalloc(TIPC_NAMETBL_SIZE, sizeof(struct hlist_head), - GFP_ATOMIC); - if (!table.types) - return -ENOMEM; + struct tipc_dest *dst; + + if (tipc_dest_find(l, node, port)) + return false; + + dst = kmalloc(sizeof(*dst), GFP_ATOMIC); + if (unlikely(!dst)) + return false; + dst->node = node; + dst->port = port; + list_add(&dst->list, l); + return true; +} - table.local_publ_count = 0; - return 0; +bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port) +{ + struct tipc_dest *dst; + + if (list_empty(l)) + return false; + dst = list_first_entry(l, typeof(*dst), list); + if (port) + *port = dst->port; + if (node) + *node = dst->node; + list_del(&dst->list); + kfree(dst); + return true; } -void tipc_nametbl_stop(void) +bool tipc_dest_del(struct list_head *l, u32 node, u32 port) { - u32 i; + struct tipc_dest *dst; + + dst = tipc_dest_find(l, node, port); + if (!dst) + return false; + list_del(&dst->list); + kfree(dst); + return true; +} - if (!table.types) - return; +void tipc_dest_list_purge(struct list_head *l) +{ + struct tipc_dest *dst, *tmp; - /* Verify name table is empty, then release it */ - write_lock_bh(&tipc_nametbl_lock); - for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { - if (hlist_empty(&table.types[i])) - continue; - pr_err("nametbl_stop(): orphaned hash chain detected\n"); - break; + list_for_each_entry_safe(dst, tmp, l, list) { + list_del(&dst->list); + kfree(dst); } - kfree(table.types); - table.types = NULL; - write_unlock_bh(&tipc_nametbl_lock); } diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index f02f48b9a216..7ff6eeebaae6 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -1,8 +1,9 @@ /* * net/tipc/name_table.h: Include file for TIPC name table code * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2014-2018, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,68 +38,120 @@ #ifndef _TIPC_NAME_TABLE_H #define _TIPC_NAME_TABLE_H -#include "node_subscr.h" - struct tipc_subscription; -struct tipc_port_list; +struct tipc_plist; +struct tipc_nlist; +struct tipc_group; +struct tipc_uaddr; /* * TIPC name types reserved for internal TIPC use (both current and planned) */ -#define TIPC_ZM_SRV 3 /* zone master service name type */ +#define TIPC_ZM_SRV 3 /* zone master service name type */ +#define TIPC_PUBL_SCOPE_NUM (TIPC_NODE_SCOPE + 1) +#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ + +#define TIPC_ANY_SCOPE 10 /* Both node and cluster scope will match */ /** - * struct publication - info about a published (name or) name sequence - * @type: name sequence type - * @lower: name sequence lower bound - * @upper: name sequence upper bound - * @scope: scope of publication - * @node: network address of publishing port's node - * @ref: publishing port - * @key: publication key - * @subscr: subscription to "node down" event (for off-node publications only) - * @local_list: adjacent entries in list of publications made by this node - * @pport_list: adjacent entries in list of publications made by this port - * @node_list: adjacent matching name seq publications with >= node scope - * @cluster_list: adjacent matching name seq publications with >= cluster scope - * @zone_list: adjacent matching name seq publications with >= zone scope - * - * Note that the node list, cluster list, and zone list are circular lists. + * struct publication - info about a published service address or range + * @sr: service range represented by this publication + * @sk: address of socket bound to this publication + * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE + * @key: publication key, unique across the cluster + * @id: publication id + * @binding_node: all publications from the same node which bound this one + * - Remote publications: in node->publ_list; + * Used by node/name distr to withdraw publications when node is lost + * - Local/node scope publications: in name_table->node_scope list + * - Local/cluster scope publications: in name_table->cluster_scope list + * @binding_sock: all publications from the same socket which bound this one + * Used by socket to withdraw publications when socket is unbound/released + * @local_publ: list of identical publications made from this node + * Used by closest_first and multicast receive lookup algorithms + * @all_publ: all publications identical to this one, whatever node and scope + * Used by round-robin lookup algorithm + * @list: to form a list of publications in temporal order + * @rcu: RCU callback head used for deferred freeing */ struct publication { - u32 type; - u32 lower; - u32 upper; - u32 scope; - u32 node; - u32 ref; + struct tipc_service_range sr; + struct tipc_socket_addr sk; + u16 scope; u32 key; - struct tipc_node_subscr subscr; - struct list_head local_list; - struct list_head pport_list; - struct list_head node_list; - struct list_head cluster_list; - struct list_head zone_list; + u32 id; + struct list_head binding_node; + struct list_head binding_sock; + struct list_head local_publ; + struct list_head all_publ; + struct list_head list; + struct rcu_head rcu; }; +/** + * struct name_table - table containing all existing port name publications + * @rcu: RCU callback head used for deferred freeing + * @services: name sequence hash lists + * @node_scope: all local publications with node scope + * - used by name_distr during re-init of name table + * @cluster_scope: all local publications with cluster scope + * - used by name_distr to send bulk updates to new nodes + * - used by name_distr during re-init of name table + * @cluster_scope_lock: lock for accessing @cluster_scope + * @local_publ_count: number of publications issued by this node + * @rc_dests: destination node counter + * @snd_nxt: next sequence number to be used + */ +struct name_table { + struct rcu_head rcu; + struct hlist_head services[TIPC_NAMETBL_SIZE]; + struct list_head node_scope; + struct list_head cluster_scope; + rwlock_t cluster_scope_lock; + u32 local_publ_count; + u32 rc_dests; + u32 snd_nxt; +}; -extern rwlock_t tipc_nametbl_lock; - -struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space); -u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node); -int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct tipc_port_list *dports); -struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key); -int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key); -struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 ref, +int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); +bool tipc_nametbl_lookup_anycast(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk); +void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua, + struct list_head *dports); +void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua, + struct tipc_nlist *nodes); +bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua, + struct list_head *dsts, int *dstcnt, + u32 exclude, bool mcast); +void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, + struct tipc_uaddr *ua); +struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key); +void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key); +struct publication *tipc_nametbl_insert_publ(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, + u32 key); +struct publication *tipc_nametbl_remove_publ(struct net *net, + struct tipc_uaddr *ua, + struct tipc_socket_addr *sk, u32 key); -struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 node, - u32 ref, u32 key); -void tipc_nametbl_subscribe(struct tipc_subscription *s); +bool tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); -int tipc_nametbl_init(void); -void tipc_nametbl_stop(void); +int tipc_nametbl_init(struct net *net); +void tipc_nametbl_stop(struct net *net); + +struct tipc_dest { + struct list_head list; + u32 port; + u32 node; +}; + +struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port); +bool tipc_dest_push(struct list_head *l, u32 node, u32 port); +bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port); +bool tipc_dest_del(struct list_head *l, u32 node, u32 port); +void tipc_dest_list_purge(struct list_head *l); #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index 7d305ecc09c2..7e65d0b0c4a8 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -1,7 +1,7 @@ /* * net/tipc/net.c: TIPC network routing code * - * Copyright (c) 1995-2006, Ericsson AB + * Copyright (c) 1995-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -38,46 +38,44 @@ #include "net.h" #include "name_distr.h" #include "subscr.h" -#include "port.h" +#include "socket.h" #include "node.h" -#include "config.h" +#include "bcast.h" +#include "link.h" +#include "netlink.h" +#include "monitor.h" /* * The TIPC locking policy is designed to ensure a very fine locking * granularity, permitting complete parallel access to individual - * port and node/link instances. The code consists of three major + * port and node/link instances. The code consists of four major * locking domains, each protected with their own disjunct set of locks. * - * 1: The routing hierarchy. - * Comprises the structures 'zone', 'cluster', 'node', 'link' - * and 'bearer'. The whole hierarchy is protected by a big - * read/write lock, tipc_net_lock, to enssure that nothing is added - * or removed while code is accessing any of these structures. - * This layer must not be called from the two others while they - * hold any of their own locks. - * Neither must it itself do any upcalls to the other two before - * it has released tipc_net_lock and other protective locks. + * 1: The bearer level. + * RTNL lock is used to serialize the process of configuring bearer + * on update side, and RCU lock is applied on read side to make + * bearer instance valid on both paths of message transmission and + * reception. * - * Within the tipc_net_lock domain there are two sub-domains;'node' and - * 'bearer', where local write operations are permitted, - * provided that those are protected by individual spin_locks - * per instance. Code holding tipc_net_lock(read) and a node spin_lock - * is permitted to poke around in both the node itself and its - * subordinate links. I.e, it can update link counters and queues, - * change link state, send protocol messages, and alter the - * "active_links" array in the node; but it can _not_ remove a link - * or a node from the overall structure. - * Correspondingly, individual bearers may change status within a - * tipc_net_lock(read), protected by an individual spin_lock ber bearer - * instance, but it needs tipc_net_lock(write) to remove/add any bearers. + * 2: The node and link level. + * All node instances are saved into two tipc_node_list and node_htable + * lists. The two lists are protected by node_list_lock on write side, + * and they are guarded with RCU lock on read side. Especially node + * instance is destroyed only when TIPC module is removed, and we can + * confirm that there has no any user who is accessing the node at the + * moment. Therefore, Except for iterating the two lists within RCU + * protection, it's no needed to hold RCU that we access node instance + * in other places. * + * In addition, all members in node structure including link instances + * are protected by node spin lock. * - * 2: The transport level of the protocol. - * This consists of the structures port, (and its user level - * representations, such as user_port and tipc_sock), reference and - * tipc_user (port.c, reg.c, socket.c). + * 3: The transport level of the protocol. + * This consists of the structures port, (and its user level + * representations, such as user_port and tipc_sock), reference and + * tipc_user (port.c, reg.c, socket.c). * - * This layer has four different locks: + * This layer has four different locks: * - The tipc_port spin_lock. This is protecting each port instance * from parallel data access and removal. Since we can not place * this lock in the port itself, it has been placed in the @@ -92,11 +90,11 @@ * - A spin lock to protect the registry of kernel/driver users (reg.c) * - A global spin_lock (tipc_port_lock), which only task is to ensure * consistency where more than one port is involved in an operation, - * i.e., whe a port is part of a linked list of ports. + * i.e., when a port is part of a linked list of ports. * There are two such lists; 'port_list', which is used for management, * and 'wait_list', which is used to queue ports during congestion. * - * 3: The name table (name_table.c, name_distr.c, subscription.c) + * 4: The name table (name_table.c, name_distr.c, subscription.c) * - There is one big read/write-lock (tipc_nametbl_lock) protecting the * overall name table structure. Nothing must be added/removed to * this structure without holding write access to it. @@ -108,98 +106,242 @@ * - A local spin_lock protecting the queue of subscriber events. */ -DEFINE_RWLOCK(tipc_net_lock); +static void tipc_net_finalize(struct net *net, u32 addr); -static void net_route_named_msg(struct sk_buff *buf) +int tipc_net_init(struct net *net, u8 *node_id, u32 addr) { - struct tipc_msg *msg = buf_msg(buf); - u32 dnode; - u32 dport; - - if (!msg_named(msg)) { - kfree_skb(buf); - return; + if (tipc_own_id(net)) { + pr_info("Cannot configure node identity twice\n"); + return -1; } + pr_info("Started in network mode\n"); - dnode = addr_domain(msg_lookup_scope(msg)); - dport = tipc_nametbl_translate(msg_nametype(msg), msg_nameinst(msg), &dnode); - if (dport) { - msg_set_destnode(msg, dnode); - msg_set_destport(msg, dport); - tipc_net_route_msg(buf); - return; - } - tipc_reject_msg(buf, TIPC_ERR_NO_NAME); + if (node_id) + tipc_set_node_id(net, node_id); + if (addr) + tipc_net_finalize(net, addr); + return 0; } -void tipc_net_route_msg(struct sk_buff *buf) +static void tipc_net_finalize(struct net *net, u32 addr) { - struct tipc_msg *msg; - u32 dnode; + struct tipc_net *tn = tipc_net(net); + struct tipc_socket_addr sk = {0, addr}; + struct tipc_uaddr ua; + + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE, + TIPC_NODE_STATE, addr, addr); - if (!buf) + if (cmpxchg(&tn->node_addr, 0, addr)) return; - msg = buf_msg(buf); - - /* Handle message for this node */ - dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg); - if (tipc_in_scope(dnode, tipc_own_addr)) { - if (msg_isdata(msg)) { - if (msg_mcast(msg)) - tipc_port_recv_mcast(buf, NULL); - else if (msg_destport(msg)) - tipc_port_recv_msg(buf); - else - net_route_named_msg(buf); - return; - } - switch (msg_user(msg)) { - case NAME_DISTRIBUTOR: - tipc_named_recv(buf); - break; - case CONN_MANAGER: - tipc_port_recv_proto_msg(buf); - break; - default: - kfree_skb(buf); - } + tipc_set_node_addr(net, addr); + tipc_named_reinit(net); + tipc_sk_reinit(net); + tipc_mon_reinit_self(net); + tipc_nametbl_publish(net, &ua, &sk, addr); +} + +void tipc_net_finalize_work(struct work_struct *work) +{ + struct tipc_net *tn = container_of(work, struct tipc_net, work); + + rtnl_lock(); + tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr); + rtnl_unlock(); +} + +void tipc_net_stop(struct net *net) +{ + if (!tipc_own_id(net)) return; + + rtnl_lock(); + tipc_bearer_stop(net); + tipc_node_stop(net); + rtnl_unlock(); + + pr_info("Left network mode\n"); +} + +static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + u64 *w0 = (u64 *)&tn->node_id[0]; + u64 *w1 = (u64 *)&tn->node_id[8]; + struct nlattr *attrs; + void *hdr; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_NET_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NET); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id)) + goto attr_msg_full; + if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID, *w0, 0)) + goto attr_msg_full; + if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID_W1, *w1, 0)) + goto attr_msg_full; + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + int err; + int done = cb->args[0]; + struct tipc_nl_msg msg; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + err = __tipc_nl_add_net(net, &msg); + if (err) + goto out; + + done = 1; +out: + cb->args[0] = done; + + return skb->len; +} + +int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); + int err; + + if (!info->attrs[TIPC_NLA_NET]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], + tipc_nl_net_policy, info->extack); + + if (err) + return err; + + /* Can't change net id once TIPC has joined a network */ + if (tipc_own_addr(net)) + return -EPERM; + + if (attrs[TIPC_NLA_NET_ID]) { + u32 val; + + val = nla_get_u32(attrs[TIPC_NLA_NET_ID]); + if (val < 1 || val > 9999) + return -EINVAL; + + tn->net_id = val; + } + + if (attrs[TIPC_NLA_NET_ADDR]) { + u32 addr; + + addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + if (!addr) + return -EINVAL; + tn->legacy_addr_format = true; + tipc_net_init(net, NULL, addr); + } + + if (attrs[TIPC_NLA_NET_NODEID]) { + u8 node_id[NODE_ID_LEN]; + u64 *w0 = (u64 *)&node_id[0]; + u64 *w1 = (u64 *)&node_id[8]; + + if (!attrs[TIPC_NLA_NET_NODEID_W1]) + return -EINVAL; + *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]); + *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]); + tipc_net_init(net, node_id, 0); } + return 0; +} + +int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; - /* Handle message for another node */ - skb_trim(buf, msg_size(msg)); - tipc_link_send(buf, dnode, msg_link_selector(msg)); + rtnl_lock(); + err = __tipc_nl_net_set(skb, info); + rtnl_unlock(); + + return err; } -void tipc_net_start(u32 addr) +static int __tipc_nl_addr_legacy_get(struct net *net, struct tipc_nl_msg *msg) { - char addr_string[16]; + struct tipc_net *tn = tipc_net(net); + struct nlattr *attrs; + void *hdr; - write_lock_bh(&tipc_net_lock); - tipc_own_addr = addr; - tipc_named_reinit(); - tipc_port_reinit(); - tipc_bclink_init(); - write_unlock_bh(&tipc_net_lock); + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + 0, TIPC_NL_ADDR_LEGACY_GET); + if (!hdr) + return -EMSGSIZE; - tipc_cfg_reinit(); + attrs = nla_nest_start(msg->skb, TIPC_NLA_NET); + if (!attrs) + goto msg_full; - pr_info("Started in network mode\n"); - pr_info("Own node address %s, network identity %u\n", - tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); + if (tn->legacy_addr_format) + if (nla_put_flag(msg->skb, TIPC_NLA_NET_ADDR_LEGACY)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; } -void tipc_net_stop(void) +int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info) { - struct tipc_node *node, *t_node; + struct net *net = sock_net(skb->sk); + struct tipc_nl_msg msg; + struct sk_buff *rep; + int err; - if (!tipc_own_addr) - return; - write_lock_bh(&tipc_net_lock); - tipc_bearer_stop(); - tipc_bclink_stop(); - list_for_each_entry_safe(node, t_node, &tipc_node_list, list) - tipc_node_delete(node); - write_unlock_bh(&tipc_net_lock); - pr_info("Left network mode\n"); + rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!rep) + return -ENOMEM; + + msg.skb = rep; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + err = __tipc_nl_addr_legacy_get(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + + return genlmsg_reply(msg.skb, info); } diff --git a/net/tipc/net.h b/net/tipc/net.h index 079daadb3f72..1cb1e43cf34a 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -1,7 +1,7 @@ /* * net/tipc/net.h: Include file for TIPC network routing code * - * Copyright (c) 1995-2006, Ericsson AB + * Copyright (c) 1995-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -37,11 +37,16 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -extern rwlock_t tipc_net_lock; +#include <net/genetlink.h> -void tipc_net_route_msg(struct sk_buff *buf); +extern const struct nla_policy tipc_nl_net_policy[]; -void tipc_net_start(u32 addr); -void tipc_net_stop(void); +int tipc_net_init(struct net *net, u8 *node_id, u32 addr); +void tipc_net_finalize_work(struct work_struct *work); +void tipc_net_stop(struct net *net); +int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info); #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 8bcd4985d0fb..1a9a5bdaccf4 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -1,7 +1,7 @@ /* * net/tipc/netlink.c: TIPC configuration handling * - * Copyright (c) 2005-2006, Ericsson AB + * Copyright (c) 2005-2006, 2014, Ericsson AB * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * @@ -35,74 +35,281 @@ */ #include "core.h" -#include "config.h" +#include "socket.h" +#include "name_table.h" +#include "bearer.h" +#include "link.h" +#include "node.h" +#include "net.h" +#include "udp_media.h" #include <net/genetlink.h> -static int handle_cmd(struct sk_buff *skb, struct genl_info *info) -{ - struct sk_buff *rep_buf; - struct nlmsghdr *rep_nlh; - struct nlmsghdr *req_nlh = info->nlhdr; - struct tipc_genlmsghdr *req_userhdr = info->userhdr; - int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); - u16 cmd; - - if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) - cmd = TIPC_CMD_NOT_NET_ADMIN; - else - cmd = req_userhdr->cmd; - - rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd, - nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, - nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), - hdr_space); - - if (rep_buf) { - skb_push(rep_buf, hdr_space); - rep_nlh = nlmsg_hdr(rep_buf); - memcpy(rep_nlh, req_nlh, hdr_space); - rep_nlh->nlmsg_len = rep_buf->len; - genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid); - } +static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { + [TIPC_NLA_UNSPEC] = { .type = NLA_UNSPEC, }, + [TIPC_NLA_BEARER] = { .type = NLA_NESTED, }, + [TIPC_NLA_SOCK] = { .type = NLA_NESTED, }, + [TIPC_NLA_PUBL] = { .type = NLA_NESTED, }, + [TIPC_NLA_LINK] = { .type = NLA_NESTED, }, + [TIPC_NLA_MEDIA] = { .type = NLA_NESTED, }, + [TIPC_NLA_NODE] = { .type = NLA_NESTED, }, + [TIPC_NLA_NET] = { .type = NLA_NESTED, }, + [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, }, + [TIPC_NLA_MON] = { .type = NLA_NESTED, }, +}; - return 0; -} +const struct nla_policy +tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = { + [TIPC_NLA_NAME_TABLE_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED } +}; + +const struct nla_policy tipc_nl_monitor_policy[TIPC_NLA_MON_MAX + 1] = { + [TIPC_NLA_MON_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_MON_REF] = { .type = NLA_U32 }, + [TIPC_NLA_MON_ACTIVATION_THRESHOLD] = { .type = NLA_U32 }, +}; + +const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { + [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 }, + [TIPC_NLA_SOCK_REF] = { .type = NLA_U32 }, + [TIPC_NLA_SOCK_CON] = { .type = NLA_NESTED }, + [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG } +}; + +const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { + [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_NET_ID] = { .type = NLA_U32 }, + [TIPC_NLA_NET_ADDR] = { .type = NLA_U32 }, + [TIPC_NLA_NET_NODEID] = { .type = NLA_U64 }, + [TIPC_NLA_NET_NODEID_W1] = { .type = NLA_U64 }, + [TIPC_NLA_NET_ADDR_LEGACY] = { .type = NLA_FLAG } +}; + +const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { + [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_LINK_NAME] = { .type = NLA_NUL_STRING, + .len = TIPC_MAX_LINK_NAME }, + [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_RX] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_TX] = { .type = NLA_U32 } +}; + +const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = { + [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 }, + [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }, + [TIPC_NLA_NODE_ID] = { .type = NLA_BINARY, + .len = TIPC_NODEID_LEN}, + [TIPC_NLA_NODE_KEY] = { .type = NLA_BINARY, + .len = TIPC_AEAD_KEY_SIZE_MAX}, + [TIPC_NLA_NODE_KEY_MASTER] = { .type = NLA_FLAG }, + [TIPC_NLA_NODE_REKEYING] = { .type = NLA_U32 }, +}; -static struct genl_family tipc_genl_family = { - .id = GENL_ID_GENERATE, - .name = TIPC_GENL_NAME, - .version = TIPC_GENL_VERSION, - .hdrsize = TIPC_GENL_HDRLEN, - .maxattr = 0, +/* Properties valid for media, bearer and link */ +const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { + [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_MTU] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_BROADCAST] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_BROADCAST_RATIO] = { .type = NLA_U32 } }; -static struct genl_ops tipc_genl_ops = { - .cmd = TIPC_GENL_CMD, - .doit = handle_cmd, +const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { + [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_BEARER_NAME] = { .type = NLA_NUL_STRING, + .len = TIPC_MAX_BEARER_NAME }, + [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED }, + [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 } }; -static int tipc_genl_family_registered; +const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = { + [TIPC_NLA_MEDIA_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_MEDIA_NAME] = { .type = NLA_STRING }, + [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } +}; -int tipc_netlink_start(void) +const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { + [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC}, + [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, + [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, +}; + +/* Users of the legacy API (tipc-config) can't handle that we add operations, + * so we have a separate genl handling for the new API. + */ +static const struct genl_ops tipc_genl_v2_ops[] = { + { + .cmd = TIPC_NL_BEARER_DISABLE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_bearer_disable, + }, + { + .cmd = TIPC_NL_BEARER_ENABLE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_bearer_enable, + }, + { + .cmd = TIPC_NL_BEARER_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_bearer_get, + .dumpit = tipc_nl_bearer_dump, + }, + { + .cmd = TIPC_NL_BEARER_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_bearer_add, + }, + { + .cmd = TIPC_NL_BEARER_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_bearer_set, + }, + { + .cmd = TIPC_NL_SOCK_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .start = tipc_dump_start, + .dumpit = tipc_nl_sk_dump, + .done = tipc_dump_done, + }, + { + .cmd = TIPC_NL_PUBL_GET, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, + .dumpit = tipc_nl_publ_dump, + }, + { + .cmd = TIPC_NL_LINK_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = tipc_nl_node_get_link, + .dumpit = tipc_nl_node_dump_link, + }, + { + .cmd = TIPC_NL_LINK_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_node_set_link, + }, + { + .cmd = TIPC_NL_LINK_RESET_STATS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_node_reset_link_stats, + }, + { + .cmd = TIPC_NL_MEDIA_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_media_get, + .dumpit = tipc_nl_media_dump, + }, + { + .cmd = TIPC_NL_MEDIA_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_media_set, + }, + { + .cmd = TIPC_NL_NODE_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .dumpit = tipc_nl_node_dump, + }, + { + .cmd = TIPC_NL_NET_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .dumpit = tipc_nl_net_dump, + }, + { + .cmd = TIPC_NL_NET_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_net_set, + }, + { + .cmd = TIPC_NL_NAME_TABLE_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .dumpit = tipc_nl_name_table_dump, + }, + { + .cmd = TIPC_NL_MON_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_node_set_monitor, + }, + { + .cmd = TIPC_NL_MON_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_node_get_monitor, + .dumpit = tipc_nl_node_dump_monitor, + }, + { + .cmd = TIPC_NL_MON_PEER_GET, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, + .dumpit = tipc_nl_node_dump_monitor_peer, + }, + { + .cmd = TIPC_NL_PEER_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_peer_rm, + }, +#ifdef CONFIG_TIPC_MEDIA_UDP + { + .cmd = TIPC_NL_UDP_GET_REMOTEIP, + .validate = GENL_DONT_VALIDATE_STRICT | + GENL_DONT_VALIDATE_DUMP_STRICT, + .dumpit = tipc_udp_nl_dump_remoteip, + }, +#endif +#ifdef CONFIG_TIPC_CRYPTO + { + .cmd = TIPC_NL_KEY_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_node_set_key, + }, + { + .cmd = TIPC_NL_KEY_FLUSH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_node_flush_key, + }, +#endif + { + .cmd = TIPC_NL_ADDR_LEGACY_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_net_addr_legacy_get, + }, +}; + +struct genl_family tipc_genl_family __ro_after_init = { + .name = TIPC_GENL_V2_NAME, + .version = TIPC_GENL_V2_VERSION, + .hdrsize = 0, + .maxattr = TIPC_NLA_MAX, + .policy = tipc_nl_policy, + .netnsok = true, + .module = THIS_MODULE, + .ops = tipc_genl_v2_ops, + .n_ops = ARRAY_SIZE(tipc_genl_v2_ops), + .resv_start_op = TIPC_NL_ADDR_LEGACY_GET + 1, +}; + +int __init tipc_netlink_start(void) { int res; - res = genl_register_family_with_ops(&tipc_genl_family, - &tipc_genl_ops, 1); + res = genl_register_family(&tipc_genl_family); if (res) { pr_err("Failed to register netlink interface\n"); return res; } - - tipc_genl_family_registered = 1; return 0; } void tipc_netlink_stop(void) { - if (!tipc_genl_family_registered) - return; - genl_unregister_family(&tipc_genl_family); - tipc_genl_family_registered = 0; } diff --git a/net/tipc/node_subscr.h b/net/tipc/netlink.h index c95d20727ded..7cf777723e3e 100644 --- a/net/tipc/node_subscr.h +++ b/net/tipc/netlink.h @@ -1,8 +1,7 @@ /* - * net/tipc/node_subscr.h: Include file for TIPC "node down" subscription handling + * net/tipc/netlink.h: Include file for TIPC netlink code * - * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, 2010-2011, Wind River Systems + * Copyright (c) 2014, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,30 +33,32 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _TIPC_NODE_SUBSCR_H -#define _TIPC_NODE_SUBSCR_H +#ifndef _TIPC_NETLINK_H +#define _TIPC_NETLINK_H +#include <net/netlink.h> -#include "addr.h" +extern struct genl_family tipc_genl_family; -typedef void (*net_ev_handler) (void *usr_handle); - -/** - * struct tipc_node_subscr - "node down" subscription entry - * @node: ptr to node structure of interest (or NULL, if none) - * @handle_node_down: routine to invoke when node fails - * @usr_handle: argument to pass to routine when node fails - * @nodesub_list: adjacent entries in list of subscriptions for the node - */ -struct tipc_node_subscr { - struct tipc_node *node; - net_ev_handler handle_node_down; - void *usr_handle; - struct list_head nodesub_list; +struct tipc_nl_msg { + struct sk_buff *skb; + u32 portid; + u32 seq; }; -void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, - void *usr_handle, net_ev_handler handle_down); -void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub); -void tipc_nodesub_notify(struct tipc_node *node); +extern const struct nla_policy tipc_nl_name_table_policy[]; +extern const struct nla_policy tipc_nl_sock_policy[]; +extern const struct nla_policy tipc_nl_net_policy[]; +extern const struct nla_policy tipc_nl_link_policy[]; +extern const struct nla_policy tipc_nl_node_policy[]; +extern const struct nla_policy tipc_nl_prop_policy[]; +extern const struct nla_policy tipc_nl_bearer_policy[]; +extern const struct nla_policy tipc_nl_media_policy[]; +extern const struct nla_policy tipc_nl_udp_policy[]; +extern const struct nla_policy tipc_nl_monitor_policy[]; + +int tipc_netlink_start(void); +int tipc_netlink_compat_start(void); +void tipc_netlink_stop(void); +void tipc_netlink_compat_stop(void); #endif diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c new file mode 100644 index 000000000000..079aebb16ed8 --- /dev/null +++ b/net/tipc/netlink_compat.c @@ -0,0 +1,1376 @@ +/* + * Copyright (c) 2014, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "bearer.h" +#include "link.h" +#include "name_table.h" +#include "socket.h" +#include "node.h" +#include "net.h" +#include <net/genetlink.h> +#include <linux/string_helpers.h> +#include <linux/tipc_config.h> + +/* The legacy API had an artificial message length limit called + * ULTRA_STRING_MAX_LEN. + */ +#define ULTRA_STRING_MAX_LEN 32768 + +#define TIPC_SKB_MAX TLV_SPACE(ULTRA_STRING_MAX_LEN) + +#define REPLY_TRUNCATED "<truncated>\n" + +struct tipc_nl_compat_msg { + u16 cmd; + int rep_type; + int rep_size; + int req_type; + int req_size; + struct net *net; + struct sk_buff *rep; + struct tlv_desc *req; + struct sock *dst_sk; +}; + +struct tipc_nl_compat_cmd_dump { + int (*header)(struct tipc_nl_compat_msg *); + int (*dumpit)(struct sk_buff *, struct netlink_callback *); + int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs); +}; + +struct tipc_nl_compat_cmd_doit { + int (*doit)(struct sk_buff *skb, struct genl_info *info); + int (*transcode)(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, struct tipc_nl_compat_msg *msg); +}; + +static int tipc_skb_tailroom(struct sk_buff *skb) +{ + int tailroom; + int limit; + + tailroom = skb_tailroom(skb); + limit = TIPC_SKB_MAX - skb->len; + + if (tailroom < limit) + return tailroom; + + return limit; +} + +static inline int TLV_GET_DATA_LEN(struct tlv_desc *tlv) +{ + return TLV_GET_LEN(tlv) - TLV_SPACE(0); +} + +static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) +{ + struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb); + + if (tipc_skb_tailroom(skb) < TLV_SPACE(len)) + return -EMSGSIZE; + + skb_put(skb, TLV_SPACE(len)); + memset(tlv, 0, TLV_SPACE(len)); + tlv->tlv_type = htons(type); + tlv->tlv_len = htons(TLV_LENGTH(len)); + if (len && data) + memcpy(TLV_DATA(tlv), data, len); + + return 0; +} + +static void tipc_tlv_init(struct sk_buff *skb, u16 type) +{ + struct tlv_desc *tlv = (struct tlv_desc *)skb->data; + + TLV_SET_LEN(tlv, 0); + TLV_SET_TYPE(tlv, type); + skb_put(skb, sizeof(struct tlv_desc)); +} + +static __printf(2, 3) int tipc_tlv_sprintf(struct sk_buff *skb, + const char *fmt, ...) +{ + int n; + u16 len; + u32 rem; + char *buf; + struct tlv_desc *tlv; + va_list args; + + rem = tipc_skb_tailroom(skb); + + tlv = (struct tlv_desc *)skb->data; + len = TLV_GET_LEN(tlv); + buf = TLV_DATA(tlv) + len; + + va_start(args, fmt); + n = vscnprintf(buf, rem, fmt, args); + va_end(args); + + TLV_SET_LEN(tlv, n + len); + skb_put(skb, n); + + return n; +} + +static struct sk_buff *tipc_tlv_alloc(int size) +{ + int hdr_len; + struct sk_buff *buf; + + size = TLV_SPACE(size); + hdr_len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); + + buf = alloc_skb(hdr_len + size, GFP_KERNEL); + if (!buf) + return NULL; + + skb_reserve(buf, hdr_len); + + return buf; +} + +static struct sk_buff *tipc_get_err_tlv(char *str) +{ + int str_len = strlen(str) + 1; + struct sk_buff *buf; + + buf = tipc_tlv_alloc(str_len); + if (buf) + tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len); + + return buf; +} + +static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, + struct tipc_nl_compat_msg *msg, + struct sk_buff *arg) +{ + struct genl_dumpit_info info; + int len = 0; + int err; + struct sk_buff *buf; + struct nlmsghdr *nlmsg; + struct netlink_callback cb; + struct nlattr **attrbuf; + + memset(&cb, 0, sizeof(cb)); + cb.nlh = (struct nlmsghdr *)arg->data; + cb.skb = arg; + cb.data = &info; + + buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf->sk = msg->dst_sk; + if (__tipc_dump_start(&cb, msg->net)) { + kfree_skb(buf); + return -ENOMEM; + } + + attrbuf = kcalloc(tipc_genl_family.maxattr + 1, + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) { + err = -ENOMEM; + goto err_out; + } + + info.info.attrs = attrbuf; + + if (nlmsg_len(cb.nlh) > 0) { + err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf, + tipc_genl_family.maxattr, + tipc_genl_family.policy, NULL); + if (err) + goto err_out; + } + do { + int rem; + + len = (*cmd->dumpit)(buf, &cb); + + nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) { + err = nlmsg_parse_deprecated(nlmsg, GENL_HDRLEN, + attrbuf, + tipc_genl_family.maxattr, + tipc_genl_family.policy, + NULL); + if (err) + goto err_out; + + err = (*cmd->format)(msg, attrbuf); + if (err) + goto err_out; + + if (tipc_skb_tailroom(msg->rep) <= 1) { + err = -EMSGSIZE; + goto err_out; + } + } + + skb_reset_tail_pointer(buf); + buf->len = 0; + + } while (len); + + err = 0; + +err_out: + kfree(attrbuf); + tipc_dump_done(&cb); + kfree_skb(buf); + + if (err == -EMSGSIZE) { + /* The legacy API only considered messages filling + * "ULTRA_STRING_MAX_LEN" to be truncated. + */ + if ((TIPC_SKB_MAX - msg->rep->len) <= 1) { + char *tail = skb_tail_pointer(msg->rep); + + if (*tail != '\0') + sprintf(tail - sizeof(REPLY_TRUNCATED) - 1, + REPLY_TRUNCATED); + } + + return 0; + } + + return err; +} + +static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, + struct tipc_nl_compat_msg *msg) +{ + struct nlmsghdr *nlh; + struct sk_buff *arg; + int err; + + if (msg->req_type && (!msg->req_size || + !TLV_CHECK_TYPE(msg->req, msg->req_type))) + return -EINVAL; + + msg->rep = tipc_tlv_alloc(msg->rep_size); + if (!msg->rep) + return -ENOMEM; + + if (msg->rep_type) + tipc_tlv_init(msg->rep, msg->rep_type); + + if (cmd->header) { + err = (*cmd->header)(msg); + if (err) { + kfree_skb(msg->rep); + msg->rep = NULL; + return err; + } + } + + arg = nlmsg_new(0, GFP_KERNEL); + if (!arg) { + kfree_skb(msg->rep); + msg->rep = NULL; + return -ENOMEM; + } + + nlh = nlmsg_put(arg, 0, 0, tipc_genl_family.id, 0, NLM_F_MULTI); + if (!nlh) { + kfree_skb(arg); + kfree_skb(msg->rep); + msg->rep = NULL; + return -EMSGSIZE; + } + nlmsg_end(arg, nlh); + + err = __tipc_nl_compat_dumpit(cmd, msg, arg); + if (err) { + kfree_skb(msg->rep); + msg->rep = NULL; + } + kfree_skb(arg); + + return err; +} + +static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + struct sk_buff *doit_buf; + struct sk_buff *trans_buf; + struct nlattr **attrbuf; + struct genl_info info; + + trans_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!trans_buf) + return -ENOMEM; + + attrbuf = kmalloc_array(tipc_genl_family.maxattr + 1, + sizeof(struct nlattr *), + GFP_KERNEL); + if (!attrbuf) { + err = -ENOMEM; + goto trans_out; + } + + doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!doit_buf) { + err = -ENOMEM; + goto attrbuf_out; + } + + memset(&info, 0, sizeof(info)); + info.attrs = attrbuf; + + rtnl_lock(); + err = (*cmd->transcode)(cmd, trans_buf, msg); + if (err) + goto doit_out; + + err = nla_parse_deprecated(attrbuf, tipc_genl_family.maxattr, + (const struct nlattr *)trans_buf->data, + trans_buf->len, NULL, NULL); + if (err) + goto doit_out; + + doit_buf->sk = msg->dst_sk; + + err = (*cmd->doit)(doit_buf, &info); +doit_out: + rtnl_unlock(); + + kfree_skb(doit_buf); +attrbuf_out: + kfree(attrbuf); +trans_out: + kfree_skb(trans_buf); + + return err; +} + +static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + + if (msg->req_type && (!msg->req_size || + !TLV_CHECK_TYPE(msg->req, msg->req_type))) + return -EINVAL; + + err = __tipc_nl_compat_doit(cmd, msg); + if (err) + return err; + + /* The legacy API considered an empty message a success message */ + msg->rep = tipc_tlv_alloc(0); + if (!msg->rep) + return -ENOMEM; + + return 0; +} + +static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested_deprecated(bearer, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], NULL, NULL); + if (err) + return err; + + return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME, + nla_data(bearer[TIPC_NLA_BEARER_NAME]), + nla_len(bearer[TIPC_NLA_BEARER_NAME])); +} + +static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *bearer; + struct tipc_bearer_config *b; + int len; + + b = (struct tipc_bearer_config *)TLV_DATA(msg->req); + + bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_bearer_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); + if (!string_is_terminated(b->name, len)) + return -EINVAL; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name)) + return -EMSGSIZE; + + if (nla_put_u32(skb, TIPC_NLA_BEARER_DOMAIN, ntohl(b->disc_domain))) + return -EMSGSIZE; + + if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) { + prop = nla_nest_start_noflag(skb, TIPC_NLA_BEARER_PROP); + if (!prop) + return -EMSGSIZE; + if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority))) + return -EMSGSIZE; + nla_nest_end(skb, prop); + } + nla_nest_end(skb, bearer); + + return 0; +} + +static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + char *name; + struct nlattr *bearer; + int len; + + name = (char *)TLV_DATA(msg->req); + + bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); + if (!string_is_terminated(name, len)) + return -EINVAL; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name)) + return -EMSGSIZE; + + nla_nest_end(skb, bearer); + + return 0; +} + +static inline u32 perc(u32 count, u32 total) +{ + return (count * 100 + (total / 2)) / total; +} + +static void __fill_bc_link_stat(struct tipc_nl_compat_msg *msg, + struct nlattr *prop[], struct nlattr *stats[]) +{ + tipc_tlv_sprintf(msg->rep, " Window:%u packets\n", + nla_get_u32(prop[TIPC_NLA_PROP_WIN])); + + tipc_tlv_sprintf(msg->rep, + " RX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, " RX naks:%u defs:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]), + nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES])); + + tipc_tlv_sprintf(msg->rep, " TX naks:%u acks:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED])); + + tipc_tlv_sprintf(msg->rep, + " Congestion link:%u Send queue max:%u avg:%u", + nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]), + nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]), + nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE])); +} + +static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + char *name; + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; + struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; + struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; + int err; + int len; + + if (!attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL, NULL); + if (err) + return err; + + if (!link[TIPC_NLA_LINK_PROP]) + return -EINVAL; + + err = nla_parse_nested_deprecated(prop, TIPC_NLA_PROP_MAX, + link[TIPC_NLA_LINK_PROP], NULL, + NULL); + if (err) + return err; + + if (!link[TIPC_NLA_LINK_STATS]) + return -EINVAL; + + err = nla_parse_nested_deprecated(stats, TIPC_NLA_STATS_MAX, + link[TIPC_NLA_LINK_STATS], NULL, + NULL); + if (err) + return err; + + name = (char *)TLV_DATA(msg->req); + + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); + if (!string_is_terminated(name, len)) + return -EINVAL; + + if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) + return 0; + + tipc_tlv_sprintf(msg->rep, "\nLink <%s>\n", + (char *)nla_data(link[TIPC_NLA_LINK_NAME])); + + if (link[TIPC_NLA_LINK_BROADCAST]) { + __fill_bc_link_stat(msg, prop, stats); + return 0; + } + + if (link[TIPC_NLA_LINK_ACTIVE]) + tipc_tlv_sprintf(msg->rep, " ACTIVE"); + else if (link[TIPC_NLA_LINK_UP]) + tipc_tlv_sprintf(msg->rep, " STANDBY"); + else + tipc_tlv_sprintf(msg->rep, " DEFUNCT"); + + tipc_tlv_sprintf(msg->rep, " MTU:%u Priority:%u", + nla_get_u32(link[TIPC_NLA_LINK_MTU]), + nla_get_u32(prop[TIPC_NLA_PROP_PRIO])); + + tipc_tlv_sprintf(msg->rep, " Tolerance:%u ms Window:%u packets\n", + nla_get_u32(prop[TIPC_NLA_PROP_TOL]), + nla_get_u32(prop[TIPC_NLA_PROP_WIN])); + + tipc_tlv_sprintf(msg->rep, + " RX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(link[TIPC_NLA_LINK_RX]) - + nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(link[TIPC_NLA_LINK_TX]) - + nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX profile sample:%u packets average:%u octets\n", + nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_CNT]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_TOT]) / + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])); + + tipc_tlv_sprintf(msg->rep, + " 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% ", + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P0]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P1]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P2]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P3]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]))); + + tipc_tlv_sprintf(msg->rep, "-16384:%u%% -32768:%u%% -66000:%u%%\n", + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P4]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P5]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P6]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]))); + + tipc_tlv_sprintf(msg->rep, + " RX states:%u probes:%u naks:%u defs:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_STATES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_PROBES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]), + nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES])); + + tipc_tlv_sprintf(msg->rep, + " TX states:%u probes:%u naks:%u acks:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_STATES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_PROBES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED])); + + tipc_tlv_sprintf(msg->rep, + " Congestion link:%u Send queue max:%u avg:%u", + nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]), + nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]), + nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE])); + + return 0; +} + +static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; + struct tipc_link_info link_info; + int err; + + if (!attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL, NULL); + if (err) + return err; + + link_info.dest = htonl(nla_get_flag(link[TIPC_NLA_LINK_DEST])); + link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); + nla_strscpy(link_info.str, link[TIPC_NLA_LINK_NAME], + TIPC_MAX_LINK_NAME); + + return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, + &link_info, sizeof(link_info)); +} + +static int __tipc_add_link_prop(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg, + struct tipc_link_config *lc) +{ + switch (msg->cmd) { + case TIPC_CMD_SET_LINK_PRI: + return nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value)); + case TIPC_CMD_SET_LINK_TOL: + return nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value)); + case TIPC_CMD_SET_LINK_WINDOW: + return nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value)); + } + + return -EINVAL; +} + +static int tipc_nl_compat_media_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *media; + struct tipc_link_config *lc; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + media = nla_nest_start_noflag(skb, TIPC_NLA_MEDIA); + if (!media) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start_noflag(skb, TIPC_NLA_MEDIA_PROP); + if (!prop) + return -EMSGSIZE; + + __tipc_add_link_prop(skb, msg, lc); + nla_nest_end(skb, prop); + nla_nest_end(skb, media); + + return 0; +} + +static int tipc_nl_compat_bearer_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *bearer; + struct tipc_link_config *lc; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start_noflag(skb, TIPC_NLA_BEARER_PROP); + if (!prop) + return -EMSGSIZE; + + __tipc_add_link_prop(skb, msg, lc); + nla_nest_end(skb, prop); + nla_nest_end(skb, bearer); + + return 0; +} + +static int __tipc_nl_compat_link_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *link; + struct tipc_link_config *lc; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + link = nla_nest_start_noflag(skb, TIPC_NLA_LINK); + if (!link) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start_noflag(skb, TIPC_NLA_LINK_PROP); + if (!prop) + return -EMSGSIZE; + + __tipc_add_link_prop(skb, msg, lc); + nla_nest_end(skb, prop); + nla_nest_end(skb, link); + + return 0; +} + +static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct tipc_link_config *lc; + struct tipc_bearer *bearer; + struct tipc_media *media; + int len; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_link_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); + if (!string_is_terminated(lc->name, len)) + return -EINVAL; + + media = tipc_media_find(lc->name); + if (media) { + cmd->doit = &__tipc_nl_media_set; + return tipc_nl_compat_media_set(skb, msg); + } + + bearer = tipc_bearer_find(msg->net, lc->name); + if (bearer) { + cmd->doit = &__tipc_nl_bearer_set; + return tipc_nl_compat_bearer_set(skb, msg); + } + + return __tipc_nl_compat_link_set(skb, msg); +} + +static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + char *name; + struct nlattr *link; + int len; + + name = (char *)TLV_DATA(msg->req); + + link = nla_nest_start_noflag(skb, TIPC_NLA_LINK); + if (!link) + return -EMSGSIZE; + + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); + if (!string_is_terminated(name, len)) + return -EINVAL; + + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name)) + return -EMSGSIZE; + + nla_nest_end(skb, link); + + return 0; +} + +static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) +{ + int i; + u32 depth; + struct tipc_name_table_query *ntq; + static const char * const header[] = { + "Type ", + "Lower Upper ", + "Port Identity ", + "Publication Scope" + }; + + ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + if (TLV_GET_DATA_LEN(msg->req) < (int)sizeof(struct tipc_name_table_query)) + return -EINVAL; + + depth = ntohl(ntq->depth); + + if (depth > 4) + depth = 4; + for (i = 0; i < depth; i++) + tipc_tlv_sprintf(msg->rep, header[i]); + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + char port_str[27]; + struct tipc_name_table_query *ntq; + struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1]; + struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; + u32 node, depth, type, lowbound, upbound; + static const char * const scope_str[] = {"", " zone", " cluster", + " node"}; + int err; + + if (!attrs[TIPC_NLA_NAME_TABLE]) + return -EINVAL; + + err = nla_parse_nested_deprecated(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL, + NULL); + if (err) + return err; + + if (!nt[TIPC_NLA_NAME_TABLE_PUBL]) + return -EINVAL; + + err = nla_parse_nested_deprecated(publ, TIPC_NLA_PUBL_MAX, + nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, + NULL); + if (err) + return err; + + ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + + depth = ntohl(ntq->depth); + type = ntohl(ntq->type); + lowbound = ntohl(ntq->lowbound); + upbound = ntohl(ntq->upbound); + + if (!(depth & TIPC_NTQ_ALLTYPES) && + (type != nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]))) + return 0; + if (lowbound && (lowbound > nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]))) + return 0; + if (upbound && (upbound < nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]))) + return 0; + + tipc_tlv_sprintf(msg->rep, "%-10u ", + nla_get_u32(publ[TIPC_NLA_PUBL_TYPE])); + + if (depth == 1) + goto out; + + tipc_tlv_sprintf(msg->rep, "%-10u %-10u ", + nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]), + nla_get_u32(publ[TIPC_NLA_PUBL_UPPER])); + + if (depth == 2) + goto out; + + node = nla_get_u32(publ[TIPC_NLA_PUBL_NODE]); + sprintf(port_str, "<%u.%u.%u:%u>", tipc_zone(node), tipc_cluster(node), + tipc_node(node), nla_get_u32(publ[TIPC_NLA_PUBL_REF])); + tipc_tlv_sprintf(msg->rep, "%-26s ", port_str); + + if (depth == 3) + goto out; + + tipc_tlv_sprintf(msg->rep, "%-10u %s", + nla_get_u32(publ[TIPC_NLA_PUBL_KEY]), + scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]); +out: + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + u32 type, lower, upper; + struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_PUBL]) + return -EINVAL; + + err = nla_parse_nested_deprecated(publ, TIPC_NLA_PUBL_MAX, + attrs[TIPC_NLA_PUBL], NULL, NULL); + if (err) + return err; + + type = nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]); + lower = nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]); + upper = nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]); + + if (lower == upper) + tipc_tlv_sprintf(msg->rep, " {%u,%u}", type, lower); + else + tipc_tlv_sprintf(msg->rep, " {%u,%u,%u}", type, lower, upper); + + return 0; +} + +static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) +{ + int err; + void *hdr; + struct nlattr *nest; + struct sk_buff *args; + struct tipc_nl_compat_cmd_dump dump; + + args = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!args) + return -ENOMEM; + + hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, + TIPC_NL_PUBL_GET); + if (!hdr) { + kfree_skb(args); + return -EMSGSIZE; + } + + nest = nla_nest_start_noflag(args, TIPC_NLA_SOCK); + if (!nest) { + kfree_skb(args); + return -EMSGSIZE; + } + + if (nla_put_u32(args, TIPC_NLA_SOCK_REF, sock)) { + kfree_skb(args); + return -EMSGSIZE; + } + + nla_nest_end(args, nest); + genlmsg_end(args, hdr); + + dump.dumpit = tipc_nl_publ_dump; + dump.format = __tipc_nl_compat_publ_dump; + + err = __tipc_nl_compat_dumpit(&dump, msg, args); + + kfree_skb(args); + + return err; +} + +static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + int err; + u32 sock_ref; + struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; + + if (!attrs[TIPC_NLA_SOCK]) + return -EINVAL; + + err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX, + attrs[TIPC_NLA_SOCK], NULL, NULL); + if (err) + return err; + + sock_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + tipc_tlv_sprintf(msg->rep, "%u:", sock_ref); + + if (sock[TIPC_NLA_SOCK_CON]) { + u32 node; + struct nlattr *con[TIPC_NLA_CON_MAX + 1]; + + err = nla_parse_nested_deprecated(con, TIPC_NLA_CON_MAX, + sock[TIPC_NLA_SOCK_CON], + NULL, NULL); + + if (err) + return err; + + node = nla_get_u32(con[TIPC_NLA_CON_NODE]); + tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>", + tipc_zone(node), + tipc_cluster(node), + tipc_node(node), + nla_get_u32(con[TIPC_NLA_CON_SOCK])); + + if (con[TIPC_NLA_CON_FLAG]) + tipc_tlv_sprintf(msg->rep, " via {%u,%u}\n", + nla_get_u32(con[TIPC_NLA_CON_TYPE]), + nla_get_u32(con[TIPC_NLA_CON_INST])); + else + tipc_tlv_sprintf(msg->rep, "\n"); + } else if (sock[TIPC_NLA_SOCK_HAS_PUBL]) { + tipc_tlv_sprintf(msg->rep, " bound to"); + + err = tipc_nl_compat_publ_dump(msg, sock_ref); + if (err) + return err; + } + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *media[TIPC_NLA_MEDIA_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_MEDIA]) + return -EINVAL; + + err = nla_parse_nested_deprecated(media, TIPC_NLA_MEDIA_MAX, + attrs[TIPC_NLA_MEDIA], NULL, NULL); + if (err) + return err; + + return tipc_add_tlv(msg->rep, TIPC_TLV_MEDIA_NAME, + nla_data(media[TIPC_NLA_MEDIA_NAME]), + nla_len(media[TIPC_NLA_MEDIA_NAME])); +} + +static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct tipc_node_info node_info; + struct nlattr *node[TIPC_NLA_NODE_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_NODE]) + return -EINVAL; + + err = nla_parse_nested_deprecated(node, TIPC_NLA_NODE_MAX, + attrs[TIPC_NLA_NODE], NULL, NULL); + if (err) + return err; + + node_info.addr = htonl(nla_get_u32(node[TIPC_NLA_NODE_ADDR])); + node_info.up = htonl(nla_get_flag(node[TIPC_NLA_NODE_UP])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_NODE_INFO, &node_info, + sizeof(node_info)); +} + +static int tipc_nl_compat_net_set(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + u32 val; + struct nlattr *net; + + val = ntohl(*(__be32 *)TLV_DATA(msg->req)); + + net = nla_nest_start_noflag(skb, TIPC_NLA_NET); + if (!net) + return -EMSGSIZE; + + if (msg->cmd == TIPC_CMD_SET_NODE_ADDR) { + if (nla_put_u32(skb, TIPC_NLA_NET_ADDR, val)) + return -EMSGSIZE; + } else if (msg->cmd == TIPC_CMD_SET_NETID) { + if (nla_put_u32(skb, TIPC_NLA_NET_ID, val)) + return -EMSGSIZE; + } + nla_nest_end(skb, net); + + return 0; +} + +static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + __be32 id; + struct nlattr *net[TIPC_NLA_NET_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_NET]) + return -EINVAL; + + err = nla_parse_nested_deprecated(net, TIPC_NLA_NET_MAX, + attrs[TIPC_NLA_NET], NULL, NULL); + if (err) + return err; + + id = htonl(nla_get_u32(net[TIPC_NLA_NET_ID])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_UNSIGNED, &id, sizeof(id)); +} + +static int tipc_cmd_show_stats_compat(struct tipc_nl_compat_msg *msg) +{ + msg->rep = tipc_tlv_alloc(ULTRA_STRING_MAX_LEN); + if (!msg->rep) + return -ENOMEM; + + tipc_tlv_init(msg->rep, TIPC_TLV_ULTRA_STRING); + tipc_tlv_sprintf(msg->rep, "TIPC version " TIPC_MOD_VER "\n"); + + return 0; +} + +static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) +{ + struct tipc_nl_compat_cmd_dump dump; + struct tipc_nl_compat_cmd_doit doit; + + memset(&dump, 0, sizeof(dump)); + memset(&doit, 0, sizeof(doit)); + + switch (msg->cmd) { + case TIPC_CMD_NOOP: + msg->rep = tipc_tlv_alloc(0); + if (!msg->rep) + return -ENOMEM; + return 0; + case TIPC_CMD_GET_BEARER_NAMES: + msg->rep_size = MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME); + dump.dumpit = tipc_nl_bearer_dump; + dump.format = tipc_nl_compat_bearer_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_ENABLE_BEARER: + msg->req_type = TIPC_TLV_BEARER_CONFIG; + doit.doit = __tipc_nl_bearer_enable; + doit.transcode = tipc_nl_compat_bearer_enable; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_DISABLE_BEARER: + msg->req_type = TIPC_TLV_BEARER_NAME; + doit.doit = __tipc_nl_bearer_disable; + doit.transcode = tipc_nl_compat_bearer_disable; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SHOW_LINK_STATS: + msg->req_type = TIPC_TLV_LINK_NAME; + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.dumpit = tipc_nl_node_dump_link; + dump.format = tipc_nl_compat_link_stat_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_LINKS: + msg->req_type = TIPC_TLV_NET_ADDR; + msg->rep_size = ULTRA_STRING_MAX_LEN; + dump.dumpit = tipc_nl_node_dump_link; + dump.format = tipc_nl_compat_link_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SET_LINK_TOL: + case TIPC_CMD_SET_LINK_PRI: + case TIPC_CMD_SET_LINK_WINDOW: + msg->req_type = TIPC_TLV_LINK_CONFIG; + doit.doit = tipc_nl_node_set_link; + doit.transcode = tipc_nl_compat_link_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_RESET_LINK_STATS: + msg->req_type = TIPC_TLV_LINK_NAME; + doit.doit = tipc_nl_node_reset_link_stats; + doit.transcode = tipc_nl_compat_link_reset_stats; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SHOW_NAME_TABLE: + msg->req_type = TIPC_TLV_NAME_TBL_QUERY; + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.header = tipc_nl_compat_name_table_dump_header; + dump.dumpit = tipc_nl_name_table_dump; + dump.format = tipc_nl_compat_name_table_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SHOW_PORTS: + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.dumpit = tipc_nl_sk_dump; + dump.format = tipc_nl_compat_sk_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_MEDIA_NAMES: + msg->rep_size = MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME); + dump.dumpit = tipc_nl_media_dump; + dump.format = tipc_nl_compat_media_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_NODES: + msg->rep_size = ULTRA_STRING_MAX_LEN; + dump.dumpit = tipc_nl_node_dump; + dump.format = tipc_nl_compat_node_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SET_NODE_ADDR: + msg->req_type = TIPC_TLV_NET_ADDR; + doit.doit = __tipc_nl_net_set; + doit.transcode = tipc_nl_compat_net_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SET_NETID: + msg->req_type = TIPC_TLV_UNSIGNED; + doit.doit = __tipc_nl_net_set; + doit.transcode = tipc_nl_compat_net_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_GET_NETID: + msg->rep_size = sizeof(u32); + dump.dumpit = tipc_nl_net_dump; + dump.format = tipc_nl_compat_net_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SHOW_STATS: + return tipc_cmd_show_stats_compat(msg); + } + + return -EOPNOTSUPP; +} + +static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) +{ + int err; + int len; + struct tipc_nl_compat_msg msg; + struct nlmsghdr *req_nlh; + struct nlmsghdr *rep_nlh; + struct tipc_genlmsghdr *req_userhdr = genl_info_userhdr(info); + + memset(&msg, 0, sizeof(msg)); + + req_nlh = (struct nlmsghdr *)skb->data; + msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN; + msg.cmd = req_userhdr->cmd; + msg.net = genl_info_net(info); + msg.dst_sk = skb->sk; + + if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) { + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN); + err = -EACCES; + goto send; + } + + msg.req_size = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); + if (msg.req_size && !TLV_OK(msg.req, msg.req_size)) { + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); + err = -EOPNOTSUPP; + goto send; + } + + err = tipc_nl_compat_handle(&msg); + if ((err == -EOPNOTSUPP) || (err == -EPERM)) + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); + else if (err == -EINVAL) + msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR); +send: + if (!msg.rep) + return err; + + len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); + skb_push(msg.rep, len); + rep_nlh = nlmsg_hdr(msg.rep); + memcpy(rep_nlh, info->nlhdr, len); + rep_nlh->nlmsg_len = msg.rep->len; + genlmsg_unicast(msg.net, msg.rep, NETLINK_CB(skb).portid); + + return err; +} + +static const struct genl_small_ops tipc_genl_compat_ops[] = { + { + .cmd = TIPC_GENL_CMD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_compat_recv, + }, +}; + +static struct genl_family tipc_genl_compat_family __ro_after_init = { + .name = TIPC_GENL_NAME, + .version = TIPC_GENL_VERSION, + .hdrsize = TIPC_GENL_HDRLEN, + .maxattr = 0, + .netnsok = true, + .module = THIS_MODULE, + .small_ops = tipc_genl_compat_ops, + .n_small_ops = ARRAY_SIZE(tipc_genl_compat_ops), + .resv_start_op = TIPC_GENL_CMD + 1, +}; + +int __init tipc_netlink_compat_start(void) +{ + int res; + + res = genl_register_family(&tipc_genl_compat_family); + if (res) { + pr_err("Failed to register legacy compat interface\n"); + return res; + } + + return 0; +} + +void tipc_netlink_compat_stop(void) +{ + genl_unregister_family(&tipc_genl_compat_family); +} diff --git a/net/tipc/node.c b/net/tipc/node.c index 6e6c434872e8..a07fb073368c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1,8 +1,8 @@ /* * net/tipc/node.c: TIPC node management routines * - * Copyright (c) 2000-2006, 2012 Ericsson AB - * Copyright (c) 2005-2006, 2010-2011, Wind River Systems + * Copyright (c) 2000-2006, 2012-2016, Ericsson AB + * Copyright (c) 2005-2006, 2010-2014, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,407 +35,3131 @@ */ #include "core.h" -#include "config.h" +#include "link.h" #include "node.h" #include "name_distr.h" +#include "socket.h" +#include "bcast.h" +#include "monitor.h" +#include "discover.h" +#include "netlink.h" +#include "trace.h" +#include "crypto.h" + +#define INVALID_NODE_SIG 0x10000 +#define NODE_CLEANUP_AFTER 300000 + +/* Flags used to take different actions according to flag type + * TIPC_NOTIFY_NODE_DOWN: notify node is down + * TIPC_NOTIFY_NODE_UP: notify node is up + * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type + */ +enum { + TIPC_NOTIFY_NODE_DOWN = (1 << 3), + TIPC_NOTIFY_NODE_UP = (1 << 4), + TIPC_NOTIFY_LINK_UP = (1 << 6), + TIPC_NOTIFY_LINK_DOWN = (1 << 7) +}; + +struct tipc_link_entry { + struct tipc_link *link; + spinlock_t lock; /* per link */ + u32 mtu; + struct sk_buff_head inputq; + struct tipc_media_addr maddr; +}; + +struct tipc_bclink_entry { + struct tipc_link *link; + struct sk_buff_head inputq1; + struct sk_buff_head arrvq; + struct sk_buff_head inputq2; + struct sk_buff_head namedq; + u16 named_rcv_nxt; + bool named_open; +}; + +/** + * struct tipc_node - TIPC node structure + * @addr: network address of node + * @kref: reference counter to node object + * @lock: rwlock governing access to structure + * @net: the applicable net namespace + * @hash: links to adjacent nodes in unsorted hash chain + * @active_links: bearer ids of active links, used as index into links[] array + * @links: array containing references to all links to node + * @bc_entry: broadcast link entry + * @action_flags: bit mask of different types of node actions + * @state: connectivity state vs peer node + * @preliminary: a preliminary node or not + * @failover_sent: failover sent or not + * @sync_point: sequence number where synch/failover is finished + * @list: links to adjacent nodes in sorted list of cluster's nodes + * @working_links: number of working links to node (both active and standby) + * @link_cnt: number of links to node + * @capabilities: bitmap, indicating peer node's functional capabilities + * @signature: node instance identifier + * @link_id: local and remote bearer ids of changing link, if any + * @peer_id: 128-bit ID of peer + * @peer_id_string: ID string of peer + * @publ_list: list of publications + * @conn_sks: list of connections (FIXME) + * @timer: node's keepalive timer + * @keepalive_intv: keepalive interval in milliseconds + * @rcu: rcu struct for tipc_node + * @delete_at: indicates the time for deleting a down node + * @peer_net: peer's net namespace + * @peer_hash_mix: hash for this peer (FIXME) + * @crypto_rx: RX crypto handler + */ +struct tipc_node { + u32 addr; + struct kref kref; + rwlock_t lock; + struct net *net; + struct hlist_node hash; + int active_links[2]; + struct tipc_link_entry links[MAX_BEARERS]; + struct tipc_bclink_entry bc_entry; + int action_flags; + struct list_head list; + int state; + bool preliminary; + bool failover_sent; + u16 sync_point; + int link_cnt; + u16 working_links; + u16 capabilities; + u32 signature; + u32 link_id; + u8 peer_id[16]; + char peer_id_string[NODE_ID_STR_LEN]; + struct list_head publ_list; + struct list_head conn_sks; + unsigned long keepalive_intv; + struct timer_list timer; + struct rcu_head rcu; + unsigned long delete_at; + struct net *peer_net; + u32 peer_hash_mix; +#ifdef CONFIG_TIPC_CRYPTO + struct tipc_crypto *crypto_rx; +#endif +}; + +/* Node FSM states and events: + */ +enum { + SELF_DOWN_PEER_DOWN = 0xdd, + SELF_UP_PEER_UP = 0xaa, + SELF_DOWN_PEER_LEAVING = 0xd1, + SELF_UP_PEER_COMING = 0xac, + SELF_COMING_PEER_UP = 0xca, + SELF_LEAVING_PEER_DOWN = 0x1d, + NODE_FAILINGOVER = 0xf0, + NODE_SYNCHING = 0xcc +}; + +enum { + SELF_ESTABL_CONTACT_EVT = 0xece, + SELF_LOST_CONTACT_EVT = 0x1ce, + PEER_ESTABL_CONTACT_EVT = 0x9ece, + PEER_LOST_CONTACT_EVT = 0x91ce, + NODE_FAILOVER_BEGIN_EVT = 0xfbe, + NODE_FAILOVER_END_EVT = 0xfee, + NODE_SYNCH_BEGIN_EVT = 0xcbe, + NODE_SYNCH_END_EVT = 0xcee +}; + +static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, + struct sk_buff_head *xmitq, + struct tipc_media_addr **maddr); +static void tipc_node_link_down(struct tipc_node *n, int bearer_id, + bool delete); +static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); +static void tipc_node_delete(struct tipc_node *node); +static void tipc_node_timeout(struct timer_list *t); +static void tipc_node_fsm_evt(struct tipc_node *n, int evt); +static struct tipc_node *tipc_node_find(struct net *net, u32 addr); +static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id); +static bool node_is_up(struct tipc_node *n); +static void tipc_node_delete_from_list(struct tipc_node *node); + +struct tipc_sock_conn { + u32 port; + u32 peer_port; + u32 peer_node; + struct list_head list; +}; + +static struct tipc_link *node_active_link(struct tipc_node *n, int sel) +{ + int bearer_id = n->active_links[sel & 1]; + + if (unlikely(bearer_id == INVALID_BEARER_ID)) + return NULL; + + return n->links[bearer_id].link; +} -#define NODE_HTABLE_SIZE 512 +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected) +{ + struct tipc_node *n; + int bearer_id; + unsigned int mtu = MAX_MSG_SIZE; + + n = tipc_node_find(net, addr); + if (unlikely(!n)) + return mtu; + + /* Allow MAX_MSG_SIZE when building connection oriented message + * if they are in the same core network + */ + if (n->peer_net && connected) { + tipc_node_put(n); + return mtu; + } -static void node_lost_contact(struct tipc_node *n_ptr); -static void node_established_contact(struct tipc_node *n_ptr); + bearer_id = n->active_links[sel & 1]; + if (likely(bearer_id != INVALID_BEARER_ID)) + mtu = n->links[bearer_id].mtu; + tipc_node_put(n); + return mtu; +} + +bool tipc_node_get_id(struct net *net, u32 addr, u8 *id) +{ + u8 *own_id = tipc_own_id(net); + struct tipc_node *n; -static DEFINE_SPINLOCK(node_create_lock); + if (!own_id) + return true; -static struct hlist_head node_htable[NODE_HTABLE_SIZE]; -LIST_HEAD(tipc_node_list); -static u32 tipc_num_nodes; + if (addr == tipc_own_addr(net)) { + memcpy(id, own_id, TIPC_NODEID_LEN); + return true; + } + n = tipc_node_find(net, addr); + if (!n) + return false; -static atomic_t tipc_num_links = ATOMIC_INIT(0); + memcpy(id, &n->peer_id, TIPC_NODEID_LEN); + tipc_node_put(n); + return true; +} -/* - * A trivial power-of-two bitmask technique is used for speed, since this - * operation is done for every incoming TIPC packet. The number of hash table - * entries has been chosen so that no hash chain exceeds 8 nodes and will - * usually be much smaller (typically only a single node). +u16 tipc_node_get_capabilities(struct net *net, u32 addr) +{ + struct tipc_node *n; + u16 caps; + + n = tipc_node_find(net, addr); + if (unlikely(!n)) + return TIPC_NODE_CAPABILITIES; + caps = n->capabilities; + tipc_node_put(n); + return caps; +} + +u32 tipc_node_get_addr(struct tipc_node *node) +{ + return (node) ? node->addr : 0; +} + +char *tipc_node_get_id_str(struct tipc_node *node) +{ + return node->peer_id_string; +} + +#ifdef CONFIG_TIPC_CRYPTO +/** + * tipc_node_crypto_rx - Retrieve crypto RX handle from node + * @__n: target tipc_node + * Note: node ref counter must be held first! */ -static unsigned int tipc_hashfn(u32 addr) +struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n) +{ + return (__n) ? __n->crypto_rx : NULL; +} + +struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos) +{ + return container_of(pos, struct tipc_node, list)->crypto_rx; +} + +struct tipc_crypto *tipc_node_crypto_rx_by_addr(struct net *net, u32 addr) +{ + struct tipc_node *n; + + n = tipc_node_find(net, addr); + return (n) ? n->crypto_rx : NULL; +} +#endif + +static void tipc_node_free(struct rcu_head *rp) +{ + struct tipc_node *n = container_of(rp, struct tipc_node, rcu); + +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_stop(&n->crypto_rx); +#endif + kfree(n); +} + +static void tipc_node_kref_release(struct kref *kref) +{ + struct tipc_node *n = container_of(kref, struct tipc_node, kref); + + kfree(n->bc_entry.link); + call_rcu(&n->rcu, tipc_node_free); +} + +void tipc_node_put(struct tipc_node *node) +{ + kref_put(&node->kref, tipc_node_kref_release); +} + +void tipc_node_get(struct tipc_node *node) { - return addr & (NODE_HTABLE_SIZE - 1); + kref_get(&node->kref); } /* * tipc_node_find - locate specified node object, if it exists */ -struct tipc_node *tipc_node_find(u32 addr) +static struct tipc_node *tipc_node_find(struct net *net, u32 addr) { + struct tipc_net *tn = tipc_net(net); struct tipc_node *node; + unsigned int thash = tipc_hashfn(addr); - if (unlikely(!in_own_cluster_exact(addr))) - return NULL; - - hlist_for_each_entry(node, &node_htable[tipc_hashfn(addr)], hash) { - if (node->addr == addr) - return node; + rcu_read_lock(); + hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) { + if (node->addr != addr || node->preliminary) + continue; + if (!kref_get_unless_zero(&node->kref)) + node = NULL; + break; } - return NULL; + rcu_read_unlock(); + return node; } -/** - * tipc_node_create - create neighboring node - * - * Currently, this routine is called by neighbor discovery code, which holds - * net_lock for reading only. We must take node_create_lock to ensure a node - * isn't created twice if two different bearers discover the node at the same - * time. (It would be preferable to switch to holding net_lock in write mode, - * but this is a non-trivial change.) +/* tipc_node_find_by_id - locate specified node object by its 128-bit id + * Note: this function is called only when a discovery request failed + * to find the node by its 32-bit id, and is not time critical */ -struct tipc_node *tipc_node_create(u32 addr) +static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id) { - struct tipc_node *n_ptr, *temp_node; + struct tipc_net *tn = tipc_net(net); + struct tipc_node *n; + bool found = false; + + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) { + read_lock_bh(&n->lock); + if (!memcmp(id, n->peer_id, 16) && + kref_get_unless_zero(&n->kref)) + found = true; + read_unlock_bh(&n->lock); + if (found) + break; + } + rcu_read_unlock(); + return found ? n : NULL; +} - spin_lock_bh(&node_create_lock); +static void tipc_node_read_lock(struct tipc_node *n) + __acquires(n->lock) +{ + read_lock_bh(&n->lock); +} - n_ptr = tipc_node_find(addr); - if (n_ptr) { - spin_unlock_bh(&node_create_lock); - return n_ptr; +static void tipc_node_read_unlock(struct tipc_node *n) + __releases(n->lock) +{ + read_unlock_bh(&n->lock); +} + +static void tipc_node_write_lock(struct tipc_node *n) + __acquires(n->lock) +{ + write_lock_bh(&n->lock); +} + +static void tipc_node_write_unlock_fast(struct tipc_node *n) + __releases(n->lock) +{ + write_unlock_bh(&n->lock); +} + +static void tipc_node_write_unlock(struct tipc_node *n) + __releases(n->lock) +{ + struct tipc_socket_addr sk; + struct net *net = n->net; + u32 flags = n->action_flags; + struct list_head *publ_list; + struct tipc_uaddr ua; + u32 bearer_id, node; + + if (likely(!flags)) { + write_unlock_bh(&n->lock); + return; } - n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC); - if (!n_ptr) { - spin_unlock_bh(&node_create_lock); - pr_warn("Node creation failed, no memory\n"); - return NULL; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, + TIPC_LINK_STATE, n->addr, n->addr); + sk.ref = n->link_id; + sk.node = tipc_own_addr(net); + node = n->addr; + bearer_id = n->link_id & 0xffff; + publ_list = &n->publ_list; + + n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | + TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); + + write_unlock_bh(&n->lock); + + if (flags & TIPC_NOTIFY_NODE_DOWN) + tipc_publ_notify(net, publ_list, node, n->capabilities); + + if (flags & TIPC_NOTIFY_NODE_UP) + tipc_named_node_up(net, node, n->capabilities); + + if (flags & TIPC_NOTIFY_LINK_UP) { + tipc_mon_peer_up(net, node, bearer_id); + tipc_nametbl_publish(net, &ua, &sk, sk.ref); + } + if (flags & TIPC_NOTIFY_LINK_DOWN) { + tipc_mon_peer_down(net, node, bearer_id); + tipc_nametbl_withdraw(net, &ua, &sk, sk.ref); } +} + +static void tipc_node_assign_peer_net(struct tipc_node *n, u32 hash_mixes) +{ + int net_id = tipc_netid(n->net); + struct tipc_net *tn_peer; + struct net *tmp; + u32 hash_chk; + + if (n->peer_net) + return; + + for_each_net_rcu(tmp) { + tn_peer = tipc_net(tmp); + if (!tn_peer) + continue; + /* Integrity checking whether node exists in namespace or not */ + if (tn_peer->net_id != net_id) + continue; + if (memcmp(n->peer_id, tn_peer->node_id, NODE_ID_LEN)) + continue; + hash_chk = tipc_net_hash_mixes(tmp, tn_peer->random); + if (hash_mixes ^ hash_chk) + continue; + n->peer_net = tmp; + n->peer_hash_mix = hash_mixes; + break; + } +} + +struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, + u16 capabilities, u32 hash_mixes, + bool preliminary) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l, *snd_l = tipc_bc_sndlink(net); + struct tipc_node *n, *temp_node; + unsigned long intv; + int bearer_id; + int i; + + spin_lock_bh(&tn->node_list_lock); + n = tipc_node_find(net, addr) ?: + tipc_node_find_by_id(net, peer_id); + if (n) { + if (!n->preliminary) + goto update; + if (preliminary) + goto exit; + /* A preliminary node becomes "real" now, refresh its data */ + tipc_node_write_lock(n); + if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link refresh failed, no memory\n"); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); + n = NULL; + goto exit; + } + n->preliminary = false; + n->addr = addr; + hlist_del_rcu(&n->hash); + hlist_add_head_rcu(&n->hash, + &tn->node_htable[tipc_hashfn(addr)]); + list_del_rcu(&n->list); + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + if (n->addr < temp_node->addr) + break; + } + list_add_tail_rcu(&n->list, &temp_node->list); + tipc_node_write_unlock_fast(n); + +update: + if (n->peer_hash_mix ^ hash_mixes) + tipc_node_assign_peer_net(n, hash_mixes); + if (n->capabilities == capabilities) + goto exit; + /* Same node may come back with new capabilities */ + tipc_node_write_lock(n); + n->capabilities = capabilities; + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + l = n->links[bearer_id].link; + if (l) + tipc_link_update_caps(l, capabilities); + } + tipc_node_write_unlock_fast(n); - n_ptr->addr = addr; - spin_lock_init(&n_ptr->lock); - INIT_HLIST_NODE(&n_ptr->hash); - INIT_LIST_HEAD(&n_ptr->list); - INIT_LIST_HEAD(&n_ptr->nsub); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } - hlist_add_head(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]); + tipc_bcast_toggle_rcast(net, + (tn->capabilities & TIPC_BCAST_RCAST)); - list_for_each_entry(temp_node, &tipc_node_list, list) { - if (n_ptr->addr < temp_node->addr) + goto exit; + } + n = kzalloc(sizeof(*n), GFP_ATOMIC); + if (!n) { + pr_warn("Node creation failed, no memory\n"); + goto exit; + } + tipc_nodeid2string(n->peer_id_string, peer_id); +#ifdef CONFIG_TIPC_CRYPTO + if (unlikely(tipc_crypto_start(&n->crypto_rx, net, n))) { + pr_warn("Failed to start crypto RX(%s)!\n", n->peer_id_string); + kfree(n); + n = NULL; + goto exit; + } +#endif + n->addr = addr; + n->preliminary = preliminary; + memcpy(&n->peer_id, peer_id, 16); + n->net = net; + n->peer_net = NULL; + n->peer_hash_mix = 0; + /* Assign kernel local namespace if exists */ + tipc_node_assign_peer_net(n, hash_mixes); + n->capabilities = capabilities; + kref_init(&n->kref); + rwlock_init(&n->lock); + INIT_HLIST_NODE(&n->hash); + INIT_LIST_HEAD(&n->list); + INIT_LIST_HEAD(&n->publ_list); + INIT_LIST_HEAD(&n->conn_sks); + skb_queue_head_init(&n->bc_entry.namedq); + skb_queue_head_init(&n->bc_entry.inputq1); + __skb_queue_head_init(&n->bc_entry.arrvq); + skb_queue_head_init(&n->bc_entry.inputq2); + for (i = 0; i < MAX_BEARERS; i++) + spin_lock_init(&n->links[i].lock); + n->state = SELF_DOWN_PEER_LEAVING; + n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER); + n->signature = INVALID_NODE_SIG; + n->active_links[0] = INVALID_BEARER_ID; + n->active_links[1] = INVALID_BEARER_ID; + if (!preliminary && + !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link creation failed, no memory\n"); + tipc_node_put(n); + n = NULL; + goto exit; + } + tipc_node_get(n); + timer_setup(&n->timer, tipc_node_timeout, 0); + /* Start a slow timer anyway, crypto needs it */ + n->keepalive_intv = 10000; + intv = jiffies + msecs_to_jiffies(n->keepalive_intv); + if (!mod_timer(&n->timer, intv)) + tipc_node_get(n); + hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + if (n->addr < temp_node->addr) break; } - list_add_tail(&n_ptr->list, &temp_node->list); - n_ptr->block_setup = WAIT_PEER_DOWN; - n_ptr->signature = INVALID_NODE_SIG; + list_add_tail_rcu(&n->list, &temp_node->list); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); + trace_tipc_node_create(n, true, " "); +exit: + spin_unlock_bh(&tn->node_list_lock); + return n; +} + +static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) +{ + unsigned long tol = tipc_link_tolerance(l); + unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; + + /* Link with lowest tolerance determines timer interval */ + if (intv < n->keepalive_intv) + n->keepalive_intv = intv; + + /* Ensure link's abort limit corresponds to current tolerance */ + tipc_link_set_abort_limit(l, tol / n->keepalive_intv); +} + +static void tipc_node_delete_from_list(struct tipc_node *node) +{ +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_key_flush(node->crypto_rx); +#endif + list_del_rcu(&node->list); + hlist_del_rcu(&node->hash); + tipc_node_put(node); +} + +static void tipc_node_delete(struct tipc_node *node) +{ + trace_tipc_node_delete(node, true, " "); + tipc_node_delete_from_list(node); + + timer_delete_sync(&node->timer); + tipc_node_put(node); +} + +void tipc_node_stop(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_node *node, *t_node; + + spin_lock_bh(&tn->node_list_lock); + list_for_each_entry_safe(node, t_node, &tn->node_list, list) + tipc_node_delete(node); + spin_unlock_bh(&tn->node_list_lock); +} + +void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) +{ + struct tipc_node *n; + + if (in_own_node(net, addr)) + return; + + n = tipc_node_find(net, addr); + if (!n) { + pr_warn("Node subscribe rejected, unknown node 0x%x\n", addr); + return; + } + tipc_node_write_lock(n); + list_add_tail(subscr, &n->publ_list); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); +} + +void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) +{ + struct tipc_node *n; + + if (in_own_node(net, addr)) + return; + + n = tipc_node_find(net, addr); + if (!n) { + pr_warn("Node unsubscribe rejected, unknown node 0x%x\n", addr); + return; + } + tipc_node_write_lock(n); + list_del_init(subscr); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); +} + +int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) +{ + struct tipc_node *node; + struct tipc_sock_conn *conn; + int err = 0; + + if (in_own_node(net, dnode)) + return 0; + + node = tipc_node_find(net, dnode); + if (!node) { + pr_warn("Connecting sock to node 0x%x failed\n", dnode); + return -EHOSTUNREACH; + } + conn = kmalloc(sizeof(*conn), GFP_ATOMIC); + if (!conn) { + err = -EHOSTUNREACH; + goto exit; + } + conn->peer_node = dnode; + conn->port = port; + conn->peer_port = peer_port; + + tipc_node_write_lock(node); + list_add_tail(&conn->list, &node->conn_sks); + tipc_node_write_unlock(node); +exit: + tipc_node_put(node); + return err; +} + +void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) +{ + struct tipc_node *node; + struct tipc_sock_conn *conn, *safe; + + if (in_own_node(net, dnode)) + return; + + node = tipc_node_find(net, dnode); + if (!node) + return; + + tipc_node_write_lock(node); + list_for_each_entry_safe(conn, safe, &node->conn_sks, list) { + if (port != conn->port) + continue; + list_del(&conn->list); + kfree(conn); + } + tipc_node_write_unlock(node); + tipc_node_put(node); +} + +static void tipc_node_clear_links(struct tipc_node *node) +{ + int i; + + for (i = 0; i < MAX_BEARERS; i++) { + struct tipc_link_entry *le = &node->links[i]; + + if (le->link) { + kfree(le->link); + le->link = NULL; + node->link_cnt--; + } + } +} + +/* tipc_node_cleanup - delete nodes that does not + * have active links for NODE_CLEANUP_AFTER time + */ +static bool tipc_node_cleanup(struct tipc_node *peer) +{ + struct tipc_node *temp_node; + struct tipc_net *tn = tipc_net(peer->net); + bool deleted = false; + + /* If lock held by tipc_node_stop() the node will be deleted anyway */ + if (!spin_trylock_bh(&tn->node_list_lock)) + return false; + + tipc_node_write_lock(peer); + + if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) { + tipc_node_clear_links(peer); + tipc_node_delete_from_list(peer); + deleted = true; + } + tipc_node_write_unlock(peer); - tipc_num_nodes++; + if (!deleted) { + spin_unlock_bh(&tn->node_list_lock); + return deleted; + } - spin_unlock_bh(&node_create_lock); - return n_ptr; + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + tipc_bcast_toggle_rcast(peer->net, + (tn->capabilities & TIPC_BCAST_RCAST)); + spin_unlock_bh(&tn->node_list_lock); + return deleted; } -void tipc_node_delete(struct tipc_node *n_ptr) +/* tipc_node_timeout - handle expiration of node timer + */ +static void tipc_node_timeout(struct timer_list *t) { - list_del(&n_ptr->list); - hlist_del(&n_ptr->hash); - kfree(n_ptr); + struct tipc_node *n = timer_container_of(n, t, timer); + struct tipc_link_entry *le; + struct sk_buff_head xmitq; + int remains = n->link_cnt; + int bearer_id; + int rc = 0; + + trace_tipc_node_timeout(n, false, " "); + if (!node_is_up(n) && tipc_node_cleanup(n)) { + /*Removing the reference of Timer*/ + tipc_node_put(n); + return; + } + +#ifdef CONFIG_TIPC_CRYPTO + /* Take any crypto key related actions first */ + tipc_crypto_timeout(n->crypto_rx); +#endif + __skb_queue_head_init(&xmitq); + + /* Initial node interval to value larger (10 seconds), then it will be + * recalculated with link lowest tolerance + */ + tipc_node_read_lock(n); + n->keepalive_intv = 10000; + tipc_node_read_unlock(n); + for (bearer_id = 0; remains && (bearer_id < MAX_BEARERS); bearer_id++) { + tipc_node_read_lock(n); + le = &n->links[bearer_id]; + if (le->link) { + spin_lock_bh(&le->lock); + /* Link tolerance may change asynchronously: */ + tipc_node_calculate_timer(n, le->link); + rc = tipc_link_timeout(le->link, &xmitq); + spin_unlock_bh(&le->lock); + remains--; + } + tipc_node_read_unlock(n); + tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr, n); + if (rc & TIPC_LINK_DOWN_EVT) + tipc_node_link_down(n, bearer_id, false); + } + mod_timer(&n->timer, jiffies + msecs_to_jiffies(n->keepalive_intv)); +} + +/** + * __tipc_node_link_up - handle addition of link + * @n: target tipc_node + * @bearer_id: id of the bearer + * @xmitq: queue for messages to be xmited on + * Node lock must be held by caller + * Link becomes active (alone or shared) or standby, depending on its priority. + */ +static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, + struct sk_buff_head *xmitq) +{ + int *slot0 = &n->active_links[0]; + int *slot1 = &n->active_links[1]; + struct tipc_link *ol = node_active_link(n, 0); + struct tipc_link *nl = n->links[bearer_id].link; + + if (!nl || tipc_link_is_up(nl)) + return; + + tipc_link_fsm_evt(nl, LINK_ESTABLISH_EVT); + if (!tipc_link_is_up(nl)) + return; + + n->working_links++; + n->action_flags |= TIPC_NOTIFY_LINK_UP; + n->link_id = tipc_link_id(nl); + + /* Leave room for tunnel header when returning 'mtu' to users: */ + n->links[bearer_id].mtu = tipc_link_mss(nl); + + tipc_bearer_add_dest(n->net, bearer_id, n->addr); + tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id); + + pr_debug("Established link <%s> on network plane %c\n", + tipc_link_name(nl), tipc_link_plane(nl)); + trace_tipc_node_link_up(n, true, " "); + + /* Ensure that a STATE message goes first */ + tipc_link_build_state_msg(nl, xmitq); + + /* First link? => give it both slots */ + if (!ol) { + *slot0 = bearer_id; + *slot1 = bearer_id; + tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); + n->action_flags |= TIPC_NOTIFY_NODE_UP; + tipc_link_set_active(nl, true); + tipc_bcast_add_peer(n->net, nl, xmitq); + return; + } + + /* Second link => redistribute slots */ + if (tipc_link_prio(nl) > tipc_link_prio(ol)) { + pr_debug("Old link <%s> becomes standby\n", tipc_link_name(ol)); + *slot0 = bearer_id; + *slot1 = bearer_id; + tipc_link_set_active(nl, true); + tipc_link_set_active(ol, false); + } else if (tipc_link_prio(nl) == tipc_link_prio(ol)) { + tipc_link_set_active(nl, true); + *slot1 = bearer_id; + } else { + pr_debug("New link <%s> is standby\n", tipc_link_name(nl)); + } - tipc_num_nodes--; + /* Prepare synchronization with first link */ + tipc_link_tnl_prepare(ol, nl, SYNCH_MSG, xmitq); } /** * tipc_node_link_up - handle addition of link + * @n: target tipc_node + * @bearer_id: id of the bearer + * @xmitq: queue for messages to be xmited on * * Link becomes active (alone or shared) or standby, depending on its priority. */ -void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) +static void tipc_node_link_up(struct tipc_node *n, int bearer_id, + struct sk_buff_head *xmitq) { - struct tipc_link **active = &n_ptr->active_links[0]; + struct tipc_media_addr *maddr; - n_ptr->working_links++; + tipc_node_write_lock(n); + __tipc_node_link_up(n, bearer_id, xmitq); + maddr = &n->links[bearer_id].maddr; + tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr, n); + tipc_node_write_unlock(n); +} - pr_info("Established link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); +/** + * tipc_node_link_failover() - start failover in case "half-failover" + * + * This function is only called in a very special situation where link + * failover can be already started on peer node but not on this node. + * This can happen when e.g.:: + * + * 1. Both links <1A-2A>, <1B-2B> down + * 2. Link endpoint 2A up, but 1A still down (e.g. due to network + * disturbance, wrong session, etc.) + * 3. Link <1B-2B> up + * 4. Link endpoint 2A down (e.g. due to link tolerance timeout) + * 5. Node 2 starts failover onto link <1B-2B> + * + * ==> Node 1 does never start link/node failover! + * + * @n: tipc node structure + * @l: link peer endpoint failingover (- can be NULL) + * @tnl: tunnel link + * @xmitq: queue for messages to be xmited on tnl link later + */ +static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l, + struct tipc_link *tnl, + struct sk_buff_head *xmitq) +{ + /* Avoid to be "self-failover" that can never end */ + if (!tipc_link_is_up(tnl)) + return; - if (!active[0]) { - active[0] = active[1] = l_ptr; - node_established_contact(n_ptr); + /* Don't rush, failure link may be in the process of resetting */ + if (l && !tipc_link_is_reset(l)) return; + + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); + tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); + + n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1); + tipc_link_failover_prepare(l, tnl, xmitq); + + if (l) + tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); +} + +/** + * __tipc_node_link_down - handle loss of link + * @n: target tipc_node + * @bearer_id: id of the bearer + * @xmitq: queue for messages to be xmited on + * @maddr: output media address of the bearer + */ +static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, + struct sk_buff_head *xmitq, + struct tipc_media_addr **maddr) +{ + struct tipc_link_entry *le = &n->links[*bearer_id]; + int *slot0 = &n->active_links[0]; + int *slot1 = &n->active_links[1]; + int i, highest = 0, prio; + struct tipc_link *l, *_l, *tnl; + + l = n->links[*bearer_id].link; + if (!l || tipc_link_is_reset(l)) + return; + + n->working_links--; + n->action_flags |= TIPC_NOTIFY_LINK_DOWN; + n->link_id = tipc_link_id(l); + + tipc_bearer_remove_dest(n->net, *bearer_id, n->addr); + + pr_debug("Lost link <%s> on network plane %c\n", + tipc_link_name(l), tipc_link_plane(l)); + + /* Select new active link if any available */ + *slot0 = INVALID_BEARER_ID; + *slot1 = INVALID_BEARER_ID; + for (i = 0; i < MAX_BEARERS; i++) { + _l = n->links[i].link; + if (!_l || !tipc_link_is_up(_l)) + continue; + if (_l == l) + continue; + prio = tipc_link_prio(_l); + if (prio < highest) + continue; + if (prio > highest) { + highest = prio; + *slot0 = i; + *slot1 = i; + continue; + } + *slot1 = i; } - if (l_ptr->priority < active[0]->priority) { - pr_info("New link <%s> becomes standby\n", l_ptr->name); + + if (!node_is_up(n)) { + if (tipc_link_peer_is_down(l)) + tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); + tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT); + trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link down!"); + tipc_link_fsm_evt(l, LINK_RESET_EVT); + tipc_link_reset(l); + tipc_link_build_reset_msg(l, xmitq); + *maddr = &n->links[*bearer_id].maddr; + node_lost_contact(n, &le->inputq); + tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); return; } - tipc_link_send_duplicate(active[0], l_ptr); - if (l_ptr->priority == active[0]->priority) { - active[0] = l_ptr; + tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); + + /* There is still a working link => initiate failover */ + *bearer_id = n->active_links[0]; + tnl = n->links[*bearer_id].link; + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); + tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); + n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1); + tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); + trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link down -> failover!"); + tipc_link_reset(l); + tipc_link_fsm_evt(l, LINK_RESET_EVT); + tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); + *maddr = &n->links[*bearer_id].maddr; +} + +static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) +{ + struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_media_addr *maddr = NULL; + struct tipc_link *l = le->link; + int old_bearer_id = bearer_id; + struct sk_buff_head xmitq; + + if (!l) return; + + __skb_queue_head_init(&xmitq); + + tipc_node_write_lock(n); + if (!tipc_link_is_establishing(l)) { + __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); + } else { + /* Defuse pending tipc_node_link_up() */ + tipc_link_reset(l); + tipc_link_fsm_evt(l, LINK_RESET_EVT); + } + if (delete) { + kfree(l); + le->link = NULL; + n->link_cnt--; } - pr_info("Old link <%s> becomes standby\n", active[0]->name); - if (active[1] != active[0]) - pr_info("Old link <%s> becomes standby\n", active[1]->name); - active[0] = active[1] = l_ptr; + trace_tipc_node_link_down(n, true, "node link down or deleted!"); + tipc_node_write_unlock(n); + if (delete) + tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr, n); + tipc_sk_rcv(n->net, &le->inputq); } -/** - * node_select_active_links - select active link +static bool node_is_up(struct tipc_node *n) +{ + return n->active_links[0] != INVALID_BEARER_ID; +} + +bool tipc_node_is_up(struct net *net, u32 addr) +{ + struct tipc_node *n; + bool retval = false; + + if (in_own_node(net, addr)) + return true; + + n = tipc_node_find(net, addr); + if (!n) + return false; + retval = node_is_up(n); + tipc_node_put(n); + return retval; +} + +static u32 tipc_node_suggest_addr(struct net *net, u32 addr) +{ + struct tipc_node *n; + + addr ^= tipc_net(net)->random; + while ((n = tipc_node_find(net, addr))) { + tipc_node_put(n); + addr++; + } + return addr; +} + +/* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not + * Returns suggested address if any, otherwise 0 */ -static void node_select_active_links(struct tipc_node *n_ptr) +u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) { - struct tipc_link **active = &n_ptr->active_links[0]; - u32 i; - u32 highest_prio = 0; + struct tipc_net *tn = tipc_net(net); + struct tipc_node *n; + bool preliminary; + u32 sugg_addr; + + /* Suggest new address if some other peer is using this one */ + n = tipc_node_find(net, addr); + if (n) { + if (!memcmp(n->peer_id, id, NODE_ID_LEN)) + addr = 0; + tipc_node_put(n); + if (!addr) + return 0; + return tipc_node_suggest_addr(net, addr); + } + + /* Suggest previously used address if peer is known */ + n = tipc_node_find_by_id(net, id); + if (n) { + sugg_addr = n->addr; + preliminary = n->preliminary; + tipc_node_put(n); + if (!preliminary) + return sugg_addr; + } + + /* Even this node may be in conflict */ + if (tn->trial_addr == addr) + return tipc_node_suggest_addr(net, addr); - active[0] = active[1] = NULL; + return 0; +} +void tipc_node_check_dest(struct net *net, u32 addr, + u8 *peer_id, struct tipc_bearer *b, + u16 capabilities, u32 signature, u32 hash_mixes, + struct tipc_media_addr *maddr, + bool *respond, bool *dupl_addr) +{ + struct tipc_node *n; + struct tipc_link *l; + struct tipc_link_entry *le; + bool addr_match = false; + bool sign_match = false; + bool link_up = false; + bool link_is_reset = false; + bool accept_addr = false; + bool reset = false; + char *if_name; + unsigned long intv; + u16 session; + + *dupl_addr = false; + *respond = false; + + n = tipc_node_create(net, addr, peer_id, capabilities, hash_mixes, + false); + if (!n) + return; + + tipc_node_write_lock(n); + + le = &n->links[b->identity]; + + /* Prepare to validate requesting node's signature and media address */ + l = le->link; + link_up = l && tipc_link_is_up(l); + link_is_reset = l && tipc_link_is_reset(l); + addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr)); + sign_match = (signature == n->signature); + + /* These three flags give us eight permutations: */ + + if (sign_match && addr_match && link_up) { + /* All is fine. Ignore requests. */ + /* Peer node is not a container/local namespace */ + if (!n->peer_hash_mix) + n->peer_hash_mix = hash_mixes; + } else if (sign_match && addr_match && !link_up) { + /* Respond. The link will come up in due time */ + *respond = true; + } else if (sign_match && !addr_match && link_up) { + /* Peer has changed i/f address without rebooting. + * If so, the link will reset soon, and the next + * discovery will be accepted. So we can ignore it. + * It may also be a cloned or malicious peer having + * chosen the same node address and signature as an + * existing one. + * Ignore requests until the link goes down, if ever. + */ + *dupl_addr = true; + } else if (sign_match && !addr_match && !link_up) { + /* Peer link has changed i/f address without rebooting. + * It may also be a cloned or malicious peer; we can't + * distinguish between the two. + * The signature is correct, so we must accept. + */ + accept_addr = true; + *respond = true; + reset = true; + } else if (!sign_match && addr_match && link_up) { + /* Peer node rebooted. Two possibilities: + * - Delayed re-discovery; this link endpoint has already + * reset and re-established contact with the peer, before + * receiving a discovery message from that node. + * (The peer happened to receive one from this node first). + * - The peer came back so fast that our side has not + * discovered it yet. Probing from this side will soon + * reset the link, since there can be no working link + * endpoint at the peer end, and the link will re-establish. + * Accept the signature, since it comes from a known peer. + */ + n->signature = signature; + } else if (!sign_match && addr_match && !link_up) { + /* The peer node has rebooted. + * Accept signature, since it is a known peer. + */ + n->signature = signature; + *respond = true; + } else if (!sign_match && !addr_match && link_up) { + /* Peer rebooted with new address, or a new/duplicate peer. + * Ignore until the link goes down, if ever. + */ + *dupl_addr = true; + } else if (!sign_match && !addr_match && !link_up) { + /* Peer rebooted with new address, or it is a new peer. + * Accept signature and address. + */ + n->signature = signature; + accept_addr = true; + *respond = true; + reset = true; + } + + if (!accept_addr) + goto exit; + + /* Now create new link if not already existing */ + if (!l) { + if (n->link_cnt == 2) + goto exit; + + if_name = strchr(b->name, ':') + 1; + get_random_bytes(&session, sizeof(u16)); + if (!tipc_link_create(net, if_name, b->identity, b->tolerance, + b->net_plane, b->mtu, b->priority, + b->min_win, b->max_win, session, + tipc_own_addr(net), addr, peer_id, + n->capabilities, + tipc_bc_sndlink(n->net), n->bc_entry.link, + &le->inputq, + &n->bc_entry.namedq, &l)) { + *respond = false; + goto exit; + } + trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link created!"); + tipc_link_reset(l); + tipc_link_fsm_evt(l, LINK_RESET_EVT); + if (n->state == NODE_FAILINGOVER) + tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + link_is_reset = tipc_link_is_reset(l); + le->link = l; + n->link_cnt++; + tipc_node_calculate_timer(n, l); + if (n->link_cnt == 1) { + intv = jiffies + msecs_to_jiffies(n->keepalive_intv); + if (!mod_timer(&n->timer, intv)) + tipc_node_get(n); + } + } + memcpy(&le->maddr, maddr, sizeof(*maddr)); +exit: + tipc_node_write_unlock(n); + if (reset && !link_is_reset) + tipc_node_link_down(n, b->identity, false); + tipc_node_put(n); +} + +void tipc_node_delete_links(struct net *net, int bearer_id) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *n; + + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_link_down(n, bearer_id, true); + } + rcu_read_unlock(); +} + +static void tipc_node_reset_links(struct tipc_node *n) +{ + int i; + + pr_warn("Resetting all links to %x\n", n->addr); + + trace_tipc_node_reset_links(n, true, " "); for (i = 0; i < MAX_BEARERS; i++) { - struct tipc_link *l_ptr = n_ptr->links[i]; + tipc_node_link_down(n, i, false); + } +} - if (!l_ptr || !tipc_link_is_up(l_ptr) || - (l_ptr->priority < highest_prio)) - continue; +/* tipc_node_fsm_evt - node finite state machine + * Determines when contact is allowed with peer node + */ +static void tipc_node_fsm_evt(struct tipc_node *n, int evt) +{ + int state = n->state; - if (l_ptr->priority > highest_prio) { - highest_prio = l_ptr->priority; - active[0] = active[1] = l_ptr; - } else { - active[1] = l_ptr; + switch (state) { + case SELF_DOWN_PEER_DOWN: + switch (evt) { + case SELF_ESTABL_CONTACT_EVT: + state = SELF_UP_PEER_COMING; + break; + case PEER_ESTABL_CONTACT_EVT: + state = SELF_COMING_PEER_UP; + break; + case SELF_LOST_CONTACT_EVT: + case PEER_LOST_CONTACT_EVT: + break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case SELF_UP_PEER_UP: + switch (evt) { + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_LEAVING; + break; + case PEER_LOST_CONTACT_EVT: + state = SELF_LEAVING_PEER_DOWN; + break; + case NODE_SYNCH_BEGIN_EVT: + state = NODE_SYNCHING; + break; + case NODE_FAILOVER_BEGIN_EVT: + state = NODE_FAILINGOVER; + break; + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + case NODE_SYNCH_END_EVT: + case NODE_FAILOVER_END_EVT: + break; + default: + goto illegal_evt; + } + break; + case SELF_DOWN_PEER_LEAVING: + switch (evt) { + case PEER_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_DOWN; + break; + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + case SELF_LOST_CONTACT_EVT: + break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case SELF_UP_PEER_COMING: + switch (evt) { + case PEER_ESTABL_CONTACT_EVT: + state = SELF_UP_PEER_UP; + break; + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_DOWN; + break; + case SELF_ESTABL_CONTACT_EVT: + case PEER_LOST_CONTACT_EVT: + case NODE_SYNCH_END_EVT: + case NODE_FAILOVER_BEGIN_EVT: + break; + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; } + break; + case SELF_COMING_PEER_UP: + switch (evt) { + case SELF_ESTABL_CONTACT_EVT: + state = SELF_UP_PEER_UP; + break; + case PEER_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_DOWN; + break; + case SELF_LOST_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case SELF_LEAVING_PEER_DOWN: + switch (evt) { + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_DOWN; + break; + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + case PEER_LOST_CONTACT_EVT: + break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case NODE_FAILINGOVER: + switch (evt) { + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_LEAVING; + break; + case PEER_LOST_CONTACT_EVT: + state = SELF_LEAVING_PEER_DOWN; + break; + case NODE_FAILOVER_END_EVT: + state = SELF_UP_PEER_UP; + break; + case NODE_FAILOVER_BEGIN_EVT: + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + break; + case NODE_SYNCH_BEGIN_EVT: + case NODE_SYNCH_END_EVT: + default: + goto illegal_evt; + } + break; + case NODE_SYNCHING: + switch (evt) { + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_LEAVING; + break; + case PEER_LOST_CONTACT_EVT: + state = SELF_LEAVING_PEER_DOWN; + break; + case NODE_SYNCH_END_EVT: + state = SELF_UP_PEER_UP; + break; + case NODE_FAILOVER_BEGIN_EVT: + state = NODE_FAILINGOVER; + break; + case NODE_SYNCH_BEGIN_EVT: + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + break; + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + default: + pr_err("Unknown node fsm state %x\n", state); + break; + } + trace_tipc_node_fsm(n->peer_id, n->state, state, evt); + n->state = state; + return; + +illegal_evt: + pr_err("Illegal node fsm evt %x in state %x\n", evt, state); + trace_tipc_node_fsm(n->peer_id, n->state, state, evt); +} + +static void node_lost_contact(struct tipc_node *n, + struct sk_buff_head *inputq) +{ + struct tipc_sock_conn *conn, *safe; + struct tipc_link *l; + struct list_head *conns = &n->conn_sks; + struct sk_buff *skb; + uint i; + + pr_debug("Lost contact with %x\n", n->addr); + n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER); + trace_tipc_node_lost_contact(n, true, " "); + + /* Clean up broadcast state */ + tipc_bcast_remove_peer(n->net, n->bc_entry.link); + skb_queue_purge(&n->bc_entry.namedq); + + /* Abort any ongoing link failover */ + for (i = 0; i < MAX_BEARERS; i++) { + l = n->links[i].link; + if (l) + tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT); + } + + /* Notify publications from this node */ + n->action_flags |= TIPC_NOTIFY_NODE_DOWN; + n->peer_net = NULL; + n->peer_hash_mix = 0; + /* Notify sockets connected to node */ + list_for_each_entry_safe(conn, safe, conns, list) { + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, + SHORT_H_SIZE, 0, tipc_own_addr(n->net), + conn->peer_node, conn->port, + conn->peer_port, TIPC_ERR_NO_NODE); + if (likely(skb)) + skb_queue_tail(inputq, skb); + list_del(&conn->list); + kfree(conn); } } /** - * tipc_node_link_down - handle loss of link + * tipc_node_get_linkname - get the name of a link + * + * @net: the applicable net namespace + * @bearer_id: id of the bearer + * @addr: peer node address + * @linkname: link name output buffer + * @len: size of @linkname output buffer + * + * Return: 0 on success */ -void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) +int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, + char *linkname, size_t len) +{ + struct tipc_link *link; + int err = -EINVAL; + struct tipc_node *node = tipc_node_find(net, addr); + + if (!node) + return err; + + if (bearer_id >= MAX_BEARERS) + goto exit; + + tipc_node_read_lock(node); + link = node->links[bearer_id].link; + if (link) { + strscpy(linkname, tipc_link_name(link), len); + err = 0; + } + tipc_node_read_unlock(node); +exit: + tipc_node_put(node); + return err; +} + +/* Caller should hold node lock for the passed node */ +static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) { - struct tipc_link **active; + void *hdr; + struct nlattr *attrs; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_NODE_GET); + if (!hdr) + return -EMSGSIZE; - n_ptr->working_links--; + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NODE); + if (!attrs) + goto msg_full; - if (!tipc_link_is_active(l_ptr)) { - pr_info("Lost standby link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr)) + goto attr_msg_full; + if (node_is_up(node)) + if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list) +{ + struct tipc_msg *hdr = buf_msg(skb_peek(list)); + struct sk_buff_head inputq; + + switch (msg_user(hdr)) { + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + if (msg_connected(hdr) || msg_named(hdr) || + msg_direct(hdr)) { + tipc_loopback_trace(peer_net, list); + spin_lock_init(&list->lock); + tipc_sk_rcv(peer_net, list); + return; + } + if (msg_mcast(hdr)) { + tipc_loopback_trace(peer_net, list); + skb_queue_head_init(&inputq); + tipc_sk_mcast_rcv(peer_net, list, &inputq); + __skb_queue_purge(list); + skb_queue_purge(&inputq); + return; + } return; + case MSG_FRAGMENTER: + if (tipc_msg_assemble(list)) { + tipc_loopback_trace(peer_net, list); + skb_queue_head_init(&inputq); + tipc_sk_mcast_rcv(peer_net, list, &inputq); + __skb_queue_purge(list); + skb_queue_purge(&inputq); + } + return; + case GROUP_PROTOCOL: + case CONN_MANAGER: + tipc_loopback_trace(peer_net, list); + spin_lock_init(&list->lock); + tipc_sk_rcv(peer_net, list); + return; + case LINK_PROTOCOL: + case NAME_DISTRIBUTOR: + case TUNNEL_PROTOCOL: + case BCAST_PROTOCOL: + return; + default: + return; + } +} + +/** + * tipc_node_xmit() - general link level function for message sending + * @net: the applicable net namespace + * @list: chain of buffers containing message + * @dnode: address of destination node + * @selector: a number used for deterministic link selection + * Consumes the buffer chain. + * Return: 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF + */ +int tipc_node_xmit(struct net *net, struct sk_buff_head *list, + u32 dnode, int selector) +{ + struct tipc_link_entry *le = NULL; + struct tipc_node *n; + struct sk_buff_head xmitq; + bool node_up = false; + struct net *peer_net; + int bearer_id; + int rc; + + if (in_own_node(net, dnode)) { + tipc_loopback_trace(net, list); + spin_lock_init(&list->lock); + tipc_sk_rcv(net, list); + return 0; + } + + n = tipc_node_find(net, dnode); + if (unlikely(!n)) { + __skb_queue_purge(list); + return -EHOSTUNREACH; + } + + rcu_read_lock(); + tipc_node_read_lock(n); + node_up = node_is_up(n); + peer_net = n->peer_net; + tipc_node_read_unlock(n); + if (node_up && peer_net && check_net(peer_net)) { + /* xmit inner linux container */ + tipc_lxc_xmit(peer_net, list); + if (likely(skb_queue_empty(list))) { + rcu_read_unlock(); + tipc_node_put(n); + return 0; + } + } + rcu_read_unlock(); + + tipc_node_read_lock(n); + bearer_id = n->active_links[selector & 1]; + if (unlikely(bearer_id == INVALID_BEARER_ID)) { + tipc_node_read_unlock(n); + tipc_node_put(n); + __skb_queue_purge(list); + return -EHOSTUNREACH; } - pr_info("Lost link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); - - active = &n_ptr->active_links[0]; - if (active[0] == l_ptr) - active[0] = active[1]; - if (active[1] == l_ptr) - active[1] = active[0]; - if (active[0] == l_ptr) - node_select_active_links(n_ptr); - if (tipc_node_is_up(n_ptr)) - tipc_link_changeover(l_ptr); + + __skb_queue_head_init(&xmitq); + le = &n->links[bearer_id]; + spin_lock_bh(&le->lock); + rc = tipc_link_xmit(le->link, list, &xmitq); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(n); + + if (unlikely(rc == -ENOBUFS)) + tipc_node_link_down(n, bearer_id, false); else - node_lost_contact(n_ptr); + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); + + tipc_node_put(n); + + return rc; } -int tipc_node_active_links(struct tipc_node *n_ptr) +/* tipc_node_xmit_skb(): send single buffer to destination + * Buffers sent via this function are generally TIPC_SYSTEM_IMPORTANCE + * messages, which will not be rejected + * The only exception is datagram messages rerouted after secondary + * lookup, which are rare and safe to dispose of anyway. + */ +int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, + u32 selector) { - return n_ptr->active_links[0] != NULL; + struct sk_buff_head head; + + __skb_queue_head_init(&head); + __skb_queue_tail(&head, skb); + tipc_node_xmit(net, &head, dnode, selector); + return 0; } -int tipc_node_redundant_links(struct tipc_node *n_ptr) +/* tipc_node_distr_xmit(): send single buffer msgs to individual destinations + * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected + */ +int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq) { - return n_ptr->working_links > 1; + struct sk_buff *skb; + u32 selector, dnode; + + while ((skb = __skb_dequeue(xmitq))) { + selector = msg_origport(buf_msg(skb)); + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(net, skb, dnode, selector); + } + return 0; } -int tipc_node_is_up(struct tipc_node *n_ptr) +void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests) { - return tipc_node_active_links(n_ptr); + struct sk_buff_head xmitq; + struct sk_buff *txskb; + struct tipc_node *n; + u16 dummy; + u32 dst; + + /* Use broadcast if all nodes support it */ + if (!rc_dests && tipc_bcast_get_mode(net) != BCLINK_MODE_RCAST) { + __skb_queue_head_init(&xmitq); + __skb_queue_tail(&xmitq, skb); + tipc_bcast_xmit(net, &xmitq, &dummy); + return; + } + + /* Otherwise use legacy replicast method */ + rcu_read_lock(); + list_for_each_entry_rcu(n, tipc_nodes(net), list) { + dst = n->addr; + if (in_own_node(net, dst)) + continue; + if (!node_is_up(n)) + continue; + txskb = pskb_copy(skb, GFP_ATOMIC); + if (!txskb) + break; + msg_set_destnode(buf_msg(txskb), dst); + tipc_node_xmit_skb(net, txskb, dst, 0); + } + rcu_read_unlock(); + kfree_skb(skb); } -void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) +static void tipc_node_mcast_rcv(struct tipc_node *n) { - n_ptr->links[l_ptr->b_ptr->identity] = l_ptr; - atomic_inc(&tipc_num_links); - n_ptr->link_cnt++; + struct tipc_bclink_entry *be = &n->bc_entry; + + /* 'arrvq' is under inputq2's lock protection */ + spin_lock_bh(&be->inputq2.lock); + spin_lock_bh(&be->inputq1.lock); + skb_queue_splice_tail_init(&be->inputq1, &be->arrvq); + spin_unlock_bh(&be->inputq1.lock); + spin_unlock_bh(&be->inputq2.lock); + tipc_sk_mcast_rcv(n->net, &be->arrvq, &be->inputq2); } -void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) +static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr, + int bearer_id, struct sk_buff_head *xmitq) { - n_ptr->links[l_ptr->b_ptr->identity] = NULL; - atomic_dec(&tipc_num_links); - n_ptr->link_cnt--; + struct tipc_link *ucl; + int rc; + + rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr, xmitq); + + if (rc & TIPC_LINK_DOWN_EVT) { + tipc_node_reset_links(n); + return; + } + + if (!(rc & TIPC_LINK_SND_STATE)) + return; + + /* If probe message, a STATE response will be sent anyway */ + if (msg_probe(hdr)) + return; + + /* Produce a STATE message carrying broadcast NACK */ + tipc_node_read_lock(n); + ucl = n->links[bearer_id].link; + if (ucl) + tipc_link_build_state_msg(ucl, xmitq); + tipc_node_read_unlock(n); } -static void node_established_contact(struct tipc_node *n_ptr) +/** + * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node + * @net: the applicable net namespace + * @skb: TIPC packet + * @bearer_id: id of bearer message arrived on + * + * Invoked with no locks held. + */ +static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id) { - tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr); - n_ptr->bclink.oos_state = 0; - n_ptr->bclink.acked = tipc_bclink_get_last_sent(); - tipc_bclink_add_node(n_ptr->addr); + int rc; + struct sk_buff_head xmitq; + struct tipc_bclink_entry *be; + struct tipc_link_entry *le; + struct tipc_msg *hdr = buf_msg(skb); + int usr = msg_user(hdr); + u32 dnode = msg_destnode(hdr); + struct tipc_node *n; + + __skb_queue_head_init(&xmitq); + + /* If NACK for other node, let rcv link for that node peek into it */ + if ((usr == BCAST_PROTOCOL) && (dnode != tipc_own_addr(net))) + n = tipc_node_find(net, dnode); + else + n = tipc_node_find(net, msg_prevnode(hdr)); + if (!n) { + kfree_skb(skb); + return; + } + be = &n->bc_entry; + le = &n->links[bearer_id]; + + rc = tipc_bcast_rcv(net, be->link, skb); + + /* Broadcast ACKs are sent on a unicast link */ + if (rc & TIPC_LINK_SND_STATE) { + tipc_node_read_lock(n); + tipc_link_build_state_msg(le->link, &xmitq); + tipc_node_read_unlock(n); + } + + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); + + if (!skb_queue_empty(&be->inputq1)) + tipc_node_mcast_rcv(n); + + /* Handle NAME_DISTRIBUTOR messages sent from 1.7 nodes */ + if (!skb_queue_empty(&n->bc_entry.namedq)) + tipc_named_rcv(net, &n->bc_entry.namedq, + &n->bc_entry.named_rcv_nxt, + &n->bc_entry.named_open); + + /* If reassembly or retransmission failure => reset all links to peer */ + if (rc & TIPC_LINK_DOWN_EVT) + tipc_node_reset_links(n); + + tipc_node_put(n); } -static void node_name_purge_complete(unsigned long node_addr) +/** + * tipc_node_check_state - check and if necessary update node state + * @n: target tipc_node + * @skb: TIPC packet + * @bearer_id: identity of bearer delivering the packet + * @xmitq: queue for messages to be xmited on + * Return: true if state and msg are ok, otherwise false + */ +static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, + int bearer_id, struct sk_buff_head *xmitq) { - struct tipc_node *n_ptr; + struct tipc_msg *hdr = buf_msg(skb); + int usr = msg_user(hdr); + int mtyp = msg_type(hdr); + u16 oseqno = msg_seqno(hdr); + u16 exp_pkts = msg_msgcnt(hdr); + u16 rcv_nxt, syncpt, dlv_nxt, inputq_len; + int state = n->state; + struct tipc_link *l, *tnl, *pl = NULL; + struct tipc_media_addr *maddr; + int pb_id; + + if (trace_tipc_node_check_state_enabled()) { + trace_tipc_skb_dump(skb, false, "skb for node state check"); + trace_tipc_node_check_state(n, true, " "); + } + l = n->links[bearer_id].link; + if (!l) + return false; + rcv_nxt = tipc_link_rcv_nxt(l); + + + if (likely((state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) + return true; + + /* Find parallel link, if any */ + for (pb_id = 0; pb_id < MAX_BEARERS; pb_id++) { + if ((pb_id != bearer_id) && n->links[pb_id].link) { + pl = n->links[pb_id].link; + break; + } + } + + if (!tipc_link_validate_msg(l, hdr)) { + trace_tipc_skb_dump(skb, false, "PROTO invalid (2)!"); + trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (2)!"); + return false; + } + + /* Check and update node accesibility if applicable */ + if (state == SELF_UP_PEER_COMING) { + if (!tipc_link_is_up(l)) + return true; + if (!msg_peer_link_is_up(hdr)) + return true; + tipc_node_fsm_evt(n, PEER_ESTABL_CONTACT_EVT); + } - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(node_addr); - if (n_ptr) { - tipc_node_lock(n_ptr); - n_ptr->block_setup &= ~WAIT_NAMES_GONE; - tipc_node_unlock(n_ptr); + if (state == SELF_DOWN_PEER_LEAVING) { + if (msg_peer_node_is_up(hdr)) + return false; + tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); + return true; } - read_unlock_bh(&tipc_net_lock); + + if (state == SELF_LEAVING_PEER_DOWN) + return false; + + /* Ignore duplicate packets */ + if ((usr != LINK_PROTOCOL) && less(oseqno, rcv_nxt)) + return true; + + /* Initiate or update failover mode if applicable */ + if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { + syncpt = oseqno + exp_pkts - 1; + if (pl && !tipc_link_is_reset(pl)) { + __tipc_node_link_down(n, &pb_id, xmitq, &maddr); + trace_tipc_node_link_down(n, true, + "node link down <- failover!"); + tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), + tipc_link_inputq(l)); + } + + /* If parallel link was already down, and this happened before + * the tunnel link came up, node failover was never started. + * Ensure that a FAILOVER_MSG is sent to get peer out of + * NODE_FAILINGOVER state, also this node must accept + * TUNNEL_MSGs from peer. + */ + if (n->state != NODE_FAILINGOVER) + tipc_node_link_failover(n, pl, l, xmitq); + + /* If pkts arrive out of order, use lowest calculated syncpt */ + if (less(syncpt, n->sync_point)) + n->sync_point = syncpt; + } + + /* Open parallel link when tunnel link reaches synch point */ + if ((n->state == NODE_FAILINGOVER) && tipc_link_is_up(l)) { + if (!more(rcv_nxt, n->sync_point)) + return true; + tipc_node_fsm_evt(n, NODE_FAILOVER_END_EVT); + if (pl) + tipc_link_fsm_evt(pl, LINK_FAILOVER_END_EVT); + return true; + } + + /* No syncing needed if only one link */ + if (!pl || !tipc_link_is_up(pl)) + return true; + + /* Initiate synch mode if applicable */ + if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) { + if (n->capabilities & TIPC_TUNNEL_ENHANCED) + syncpt = msg_syncpt(hdr); + else + syncpt = msg_seqno(msg_inner_hdr(hdr)) + exp_pkts - 1; + if (!tipc_link_is_up(l)) + __tipc_node_link_up(n, bearer_id, xmitq); + if (n->state == SELF_UP_PEER_UP) { + n->sync_point = syncpt; + tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT); + tipc_node_fsm_evt(n, NODE_SYNCH_BEGIN_EVT); + } + } + + /* Open tunnel link when parallel link reaches synch point */ + if (n->state == NODE_SYNCHING) { + if (tipc_link_is_synching(l)) { + tnl = l; + } else { + tnl = pl; + pl = l; + } + inputq_len = skb_queue_len(tipc_link_inputq(pl)); + dlv_nxt = tipc_link_rcv_nxt(pl) - inputq_len; + if (more(dlv_nxt, n->sync_point)) { + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); + tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); + return true; + } + if (l == pl) + return true; + if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) + return true; + if (usr == LINK_PROTOCOL) + return true; + return false; + } + return true; } -static void node_lost_contact(struct tipc_node *n_ptr) +/** + * tipc_rcv - process TIPC packets/messages arriving from off-node + * @net: the applicable net namespace + * @skb: TIPC packet + * @b: pointer to bearer message arrived on + * + * Invoked with no locks held. Bearer pointer must point to a valid bearer + * structure (i.e. cannot be NULL), but bearer can be inactive. + */ +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) { - char addr_string[16]; - u32 i; + struct sk_buff_head xmitq; + struct tipc_link_entry *le; + struct tipc_msg *hdr; + struct tipc_node *n; + int bearer_id = b->identity; + u32 self = tipc_own_addr(net); + int usr, rc = 0; + u16 bc_ack; +#ifdef CONFIG_TIPC_CRYPTO + struct tipc_ehdr *ehdr; + + /* Check if message must be decrypted first */ + if (TIPC_SKB_CB(skb)->decrypted || !tipc_ehdr_validate(skb)) + goto rcv; + + ehdr = (struct tipc_ehdr *)skb->data; + if (likely(ehdr->user != LINK_CONFIG)) { + n = tipc_node_find(net, ntohl(ehdr->addr)); + if (unlikely(!n)) + goto discard; + } else { + n = tipc_node_find_by_id(net, ehdr->id); + } + skb_dst_force(skb); + tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b); + if (!skb) + return; - pr_info("Lost contact with %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); +rcv: +#endif + /* Ensure message is well-formed before touching the header */ + if (unlikely(!tipc_msg_validate(&skb))) + goto discard; + __skb_queue_head_init(&xmitq); + hdr = buf_msg(skb); + usr = msg_user(hdr); + bc_ack = msg_bcast_ack(hdr); + + /* Handle arrival of discovery or broadcast packet */ + if (unlikely(msg_non_seq(hdr))) { + if (unlikely(usr == LINK_CONFIG)) + return tipc_disc_rcv(net, skb, b); + else + return tipc_node_bc_rcv(net, skb, bearer_id); + } - /* Flush broadcast link info associated with lost node */ - if (n_ptr->bclink.recv_permitted) { - while (n_ptr->bclink.deferred_head) { - struct sk_buff *buf = n_ptr->bclink.deferred_head; - n_ptr->bclink.deferred_head = buf->next; - kfree_skb(buf); + /* Discard unicast link messages destined for another node */ + if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self))) + goto discard; + + /* Locate neighboring node that sent packet */ + n = tipc_node_find(net, msg_prevnode(hdr)); + if (unlikely(!n)) + goto discard; + le = &n->links[bearer_id]; + + /* Ensure broadcast reception is in synch with peer's send state */ + if (unlikely(usr == LINK_PROTOCOL)) { + if (unlikely(skb_linearize(skb))) { + tipc_node_put(n); + goto discard; } - n_ptr->bclink.deferred_size = 0; + hdr = buf_msg(skb); + tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq); + } else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) { + tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr); + } - if (n_ptr->bclink.defragm) { - kfree_skb(n_ptr->bclink.defragm); - n_ptr->bclink.defragm = NULL; + /* Receive packet directly if conditions permit */ + tipc_node_read_lock(n); + if (likely((n->state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) { + spin_lock_bh(&le->lock); + if (le->link) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } + spin_unlock_bh(&le->lock); + } + tipc_node_read_unlock(n); + + /* Check/update node state before receiving */ + if (unlikely(skb)) { + if (unlikely(skb_linearize(skb))) + goto out_node_put; + tipc_node_write_lock(n); + if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) { + if (le->link) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } } + tipc_node_write_unlock(n); + } + + if (unlikely(rc & TIPC_LINK_UP_EVT)) + tipc_node_link_up(n, bearer_id, &xmitq); + + if (unlikely(rc & TIPC_LINK_DOWN_EVT)) + tipc_node_link_down(n, bearer_id, false); + + if (unlikely(!skb_queue_empty(&n->bc_entry.namedq))) + tipc_named_rcv(net, &n->bc_entry.namedq, + &n->bc_entry.named_rcv_nxt, + &n->bc_entry.named_open); + + if (unlikely(!skb_queue_empty(&n->bc_entry.inputq1))) + tipc_node_mcast_rcv(n); - tipc_bclink_remove_node(n_ptr->addr); - tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ); + if (!skb_queue_empty(&le->inputq)) + tipc_sk_rcv(net, &le->inputq); - n_ptr->bclink.recv_permitted = false; + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); + +out_node_put: + tipc_node_put(n); +discard: + kfree_skb(skb); +} + +void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, + int prop) +{ + struct tipc_net *tn = tipc_net(net); + int bearer_id = b->identity; + struct sk_buff_head xmitq; + struct tipc_link_entry *e; + struct tipc_node *n; + + __skb_queue_head_init(&xmitq); + + rcu_read_lock(); + + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_write_lock(n); + e = &n->links[bearer_id]; + if (e->link) { + if (prop == TIPC_NLA_PROP_TOL) + tipc_link_set_tolerance(e->link, b->tolerance, + &xmitq); + else if (prop == TIPC_NLA_PROP_MTU) + tipc_link_set_mtu(e->link, b->mtu); + + /* Update MTU for node link entry */ + e->mtu = tipc_link_mss(e->link); + } + + tipc_node_write_unlock(n); + tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL); } - /* Abort link changeover */ - for (i = 0; i < MAX_BEARERS; i++) { - struct tipc_link *l_ptr = n_ptr->links[i]; - if (!l_ptr) + rcu_read_unlock(); +} + +int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + struct tipc_node *peer, *temp_node; + u8 node_id[NODE_ID_LEN]; + u64 *w0 = (u64 *)&node_id[0]; + u64 *w1 = (u64 *)&node_id[8]; + u32 addr; + int err; + + /* We identify the peer by its net */ + if (!info->attrs[TIPC_NLA_NET]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], + tipc_nl_net_policy, info->extack); + if (err) + return err; + + /* attrs[TIPC_NLA_NET_NODEID] and attrs[TIPC_NLA_NET_ADDR] are + * mutually exclusive cases + */ + if (attrs[TIPC_NLA_NET_ADDR]) { + addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + if (!addr) + return -EINVAL; + } + + if (attrs[TIPC_NLA_NET_NODEID]) { + if (!attrs[TIPC_NLA_NET_NODEID_W1]) + return -EINVAL; + *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]); + *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]); + addr = hash128to32(node_id); + } + + if (in_own_node(net, addr)) + return -ENOTSUPP; + + spin_lock_bh(&tn->node_list_lock); + peer = tipc_node_find(net, addr); + if (!peer) { + spin_unlock_bh(&tn->node_list_lock); + return -ENXIO; + } + + tipc_node_write_lock(peer); + if (peer->state != SELF_DOWN_PEER_DOWN && + peer->state != SELF_DOWN_PEER_LEAVING) { + tipc_node_write_unlock(peer); + err = -EBUSY; + goto err_out; + } + + tipc_node_clear_links(peer); + tipc_node_write_unlock(peer); + tipc_node_delete(peer); + + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); + err = 0; +err_out: + tipc_node_put(peer); + spin_unlock_bh(&tn->node_list_lock); + + return err; +} + +int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + int done = cb->args[0]; + int last_addr = cb->args[1]; + struct tipc_node *node; + struct tipc_nl_msg msg; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + if (last_addr) { + node = tipc_node_find(net, last_addr); + if (!node) { + rcu_read_unlock(); + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the NLMSG_DONE message having + * the NLM_F_DUMP_INTR flag set if the node state + * changed while we released the lock. + */ + cb->prev_seq = 1; + return -EPIPE; + } + tipc_node_put(node); + } + + list_for_each_entry_rcu(node, &tn->node_list, list) { + if (node->preliminary) continue; - l_ptr->reset_checkpoint = l_ptr->next_in_no; - l_ptr->exp_msg_count = 0; - tipc_link_reset_fragments(l_ptr); + if (last_addr) { + if (node->addr == last_addr) + last_addr = 0; + else + continue; + } + + tipc_node_read_lock(node); + err = __tipc_nl_add_node(&msg, node); + if (err) { + last_addr = node->addr; + tipc_node_read_unlock(node); + goto out; + } + + tipc_node_read_unlock(node); } + done = 1; +out: + cb->args[0] = done; + cb->args[1] = last_addr; + rcu_read_unlock(); + + return skb->len; +} - /* Notify subscribers */ - tipc_nodesub_notify(n_ptr); +/* tipc_node_find_by_name - locate owner node of link by link's name + * @net: the applicable net namespace + * @name: pointer to link name string + * @bearer_id: pointer to index in 'node->links' array where the link was found. + * + * Returns pointer to node owning the link, or 0 if no matching link is found. + */ +static struct tipc_node *tipc_node_find_by_name(struct net *net, + const char *link_name, + unsigned int *bearer_id) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l; + struct tipc_node *n; + struct tipc_node *found_node = NULL; + int i; + + *bearer_id = 0; + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_read_lock(n); + for (i = 0; i < MAX_BEARERS; i++) { + l = n->links[i].link; + if (l && !strcmp(tipc_link_name(l), link_name)) { + *bearer_id = i; + found_node = n; + break; + } + } + tipc_node_read_unlock(n); + if (found_node) + break; + } + rcu_read_unlock(); - /* Prevent re-contact with node until cleanup is done */ - n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE; - tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr); + return found_node; } -struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) +int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) { - u32 domain; - struct sk_buff *buf; - struct tipc_node *n_ptr; - struct tipc_node_info node_info; - u32 payload_size; + int err; + int res = 0; + int bearer_id; + char *name; + struct tipc_link *link; + struct tipc_node *node; + struct sk_buff_head xmitq; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + + __skb_queue_head_init(&xmitq); + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); + name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (!tipc_addr_domain_valid(domain)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (network address)"); + if (strcmp(name, tipc_bclink_name) == 0) + return tipc_nl_bc_link_set(net, attrs); - read_lock_bh(&tipc_net_lock); - if (!tipc_num_nodes) { - read_unlock_bh(&tipc_net_lock); - return tipc_cfg_reply_none(); + node = tipc_node_find_by_name(net, name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_read_lock(node); + + link = node->links[bearer_id].link; + if (!link) { + res = -EINVAL; + goto out; } - /* For now, get space for all other nodes */ - payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes; - if (payload_size > 32768u) { - read_unlock_bh(&tipc_net_lock); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (too many nodes)"); + if (attrs[TIPC_NLA_LINK_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props); + if (err) { + res = err; + goto out; + } + + if (props[TIPC_NLA_PROP_TOL]) { + u32 tol; + + tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + tipc_link_set_tolerance(link, tol, &xmitq); + } + if (props[TIPC_NLA_PROP_PRIO]) { + u32 prio; + + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + tipc_link_set_prio(link, prio, &xmitq); + } + if (props[TIPC_NLA_PROP_WIN]) { + u32 max_win; + + max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + tipc_link_set_queue_limits(link, + tipc_link_min_win(link), + max_win); + } } - buf = tipc_cfg_reply_alloc(payload_size); - if (!buf) { - read_unlock_bh(&tipc_net_lock); - return NULL; + +out: + tipc_node_read_unlock(node); + tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr, + NULL); + return res; +} + +int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct tipc_nl_msg msg; + char *name; + int err; + + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg.skb) + return -ENOMEM; + + if (strcmp(name, tipc_bclink_name) == 0) { + err = tipc_nl_add_bc_link(net, &msg, tipc_net(net)->bcl); + if (err) + goto err_free; + } else { + int bearer_id; + struct tipc_node *node; + struct tipc_link *link; + + node = tipc_node_find_by_name(net, name, &bearer_id); + if (!node) { + err = -EINVAL; + goto err_free; + } + + tipc_node_read_lock(node); + link = node->links[bearer_id].link; + if (!link) { + tipc_node_read_unlock(node); + err = -EINVAL; + goto err_free; + } + + err = __tipc_nl_add_link(net, &msg, link, 0); + tipc_node_read_unlock(node); + if (err) + goto err_free; + } + + return genlmsg_reply(msg.skb, info); + +err_free: + nlmsg_free(msg.skb); + return err; +} + +int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *link_name; + unsigned int bearer_id; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); + struct tipc_link_entry *le; + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + err = -EINVAL; + if (!strcmp(link_name, tipc_bclink_name)) { + err = tipc_bclink_reset_stats(net, tipc_bc_sndlink(net)); + if (err) + return err; + return 0; + } else if (strstr(link_name, tipc_bclink_name)) { + rcu_read_lock(); + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_read_lock(node); + link = node->bc_entry.link; + if (link && !strcmp(link_name, tipc_link_name(link))) { + err = tipc_bclink_reset_stats(net, link); + tipc_node_read_unlock(node); + break; + } + tipc_node_read_unlock(node); + } + rcu_read_unlock(); + return err; + } + + node = tipc_node_find_by_name(net, link_name, &bearer_id); + if (!node) + return -EINVAL; + + le = &node->links[bearer_id]; + tipc_node_read_lock(node); + spin_lock_bh(&le->lock); + link = node->links[bearer_id].link; + if (!link) { + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); + return -EINVAL; } + tipc_link_reset_stats(link); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); + return 0; +} + +/* Caller should hold node lock */ +static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, + struct tipc_node *node, u32 *prev_link, + bool bc_link) +{ + u32 i; + int err; - /* Add TLVs for all nodes in scope */ - list_for_each_entry(n_ptr, &tipc_node_list, list) { - if (!tipc_in_scope(domain, n_ptr->addr)) + for (i = *prev_link; i < MAX_BEARERS; i++) { + *prev_link = i; + + if (!node->links[i].link) continue; - node_info.addr = htonl(n_ptr->addr); - node_info.up = htonl(tipc_node_is_up(n_ptr)); - tipc_cfg_append_tlv(buf, TIPC_TLV_NODE_INFO, - &node_info, sizeof(node_info)); + + err = __tipc_nl_add_link(net, msg, + node->links[i].link, NLM_F_MULTI); + if (err) + return err; } - read_unlock_bh(&tipc_net_lock); - return buf; + if (bc_link) { + *prev_link = i; + err = tipc_nl_add_bc_link(net, msg, node->bc_entry.link); + if (err) + return err; + } + + *prev_link = 0; + + return 0; } -struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) +int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) { - u32 domain; - struct sk_buff *buf; - struct tipc_node *n_ptr; - struct tipc_link_info link_info; - u32 payload_size; + struct net *net = sock_net(skb->sk); + struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs; + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *node; + struct tipc_nl_msg msg; + u32 prev_node = cb->args[0]; + u32 prev_link = cb->args[1]; + int done = cb->args[2]; + bool bc_link = cb->args[3]; + int err; + + if (done) + return 0; + + if (!prev_node) { + /* Check if broadcast-receiver links dumping is needed */ + if (attrs && attrs[TIPC_NLA_LINK]) { + err = nla_parse_nested_deprecated(link, + TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, + NULL); + if (unlikely(err)) + return err; + if (unlikely(!link[TIPC_NLA_LINK_BROADCAST])) + return -EINVAL; + bc_link = true; + } + } + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + if (prev_node) { + node = tipc_node_find(net, prev_node); + if (!node) { + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the last NLMSG_DONE message + * having the NLM_F_DUMP_INTR flag set. + */ + cb->prev_seq = 1; + goto out; + } + tipc_node_put(node); + + list_for_each_entry_continue_rcu(node, &tn->node_list, + list) { + tipc_node_read_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link, bc_link); + tipc_node_read_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } else { + err = tipc_nl_add_bc_link(net, &msg, tn->bcl); + if (err) + goto out; + + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_read_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link, bc_link); + tipc_node_read_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } + done = 1; +out: + rcu_read_unlock(); + + cb->args[0] = prev_node; + cb->args[1] = prev_link; + cb->args[2] = done; + cb->args[3] = bc_link; + + return skb->len; +} + +int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[TIPC_NLA_MON_MAX + 1]; + struct net *net = sock_net(skb->sk); + int err; + + if (!info->attrs[TIPC_NLA_MON]) + return -EINVAL; + + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MON_MAX, + info->attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy, + info->extack); + if (err) + return err; + + if (attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]) { + u32 val; + + val = nla_get_u32(attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]); + err = tipc_nl_monitor_set_threshold(net, val); + if (err) + return err; + } + + return 0; +} + +static int __tipc_nl_add_monitor_prop(struct net *net, struct tipc_nl_msg *msg) +{ + struct nlattr *attrs; + void *hdr; + u32 val; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + 0, TIPC_NL_MON_GET); + if (!hdr) + return -EMSGSIZE; - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON); + if (!attrs) + goto msg_full; - domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (!tipc_addr_domain_valid(domain)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (network address)"); + val = tipc_nl_monitor_get_threshold(net); - if (!tipc_own_addr) - return tipc_cfg_reply_none(); + if (nla_put_u32(msg->skb, TIPC_NLA_MON_ACTIVATION_THRESHOLD, val)) + goto attr_msg_full; - read_lock_bh(&tipc_net_lock); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); - /* Get space for all unicast links + broadcast link */ - payload_size = TLV_SPACE(sizeof(link_info)) * - (atomic_read(&tipc_num_links) + 1); - if (payload_size > 32768u) { - read_unlock_bh(&tipc_net_lock); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (too many links)"); + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_nl_msg msg; + int err; + + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg.skb) + return -ENOMEM; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + err = __tipc_nl_add_monitor_prop(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; } - buf = tipc_cfg_reply_alloc(payload_size); - if (!buf) { - read_unlock_bh(&tipc_net_lock); - return NULL; + + return genlmsg_reply(msg.skb, info); +} + +int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + u32 prev_bearer = cb->args[0]; + struct tipc_nl_msg msg; + int bearer_id; + int err; + + if (prev_bearer == MAX_BEARERS) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + for (bearer_id = prev_bearer; bearer_id < MAX_BEARERS; bearer_id++) { + err = __tipc_nl_add_monitor(net, &msg, bearer_id); + if (err) + break; + } + rtnl_unlock(); + cb->args[0] = bearer_id; + + return skb->len; +} + +int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + u32 prev_node = cb->args[1]; + u32 bearer_id = cb->args[2]; + int done = cb->args[0]; + struct tipc_nl_msg msg; + int err; + + if (!prev_node) { + struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs; + struct nlattr *mon[TIPC_NLA_MON_MAX + 1]; + + if (!attrs[TIPC_NLA_MON]) + return -EINVAL; + + err = nla_parse_nested_deprecated(mon, TIPC_NLA_MON_MAX, + attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy, + NULL); + if (err) + return err; + + if (!mon[TIPC_NLA_MON_REF]) + return -EINVAL; + + bearer_id = nla_get_u32(mon[TIPC_NLA_MON_REF]); + + if (bearer_id >= MAX_BEARERS) + return -EINVAL; } - /* Add TLV for broadcast link */ - link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr)); - link_info.up = htonl(1); - strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME); - tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + err = tipc_nl_add_monitor_peer(net, &msg, bearer_id, &prev_node); + if (!err) + done = 1; - /* Add TLVs for any other links in scope */ - list_for_each_entry(n_ptr, &tipc_node_list, list) { - u32 i; + rtnl_unlock(); + cb->args[0] = done; + cb->args[1] = prev_node; + cb->args[2] = bearer_id; + + return skb->len; +} - if (!tipc_in_scope(domain, n_ptr->addr)) +#ifdef CONFIG_TIPC_CRYPTO +static int tipc_nl_retrieve_key(struct nlattr **attrs, + struct tipc_aead_key **pkey) +{ + struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY]; + struct tipc_aead_key *key; + + if (!attr) + return -ENODATA; + + if (nla_len(attr) < sizeof(*key)) + return -EINVAL; + key = (struct tipc_aead_key *)nla_data(attr); + if (key->keylen > TIPC_AEAD_KEYLEN_MAX || + nla_len(attr) < tipc_aead_key_size(key)) + return -EINVAL; + + *pkey = key; + return 0; +} + +static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id) +{ + struct nlattr *attr = attrs[TIPC_NLA_NODE_ID]; + + if (!attr) + return -ENODATA; + + if (nla_len(attr) < TIPC_NODEID_LEN) + return -EINVAL; + + *node_id = (u8 *)nla_data(attr); + return 0; +} + +static int tipc_nl_retrieve_rekeying(struct nlattr **attrs, u32 *intv) +{ + struct nlattr *attr = attrs[TIPC_NLA_NODE_REKEYING]; + + if (!attr) + return -ENODATA; + + *intv = nla_get_u32(attr); + return 0; +} + +static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_crypto *tx = tipc_net(net)->crypto_tx, *c = tx; + struct tipc_node *n = NULL; + struct tipc_aead_key *ukey; + bool rekeying = true, master_key = false; + u8 *id, *own_id, mode; + u32 intv = 0; + int rc = 0; + + if (!info->attrs[TIPC_NLA_NODE]) + return -EINVAL; + + rc = nla_parse_nested(attrs, TIPC_NLA_NODE_MAX, + info->attrs[TIPC_NLA_NODE], + tipc_nl_node_policy, info->extack); + if (rc) + return rc; + + own_id = tipc_own_id(net); + if (!own_id) { + GENL_SET_ERR_MSG(info, "not found own node identity (set id?)"); + return -EPERM; + } + + rc = tipc_nl_retrieve_rekeying(attrs, &intv); + if (rc == -ENODATA) + rekeying = false; + + rc = tipc_nl_retrieve_key(attrs, &ukey); + if (rc == -ENODATA && rekeying) + goto rekeying; + else if (rc) + return rc; + + rc = tipc_aead_key_validate(ukey, info); + if (rc) + return rc; + + rc = tipc_nl_retrieve_nodeid(attrs, &id); + switch (rc) { + case -ENODATA: + mode = CLUSTER_KEY; + master_key = !!(attrs[TIPC_NLA_NODE_KEY_MASTER]); + break; + case 0: + mode = PER_NODE_KEY; + if (memcmp(id, own_id, NODE_ID_LEN)) { + n = tipc_node_find_by_id(net, id) ?: + tipc_node_create(net, 0, id, 0xffffu, 0, true); + if (unlikely(!n)) + return -ENOMEM; + c = n->crypto_rx; + } + break; + default: + return rc; + } + + /* Initiate the TX/RX key */ + rc = tipc_crypto_key_init(c, ukey, mode, master_key); + if (n) + tipc_node_put(n); + + if (unlikely(rc < 0)) { + GENL_SET_ERR_MSG(info, "unable to initiate or attach new key"); + return rc; + } else if (c == tx) { + /* Distribute TX key but not master one */ + if (!master_key && tipc_crypto_key_distr(tx, rc, NULL)) + GENL_SET_ERR_MSG(info, "failed to replicate new key"); +rekeying: + /* Schedule TX rekeying if needed */ + tipc_crypto_rekeying_sched(tx, rekeying, intv); + } + + return 0; +} + +int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_node_set_key(skb, info); + rtnl_unlock(); + + return err; +} + +static int __tipc_nl_node_flush_key(struct sk_buff *skb, + struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); + struct tipc_node *n; + + tipc_crypto_key_flush(tn->crypto_tx); + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) + tipc_crypto_key_flush(n->crypto_rx); + rcu_read_unlock(); + + return 0; +} + +int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_node_flush_key(skb, info); + rtnl_unlock(); + + return err; +} +#endif + +/** + * tipc_node_dump - dump TIPC node data + * @n: tipc node to be dumped + * @more: dump more? + * - false: dump only tipc node data + * - true: dump node link data as well + * @buf: returned buffer of dump data in format + */ +int tipc_node_dump(struct tipc_node *n, bool more, char *buf) +{ + int i = 0; + size_t sz = (more) ? NODE_LMAX : NODE_LMIN; + + if (!n) { + i += scnprintf(buf, sz, "node data: (null)\n"); + return i; + } + + i += scnprintf(buf, sz, "node data: %x", n->addr); + i += scnprintf(buf + i, sz - i, " %x", n->state); + i += scnprintf(buf + i, sz - i, " %d", n->active_links[0]); + i += scnprintf(buf + i, sz - i, " %d", n->active_links[1]); + i += scnprintf(buf + i, sz - i, " %x", n->action_flags); + i += scnprintf(buf + i, sz - i, " %u", n->failover_sent); + i += scnprintf(buf + i, sz - i, " %u", n->sync_point); + i += scnprintf(buf + i, sz - i, " %d", n->link_cnt); + i += scnprintf(buf + i, sz - i, " %u", n->working_links); + i += scnprintf(buf + i, sz - i, " %x", n->capabilities); + i += scnprintf(buf + i, sz - i, " %lu\n", n->keepalive_intv); + + if (!more) + return i; + + i += scnprintf(buf + i, sz - i, "link_entry[0]:\n"); + i += scnprintf(buf + i, sz - i, " mtu: %u\n", n->links[0].mtu); + i += scnprintf(buf + i, sz - i, " media: "); + i += tipc_media_addr_printf(buf + i, sz - i, &n->links[0].maddr); + i += scnprintf(buf + i, sz - i, "\n"); + i += tipc_link_dump(n->links[0].link, TIPC_DUMP_NONE, buf + i); + i += scnprintf(buf + i, sz - i, " inputq: "); + i += tipc_list_dump(&n->links[0].inputq, false, buf + i); + + i += scnprintf(buf + i, sz - i, "link_entry[1]:\n"); + i += scnprintf(buf + i, sz - i, " mtu: %u\n", n->links[1].mtu); + i += scnprintf(buf + i, sz - i, " media: "); + i += tipc_media_addr_printf(buf + i, sz - i, &n->links[1].maddr); + i += scnprintf(buf + i, sz - i, "\n"); + i += tipc_link_dump(n->links[1].link, TIPC_DUMP_NONE, buf + i); + i += scnprintf(buf + i, sz - i, " inputq: "); + i += tipc_list_dump(&n->links[1].inputq, false, buf + i); + + i += scnprintf(buf + i, sz - i, "bclink:\n "); + i += tipc_link_dump(n->bc_entry.link, TIPC_DUMP_NONE, buf + i); + + return i; +} + +void tipc_node_pre_cleanup_net(struct net *exit_net) +{ + struct tipc_node *n; + struct tipc_net *tn; + struct net *tmp; + + rcu_read_lock(); + for_each_net_rcu(tmp) { + if (tmp == exit_net) continue; - tipc_node_lock(n_ptr); - for (i = 0; i < MAX_BEARERS; i++) { - if (!n_ptr->links[i]) + tn = tipc_net(tmp); + if (!tn) + continue; + spin_lock_bh(&tn->node_list_lock); + list_for_each_entry_rcu(n, &tn->node_list, list) { + if (!n->peer_net) + continue; + if (n->peer_net != exit_net) continue; - link_info.dest = htonl(n_ptr->addr); - link_info.up = htonl(tipc_link_is_up(n_ptr->links[i])); - strcpy(link_info.str, n_ptr->links[i]->name); - tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, - &link_info, sizeof(link_info)); + tipc_node_write_lock(n); + n->peer_net = NULL; + n->peer_hash_mix = 0; + tipc_node_write_unlock_fast(n); + break; } - tipc_node_unlock(n_ptr); + spin_unlock_bh(&tn->node_list_lock); } - - read_unlock_bh(&tipc_net_lock); - return buf; + rcu_read_unlock(); } diff --git a/net/tipc/node.h b/net/tipc/node.h index 3c189b35b102..154a5bbb0d29 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -1,8 +1,8 @@ /* * net/tipc/node.h: Include file for TIPC node management routines * - * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, 2010-2011, Wind River Systems + * Copyright (c) 2000-2006, 2014-2016, Ericsson AB + * Copyright (c) 2005, 2010-2014, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,95 +37,95 @@ #ifndef _TIPC_NODE_H #define _TIPC_NODE_H -#include "node_subscr.h" #include "addr.h" #include "net.h" #include "bearer.h" +#include "msg.h" -/* - * Out-of-range value for node signature - */ -#define INVALID_NODE_SIG 0x10000 - -/* Flags used to block (re)establishment of contact with a neighboring node */ -#define WAIT_PEER_DOWN 0x0001 /* wait to see that peer's links are down */ -#define WAIT_NAMES_GONE 0x0002 /* wait for peer's publications to be purged */ -#define WAIT_NODE_DOWN 0x0004 /* wait until peer node is declared down */ - -/** - * struct tipc_node - TIPC node structure - * @addr: network address of node - * @lock: spinlock governing access to structure - * @hash: links to adjacent nodes in unsorted hash chain - * @list: links to adjacent nodes in sorted list of cluster's nodes - * @nsub: list of "node down" subscriptions monitoring node - * @active_links: pointers to active links to node - * @links: pointers to all links to node - * @working_links: number of working links to node (both active and standby) - * @block_setup: bit mask of conditions preventing link establishment to node - * @link_cnt: number of links to node - * @permit_changeover: non-zero if node has redundant links to this system - * @signature: node instance identifier - * @bclink: broadcast-related info - * @acked: sequence # of last outbound b'cast message acknowledged by node - * @last_in: sequence # of last in-sequence b'cast message received from node - * @last_sent: sequence # of last b'cast message sent by node - * @oos_state: state tracker for handling OOS b'cast messages - * @deferred_size: number of OOS b'cast messages in deferred queue - * @deferred_head: oldest OOS b'cast message received from node - * @deferred_tail: newest OOS b'cast message received from node - * @defragm: list of partially reassembled b'cast message fragments from node - * @recv_permitted: true if node is allowed to receive b'cast messages +/* Optional capabilities supported by this code version */ -struct tipc_node { - u32 addr; - spinlock_t lock; - struct hlist_node hash; - struct list_head list; - struct list_head nsub; - struct tipc_link *active_links[2]; - struct tipc_link *links[MAX_BEARERS]; - int link_cnt; - int working_links; - int block_setup; - int permit_changeover; - u32 signature; - struct { - u32 acked; - u32 last_in; - u32 last_sent; - u32 oos_state; - u32 deferred_size; - struct sk_buff *deferred_head; - struct sk_buff *deferred_tail; - struct sk_buff *defragm; - bool recv_permitted; - } bclink; +enum { + TIPC_SYN_BIT = (1), + TIPC_BCAST_SYNCH = (1 << 1), + TIPC_BCAST_STATE_NACK = (1 << 2), + TIPC_BLOCK_FLOWCTL = (1 << 3), + TIPC_BCAST_RCAST = (1 << 4), + TIPC_NODE_ID128 = (1 << 5), + TIPC_LINK_PROTO_SEQNO = (1 << 6), + TIPC_MCAST_RBCTL = (1 << 7), + TIPC_GAP_ACK_BLOCK = (1 << 8), + TIPC_TUNNEL_ENHANCED = (1 << 9), + TIPC_NAGLE = (1 << 10), + TIPC_NAMED_BCAST = (1 << 11) }; -extern struct list_head tipc_node_list; +#define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \ + TIPC_BCAST_SYNCH | \ + TIPC_BCAST_STATE_NACK | \ + TIPC_BCAST_RCAST | \ + TIPC_BLOCK_FLOWCTL | \ + TIPC_NODE_ID128 | \ + TIPC_LINK_PROTO_SEQNO | \ + TIPC_MCAST_RBCTL | \ + TIPC_GAP_ACK_BLOCK | \ + TIPC_TUNNEL_ENHANCED | \ + TIPC_NAGLE | \ + TIPC_NAMED_BCAST) -struct tipc_node *tipc_node_find(u32 addr); -struct tipc_node *tipc_node_create(u32 addr); -void tipc_node_delete(struct tipc_node *n_ptr); -void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -int tipc_node_active_links(struct tipc_node *n_ptr); -int tipc_node_redundant_links(struct tipc_node *n_ptr); -int tipc_node_is_up(struct tipc_node *n_ptr); -struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); -struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); +#define INVALID_BEARER_ID -1 -static inline void tipc_node_lock(struct tipc_node *n_ptr) -{ - spin_lock_bh(&n_ptr->lock); -} - -static inline void tipc_node_unlock(struct tipc_node *n_ptr) -{ - spin_unlock_bh(&n_ptr->lock); -} +void tipc_node_stop(struct net *net); +bool tipc_node_get_id(struct net *net, u32 addr, u8 *id); +u32 tipc_node_get_addr(struct tipc_node *node); +char *tipc_node_get_id_str(struct tipc_node *node); +void tipc_node_put(struct tipc_node *node); +void tipc_node_get(struct tipc_node *node); +struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, + u16 capabilities, u32 hash_mixes, + bool preliminary); +#ifdef CONFIG_TIPC_CRYPTO +struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n); +struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos); +struct tipc_crypto *tipc_node_crypto_rx_by_addr(struct net *net, u32 addr); +#endif +u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr); +void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128, + struct tipc_bearer *bearer, + u16 capabilities, u32 signature, u32 hash_mixes, + struct tipc_media_addr *maddr, + bool *respond, bool *dupl_addr); +void tipc_node_delete_links(struct net *net, int bearer_id); +void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, int prop); +int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, + char *linkname, size_t len); +int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, + int selector); +int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *list); +int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, + u32 selector); +void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); +void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr); +void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests); +int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); +void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected); +bool tipc_node_is_up(struct net *net, u32 addr); +u16 tipc_node_get_capabilities(struct net *net, u32 addr); +int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, + struct netlink_callback *cb); +#ifdef CONFIG_TIPC_CRYPTO +int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info); +#endif +void tipc_node_pre_cleanup_net(struct net *exit_net); #endif diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c deleted file mode 100644 index 8a7384c04add..000000000000 --- a/net/tipc/node_subscr.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * net/tipc/node_subscr.c: TIPC "node down" subscription handling - * - * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, 2010-2011, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "node_subscr.h" -#include "node.h" - -/** - * tipc_nodesub_subscribe - create "node down" subscription for specified node - */ -void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, - void *usr_handle, net_ev_handler handle_down) -{ - if (in_own_node(addr)) { - node_sub->node = NULL; - return; - } - - node_sub->node = tipc_node_find(addr); - if (!node_sub->node) { - pr_warn("Node subscription rejected, unknown node 0x%x\n", - addr); - return; - } - node_sub->handle_node_down = handle_down; - node_sub->usr_handle = usr_handle; - - tipc_node_lock(node_sub->node); - list_add_tail(&node_sub->nodesub_list, &node_sub->node->nsub); - tipc_node_unlock(node_sub->node); -} - -/** - * tipc_nodesub_unsubscribe - cancel "node down" subscription (if any) - */ -void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub) -{ - if (!node_sub->node) - return; - - tipc_node_lock(node_sub->node); - list_del_init(&node_sub->nodesub_list); - tipc_node_unlock(node_sub->node); -} - -/** - * tipc_nodesub_notify - notify subscribers that a node is unreachable - * - * Note: node is locked by caller - */ -void tipc_nodesub_notify(struct tipc_node *node) -{ - struct tipc_node_subscr *ns; - - list_for_each_entry(ns, &node->nsub, nodesub_list) { - if (ns->handle_node_down) { - tipc_k_signal((Handler)ns->handle_node_down, - (unsigned long)ns->usr_handle); - ns->handle_node_down = NULL; - } - } -} diff --git a/net/tipc/port.c b/net/tipc/port.c deleted file mode 100644 index b3ed2fcab4fb..000000000000 --- a/net/tipc/port.c +++ /dev/null @@ -1,1066 +0,0 @@ -/* - * net/tipc/port.c: TIPC port code - * - * Copyright (c) 1992-2007, Ericsson AB - * Copyright (c) 2004-2008, 2010-2013, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "config.h" -#include "port.h" -#include "name_table.h" - -/* Connection management: */ -#define PROBING_INTERVAL 3600000 /* [ms] => 1 h */ -#define CONFIRMED 0 -#define PROBING 1 - -#define MAX_REJECT_SIZE 1024 - -DEFINE_SPINLOCK(tipc_port_list_lock); - -static LIST_HEAD(ports); -static void port_handle_node_down(unsigned long ref); -static struct sk_buff *port_build_self_abort_msg(struct tipc_port *, u32 err); -static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *, u32 err); -static void port_timeout(unsigned long ref); - - -static u32 port_peernode(struct tipc_port *p_ptr) -{ - return msg_destnode(&p_ptr->phdr); -} - -static u32 port_peerport(struct tipc_port *p_ptr) -{ - return msg_destport(&p_ptr->phdr); -} - -/** - * tipc_port_peer_msg - verify message was sent by connected port's peer - * - * Handles cases where the node's network address has changed from - * the default of <0.0.0> to its configured setting. - */ -int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg) -{ - u32 peernode; - u32 orignode; - - if (msg_origport(msg) != port_peerport(p_ptr)) - return 0; - - orignode = msg_orignode(msg); - peernode = port_peernode(p_ptr); - return (orignode == peernode) || - (!orignode && (peernode == tipc_own_addr)) || - (!peernode && (orignode == tipc_own_addr)); -} - -/** - * tipc_multicast - send a multicast message to local and remote destinations - */ -int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, - u32 num_sect, struct iovec const *msg_sect, - unsigned int total_len) -{ - struct tipc_msg *hdr; - struct sk_buff *buf; - struct sk_buff *ibuf = NULL; - struct tipc_port_list dports = {0, NULL, }; - struct tipc_port *oport = tipc_port_deref(ref); - int ext_targets; - int res; - - if (unlikely(!oport)) - return -EINVAL; - - /* Create multicast message */ - hdr = &oport->phdr; - msg_set_type(hdr, TIPC_MCAST_MSG); - msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE); - msg_set_destport(hdr, 0); - msg_set_destnode(hdr, 0); - msg_set_nametype(hdr, seq->type); - msg_set_namelower(hdr, seq->lower); - msg_set_nameupper(hdr, seq->upper); - msg_set_hdr_sz(hdr, MCAST_H_SIZE); - res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, - &buf); - if (unlikely(!buf)) - return res; - - /* Figure out where to send multicast message */ - ext_targets = tipc_nametbl_mc_translate(seq->type, seq->lower, seq->upper, - TIPC_NODE_SCOPE, &dports); - - /* Send message to destinations (duplicate it only if necessary) */ - if (ext_targets) { - if (dports.count != 0) { - ibuf = skb_copy(buf, GFP_ATOMIC); - if (ibuf == NULL) { - tipc_port_list_free(&dports); - kfree_skb(buf); - return -ENOMEM; - } - } - res = tipc_bclink_send_msg(buf); - if ((res < 0) && (dports.count != 0)) - kfree_skb(ibuf); - } else { - ibuf = buf; - } - - if (res >= 0) { - if (ibuf) - tipc_port_recv_mcast(ibuf, &dports); - } else { - tipc_port_list_free(&dports); - } - return res; -} - -/** - * tipc_port_recv_mcast - deliver multicast message to all destination ports - * - * If there is no port list, perform a lookup to create one - */ -void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp) -{ - struct tipc_msg *msg; - struct tipc_port_list dports = {0, NULL, }; - struct tipc_port_list *item = dp; - int cnt = 0; - - msg = buf_msg(buf); - - /* Create destination port list, if one wasn't supplied */ - if (dp == NULL) { - tipc_nametbl_mc_translate(msg_nametype(msg), - msg_namelower(msg), - msg_nameupper(msg), - TIPC_CLUSTER_SCOPE, - &dports); - item = dp = &dports; - } - - /* Deliver a copy of message to each destination port */ - if (dp->count != 0) { - msg_set_destnode(msg, tipc_own_addr); - if (dp->count == 1) { - msg_set_destport(msg, dp->ports[0]); - tipc_port_recv_msg(buf); - tipc_port_list_free(dp); - return; - } - for (; cnt < dp->count; cnt++) { - int index = cnt % PLSIZE; - struct sk_buff *b = skb_clone(buf, GFP_ATOMIC); - - if (b == NULL) { - pr_warn("Unable to deliver multicast message(s)\n"); - goto exit; - } - if ((index == 0) && (cnt != 0)) - item = item->next; - msg_set_destport(buf_msg(b), item->ports[index]); - tipc_port_recv_msg(b); - } - } -exit: - kfree_skb(buf); - tipc_port_list_free(dp); -} - -/** - * tipc_createport - create a generic TIPC port - * - * Returns pointer to (locked) TIPC port, or NULL if unable to create it - */ -struct tipc_port *tipc_createport(struct sock *sk, - u32 (*dispatcher)(struct tipc_port *, - struct sk_buff *), - void (*wakeup)(struct tipc_port *), - const u32 importance) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg; - u32 ref; - - p_ptr = kzalloc(sizeof(*p_ptr), GFP_ATOMIC); - if (!p_ptr) { - pr_warn("Port creation failed, no memory\n"); - return NULL; - } - ref = tipc_ref_acquire(p_ptr, &p_ptr->lock); - if (!ref) { - pr_warn("Port creation failed, ref. table exhausted\n"); - kfree(p_ptr); - return NULL; - } - - p_ptr->sk = sk; - p_ptr->max_pkt = MAX_PKT_DEFAULT; - p_ptr->ref = ref; - INIT_LIST_HEAD(&p_ptr->wait_list); - INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); - p_ptr->dispatcher = dispatcher; - p_ptr->wakeup = wakeup; - k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref); - INIT_LIST_HEAD(&p_ptr->publications); - INIT_LIST_HEAD(&p_ptr->port_list); - - /* - * Must hold port list lock while initializing message header template - * to ensure a change to node's own network address doesn't result - * in template containing out-dated network address information - */ - spin_lock_bh(&tipc_port_list_lock); - msg = &p_ptr->phdr; - tipc_msg_init(msg, importance, TIPC_NAMED_MSG, NAMED_H_SIZE, 0); - msg_set_origport(msg, ref); - list_add_tail(&p_ptr->port_list, &ports); - spin_unlock_bh(&tipc_port_list_lock); - return p_ptr; -} - -int tipc_deleteport(u32 ref) -{ - struct tipc_port *p_ptr; - struct sk_buff *buf = NULL; - - tipc_withdraw(ref, 0, NULL); - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - - tipc_ref_discard(ref); - tipc_port_unlock(p_ptr); - - k_cancel_timer(&p_ptr->timer); - if (p_ptr->connected) { - buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); - tipc_nodesub_unsubscribe(&p_ptr->subscription); - } - - spin_lock_bh(&tipc_port_list_lock); - list_del(&p_ptr->port_list); - list_del(&p_ptr->wait_list); - spin_unlock_bh(&tipc_port_list_lock); - k_term_timer(&p_ptr->timer); - kfree(p_ptr); - tipc_net_route_msg(buf); - return 0; -} - -static int port_unreliable(struct tipc_port *p_ptr) -{ - return msg_src_droppable(&p_ptr->phdr); -} - -int tipc_portunreliable(u32 ref, unsigned int *isunreliable) -{ - struct tipc_port *p_ptr; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - *isunreliable = port_unreliable(p_ptr); - tipc_port_unlock(p_ptr); - return 0; -} - -int tipc_set_portunreliable(u32 ref, unsigned int isunreliable) -{ - struct tipc_port *p_ptr; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - msg_set_src_droppable(&p_ptr->phdr, (isunreliable != 0)); - tipc_port_unlock(p_ptr); - return 0; -} - -static int port_unreturnable(struct tipc_port *p_ptr) -{ - return msg_dest_droppable(&p_ptr->phdr); -} - -int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable) -{ - struct tipc_port *p_ptr; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - *isunrejectable = port_unreturnable(p_ptr); - tipc_port_unlock(p_ptr); - return 0; -} - -int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) -{ - struct tipc_port *p_ptr; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - msg_set_dest_droppable(&p_ptr->phdr, (isunrejectable != 0)); - tipc_port_unlock(p_ptr); - return 0; -} - -/* - * port_build_proto_msg(): create connection protocol message for port - * - * On entry the port must be locked and connected. - */ -static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr, - u32 type, u32 ack) -{ - struct sk_buff *buf; - struct tipc_msg *msg; - - buf = tipc_buf_acquire(INT_H_SIZE); - if (buf) { - msg = buf_msg(buf); - tipc_msg_init(msg, CONN_MANAGER, type, INT_H_SIZE, - port_peernode(p_ptr)); - msg_set_destport(msg, port_peerport(p_ptr)); - msg_set_origport(msg, p_ptr->ref); - msg_set_msgcnt(msg, ack); - } - return buf; -} - -int tipc_reject_msg(struct sk_buff *buf, u32 err) -{ - struct tipc_msg *msg = buf_msg(buf); - struct sk_buff *rbuf; - struct tipc_msg *rmsg; - int hdr_sz; - u32 imp; - u32 data_sz = msg_data_sz(msg); - u32 src_node; - u32 rmsg_sz; - - /* discard rejected message if it shouldn't be returned to sender */ - if (WARN(!msg_isdata(msg), - "attempt to reject message with user=%u", msg_user(msg))) { - dump_stack(); - goto exit; - } - if (msg_errcode(msg) || msg_dest_droppable(msg)) - goto exit; - - /* - * construct returned message by copying rejected message header and - * data (or subset), then updating header fields that need adjusting - */ - hdr_sz = msg_hdr_sz(msg); - rmsg_sz = hdr_sz + min_t(u32, data_sz, MAX_REJECT_SIZE); - - rbuf = tipc_buf_acquire(rmsg_sz); - if (rbuf == NULL) - goto exit; - - rmsg = buf_msg(rbuf); - skb_copy_to_linear_data(rbuf, msg, rmsg_sz); - - if (msg_connected(rmsg)) { - imp = msg_importance(rmsg); - if (imp < TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(rmsg, ++imp); - } - msg_set_non_seq(rmsg, 0); - msg_set_size(rmsg, rmsg_sz); - msg_set_errcode(rmsg, err); - msg_set_prevnode(rmsg, tipc_own_addr); - msg_swap_words(rmsg, 4, 5); - if (!msg_short(rmsg)) - msg_swap_words(rmsg, 6, 7); - - /* send self-abort message when rejecting on a connected port */ - if (msg_connected(msg)) { - struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg)); - - if (p_ptr) { - struct sk_buff *abuf = NULL; - - if (p_ptr->connected) - abuf = port_build_self_abort_msg(p_ptr, err); - tipc_port_unlock(p_ptr); - tipc_net_route_msg(abuf); - } - } - - /* send returned message & dispose of rejected message */ - src_node = msg_prevnode(msg); - if (in_own_node(src_node)) - tipc_port_recv_msg(rbuf); - else - tipc_link_send(rbuf, src_node, msg_link_selector(rmsg)); -exit: - kfree_skb(buf); - return data_sz; -} - -int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, - struct iovec const *msg_sect, u32 num_sect, - unsigned int total_len, int err) -{ - struct sk_buff *buf; - int res; - - res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, - &buf); - if (!buf) - return res; - - return tipc_reject_msg(buf, err); -} - -static void port_timeout(unsigned long ref) -{ - struct tipc_port *p_ptr = tipc_port_lock(ref); - struct sk_buff *buf = NULL; - - if (!p_ptr) - return; - - if (!p_ptr->connected) { - tipc_port_unlock(p_ptr); - return; - } - - /* Last probe answered ? */ - if (p_ptr->probing_state == PROBING) { - buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_PORT); - } else { - buf = port_build_proto_msg(p_ptr, CONN_PROBE, 0); - p_ptr->probing_state = PROBING; - k_start_timer(&p_ptr->timer, p_ptr->probing_interval); - } - tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); -} - - -static void port_handle_node_down(unsigned long ref) -{ - struct tipc_port *p_ptr = tipc_port_lock(ref); - struct sk_buff *buf = NULL; - - if (!p_ptr) - return; - buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_NODE); - tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); -} - - -static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 err) -{ - struct sk_buff *buf = port_build_peer_abort_msg(p_ptr, err); - - if (buf) { - struct tipc_msg *msg = buf_msg(buf); - msg_swap_words(msg, 4, 5); - msg_swap_words(msg, 6, 7); - } - return buf; -} - - -static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 err) -{ - struct sk_buff *buf; - struct tipc_msg *msg; - u32 imp; - - if (!p_ptr->connected) - return NULL; - - buf = tipc_buf_acquire(BASIC_H_SIZE); - if (buf) { - msg = buf_msg(buf); - memcpy(msg, &p_ptr->phdr, BASIC_H_SIZE); - msg_set_hdr_sz(msg, BASIC_H_SIZE); - msg_set_size(msg, BASIC_H_SIZE); - imp = msg_importance(msg); - if (imp < TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(msg, ++imp); - msg_set_errcode(msg, err); - } - return buf; -} - -void tipc_port_recv_proto_msg(struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - struct tipc_port *p_ptr; - struct sk_buff *r_buf = NULL; - u32 destport = msg_destport(msg); - int wakeable; - - /* Validate connection */ - p_ptr = tipc_port_lock(destport); - if (!p_ptr || !p_ptr->connected || !tipc_port_peer_msg(p_ptr, msg)) { - r_buf = tipc_buf_acquire(BASIC_H_SIZE); - if (r_buf) { - msg = buf_msg(r_buf); - tipc_msg_init(msg, TIPC_HIGH_IMPORTANCE, TIPC_CONN_MSG, - BASIC_H_SIZE, msg_orignode(msg)); - msg_set_errcode(msg, TIPC_ERR_NO_PORT); - msg_set_origport(msg, destport); - msg_set_destport(msg, msg_origport(msg)); - } - if (p_ptr) - tipc_port_unlock(p_ptr); - goto exit; - } - - /* Process protocol message sent by peer */ - switch (msg_type(msg)) { - case CONN_ACK: - wakeable = tipc_port_congested(p_ptr) && p_ptr->congested && - p_ptr->wakeup; - p_ptr->acked += msg_msgcnt(msg); - if (!tipc_port_congested(p_ptr)) { - p_ptr->congested = 0; - if (wakeable) - p_ptr->wakeup(p_ptr); - } - break; - case CONN_PROBE: - r_buf = port_build_proto_msg(p_ptr, CONN_PROBE_REPLY, 0); - break; - default: - /* CONN_PROBE_REPLY or unrecognized - no action required */ - break; - } - p_ptr->probing_state = CONFIRMED; - tipc_port_unlock(p_ptr); -exit: - tipc_net_route_msg(r_buf); - kfree_skb(buf); -} - -static int port_print(struct tipc_port *p_ptr, char *buf, int len, int full_id) -{ - struct publication *publ; - int ret; - - if (full_id) - ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:", - tipc_zone(tipc_own_addr), - tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), p_ptr->ref); - else - ret = tipc_snprintf(buf, len, "%-10u:", p_ptr->ref); - - if (p_ptr->connected) { - u32 dport = port_peerport(p_ptr); - u32 destnode = port_peernode(p_ptr); - - ret += tipc_snprintf(buf + ret, len - ret, - " connected to <%u.%u.%u:%u>", - tipc_zone(destnode), - tipc_cluster(destnode), - tipc_node(destnode), dport); - if (p_ptr->conn_type != 0) - ret += tipc_snprintf(buf + ret, len - ret, - " via {%u,%u}", p_ptr->conn_type, - p_ptr->conn_instance); - } else if (p_ptr->published) { - ret += tipc_snprintf(buf + ret, len - ret, " bound to"); - list_for_each_entry(publ, &p_ptr->publications, pport_list) { - if (publ->lower == publ->upper) - ret += tipc_snprintf(buf + ret, len - ret, - " {%u,%u}", publ->type, - publ->lower); - else - ret += tipc_snprintf(buf + ret, len - ret, - " {%u,%u,%u}", publ->type, - publ->lower, publ->upper); - } - } - ret += tipc_snprintf(buf + ret, len - ret, "\n"); - return ret; -} - -struct sk_buff *tipc_port_get_ports(void) -{ - struct sk_buff *buf; - struct tlv_desc *rep_tlv; - char *pb; - int pb_len; - struct tipc_port *p_ptr; - int str_len = 0; - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (!buf) - return NULL; - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - - spin_lock_bh(&tipc_port_list_lock); - list_for_each_entry(p_ptr, &ports, port_list) { - spin_lock_bh(p_ptr->lock); - str_len += port_print(p_ptr, pb, pb_len, 0); - spin_unlock_bh(p_ptr->lock); - } - spin_unlock_bh(&tipc_port_list_lock); - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); - - return buf; -} - -void tipc_port_reinit(void) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg; - - spin_lock_bh(&tipc_port_list_lock); - list_for_each_entry(p_ptr, &ports, port_list) { - msg = &p_ptr->phdr; - msg_set_prevnode(msg, tipc_own_addr); - msg_set_orignode(msg, tipc_own_addr); - } - spin_unlock_bh(&tipc_port_list_lock); -} - -void tipc_acknowledge(u32 ref, u32 ack) -{ - struct tipc_port *p_ptr; - struct sk_buff *buf = NULL; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return; - if (p_ptr->connected) { - p_ptr->conn_unacked -= ack; - buf = port_build_proto_msg(p_ptr, CONN_ACK, ack); - } - tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); -} - -int tipc_portimportance(u32 ref, unsigned int *importance) -{ - struct tipc_port *p_ptr; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - *importance = (unsigned int)msg_importance(&p_ptr->phdr); - tipc_port_unlock(p_ptr); - return 0; -} - -int tipc_set_portimportance(u32 ref, unsigned int imp) -{ - struct tipc_port *p_ptr; - - if (imp > TIPC_CRITICAL_IMPORTANCE) - return -EINVAL; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - msg_set_importance(&p_ptr->phdr, (u32)imp); - tipc_port_unlock(p_ptr); - return 0; -} - - -int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) -{ - struct tipc_port *p_ptr; - struct publication *publ; - u32 key; - int res = -EINVAL; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - - if (p_ptr->connected) - goto exit; - key = ref + p_ptr->pub_count + 1; - if (key == ref) { - res = -EADDRINUSE; - goto exit; - } - publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, - scope, p_ptr->ref, key); - if (publ) { - list_add(&publ->pport_list, &p_ptr->publications); - p_ptr->pub_count++; - p_ptr->published = 1; - res = 0; - } -exit: - tipc_port_unlock(p_ptr); - return res; -} - -int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) -{ - struct tipc_port *p_ptr; - struct publication *publ; - struct publication *tpubl; - int res = -EINVAL; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - if (!seq) { - list_for_each_entry_safe(publ, tpubl, - &p_ptr->publications, pport_list) { - tipc_nametbl_withdraw(publ->type, publ->lower, - publ->ref, publ->key); - } - res = 0; - } else { - list_for_each_entry_safe(publ, tpubl, - &p_ptr->publications, pport_list) { - if (publ->scope != scope) - continue; - if (publ->type != seq->type) - continue; - if (publ->lower != seq->lower) - continue; - if (publ->upper != seq->upper) - break; - tipc_nametbl_withdraw(publ->type, publ->lower, - publ->ref, publ->key); - res = 0; - break; - } - } - if (list_empty(&p_ptr->publications)) - p_ptr->published = 0; - tipc_port_unlock(p_ptr); - return res; -} - -int tipc_connect(u32 ref, struct tipc_portid const *peer) -{ - struct tipc_port *p_ptr; - int res; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - res = __tipc_connect(ref, p_ptr, peer); - tipc_port_unlock(p_ptr); - return res; -} - -/* - * __tipc_connect - connect to a remote peer - * - * Port must be locked. - */ -int __tipc_connect(u32 ref, struct tipc_port *p_ptr, - struct tipc_portid const *peer) -{ - struct tipc_msg *msg; - int res = -EINVAL; - - if (p_ptr->published || p_ptr->connected) - goto exit; - if (!peer->ref) - goto exit; - - msg = &p_ptr->phdr; - msg_set_destnode(msg, peer->node); - msg_set_destport(msg, peer->ref); - msg_set_type(msg, TIPC_CONN_MSG); - msg_set_lookup_scope(msg, 0); - msg_set_hdr_sz(msg, SHORT_H_SIZE); - - p_ptr->probing_interval = PROBING_INTERVAL; - p_ptr->probing_state = CONFIRMED; - p_ptr->connected = 1; - k_start_timer(&p_ptr->timer, p_ptr->probing_interval); - - tipc_nodesub_subscribe(&p_ptr->subscription, peer->node, - (void *)(unsigned long)ref, - (net_ev_handler)port_handle_node_down); - res = 0; -exit: - p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref); - return res; -} - -/* - * __tipc_disconnect - disconnect port from peer - * - * Port must be locked. - */ -int __tipc_disconnect(struct tipc_port *tp_ptr) -{ - int res; - - if (tp_ptr->connected) { - tp_ptr->connected = 0; - /* let timer expire on it's own to avoid deadlock! */ - tipc_nodesub_unsubscribe(&tp_ptr->subscription); - res = 0; - } else { - res = -ENOTCONN; - } - return res; -} - -/* - * tipc_disconnect(): Disconnect port form peer. - * This is a node local operation. - */ -int tipc_disconnect(u32 ref) -{ - struct tipc_port *p_ptr; - int res; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - res = __tipc_disconnect(p_ptr); - tipc_port_unlock(p_ptr); - return res; -} - -/* - * tipc_shutdown(): Send a SHUTDOWN msg to peer and disconnect - */ -int tipc_shutdown(u32 ref) -{ - struct tipc_port *p_ptr; - struct sk_buff *buf = NULL; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - - buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN); - tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); - return tipc_disconnect(ref); -} - -/** - * tipc_port_recv_msg - receive message from lower layer and deliver to port user - */ -int tipc_port_recv_msg(struct sk_buff *buf) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg = buf_msg(buf); - u32 destport = msg_destport(msg); - u32 dsz = msg_data_sz(msg); - u32 err; - - /* forward unresolved named message */ - if (unlikely(!destport)) { - tipc_net_route_msg(buf); - return dsz; - } - - /* validate destination & pass to port, otherwise reject message */ - p_ptr = tipc_port_lock(destport); - if (likely(p_ptr)) { - err = p_ptr->dispatcher(p_ptr, buf); - tipc_port_unlock(p_ptr); - if (likely(!err)) - return dsz; - } else { - err = TIPC_ERR_NO_PORT; - } - - return tipc_reject_msg(buf, err); -} - -/* - * tipc_port_recv_sections(): Concatenate and deliver sectioned - * message for this node. - */ -static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_sect, - struct iovec const *msg_sect, - unsigned int total_len) -{ - struct sk_buff *buf; - int res; - - res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, total_len, - MAX_MSG_SIZE, &buf); - if (likely(buf)) - tipc_port_recv_msg(buf); - return res; -} - -/** - * tipc_send - send message sections on connection - */ -int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect, - unsigned int total_len) -{ - struct tipc_port *p_ptr; - u32 destnode; - int res; - - p_ptr = tipc_port_deref(ref); - if (!p_ptr || !p_ptr->connected) - return -EINVAL; - - p_ptr->congested = 1; - if (!tipc_port_congested(p_ptr)) { - destnode = port_peernode(p_ptr); - if (likely(!in_own_node(destnode))) - res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, - total_len, destnode); - else - res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect, - total_len); - - if (likely(res != -ELINKCONG)) { - p_ptr->congested = 0; - if (res > 0) - p_ptr->sent++; - return res; - } - } - if (port_unreliable(p_ptr)) { - p_ptr->congested = 0; - return total_len; - } - return -ELINKCONG; -} - -/** - * tipc_send2name - send message sections to port name - */ -int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, - unsigned int num_sect, struct iovec const *msg_sect, - unsigned int total_len) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg; - u32 destnode = domain; - u32 destport; - int res; - - p_ptr = tipc_port_deref(ref); - if (!p_ptr || p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_NAMED_MSG); - msg_set_hdr_sz(msg, NAMED_H_SIZE); - msg_set_nametype(msg, name->type); - msg_set_nameinst(msg, name->instance); - msg_set_lookup_scope(msg, tipc_addr_scope(domain)); - destport = tipc_nametbl_translate(name->type, name->instance, &destnode); - msg_set_destnode(msg, destnode); - msg_set_destport(msg, destport); - - if (likely(destport || destnode)) { - if (likely(in_own_node(destnode))) - res = tipc_port_recv_sections(p_ptr, num_sect, - msg_sect, total_len); - else if (tipc_own_addr) - res = tipc_link_send_sections_fast(p_ptr, msg_sect, - num_sect, total_len, - destnode); - else - res = tipc_port_reject_sections(p_ptr, msg, msg_sect, - num_sect, total_len, - TIPC_ERR_NO_NODE); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (port_unreliable(p_ptr)) { - return total_len; - } - return -ELINKCONG; - } - return tipc_port_reject_sections(p_ptr, msg, msg_sect, num_sect, - total_len, TIPC_ERR_NO_NAME); -} - -/** - * tipc_send2port - send message sections to port identity - */ -int tipc_send2port(u32 ref, struct tipc_portid const *dest, - unsigned int num_sect, struct iovec const *msg_sect, - unsigned int total_len) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg; - int res; - - p_ptr = tipc_port_deref(ref); - if (!p_ptr || p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_lookup_scope(msg, 0); - msg_set_destnode(msg, dest->node); - msg_set_destport(msg, dest->ref); - msg_set_hdr_sz(msg, BASIC_H_SIZE); - - if (in_own_node(dest->node)) - res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect, - total_len); - else if (tipc_own_addr) - res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, - total_len, dest->node); - else - res = tipc_port_reject_sections(p_ptr, msg, msg_sect, num_sect, - total_len, TIPC_ERR_NO_NODE); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (port_unreliable(p_ptr)) { - return total_len; - } - return -ELINKCONG; -} diff --git a/net/tipc/port.h b/net/tipc/port.h deleted file mode 100644 index 5a7026b9c345..000000000000 --- a/net/tipc/port.h +++ /dev/null @@ -1,205 +0,0 @@ -/* - * net/tipc/port.h: Include file for TIPC port code - * - * Copyright (c) 1994-2007, Ericsson AB - * Copyright (c) 2004-2007, 2010-2013, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _TIPC_PORT_H -#define _TIPC_PORT_H - -#include "ref.h" -#include "net.h" -#include "msg.h" -#include "node_subscr.h" - -#define TIPC_FLOW_CONTROL_WIN 512 -#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ - SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) - -/** - * struct tipc_port - TIPC port structure - * @sk: pointer to socket handle - * @lock: pointer to spinlock for controlling access to port - * @connected: non-zero if port is currently connected to a peer port - * @conn_type: TIPC type used when connection was established - * @conn_instance: TIPC instance used when connection was established - * @conn_unacked: number of unacknowledged messages received from peer port - * @published: non-zero if port has one or more associated names - * @congested: non-zero if cannot send because of link or port congestion - * @max_pkt: maximum packet size "hint" used when building messages sent by port - * @ref: unique reference to port in TIPC object registry - * @phdr: preformatted message header used when sending messages - * @port_list: adjacent ports in TIPC's global list of ports - * @dispatcher: ptr to routine which handles received messages - * @wakeup: ptr to routine to call when port is no longer congested - * @wait_list: adjacent ports in list of ports waiting on link congestion - * @waiting_pkts: - * @sent: # of non-empty messages sent by port - * @acked: # of non-empty message acknowledgements from connected port's peer - * @publications: list of publications for port - * @pub_count: total # of publications port has made during its lifetime - * @probing_state: - * @probing_interval: - * @timer_ref: - * @subscription: "node down" subscription used to terminate failed connections - */ -struct tipc_port { - struct sock *sk; - spinlock_t *lock; - int connected; - u32 conn_type; - u32 conn_instance; - u32 conn_unacked; - int published; - u32 congested; - u32 max_pkt; - u32 ref; - struct tipc_msg phdr; - struct list_head port_list; - u32 (*dispatcher)(struct tipc_port *, struct sk_buff *); - void (*wakeup)(struct tipc_port *); - struct list_head wait_list; - u32 waiting_pkts; - u32 sent; - u32 acked; - struct list_head publications; - u32 pub_count; - u32 probing_state; - u32 probing_interval; - struct timer_list timer; - struct tipc_node_subscr subscription; -}; - -extern spinlock_t tipc_port_list_lock; -struct tipc_port_list; - -/* - * TIPC port manipulation routines - */ -struct tipc_port *tipc_createport(struct sock *sk, - u32 (*dispatcher)(struct tipc_port *, - struct sk_buff *), - void (*wakeup)(struct tipc_port *), - const u32 importance); - -int tipc_reject_msg(struct sk_buff *buf, u32 err); - -void tipc_acknowledge(u32 port_ref, u32 ack); - -int tipc_deleteport(u32 portref); - -int tipc_portimportance(u32 portref, unsigned int *importance); -int tipc_set_portimportance(u32 portref, unsigned int importance); - -int tipc_portunreliable(u32 portref, unsigned int *isunreliable); -int tipc_set_portunreliable(u32 portref, unsigned int isunreliable); - -int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); -int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); - -int tipc_publish(u32 portref, unsigned int scope, - struct tipc_name_seq const *name_seq); -int tipc_withdraw(u32 portref, unsigned int scope, - struct tipc_name_seq const *name_seq); - -int tipc_connect(u32 portref, struct tipc_portid const *port); - -int tipc_disconnect(u32 portref); - -int tipc_shutdown(u32 ref); - - -/* - * The following routines require that the port be locked on entry - */ -int __tipc_disconnect(struct tipc_port *tp_ptr); -int __tipc_connect(u32 ref, struct tipc_port *p_ptr, - struct tipc_portid const *peer); -int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg); - -/* - * TIPC messaging routines - */ -int tipc_port_recv_msg(struct sk_buff *buf); -int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect, - unsigned int total_len); - -int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain, - unsigned int num_sect, struct iovec const *msg_sect, - unsigned int total_len); - -int tipc_send2port(u32 portref, struct tipc_portid const *dest, - unsigned int num_sect, struct iovec const *msg_sect, - unsigned int total_len); - -int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, - unsigned int section_count, struct iovec const *msg, - unsigned int total_len); - -int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, - struct iovec const *msg_sect, u32 num_sect, - unsigned int total_len, int err); -struct sk_buff *tipc_port_get_ports(void); -void tipc_port_recv_proto_msg(struct sk_buff *buf); -void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp); -void tipc_port_reinit(void); - -/** - * tipc_port_lock - lock port instance referred to and return its pointer - */ -static inline struct tipc_port *tipc_port_lock(u32 ref) -{ - return (struct tipc_port *)tipc_ref_lock(ref); -} - -/** - * tipc_port_unlock - unlock a port instance - * - * Can use pointer instead of tipc_ref_unlock() since port is already locked. - */ -static inline void tipc_port_unlock(struct tipc_port *p_ptr) -{ - spin_unlock_bh(p_ptr->lock); -} - -static inline struct tipc_port *tipc_port_deref(u32 ref) -{ - return (struct tipc_port *)tipc_ref_deref(ref); -} - -static inline int tipc_port_congested(struct tipc_port *p_ptr) -{ - return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2); -} - -#endif diff --git a/net/tipc/ref.c b/net/tipc/ref.c deleted file mode 100644 index 2a2a938dc22c..000000000000 --- a/net/tipc/ref.c +++ /dev/null @@ -1,286 +0,0 @@ -/* - * net/tipc/ref.c: TIPC object registry code - * - * Copyright (c) 1991-2006, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "ref.h" - -/** - * struct reference - TIPC object reference entry - * @object: pointer to object associated with reference entry - * @lock: spinlock controlling access to object - * @ref: reference value for object (combines instance & array index info) - */ -struct reference { - void *object; - spinlock_t lock; - u32 ref; -}; - -/** - * struct tipc_ref_table - table of TIPC object reference entries - * @entries: pointer to array of reference entries - * @capacity: array index of first unusable entry - * @init_point: array index of first uninitialized entry - * @first_free: array index of first unused object reference entry - * @last_free: array index of last unused object reference entry - * @index_mask: bitmask for array index portion of reference values - * @start_mask: initial value for instance value portion of reference values - */ -struct ref_table { - struct reference *entries; - u32 capacity; - u32 init_point; - u32 first_free; - u32 last_free; - u32 index_mask; - u32 start_mask; -}; - -/* - * Object reference table consists of 2**N entries. - * - * State Object ptr Reference - * ----- ---------- --------- - * In use non-NULL XXXX|own index - * (XXXX changes each time entry is acquired) - * Free NULL YYYY|next free index - * (YYYY is one more than last used XXXX) - * Uninitialized NULL 0 - * - * Entry 0 is not used; this allows index 0 to denote the end of the free list. - * - * Note that a reference value of 0 does not necessarily indicate that an - * entry is uninitialized, since the last entry in the free list could also - * have a reference value of 0 (although this is unlikely). - */ - -static struct ref_table tipc_ref_table; - -static DEFINE_RWLOCK(ref_table_lock); - -/** - * tipc_ref_table_init - create reference table for objects - */ -int tipc_ref_table_init(u32 requested_size, u32 start) -{ - struct reference *table; - u32 actual_size; - - /* account for unused entry, then round up size to a power of 2 */ - - requested_size++; - for (actual_size = 16; actual_size < requested_size; actual_size <<= 1) - /* do nothing */ ; - - /* allocate table & mark all entries as uninitialized */ - table = vzalloc(actual_size * sizeof(struct reference)); - if (table == NULL) - return -ENOMEM; - - tipc_ref_table.entries = table; - tipc_ref_table.capacity = requested_size; - tipc_ref_table.init_point = 1; - tipc_ref_table.first_free = 0; - tipc_ref_table.last_free = 0; - tipc_ref_table.index_mask = actual_size - 1; - tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask; - - return 0; -} - -/** - * tipc_ref_table_stop - destroy reference table for objects - */ -void tipc_ref_table_stop(void) -{ - if (!tipc_ref_table.entries) - return; - - vfree(tipc_ref_table.entries); - tipc_ref_table.entries = NULL; -} - -/** - * tipc_ref_acquire - create reference to an object - * - * Register an object pointer in reference table and lock the object. - * Returns a unique reference value that is used from then on to retrieve the - * object pointer, or to determine that the object has been deregistered. - * - * Note: The object is returned in the locked state so that the caller can - * register a partially initialized object, without running the risk that - * the object will be accessed before initialization is complete. - */ -u32 tipc_ref_acquire(void *object, spinlock_t **lock) -{ - u32 index; - u32 index_mask; - u32 next_plus_upper; - u32 ref; - struct reference *entry = NULL; - - if (!object) { - pr_err("Attempt to acquire ref. to non-existent obj\n"); - return 0; - } - if (!tipc_ref_table.entries) { - pr_err("Ref. table not found in acquisition attempt\n"); - return 0; - } - - /* take a free entry, if available; otherwise initialize a new entry */ - write_lock_bh(&ref_table_lock); - if (tipc_ref_table.first_free) { - index = tipc_ref_table.first_free; - entry = &(tipc_ref_table.entries[index]); - index_mask = tipc_ref_table.index_mask; - next_plus_upper = entry->ref; - tipc_ref_table.first_free = next_plus_upper & index_mask; - ref = (next_plus_upper & ~index_mask) + index; - } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { - index = tipc_ref_table.init_point++; - entry = &(tipc_ref_table.entries[index]); - spin_lock_init(&entry->lock); - ref = tipc_ref_table.start_mask + index; - } else { - ref = 0; - } - write_unlock_bh(&ref_table_lock); - - /* - * Grab the lock so no one else can modify this entry - * While we assign its ref value & object pointer - */ - if (entry) { - spin_lock_bh(&entry->lock); - entry->ref = ref; - entry->object = object; - *lock = &entry->lock; - /* - * keep it locked, the caller is responsible - * for unlocking this when they're done with it - */ - } - - return ref; -} - -/** - * tipc_ref_discard - invalidate references to an object - * - * Disallow future references to an object and free up the entry for re-use. - * Note: The entry's spin_lock may still be busy after discard - */ -void tipc_ref_discard(u32 ref) -{ - struct reference *entry; - u32 index; - u32 index_mask; - - if (!tipc_ref_table.entries) { - pr_err("Ref. table not found during discard attempt\n"); - return; - } - - index_mask = tipc_ref_table.index_mask; - index = ref & index_mask; - entry = &(tipc_ref_table.entries[index]); - - write_lock_bh(&ref_table_lock); - - if (!entry->object) { - pr_err("Attempt to discard ref. to non-existent obj\n"); - goto exit; - } - if (entry->ref != ref) { - pr_err("Attempt to discard non-existent reference\n"); - goto exit; - } - - /* - * mark entry as unused; increment instance part of entry's reference - * to invalidate any subsequent references - */ - entry->object = NULL; - entry->ref = (ref & ~index_mask) + (index_mask + 1); - - /* append entry to free entry list */ - if (tipc_ref_table.first_free == 0) - tipc_ref_table.first_free = index; - else - tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index; - tipc_ref_table.last_free = index; - -exit: - write_unlock_bh(&ref_table_lock); -} - -/** - * tipc_ref_lock - lock referenced object and return pointer to it - */ -void *tipc_ref_lock(u32 ref) -{ - if (likely(tipc_ref_table.entries)) { - struct reference *entry; - - entry = &tipc_ref_table.entries[ref & - tipc_ref_table.index_mask]; - if (likely(entry->ref != 0)) { - spin_lock_bh(&entry->lock); - if (likely((entry->ref == ref) && (entry->object))) - return entry->object; - spin_unlock_bh(&entry->lock); - } - } - return NULL; -} - - -/** - * tipc_ref_deref - return pointer referenced object (without locking it) - */ -void *tipc_ref_deref(u32 ref) -{ - if (likely(tipc_ref_table.entries)) { - struct reference *entry; - - entry = &tipc_ref_table.entries[ref & - tipc_ref_table.index_mask]; - if (likely(entry->ref == ref)) - return entry->object; - } - return NULL; -} diff --git a/net/tipc/server.c b/net/tipc/server.c deleted file mode 100644 index 19da5abe0fa6..000000000000 --- a/net/tipc/server.c +++ /dev/null @@ -1,596 +0,0 @@ -/* - * net/tipc/server.c: TIPC server infrastructure - * - * Copyright (c) 2012-2013, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "server.h" -#include "core.h" -#include <net/sock.h> - -/* Number of messages to send before rescheduling */ -#define MAX_SEND_MSG_COUNT 25 -#define MAX_RECV_MSG_COUNT 25 -#define CF_CONNECTED 1 - -#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data) - -/** - * struct tipc_conn - TIPC connection structure - * @kref: reference counter to connection object - * @conid: connection identifier - * @sock: socket handler associated with connection - * @flags: indicates connection state - * @server: pointer to connected server - * @rwork: receive work item - * @usr_data: user-specified field - * @rx_action: what to do when connection socket is active - * @outqueue: pointer to first outbound message in queue - * @outqueue_lock: controll access to the outqueue - * @outqueue: list of connection objects for its server - * @swork: send work item - */ -struct tipc_conn { - struct kref kref; - int conid; - struct socket *sock; - unsigned long flags; - struct tipc_server *server; - struct work_struct rwork; - int (*rx_action) (struct tipc_conn *con); - void *usr_data; - struct list_head outqueue; - spinlock_t outqueue_lock; - struct work_struct swork; -}; - -/* An entry waiting to be sent */ -struct outqueue_entry { - struct list_head list; - struct kvec iov; - struct sockaddr_tipc dest; -}; - -static void tipc_recv_work(struct work_struct *work); -static void tipc_send_work(struct work_struct *work); -static void tipc_clean_outqueues(struct tipc_conn *con); - -static void tipc_conn_kref_release(struct kref *kref) -{ - struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct tipc_server *s = con->server; - - if (con->sock) { - tipc_sock_release_local(con->sock); - con->sock = NULL; - } - - tipc_clean_outqueues(con); - - if (con->conid) - s->tipc_conn_shutdown(con->conid, con->usr_data); - - kfree(con); -} - -static void conn_put(struct tipc_conn *con) -{ - kref_put(&con->kref, tipc_conn_kref_release); -} - -static void conn_get(struct tipc_conn *con) -{ - kref_get(&con->kref); -} - -static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) -{ - struct tipc_conn *con; - - spin_lock_bh(&s->idr_lock); - con = idr_find(&s->conn_idr, conid); - if (con) - conn_get(con); - spin_unlock_bh(&s->idr_lock); - return con; -} - -static void sock_data_ready(struct sock *sk, int unused) -{ - struct tipc_conn *con; - - read_lock(&sk->sk_callback_lock); - con = sock2con(sk); - if (con && test_bit(CF_CONNECTED, &con->flags)) { - conn_get(con); - if (!queue_work(con->server->rcv_wq, &con->rwork)) - conn_put(con); - } - read_unlock(&sk->sk_callback_lock); -} - -static void sock_write_space(struct sock *sk) -{ - struct tipc_conn *con; - - read_lock(&sk->sk_callback_lock); - con = sock2con(sk); - if (con && test_bit(CF_CONNECTED, &con->flags)) { - conn_get(con); - if (!queue_work(con->server->send_wq, &con->swork)) - conn_put(con); - } - read_unlock(&sk->sk_callback_lock); -} - -static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) -{ - struct sock *sk = sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - - sk->sk_data_ready = sock_data_ready; - sk->sk_write_space = sock_write_space; - sk->sk_user_data = con; - - con->sock = sock; - - write_unlock_bh(&sk->sk_callback_lock); -} - -static void tipc_unregister_callbacks(struct tipc_conn *con) -{ - struct sock *sk = con->sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - sk->sk_user_data = NULL; - write_unlock_bh(&sk->sk_callback_lock); -} - -static void tipc_close_conn(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - - if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - spin_lock_bh(&s->idr_lock); - idr_remove(&s->conn_idr, con->conid); - s->idr_in_use--; - spin_unlock_bh(&s->idr_lock); - - tipc_unregister_callbacks(con); - - /* We shouldn't flush pending works as we may be in the - * thread. In fact the races with pending rx/tx work structs - * are harmless for us here as we have already deleted this - * connection from server connection list and set - * sk->sk_user_data to 0 before releasing connection object. - */ - kernel_sock_shutdown(con->sock, SHUT_RDWR); - - conn_put(con); - } -} - -static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) -{ - struct tipc_conn *con; - int ret; - - con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC); - if (!con) - return ERR_PTR(-ENOMEM); - - kref_init(&con->kref); - INIT_LIST_HEAD(&con->outqueue); - spin_lock_init(&con->outqueue_lock); - INIT_WORK(&con->swork, tipc_send_work); - INIT_WORK(&con->rwork, tipc_recv_work); - - spin_lock_bh(&s->idr_lock); - ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); - if (ret < 0) { - kfree(con); - spin_unlock_bh(&s->idr_lock); - return ERR_PTR(-ENOMEM); - } - con->conid = ret; - s->idr_in_use++; - spin_unlock_bh(&s->idr_lock); - - set_bit(CF_CONNECTED, &con->flags); - con->server = s; - - return con; -} - -static int tipc_receive_from_sock(struct tipc_conn *con) -{ - struct msghdr msg = {}; - struct tipc_server *s = con->server; - struct sockaddr_tipc addr; - struct kvec iov; - void *buf; - int ret; - - buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC); - if (!buf) { - ret = -ENOMEM; - goto out_close; - } - - iov.iov_base = buf; - iov.iov_len = s->max_rcvbuf_size; - msg.msg_name = &addr; - ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, - MSG_DONTWAIT); - if (ret <= 0) { - kmem_cache_free(s->rcvbuf_cache, buf); - goto out_close; - } - - s->tipc_conn_recvmsg(con->conid, &addr, con->usr_data, buf, ret); - - kmem_cache_free(s->rcvbuf_cache, buf); - - return 0; - -out_close: - if (ret != -EWOULDBLOCK) - tipc_close_conn(con); - else if (ret == 0) - /* Don't return success if we really got EOF */ - ret = -EAGAIN; - - return ret; -} - -static int tipc_accept_from_sock(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - struct socket *sock = con->sock; - struct socket *newsock; - struct tipc_conn *newcon; - int ret; - - ret = tipc_sock_accept_local(sock, &newsock, O_NONBLOCK); - if (ret < 0) - return ret; - - newcon = tipc_alloc_conn(con->server); - if (IS_ERR(newcon)) { - ret = PTR_ERR(newcon); - sock_release(newsock); - return ret; - } - - newcon->rx_action = tipc_receive_from_sock; - tipc_register_callbacks(newsock, newcon); - - /* Notify that new connection is incoming */ - newcon->usr_data = s->tipc_conn_new(newcon->conid); - - /* Wake up receive process in case of 'SYN+' message */ - newsock->sk->sk_data_ready(newsock->sk, 0); - return ret; -} - -static struct socket *tipc_create_listen_sock(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - struct socket *sock = NULL; - int ret; - - ret = tipc_sock_create_local(s->type, &sock); - if (ret < 0) - return NULL; - ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, - (char *)&s->imp, sizeof(s->imp)); - if (ret < 0) - goto create_err; - ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr)); - if (ret < 0) - goto create_err; - - switch (s->type) { - case SOCK_STREAM: - case SOCK_SEQPACKET: - con->rx_action = tipc_accept_from_sock; - - ret = kernel_listen(sock, 0); - if (ret < 0) - goto create_err; - break; - case SOCK_DGRAM: - case SOCK_RDM: - con->rx_action = tipc_receive_from_sock; - break; - default: - pr_err("Unknown socket type %d\n", s->type); - goto create_err; - } - return sock; - -create_err: - sock_release(sock); - con->sock = NULL; - return NULL; -} - -static int tipc_open_listening_sock(struct tipc_server *s) -{ - struct socket *sock; - struct tipc_conn *con; - - con = tipc_alloc_conn(s); - if (IS_ERR(con)) - return PTR_ERR(con); - - sock = tipc_create_listen_sock(con); - if (!sock) - return -EINVAL; - - tipc_register_callbacks(sock, con); - return 0; -} - -static struct outqueue_entry *tipc_alloc_entry(void *data, int len) -{ - struct outqueue_entry *entry; - void *buf; - - entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC); - if (!entry) - return NULL; - - buf = kmalloc(len, GFP_ATOMIC); - if (!buf) { - kfree(entry); - return NULL; - } - - memcpy(buf, data, len); - entry->iov.iov_base = buf; - entry->iov.iov_len = len; - - return entry; -} - -static void tipc_free_entry(struct outqueue_entry *e) -{ - kfree(e->iov.iov_base); - kfree(e); -} - -static void tipc_clean_outqueues(struct tipc_conn *con) -{ - struct outqueue_entry *e, *safe; - - spin_lock_bh(&con->outqueue_lock); - list_for_each_entry_safe(e, safe, &con->outqueue, list) { - list_del(&e->list); - tipc_free_entry(e); - } - spin_unlock_bh(&con->outqueue_lock); -} - -int tipc_conn_sendmsg(struct tipc_server *s, int conid, - struct sockaddr_tipc *addr, void *data, size_t len) -{ - struct outqueue_entry *e; - struct tipc_conn *con; - - con = tipc_conn_lookup(s, conid); - if (!con) - return -EINVAL; - - e = tipc_alloc_entry(data, len); - if (!e) { - conn_put(con); - return -ENOMEM; - } - - if (addr) - memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc)); - - spin_lock_bh(&con->outqueue_lock); - list_add_tail(&e->list, &con->outqueue); - spin_unlock_bh(&con->outqueue_lock); - - if (test_bit(CF_CONNECTED, &con->flags)) - if (!queue_work(s->send_wq, &con->swork)) - conn_put(con); - - return 0; -} - -void tipc_conn_terminate(struct tipc_server *s, int conid) -{ - struct tipc_conn *con; - - con = tipc_conn_lookup(s, conid); - if (con) { - tipc_close_conn(con); - conn_put(con); - } -} - -static void tipc_send_to_sock(struct tipc_conn *con) -{ - int count = 0; - struct tipc_server *s = con->server; - struct outqueue_entry *e; - struct msghdr msg; - int ret; - - spin_lock_bh(&con->outqueue_lock); - while (1) { - e = list_entry(con->outqueue.next, struct outqueue_entry, - list); - if ((struct list_head *) e == &con->outqueue) - break; - spin_unlock_bh(&con->outqueue_lock); - - memset(&msg, 0, sizeof(msg)); - msg.msg_flags = MSG_DONTWAIT; - - if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { - msg.msg_name = &e->dest; - msg.msg_namelen = sizeof(struct sockaddr_tipc); - } - ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, - e->iov.iov_len); - if (ret == -EWOULDBLOCK || ret == 0) { - cond_resched(); - goto out; - } else if (ret < 0) { - goto send_err; - } - - /* Don't starve users filling buffers */ - if (++count >= MAX_SEND_MSG_COUNT) { - cond_resched(); - count = 0; - } - - spin_lock_bh(&con->outqueue_lock); - list_del(&e->list); - tipc_free_entry(e); - } - spin_unlock_bh(&con->outqueue_lock); -out: - return; - -send_err: - tipc_close_conn(con); -} - -static void tipc_recv_work(struct work_struct *work) -{ - struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); - int count = 0; - - while (test_bit(CF_CONNECTED, &con->flags)) { - if (con->rx_action(con)) - break; - - /* Don't flood Rx machine */ - if (++count >= MAX_RECV_MSG_COUNT) { - cond_resched(); - count = 0; - } - } - conn_put(con); -} - -static void tipc_send_work(struct work_struct *work) -{ - struct tipc_conn *con = container_of(work, struct tipc_conn, swork); - - if (test_bit(CF_CONNECTED, &con->flags)) - tipc_send_to_sock(con); - - conn_put(con); -} - -static void tipc_work_stop(struct tipc_server *s) -{ - destroy_workqueue(s->rcv_wq); - destroy_workqueue(s->send_wq); -} - -static int tipc_work_start(struct tipc_server *s) -{ - s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1); - if (!s->rcv_wq) { - pr_err("can't start tipc receive workqueue\n"); - return -ENOMEM; - } - - s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1); - if (!s->send_wq) { - pr_err("can't start tipc send workqueue\n"); - destroy_workqueue(s->rcv_wq); - return -ENOMEM; - } - - return 0; -} - -int tipc_server_start(struct tipc_server *s) -{ - int ret; - - spin_lock_init(&s->idr_lock); - idr_init(&s->conn_idr); - s->idr_in_use = 0; - - s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size, - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!s->rcvbuf_cache) - return -ENOMEM; - - ret = tipc_work_start(s); - if (ret < 0) { - kmem_cache_destroy(s->rcvbuf_cache); - return ret; - } - s->enabled = 1; - - return tipc_open_listening_sock(s); -} - -void tipc_server_stop(struct tipc_server *s) -{ - struct tipc_conn *con; - int total = 0; - int id; - - if (!s->enabled) - return; - - s->enabled = 0; - spin_lock_bh(&s->idr_lock); - for (id = 0; total < s->idr_in_use; id++) { - con = idr_find(&s->conn_idr, id); - if (con) { - total++; - spin_unlock_bh(&s->idr_lock); - tipc_close_conn(con); - spin_lock_bh(&s->idr_lock); - } - } - spin_unlock_bh(&s->idr_lock); - - tipc_work_stop(s); - kmem_cache_destroy(s->rcvbuf_cache); - idr_destroy(&s->conn_idr); -} diff --git a/net/tipc/server.h b/net/tipc/server.h deleted file mode 100644 index 98b23f20bc0f..000000000000 --- a/net/tipc/server.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * net/tipc/server.h: Include file for TIPC server code - * - * Copyright (c) 2012-2013, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _TIPC_SERVER_H -#define _TIPC_SERVER_H - -#include "core.h" - -#define TIPC_SERVER_NAME_LEN 32 - -/** - * struct tipc_server - TIPC server structure - * @conn_idr: identifier set of connection - * @idr_lock: protect the connection identifier set - * @idr_in_use: amount of allocated identifier entry - * @rcvbuf_cache: memory cache of server receive buffer - * @rcv_wq: receive workqueue - * @send_wq: send workqueue - * @max_rcvbuf_size: maximum permitted receive message length - * @tipc_conn_new: callback will be called when new connection is incoming - * @tipc_conn_shutdown: callback will be called when connection is shut down - * @tipc_conn_recvmsg: callback will be called when message arrives - * @saddr: TIPC server address - * @name: server name - * @imp: message importance - * @type: socket type - * @enabled: identify whether server is launched or not - */ -struct tipc_server { - struct idr conn_idr; - spinlock_t idr_lock; - int idr_in_use; - struct kmem_cache *rcvbuf_cache; - struct workqueue_struct *rcv_wq; - struct workqueue_struct *send_wq; - int max_rcvbuf_size; - void *(*tipc_conn_new) (int conid); - void (*tipc_conn_shutdown) (int conid, void *usr_data); - void (*tipc_conn_recvmsg) (int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len); - struct sockaddr_tipc *saddr; - const char name[TIPC_SERVER_NAME_LEN]; - int imp; - int type; - int enabled; -}; - -int tipc_conn_sendmsg(struct tipc_server *s, int conid, - struct sockaddr_tipc *addr, void *data, size_t len); - -/** - * tipc_conn_terminate - terminate connection with server - * - * Note: Must call it in process context since it might sleep - */ -void tipc_conn_terminate(struct tipc_server *s, int conid); - -int tipc_server_start(struct tipc_server *s); - -void tipc_server_stop(struct tipc_server *s); - -#endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ce8249c76827..817b07d95a91 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,8 +1,9 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012 Ericsson AB + * Copyright (c) 2001-2007, 2012-2019, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,115 +35,414 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "core.h" -#include "port.h" - -#include <linux/export.h> -#include <net/sock.h> +#include <linux/rhashtable.h> +#include <linux/sched/signal.h> +#include <trace/events/sock.h> -#define SS_LISTENING -1 /* socket is listening */ -#define SS_READY -2 /* socket is connectionless */ +#include "core.h" +#include "name_table.h" +#include "node.h" +#include "link.h" +#include "name_distr.h" +#include "socket.h" +#include "bcast.h" +#include "netlink.h" +#include "group.h" +#include "trace.h" + +#define NAGLE_START_INIT 4 +#define NAGLE_START_MAX 1024 +#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */ +#define TIPC_MAX_PORT 0xffffffff +#define TIPC_MIN_PORT 1 +#define TIPC_ACK_RATE 4 /* ACK at 1/4 of rcv window size */ + +enum { + TIPC_LISTEN = TCP_LISTEN, + TIPC_ESTABLISHED = TCP_ESTABLISHED, + TIPC_OPEN = TCP_CLOSE, + TIPC_DISCONNECTING = TCP_CLOSE_WAIT, + TIPC_CONNECTING = TCP_SYN_SENT, +}; -#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +struct sockaddr_pair { + struct sockaddr_tipc sock; + struct sockaddr_tipc member; +}; +/** + * struct tipc_sock - TIPC socket structure + * @sk: socket - interacts with 'port' and with user via the socket API + * @max_pkt: maximum packet size "hint" used when building messages sent by port + * @maxnagle: maximum size of msg which can be subject to nagle + * @portid: unique port identity in TIPC socket hash table + * @phdr: preformatted message header used when sending messages + * @cong_links: list of congested links + * @publications: list of publications for port + * @pub_count: total # of publications port has made during its lifetime + * @conn_timeout: the time we can wait for an unresponded setup request + * @probe_unacked: probe has not received ack yet + * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue + * @cong_link_cnt: number of congested links + * @snt_unacked: # messages sent by socket, and not yet acked by peer + * @snd_win: send window size + * @peer_caps: peer capabilities mask + * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @rcv_win: receive window size + * @peer: 'connected' peer for dgram/rdm + * @node: hash table node + * @mc_method: cookie for use between socket and broadcast layer + * @rcu: rcu struct for tipc_sock + * @group: TIPC communications group + * @oneway: message count in one direction (FIXME) + * @nagle_start: current nagle value + * @snd_backlog: send backlog count + * @msg_acc: messages accepted; used in managing backlog and nagle + * @pkt_cnt: TIPC socket packet count + * @expect_ack: whether this TIPC socket is expecting an ack + * @nodelay: setsockopt() TIPC_NODELAY setting + * @group_is_open: TIPC socket group is fully open (FIXME) + * @published: true if port has one or more associated names + * @conn_addrtype: address type used when establishing connection + */ struct tipc_sock { struct sock sk; - struct tipc_port *p; - struct tipc_portid peer_name; - unsigned int conn_timeout; + u32 max_pkt; + u32 maxnagle; + u32 portid; + struct tipc_msg phdr; + struct list_head cong_links; + struct list_head publications; + u32 pub_count; + atomic_t dupl_rcvcnt; + u16 conn_timeout; + bool probe_unacked; + u16 cong_link_cnt; + u16 snt_unacked; + u16 snd_win; + u16 peer_caps; + u16 rcv_unacked; + u16 rcv_win; + struct sockaddr_tipc peer; + struct rhash_head node; + struct tipc_mc_method mc_method; + struct rcu_head rcu; + struct tipc_group *group; + u32 oneway; + u32 nagle_start; + u16 snd_backlog; + u16 msg_acc; + u16 pkt_cnt; + bool expect_ack; + bool nodelay; + bool group_is_open; + bool published; + u8 conn_addrtype; }; -#define tipc_sk(sk) ((struct tipc_sock *)(sk)) -#define tipc_sk_port(sk) (tipc_sk(sk)->p) - -#define tipc_rx_ready(sock) (!skb_queue_empty(&sock->sk->sk_receive_queue) || \ - (sock->state == SS_DISCONNECTING)) - -static int backlog_rcv(struct sock *sk, struct sk_buff *skb); -static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); -static void wakeupdispatch(struct tipc_port *tport); -static void tipc_data_ready(struct sock *sk, int len); +static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); +static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); -static int release(struct socket *sock); -static int accept(struct socket *sock, struct socket *new_sock, int flags); +static void tipc_sock_destruct(struct sock *sk); +static int tipc_release(struct socket *sock); +static void tipc_sk_timeout(struct timer_list *t); +static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua); +static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua); +static int tipc_sk_leave(struct tipc_sock *tsk); +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); +static int tipc_sk_insert(struct tipc_sock *tsk); +static void tipc_sk_remove(struct tipc_sock *tsk); +static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz); +static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); +static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack); +static int tipc_wait_for_connect(struct socket *sock, long *timeo_p); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; static const struct proto_ops msg_ops; - static struct proto tipc_proto; -static struct proto tipc_proto_kern; +static const struct rhashtable_params tsk_rht_params; + +static u32 tsk_own_node(struct tipc_sock *tsk) +{ + return msg_prevnode(&tsk->phdr); +} -static int sockets_enabled; +static u32 tsk_peer_node(struct tipc_sock *tsk) +{ + return msg_destnode(&tsk->phdr); +} -/* - * Revised TIPC socket locking policy: - * - * Most socket operations take the standard socket lock when they start - * and hold it until they finish (or until they need to sleep). Acquiring - * this lock grants the owner exclusive access to the fields of the socket - * data structures, with the exception of the backlog queue. A few socket - * operations can be done without taking the socket lock because they only - * read socket information that never changes during the life of the socket. - * - * Socket operations may acquire the lock for the associated TIPC port if they - * need to perform an operation on the port. If any routine needs to acquire - * both the socket lock and the port lock it must take the socket lock first - * to avoid the risk of deadlock. - * - * The dispatcher handling incoming messages cannot grab the socket lock in - * the standard fashion, since invoked it runs at the BH level and cannot block. - * Instead, it checks to see if the socket lock is currently owned by someone, - * and either handles the message itself or adds it to the socket's backlog - * queue; in the latter case the queued message is processed once the process - * owning the socket lock releases it. - * - * NOTE: Releasing the socket lock while an operation is sleeping overcomes - * the problem of a blocked socket operation preventing any other operations - * from occurring. However, applications must be careful if they have - * multiple threads trying to send (or receive) on the same socket, as these - * operations might interfere with each other. For example, doing a connect - * and a receive at the same time might allow the receive to consume the - * ACK message meant for the connect. While additional work could be done - * to try and overcome this, it doesn't seem to be worthwhile at the present. - * - * NOTE: Releasing the socket lock while an operation is sleeping also ensures - * that another operation that must be performed in a non-blocking manner is - * not delayed for very long because the lock has already been taken. - * - * NOTE: This code assumes that certain fields of a port/socket pair are - * constant over its lifetime; such fields can be examined without taking - * the socket lock and/or port lock, and do not need to be re-read even - * after resuming processing after waiting. These fields include: - * - socket type - * - pointer to socket sk structure (aka tipc_sock structure) - * - pointer to port structure - * - port reference +static u32 tsk_peer_port(struct tipc_sock *tsk) +{ + return msg_destport(&tsk->phdr); +} + +static bool tsk_unreliable(struct tipc_sock *tsk) +{ + return msg_src_droppable(&tsk->phdr) != 0; +} + +static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable) +{ + msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0); +} + +static bool tsk_unreturnable(struct tipc_sock *tsk) +{ + return msg_dest_droppable(&tsk->phdr) != 0; +} + +static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable) +{ + msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0); +} + +static int tsk_importance(struct tipc_sock *tsk) +{ + return msg_importance(&tsk->phdr); +} + +static struct tipc_sock *tipc_sk(const struct sock *sk) +{ + return container_of(sk, struct tipc_sock, sk); +} + +int tsk_set_importance(struct sock *sk, int imp) +{ + if (imp > TIPC_CRITICAL_IMPORTANCE) + return -EINVAL; + msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp); + return 0; +} + +static bool tsk_conn_cong(struct tipc_sock *tsk) +{ + return tsk->snt_unacked > tsk->snd_win; +} + +static u16 tsk_blocks(int len) +{ + return ((len / FLOWCTL_BLK_SZ) + 1); +} + +/* tsk_blocks(): translate a buffer size in bytes to number of + * advertisable blocks, taking into account the ratio truesize(len)/len + * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ */ +static u16 tsk_adv_blocks(int len) +{ + return len / FLOWCTL_BLK_SZ / 4; +} + +/* tsk_inc(): increment counter for sent or received data + * - If block based flow control is not supported by peer we + * fall back to message based ditto, incrementing the counter + */ +static u16 tsk_inc(struct tipc_sock *tsk, int msglen) +{ + if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) + return ((msglen / FLOWCTL_BLK_SZ) + 1); + return 1; +} + +/* tsk_set_nagle - enable/disable nagle property by manipulating maxnagle + */ +static void tsk_set_nagle(struct tipc_sock *tsk) +{ + struct sock *sk = &tsk->sk; + + tsk->maxnagle = 0; + if (sk->sk_type != SOCK_STREAM) + return; + if (tsk->nodelay) + return; + if (!(tsk->peer_caps & TIPC_NAGLE)) + return; + /* Limit node local buffer size to avoid receive queue overflow */ + if (tsk->max_pkt == MAX_MSG_SIZE) + tsk->maxnagle = 1500; + else + tsk->maxnagle = tsk->max_pkt; +} /** - * advance_rx_queue - discard first buffer in socket receive queue + * tsk_advance_rx_queue - discard first buffer in socket receive queue + * @sk: network socket * * Caller must hold socket lock */ -static void advance_rx_queue(struct sock *sk) +static void tsk_advance_rx_queue(struct sock *sk) { + trace_tipc_sk_advance_rx(sk, NULL, TIPC_DUMP_SK_RCVQ, " "); kfree_skb(__skb_dequeue(&sk->sk_receive_queue)); } +/* tipc_sk_respond() : send response message back to sender + */ +static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err) +{ + u32 selector; + u32 dnode; + u32 onode = tipc_own_addr(sock_net(sk)); + + if (!tipc_msg_reverse(onode, &skb, err)) + return; + + trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_NONE, "@sk_respond!"); + dnode = msg_destnode(buf_msg(skb)); + selector = msg_origport(buf_msg(skb)); + tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); +} + /** - * reject_rx_queue - reject all buffers in socket receive queue + * tsk_rej_rx_queue - reject all buffers in socket receive queue + * @sk: network socket + * @error: response error code * * Caller must hold socket lock */ -static void reject_rx_queue(struct sock *sk) +static void tsk_rej_rx_queue(struct sock *sk, int error) { - struct sk_buff *buf; + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&sk->sk_receive_queue))) + tipc_sk_respond(sk, skb, error); +} + +static bool tipc_sk_connected(const struct sock *sk) +{ + return READ_ONCE(sk->sk_state) == TIPC_ESTABLISHED; +} + +/* tipc_sk_type_connectionless - check if the socket is datagram socket + * @sk: socket + * + * Returns true if connection less, false otherwise + */ +static bool tipc_sk_type_connectionless(struct sock *sk) +{ + return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM; +} + +/* tsk_peer_msg - verify if message was sent by connected port's peer + * + * Handles cases where the node's network address has changed from + * the default of <0.0.0> to its configured setting. + */ +static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) +{ + struct sock *sk = &tsk->sk; + u32 self = tipc_own_addr(sock_net(sk)); + u32 peer_port = tsk_peer_port(tsk); + u32 orig_node, peer_node; + + if (unlikely(!tipc_sk_connected(sk))) + return false; + + if (unlikely(msg_origport(msg) != peer_port)) + return false; - while ((buf = __skb_dequeue(&sk->sk_receive_queue))) - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); + orig_node = msg_orignode(msg); + peer_node = tsk_peer_node(tsk); + + if (likely(orig_node == peer_node)) + return true; + + if (!orig_node && peer_node == self) + return true; + + if (!peer_node && orig_node == self) + return true; + + return false; } +/* tipc_set_sk_state - set the sk_state of the socket + * @sk: socket + * + * Caller must hold socket lock + * + * Returns 0 on success, errno otherwise + */ +static int tipc_set_sk_state(struct sock *sk, int state) +{ + int oldsk_state = sk->sk_state; + int res = -EINVAL; + + switch (state) { + case TIPC_OPEN: + res = 0; + break; + case TIPC_LISTEN: + case TIPC_CONNECTING: + if (oldsk_state == TIPC_OPEN) + res = 0; + break; + case TIPC_ESTABLISHED: + if (oldsk_state == TIPC_CONNECTING || + oldsk_state == TIPC_OPEN) + res = 0; + break; + case TIPC_DISCONNECTING: + if (oldsk_state == TIPC_CONNECTING || + oldsk_state == TIPC_ESTABLISHED) + res = 0; + break; + } + + if (!res) + sk->sk_state = state; + + return res; +} + +static int tipc_sk_sock_err(struct socket *sock, long *timeout) +{ + struct sock *sk = sock->sk; + int err = sock_error(sk); + int typ = sock->type; + + if (err) + return err; + if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) { + if (sk->sk_state == TIPC_DISCONNECTING) + return -EPIPE; + else if (!tipc_sk_connected(sk)) + return -ENOTCONN; + } + if (!*timeout) + return -EAGAIN; + if (signal_pending(current)) + return sock_intr_errno(*timeout); + + return 0; +} + +#define tipc_wait_for_cond(sock_, timeo_, condition_) \ +({ \ + DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ + struct sock *sk_; \ + int rc_; \ + \ + while ((rc_ = !(condition_))) { \ + /* coupled with smp_wmb() in tipc_sk_proto_rcv() */ \ + smp_rmb(); \ + sk_ = (sock_)->sk; \ + rc_ = tipc_sk_sock_err((sock_), timeo_); \ + if (rc_) \ + break; \ + add_wait_queue(sk_sleep(sk_), &wait_); \ + release_sock(sk_); \ + *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \ + sched_annotate_sleep(); \ + lock_sock(sk_); \ + remove_wait_queue(sk_sleep(sk_), &wait_); \ + } \ + rc_; \ +}) + /** * tipc_sk_create - create a TIPC socket * @net: network namespace (must be default network) @@ -153,15 +453,15 @@ static void reject_rx_queue(struct sock *sk) * This routine creates additional data structures used by the TIPC socket, * initializes them, and links them together. * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ -static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, - int kern) +static int tipc_sk_create(struct net *net, struct socket *sock, + int protocol, int kern) { const struct proto_ops *ops; - socket_state state; struct sock *sk; - struct tipc_port *tp_ptr; + struct tipc_sock *tsk; + struct tipc_msg *msg; /* Validate arguments */ if (unlikely(protocol != 0)) @@ -170,135 +470,143 @@ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, switch (sock->type) { case SOCK_STREAM: ops = &stream_ops; - state = SS_UNCONNECTED; break; case SOCK_SEQPACKET: ops = &packet_ops; - state = SS_UNCONNECTED; break; case SOCK_DGRAM: case SOCK_RDM: ops = &msg_ops; - state = SS_READY; break; default: return -EPROTOTYPE; } /* Allocate socket's protocol area */ - if (!kern) - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); - else - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern); - + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern); if (sk == NULL) return -ENOMEM; - /* Allocate TIPC port for socket to use */ - tp_ptr = tipc_createport(sk, &dispatch, &wakeupdispatch, - TIPC_LOW_IMPORTANCE); - if (unlikely(!tp_ptr)) { - sk_free(sk); - return -ENOMEM; - } + tsk = tipc_sk(sk); + tsk->max_pkt = MAX_PKT_DEFAULT; + tsk->maxnagle = 0; + tsk->nagle_start = NAGLE_START_INIT; + INIT_LIST_HEAD(&tsk->publications); + INIT_LIST_HEAD(&tsk->cong_links); + msg = &tsk->phdr; /* Finish initializing socket data structures */ sock->ops = ops; - sock->state = state; - sock_init_data(sock, sk); - sk->sk_backlog_rcv = backlog_rcv; - sk->sk_rcvbuf = sysctl_tipc_rmem[1]; + tipc_set_sk_state(sk, TIPC_OPEN); + if (tipc_sk_insert(tsk)) { + sk_free(sk); + pr_warn("Socket create failed; port number exhausted\n"); + return -EINVAL; + } + + /* Ensure tsk is visible before we read own_addr. */ + smp_mb(); + + tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE, + TIPC_NAMED_MSG, NAMED_H_SIZE, 0); + + msg_set_origport(msg, tsk->portid); + timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); + sk->sk_shutdown = 0; + sk->sk_backlog_rcv = tipc_sk_backlog_rcv; + sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]); sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; - tipc_sk(sk)->p = tp_ptr; - tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT; + sk->sk_destruct = tipc_sock_destruct; + tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->group_is_open = true; + atomic_set(&tsk->dupl_rcvcnt, 0); - spin_unlock_bh(tp_ptr->lock); + /* Start out with safe limits until we receive an advertised window */ + tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN); + tsk->rcv_win = tsk->snd_win; - if (sock->state == SS_READY) { - tipc_set_portunreturnable(tp_ptr->ref, 1); + if (tipc_sk_type_connectionless(sk)) { + tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) - tipc_set_portunreliable(tp_ptr->ref, 1); + tsk_set_unreliable(tsk, true); } - + __skb_queue_head_init(&tsk->mc_method.deferredq); + trace_tipc_sk_create(sk, NULL, TIPC_DUMP_NONE, " "); return 0; } -/** - * tipc_sock_create_local - create TIPC socket from inside TIPC module - * @type: socket type - SOCK_RDM or SOCK_SEQPACKET - * - * We cannot use sock_creat_kern here because it bumps module user count. - * Since socket owner and creator is the same module we must make sure - * that module count remains zero for module local sockets, otherwise - * we cannot do rmmod. - * - * Returns 0 on success, errno otherwise - */ -int tipc_sock_create_local(int type, struct socket **res) +static void tipc_sk_callback(struct rcu_head *head) { - int rc; - struct sock *sk; - - rc = sock_create_lite(AF_TIPC, type, 0, res); - if (rc < 0) { - pr_err("Failed to create kernel socket\n"); - return rc; - } - tipc_sk_create(&init_net, *res, 0, 1); - - sk = (*res)->sk; - - return 0; -} + struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); -/** - * tipc_sock_release_local - release socket created by tipc_sock_create_local - * @sock: the socket to be released. - * - * Module reference count is not incremented when such sockets are created, - * so we must keep it from being decremented when they are released. - */ -void tipc_sock_release_local(struct socket *sock) -{ - release(sock); - sock->ops = NULL; - sock_release(sock); + sock_put(&tsk->sk); } -/** - * tipc_sock_accept_local - accept a connection on a socket created - * with tipc_sock_create_local. Use this function to avoid that - * module reference count is inadvertently incremented. - * - * @sock: the accepting socket - * @newsock: reference to the new socket to be created - * @flags: socket flags - */ - -int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, - int flags) +/* Caller should hold socket lock for the socket. */ +static void __tipc_shutdown(struct socket *sock, int error) { struct sock *sk = sock->sk; - int ret; + struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); + long timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT); + u32 dnode = tsk_peer_node(tsk); + struct sk_buff *skb; + + /* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */ + tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt && + !tsk_conn_cong(tsk))); + + /* Push out delayed messages if in Nagle mode */ + tipc_sk_push_backlog(tsk, false); + /* Remove pending SYN */ + __skb_queue_purge(&sk->sk_write_queue); + + /* Remove partially received buffer if any */ + skb = skb_peek(&sk->sk_receive_queue); + if (skb && TIPC_SKB_CB(skb)->bytes_read) { + __skb_unlink(skb, &sk->sk_receive_queue); + kfree_skb(skb); + } - ret = sock_create_lite(sk->sk_family, sk->sk_type, - sk->sk_protocol, newsock); - if (ret < 0) - return ret; + /* Reject all unreceived messages if connectionless */ + if (tipc_sk_type_connectionless(sk)) { + tsk_rej_rx_queue(sk, error); + return; + } - ret = accept(sock, *newsock, flags); - if (ret < 0) { - sock_release(*newsock); - return ret; + switch (sk->sk_state) { + case TIPC_CONNECTING: + case TIPC_ESTABLISHED: + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, dnode, tsk->portid); + /* Send a FIN+/- to its peer */ + skb = __skb_dequeue(&sk->sk_receive_queue); + if (skb) { + __skb_queue_purge(&sk->sk_receive_queue); + tipc_sk_respond(sk, skb, error); + break; + } + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, + tsk_own_node(tsk), tsk_peer_port(tsk), + tsk->portid, error); + if (skb) + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); + break; + case TIPC_LISTEN: + /* Reject all SYN messages */ + tsk_rej_rx_queue(sk, error); + break; + default: + __skb_queue_purge(&sk->sk_receive_queue); + break; } - (*newsock)->ops = sock->ops; - return ret; } /** - * release - destroy a TIPC socket + * tipc_release - destroy a TIPC socket * @sock: socket to destroy * * This routine cleans up any messages that are still queued on the socket. @@ -311,14 +619,12 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, * are returned or discarded according to the "destination droppable" setting * specified for the message by the sender. * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ -static int release(struct socket *sock) +static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; - struct tipc_port *tport; - struct sk_buff *buf; - int res; + struct tipc_sock *tsk; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -327,138 +633,148 @@ static int release(struct socket *sock) if (sk == NULL) return 0; - tport = tipc_sk_port(sk); + tsk = tipc_sk(sk); lock_sock(sk); - /* - * Reject all unreceived messages, except on an active connection - * (which disconnects locally & sends a 'FIN+' to peer) - */ - while (sock->state != SS_DISCONNECTING) { - buf = __skb_dequeue(&sk->sk_receive_queue); - if (buf == NULL) - break; - if (TIPC_SKB_CB(buf)->handle != 0) - kfree_skb(buf); - else { - if ((sock->state == SS_CONNECTING) || - (sock->state == SS_CONNECTED)) { - sock->state = SS_DISCONNECTING; - tipc_disconnect(tport->ref); - } - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); - } - } - - /* - * Delete TIPC port; this ensures no more messages are queued - * (also disconnects an active connection & sends a 'FIN-' to peer) - */ - res = tipc_deleteport(tport->ref); - - /* Discard any remaining (connection-based) messages in receive queue */ - __skb_queue_purge(&sk->sk_receive_queue); + trace_tipc_sk_release(sk, NULL, TIPC_DUMP_ALL, " "); + __tipc_shutdown(sock, TIPC_ERR_NO_PORT); + sk->sk_shutdown = SHUTDOWN_MASK; + tipc_sk_leave(tsk); + tipc_sk_withdraw(tsk, NULL); + __skb_queue_purge(&tsk->mc_method.deferredq); + sk_stop_timer(sk, &sk->sk_timer); + tipc_sk_remove(tsk); + sock_orphan(sk); /* Reject any messages that accumulated in backlog queue */ - sock->state = SS_DISCONNECTING; release_sock(sk); - - sock_put(sk); + tipc_dest_list_purge(&tsk->cong_links); + tsk->cong_link_cnt = 0; + call_rcu(&tsk->rcu, tipc_sk_callback); sock->sk = NULL; - return res; + return 0; } /** - * bind - associate or disassocate TIPC name(s) with a socket + * __tipc_bind - associate or disassociate TIPC name(s) with a socket * @sock: socket structure - * @uaddr: socket address describing name(s) and desired operation - * @uaddr_len: size of socket address data structure + * @skaddr: socket address describing name(s) and desired operation + * @alen: size of socket address data structure * - * Name and name sequence binding is indicated using a positive scope value; + * Name and name sequence binding are indicated using a positive scope value; * a negative scope value unbinds the specified name. Specifying no name * (i.e. a socket address length of 0) unbinds all names from the socket. * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise * * NOTE: This routine doesn't need to take the socket lock since it doesn't * access any non-constant socket information. */ -static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) +static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) { - struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - u32 portref = tipc_sk_port(sock->sk)->ref; + struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; + struct tipc_sock *tsk = tipc_sk(sock->sk); + bool unbind = false; - if (unlikely(!uaddr_len)) - return tipc_withdraw(portref, 0, NULL); - - if (uaddr_len < sizeof(struct sockaddr_tipc)) - return -EINVAL; - if (addr->family != AF_TIPC) - return -EAFNOSUPPORT; + if (unlikely(!alen)) + return tipc_sk_withdraw(tsk, NULL); - if (addr->addrtype == TIPC_ADDR_NAME) - addr->addr.nameseq.upper = addr->addr.nameseq.lower; - else if (addr->addrtype != TIPC_ADDR_NAMESEQ) - return -EAFNOSUPPORT; + if (ua->addrtype == TIPC_SERVICE_ADDR) { + ua->addrtype = TIPC_SERVICE_RANGE; + ua->sr.upper = ua->sr.lower; + } + if (ua->scope < 0) { + unbind = true; + ua->scope = -ua->scope; + } + /* Users may still use deprecated TIPC_ZONE_SCOPE */ + if (ua->scope != TIPC_NODE_SCOPE) + ua->scope = TIPC_CLUSTER_SCOPE; - if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && - (addr->addr.nameseq.type != TIPC_TOP_SRV) && - (addr->addr.nameseq.type != TIPC_CFG_SRV)) + if (tsk->group) return -EACCES; - return (addr->scope > 0) ? - tipc_publish(portref, addr->scope, &addr->addr.nameseq) : - tipc_withdraw(portref, -addr->scope, &addr->addr.nameseq); + if (unbind) + return tipc_sk_withdraw(tsk, ua); + return tipc_sk_publish(tsk, ua); +} + +int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen) +{ + int res; + + lock_sock(sock->sk); + res = __tipc_bind(sock, skaddr, alen); + release_sock(sock->sk); + return res; +} + +static int tipc_bind(struct socket *sock, struct sockaddr_unsized *skaddr, int alen) +{ + struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr; + u32 atype = ua->addrtype; + + if (alen) { + if (!tipc_uaddr_valid(ua, alen)) + return -EINVAL; + if (atype == TIPC_SOCKET_ADDR) + return -EAFNOSUPPORT; + if (ua->sr.type < TIPC_RESERVED_TYPES) { + pr_warn_once("Can't bind to reserved service type %u\n", + ua->sr.type); + return -EACCES; + } + } + return tipc_sk_bind(sock, (struct sockaddr *)skaddr, alen); } /** - * get_name - get port ID of socket or peer socket + * tipc_getname - get port ID of socket or peer socket * @sock: socket structure * @uaddr: area for returned socket address - * @uaddr_len: area for returned length of socket address * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise * * NOTE: This routine doesn't need to take the socket lock since it only * accesses socket information that is unchanging (or which changes in * a completely predictable manner). */ -static int get_name(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) +static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, + int peer) { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - struct tipc_sock *tsock = tipc_sk(sock->sk); + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); memset(addr, 0, sizeof(*addr)); if (peer) { - if ((sock->state != SS_CONNECTED) && - ((peer != 2) || (sock->state != SS_DISCONNECTING))) + if ((!tipc_sk_connected(sk)) && + ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING))) return -ENOTCONN; - addr->addr.id.ref = tsock->peer_name.ref; - addr->addr.id.node = tsock->peer_name.node; + addr->addr.id.ref = tsk_peer_port(tsk); + addr->addr.id.node = tsk_peer_node(tsk); } else { - addr->addr.id.ref = tsock->p->ref; - addr->addr.id.node = tipc_own_addr; + addr->addr.id.ref = tsk->portid; + addr->addr.id.node = tipc_own_addr(sock_net(sk)); } - *uaddr_len = sizeof(*addr); - addr->addrtype = TIPC_ADDR_ID; + addr->addrtype = TIPC_SOCKET_ADDR; addr->family = AF_TIPC; addr->scope = 0; addr->addr.name.domain = 0; - return 0; + return sizeof(*addr); } /** - * poll - read and possibly block on pollmask + * tipc_poll - read and possibly block on pollmask * @file: file structure associated with the socket * @sock: socket for which to calculate the poll bits * @wait: ??? * - * Returns pollmask value + * Return: pollmask value * * COMMENTARY: * It appears that the usual socket locking mechanisms are not useful here @@ -466,728 +782,1324 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr, * exits. TCP and other protocols seem to rely on higher level poll routines * to handle any preventable race conditions, so TIPC will do the same ... * - * TIPC sets the returned events as follows: - * - * socket state flags set - * ------------ --------- - * unconnected no read flags - * POLLOUT if port is not congested - * - * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue - * no write flags - * - * connected POLLIN/POLLRDNORM if data in rx queue - * POLLOUT if port is not congested - * - * disconnecting POLLIN/POLLRDNORM/POLLHUP - * no write flags - * - * listening POLLIN if SYN in rx queue - * no write flags - * - * ready POLLIN/POLLRDNORM if data in rx queue - * [connectionless] POLLOUT (since port cannot be congested) - * * IMPORTANT: The fact that a read or write operation is indicated does NOT * imply that the operation will succeed, merely that it should be performed * and will not block. */ -static unsigned int poll(struct file *file, struct socket *sock, - poll_table *wait) +static __poll_t tipc_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; - u32 mask = 0; - - sock_poll_wait(file, sk_sleep(sk), wait); - - switch ((int)sock->state) { - case SS_UNCONNECTED: - if (!tipc_sk_port(sk)->congested) - mask |= POLLOUT; + struct tipc_sock *tsk = tipc_sk(sk); + __poll_t revents = 0; + + sock_poll_wait(file, sock, wait); + trace_tipc_sk_poll(sk, NULL, TIPC_DUMP_ALL, " "); + + if (sk->sk_shutdown & RCV_SHUTDOWN) + revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; + if (sk->sk_shutdown == SHUTDOWN_MASK) + revents |= EPOLLHUP; + + switch (sk->sk_state) { + case TIPC_ESTABLISHED: + if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) + revents |= EPOLLOUT; + fallthrough; + case TIPC_LISTEN: + case TIPC_CONNECTING: + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + revents |= EPOLLIN | EPOLLRDNORM; break; - case SS_READY: - case SS_CONNECTED: - if (!tipc_sk_port(sk)->congested) - mask |= POLLOUT; - /* fall thru' */ - case SS_CONNECTING: - case SS_LISTENING: - if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= (POLLIN | POLLRDNORM); + case TIPC_OPEN: + if (tsk->group_is_open && !tsk->cong_link_cnt) + revents |= EPOLLOUT; + if (!tipc_sk_type_connectionless(sk)) + break; + if (skb_queue_empty_lockless(&sk->sk_receive_queue)) + break; + revents |= EPOLLIN | EPOLLRDNORM; break; - case SS_DISCONNECTING: - mask = (POLLIN | POLLRDNORM | POLLHUP); + case TIPC_DISCONNECTING: + revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP; break; } + return revents; +} + +/** + * tipc_sendmcast - send multicast message + * @sock: socket structure + * @ua: destination address struct + * @msg: message to send + * @dlen: length of data to send + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Return: the number of bytes sent on success, or errno + */ +static int tipc_sendmcast(struct socket *sock, struct tipc_uaddr *ua, + struct msghdr *msg, size_t dlen, long timeout) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = &tsk->phdr; + struct net *net = sock_net(sk); + int mtu = tipc_bcast_get_mtu(net); + struct sk_buff_head pkts; + struct tipc_nlist dsts; + int rc; + + if (tsk->group) + return -EACCES; + + /* Block or return if any destination link is congested */ + rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt); + if (unlikely(rc)) + return rc; + + /* Lookup destination nodes */ + tipc_nlist_init(&dsts, tipc_own_addr(net)); + tipc_nametbl_lookup_mcast_nodes(net, ua, &dsts); + if (!dsts.local && !dsts.remote) + return -EHOSTUNREACH; + + /* Build message header */ + msg_set_type(hdr, TIPC_MCAST_MSG); + msg_set_hdr_sz(hdr, MCAST_H_SIZE); + msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE); + msg_set_destport(hdr, 0); + msg_set_destnode(hdr, 0); + msg_set_nametype(hdr, ua->sr.type); + msg_set_namelower(hdr, ua->sr.lower); + msg_set_nameupper(hdr, ua->sr.upper); + + /* Build message as chain of buffers */ + __skb_queue_head_init(&pkts); + rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts); + + /* Send message if build was successful */ + if (unlikely(rc == dlen)) { + trace_tipc_sk_sendmcast(sk, skb_peek(&pkts), + TIPC_DUMP_SK_SNDQ, " "); + rc = tipc_mcast_xmit(net, &pkts, &tsk->mc_method, &dsts, + &tsk->cong_link_cnt); + } - return mask; + tipc_nlist_purge(&dsts); + + return rc ? rc : dlen; } /** - * dest_name_check - verify user is permitted to send to specified port name - * @dest: destination address - * @m: descriptor for message to be sent + * tipc_send_group_msg - send a message to a member in the group + * @net: network namespace + * @tsk: tipc socket + * @m: message to send + * @mb: group member + * @dnode: destination node + * @dport: destination port + * @dlen: total length of message data + */ +static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, + struct msghdr *m, struct tipc_member *mb, + u32 dnode, u32 dport, int dlen) +{ + u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group); + struct tipc_mc_method *method = &tsk->mc_method; + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_msg *hdr = &tsk->phdr; + struct sk_buff_head pkts; + int mtu, rc; + + /* Complete message header */ + msg_set_type(hdr, TIPC_GRP_UCAST_MSG); + msg_set_hdr_sz(hdr, GROUP_H_SIZE); + msg_set_destport(hdr, dport); + msg_set_destnode(hdr, dnode); + msg_set_grp_bc_seqno(hdr, bc_snd_nxt); + + /* Build message as chain of buffers */ + __skb_queue_head_init(&pkts); + mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false); + rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); + if (unlikely(rc != dlen)) + return rc; + + /* Send message */ + rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); + if (unlikely(rc == -ELINKCONG)) { + tipc_dest_push(&tsk->cong_links, dnode, 0); + tsk->cong_link_cnt++; + } + + /* Update send window */ + tipc_group_update_member(mb, blks); + + /* A broadcast sent within next EXPIRE period must follow same path */ + method->rcast = true; + method->mandatory = true; + return dlen; +} + +/** + * tipc_send_group_unicast - send message to a member in the group + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup * - * Prevents restricted configuration commands from being issued by - * unauthorized users. + * Called from function tipc_sendmsg(), which has done all sanity checks + * Return: the number of bytes sent on success, or errno + */ +static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + struct sock *sk = sock->sk; + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); + struct tipc_member *mb = NULL; + u32 node, port; + int rc; + + node = ua->sk.node; + port = ua->sk.ref; + if (!port && !node) + return -EHOSTUNREACH; + + /* Block or return if destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(&tsk->cong_links, node, 0) && + tsk->group && + !tipc_group_cong(tsk->group, node, port, blks, + &mb)); + if (unlikely(rc)) + return rc; + + if (unlikely(!mb)) + return -EHOSTUNREACH; + + rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen); + + return rc ? rc : dlen; +} + +/** + * tipc_send_group_anycast - send message to any member with given identity + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup * - * Returns 0 if permission is granted, otherwise errno + * Called from function tipc_sendmsg(), which has done all sanity checks + * Return: the number of bytes sent on success, or errno */ -static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) +static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) { - struct tipc_cfg_msg_hdr hdr; + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct list_head *cong_links = &tsk->cong_links; + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_msg *hdr = &tsk->phdr; + struct tipc_member *first = NULL; + struct tipc_member *mbr = NULL; + struct net *net = sock_net(sk); + u32 node, port, exclude; + LIST_HEAD(dsts); + int lookups = 0; + int dstcnt, rc; + bool cong; + + ua->sa.type = msg_nametype(hdr); + ua->scope = msg_lookup_scope(hdr); + + while (++lookups < 4) { + exclude = tipc_group_exclude(tsk->group); + + first = NULL; + + /* Look for a non-congested destination member, if any */ + while (1) { + if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, + exclude, false)) + return -EHOSTUNREACH; + tipc_dest_pop(&dsts, &node, &port); + cong = tipc_group_cong(tsk->group, node, port, blks, + &mbr); + if (!cong) + break; + if (mbr == first) + break; + if (!first) + first = mbr; + } - if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES)) - return 0; - if (likely(dest->addr.name.name.type == TIPC_TOP_SRV)) - return 0; - if (likely(dest->addr.name.name.type != TIPC_CFG_SRV)) - return -EACCES; + /* Start over if destination was not in member list */ + if (unlikely(!mbr)) + continue; - if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr))) - return -EMSGSIZE; - if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr))) - return -EFAULT; - if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN))) - return -EACCES; + if (likely(!cong && !tipc_dest_find(cong_links, node, 0))) + break; - return 0; + /* Block or return if destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(cong_links, node, 0) && + tsk->group && + !tipc_group_cong(tsk->group, node, port, + blks, &mbr)); + if (unlikely(rc)) + return rc; + + /* Send, unless destination disappeared while waiting */ + if (likely(mbr)) + break; + } + + if (unlikely(lookups >= 4)) + return -EHOSTUNREACH; + + rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen); + + return rc ? rc : dlen; } /** - * send_msg - send message in connectionless manner - * @iocb: if NULL, indicates that socket lock is already held + * tipc_send_group_bcast - send message to all members in communication group * @sock: socket structure * @m: message to send - * @total_len: length of message + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup * - * Message must have an destination specified explicitly. - * Used for SOCK_RDM and SOCK_DGRAM messages, - * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections. - * (Note: 'SYN+' is prohibited on SOCK_STREAM.) + * Called from function tipc_sendmsg(), which has done all sanity checks + * Return: the number of bytes sent on success, or errno + */ +static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_nlist *dsts; + struct tipc_mc_method *method = &tsk->mc_method; + bool ack = method->mandatory && method->rcast; + int blks = tsk_blocks(MCAST_H_SIZE + dlen); + struct tipc_msg *hdr = &tsk->phdr; + int mtu = tipc_bcast_get_mtu(net); + struct sk_buff_head pkts; + int rc = -EHOSTUNREACH; + + /* Block or return if any destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tsk->cong_link_cnt && tsk->group && + !tipc_group_bc_cong(tsk->group, blks)); + if (unlikely(rc)) + return rc; + + dsts = tipc_group_dests(tsk->group); + if (!dsts->local && !dsts->remote) + return -EHOSTUNREACH; + + /* Complete message header */ + if (ua) { + msg_set_type(hdr, TIPC_GRP_MCAST_MSG); + msg_set_nameinst(hdr, ua->sa.instance); + } else { + msg_set_type(hdr, TIPC_GRP_BCAST_MSG); + msg_set_nameinst(hdr, 0); + } + msg_set_hdr_sz(hdr, GROUP_H_SIZE); + msg_set_destport(hdr, 0); + msg_set_destnode(hdr, 0); + msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(tsk->group)); + + /* Avoid getting stuck with repeated forced replicasts */ + msg_set_grp_bc_ack_req(hdr, ack); + + /* Build message as chain of buffers */ + __skb_queue_head_init(&pkts); + rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); + if (unlikely(rc != dlen)) + return rc; + + /* Send message */ + rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt); + if (unlikely(rc)) + return rc; + + /* Update broadcast sequence number and send windows */ + tipc_group_update_bc_members(tsk->group, blks, ack); + + /* Broadcast link is now free to choose method for next broadcast */ + method->mandatory = false; + method->expires = jiffies; + + return dlen; +} + +/** + * tipc_send_group_mcast - send message to all members with given identity + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup * - * Returns the number of bytes sent on success, or errno otherwise + * Called from function tipc_sendmsg(), which has done all sanity checks + * Return: the number of bytes sent on success, or errno */ -static int send_msg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) { + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); - struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; - int needs_conn; - long timeout_val; - int res = -EINVAL; + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; + struct net *net = sock_net(sk); + u32 dstcnt, exclude; + LIST_HEAD(dsts); + + ua->sa.type = msg_nametype(hdr); + ua->scope = msg_lookup_scope(hdr); + exclude = tipc_group_exclude(grp); + + if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, exclude, true)) + return -EHOSTUNREACH; + + if (dstcnt == 1) { + tipc_dest_pop(&dsts, &ua->sk.node, &ua->sk.ref); + return tipc_send_group_unicast(sock, m, dlen, timeout); + } - if (unlikely(!dest)) - return -EDESTADDRREQ; - if (unlikely((m->msg_namelen < sizeof(*dest)) || - (dest->family != AF_TIPC))) - return -EINVAL; - if (total_len > TIPC_MAX_USER_MSG_SIZE) - return -EMSGSIZE; + tipc_dest_list_purge(&dsts); + return tipc_send_group_bcast(sock, m, dlen, timeout); +} - if (iocb) - lock_sock(sk); +/** + * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets + * @net: the associated network namespace + * @arrvq: queue with arriving messages, to be cloned after destination lookup + * @inputq: queue with cloned messages, delivered to socket after dest lookup + * + * Multi-threaded: parallel calls with reference to same queues may occur + */ +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, + struct sk_buff_head *inputq) +{ + u32 self = tipc_own_addr(net); + struct sk_buff *skb, *_skb; + u32 portid, onode; + struct sk_buff_head tmpq; + struct list_head dports; + struct tipc_msg *hdr; + struct tipc_uaddr ua; + int user, mtyp, hlen; + + __skb_queue_head_init(&tmpq); + INIT_LIST_HEAD(&dports); + ua.addrtype = TIPC_SERVICE_RANGE; + + /* tipc_skb_peek() increments the head skb's reference counter */ + skb = tipc_skb_peek(arrvq, &inputq->lock); + for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { + hdr = buf_msg(skb); + user = msg_user(hdr); + mtyp = msg_type(hdr); + hlen = skb_headroom(skb) + msg_hdr_sz(hdr); + onode = msg_orignode(hdr); + ua.sr.type = msg_nametype(hdr); + ua.sr.lower = msg_namelower(hdr); + ua.sr.upper = msg_nameupper(hdr); + if (onode == self) + ua.scope = TIPC_ANY_SCOPE; + else + ua.scope = TIPC_CLUSTER_SCOPE; - needs_conn = (sock->state != SS_READY); - if (unlikely(needs_conn)) { - if (sock->state == SS_LISTENING) { - res = -EPIPE; - goto exit; + if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { + spin_lock_bh(&inputq->lock); + if (skb_peek(arrvq) == skb) { + __skb_dequeue(arrvq); + __skb_queue_tail(inputq, skb); + } + kfree_skb(skb); + spin_unlock_bh(&inputq->lock); + continue; } - if (sock->state != SS_UNCONNECTED) { - res = -EISCONN; - goto exit; + + /* Group messages require exact scope match */ + if (msg_in_group(hdr)) { + ua.sr.lower = 0; + ua.sr.upper = ~0; + ua.scope = msg_lookup_scope(hdr); } - if (tport->published) { - res = -EOPNOTSUPP; - goto exit; + + /* Create destination port list: */ + tipc_nametbl_lookup_mcast_sockets(net, &ua, &dports); + + /* Clone message per destination */ + while (tipc_dest_pop(&dports, NULL, &portid)) { + _skb = __pskb_copy(skb, hlen, GFP_ATOMIC); + if (_skb) { + msg_set_destport(buf_msg(_skb), portid); + __skb_queue_tail(&tmpq, _skb); + continue; + } + pr_warn("Failed to clone mcast rcv buffer\n"); } - if (dest->addrtype == TIPC_ADDR_NAME) { - tport->conn_type = dest->addr.name.name.type; - tport->conn_instance = dest->addr.name.name.instance; + /* Append clones to inputq only if skb is still head of arrvq */ + spin_lock_bh(&inputq->lock); + if (skb_peek(arrvq) == skb) { + skb_queue_splice_tail_init(&tmpq, inputq); + /* Decrement the skb's refcnt */ + kfree_skb(__skb_dequeue(arrvq)); } - - /* Abort any pending connection attempts (very unlikely) */ - reject_rx_queue(sk); + spin_unlock_bh(&inputq->lock); + __skb_queue_purge(&tmpq); + kfree_skb(skb); } + tipc_sk_rcv(net, inputq); +} - timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); +/* tipc_sk_push_backlog(): send accumulated buffers in socket write queue + * when socket is in Nagle mode + */ +static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack) +{ + struct sk_buff_head *txq = &tsk->sk.sk_write_queue; + struct sk_buff *skb = skb_peek_tail(txq); + struct net *net = sock_net(&tsk->sk); + u32 dnode = tsk_peer_node(tsk); + int rc; - do { - if (dest->addrtype == TIPC_ADDR_NAME) { - res = dest_name_check(dest, m); - if (res) - break; - res = tipc_send2name(tport->ref, - &dest->addr.name.name, - dest->addr.name.domain, - m->msg_iovlen, - m->msg_iov, - total_len); - } else if (dest->addrtype == TIPC_ADDR_ID) { - res = tipc_send2port(tport->ref, - &dest->addr.id, - m->msg_iovlen, - m->msg_iov, - total_len); - } else if (dest->addrtype == TIPC_ADDR_MCAST) { - if (needs_conn) { - res = -EOPNOTSUPP; - break; + if (nagle_ack) { + tsk->pkt_cnt += skb_queue_len(txq); + if (!tsk->pkt_cnt || tsk->msg_acc / tsk->pkt_cnt < 2) { + tsk->oneway = 0; + if (tsk->nagle_start < NAGLE_START_MAX) + tsk->nagle_start *= 2; + tsk->expect_ack = false; + pr_debug("tsk %10u: bad nagle %u -> %u, next start %u!\n", + tsk->portid, tsk->msg_acc, tsk->pkt_cnt, + tsk->nagle_start); + } else { + tsk->nagle_start = NAGLE_START_INIT; + if (skb) { + msg_set_ack_required(buf_msg(skb)); + tsk->expect_ack = true; + } else { + tsk->expect_ack = false; } - res = dest_name_check(dest, m); - if (res) - break; - res = tipc_multicast(tport->ref, - &dest->addr.nameseq, - m->msg_iovlen, - m->msg_iov, - total_len); - } - if (likely(res != -ELINKCONG)) { - if (needs_conn && (res >= 0)) - sock->state = SS_CONNECTING; - break; - } - if (timeout_val <= 0L) { - res = timeout_val ? timeout_val : -EWOULDBLOCK; - break; } - release_sock(sk); - timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk), - !tport->congested, timeout_val); - lock_sock(sk); - } while (1); + tsk->msg_acc = 0; + tsk->pkt_cnt = 0; + } + + if (!skb || tsk->cong_link_cnt) + return; + + /* Do not send SYN again after congestion */ + if (msg_is_syn(buf_msg(skb))) + return; + + if (tsk->msg_acc) + tsk->pkt_cnt += skb_queue_len(txq); + tsk->snt_unacked += tsk->snd_backlog; + tsk->snd_backlog = 0; + rc = tipc_node_xmit(net, txq, dnode, tsk->portid); + if (rc == -ELINKCONG) + tsk->cong_link_cnt = 1; +} +/** + * tipc_sk_conn_proto_rcv - receive a connection mng protocol message + * @tsk: receiving socket + * @skb: pointer to message buffer. + * @inputq: buffer list containing the buffers + * @xmitq: output message area + */ +static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr = buf_msg(skb); + u32 onode = tsk_own_node(tsk); + struct sock *sk = &tsk->sk; + int mtyp = msg_type(hdr); + bool was_cong; + + /* Ignore if connection cannot be validated: */ + if (!tsk_peer_msg(tsk, hdr)) { + trace_tipc_sk_drop_msg(sk, skb, TIPC_DUMP_NONE, "@proto_rcv!"); + goto exit; + } + + if (unlikely(msg_errcode(hdr))) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), + tsk_peer_port(tsk)); + sk->sk_state_change(sk); + + /* State change is ignored if socket already awake, + * - convert msg to abort msg and add to inqueue + */ + msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); + msg_set_type(hdr, TIPC_CONN_MSG); + msg_set_size(hdr, BASIC_H_SIZE); + msg_set_hdr_sz(hdr, BASIC_H_SIZE); + __skb_queue_tail(inputq, skb); + return; + } + + tsk->probe_unacked = false; + + if (mtyp == CONN_PROBE) { + msg_set_type(hdr, CONN_PROBE_REPLY); + if (tipc_msg_reverse(onode, &skb, TIPC_OK)) + __skb_queue_tail(xmitq, skb); + return; + } else if (mtyp == CONN_ACK) { + was_cong = tsk_conn_cong(tsk); + tipc_sk_push_backlog(tsk, msg_nagle_ack(hdr)); + tsk->snt_unacked -= msg_conn_ack(hdr); + if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) + tsk->snd_win = msg_adv_win(hdr); + if (was_cong && !tsk_conn_cong(tsk)) + sk->sk_write_space(sk); + } else if (mtyp != CONN_PROBE_REPLY) { + pr_warn("Received unknown CONN_PROTO msg\n"); + } exit: - if (iocb) - release_sock(sk); - return res; + kfree_skb(skb); } /** - * send_packet - send a connection-oriented message - * @iocb: if NULL, indicates that socket lock is already held + * tipc_sendmsg - send message in connectionless manner * @sock: socket structure * @m: message to send - * @total_len: length of message + * @dsz: amount of user data to be sent * - * Used for SOCK_SEQPACKET messages and SOCK_STREAM data. + * Message must have an destination specified explicitly. + * Used for SOCK_RDM and SOCK_DGRAM messages, + * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections. + * (Note: 'SYN+' is prohibited on SOCK_STREAM.) * - * Returns the number of bytes sent on success, or errno otherwise + * Return: the number of bytes sent on success, or errno otherwise */ -static int send_packet(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_sendmsg(struct socket *sock, + struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); - struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; - long timeout_val; - int res; + int ret; + + lock_sock(sk); + ret = __tipc_sendmsg(sock, m, dsz); + release_sock(sk); - /* Handle implied connection establishment */ - if (unlikely(dest)) - return send_msg(iocb, sock, m, total_len); + return ret; +} - if (total_len > TIPC_MAX_USER_MSG_SIZE) +static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) +{ + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name; + long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + struct list_head *clinks = &tsk->cong_links; + bool syn = !tipc_sk_type_connectionless(sk); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; + struct tipc_socket_addr skaddr; + struct sk_buff_head pkts; + int atype, mtu, rc; + + if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) return -EMSGSIZE; - if (iocb) - lock_sock(sk); + if (ua) { + if (!tipc_uaddr_valid(ua, m->msg_namelen)) + return -EINVAL; + atype = ua->addrtype; + } - timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + /* If socket belongs to a communication group follow other paths */ + if (grp) { + if (!ua) + return tipc_send_group_bcast(sock, m, dlen, timeout); + if (atype == TIPC_SERVICE_ADDR) + return tipc_send_group_anycast(sock, m, dlen, timeout); + if (atype == TIPC_SOCKET_ADDR) + return tipc_send_group_unicast(sock, m, dlen, timeout); + if (atype == TIPC_SERVICE_RANGE) + return tipc_send_group_mcast(sock, m, dlen, timeout); + return -EINVAL; + } - do { - if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_DISCONNECTING) - res = -EPIPE; - else - res = -ENOTCONN; - break; - } + if (!ua) { + ua = (struct tipc_uaddr *)&tsk->peer; + if (!syn && ua->family != AF_TIPC) + return -EDESTADDRREQ; + atype = ua->addrtype; + } - res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov, - total_len); - if (likely(res != -ELINKCONG)) - break; - if (timeout_val <= 0L) { - res = timeout_val ? timeout_val : -EWOULDBLOCK; - break; + if (unlikely(syn)) { + if (sk->sk_state == TIPC_LISTEN) + return -EPIPE; + if (sk->sk_state != TIPC_OPEN) + return -EISCONN; + if (tsk->published) + return -EOPNOTSUPP; + if (atype == TIPC_SERVICE_ADDR) + tsk->conn_addrtype = atype; + msg_set_syn(hdr, 1); + } + + memset(&skaddr, 0, sizeof(skaddr)); + + /* Determine destination */ + if (atype == TIPC_SERVICE_RANGE) { + return tipc_sendmcast(sock, ua, m, dlen, timeout); + } else if (atype == TIPC_SERVICE_ADDR) { + skaddr.node = ua->lookup_node; + ua->scope = tipc_node2scope(skaddr.node); + if (!tipc_nametbl_lookup_anycast(net, ua, &skaddr)) + return -EHOSTUNREACH; + } else if (atype == TIPC_SOCKET_ADDR) { + skaddr = ua->sk; + } else { + return -EINVAL; + } + + /* Block or return if destination link is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(clinks, skaddr.node, 0)); + if (unlikely(rc)) + return rc; + + /* Finally build message header */ + msg_set_destnode(hdr, skaddr.node); + msg_set_destport(hdr, skaddr.ref); + if (atype == TIPC_SERVICE_ADDR) { + msg_set_type(hdr, TIPC_NAMED_MSG); + msg_set_hdr_sz(hdr, NAMED_H_SIZE); + msg_set_nametype(hdr, ua->sa.type); + msg_set_nameinst(hdr, ua->sa.instance); + msg_set_lookup_scope(hdr, ua->scope); + } else { /* TIPC_SOCKET_ADDR */ + msg_set_type(hdr, TIPC_DIRECT_MSG); + msg_set_lookup_scope(hdr, 0); + msg_set_hdr_sz(hdr, BASIC_H_SIZE); + } + + /* Add message body */ + __skb_queue_head_init(&pkts); + mtu = tipc_node_get_mtu(net, skaddr.node, tsk->portid, true); + rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); + if (unlikely(rc != dlen)) + return rc; + if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue))) { + __skb_queue_purge(&pkts); + return -ENOMEM; + } + + /* Send message */ + trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " "); + rc = tipc_node_xmit(net, &pkts, skaddr.node, tsk->portid); + if (unlikely(rc == -ELINKCONG)) { + tipc_dest_push(clinks, skaddr.node, 0); + tsk->cong_link_cnt++; + rc = 0; + } + + if (unlikely(syn && !rc)) { + tipc_set_sk_state(sk, TIPC_CONNECTING); + if (dlen && timeout) { + timeout = msecs_to_jiffies(timeout); + tipc_wait_for_connect(sock, &timeout); } - release_sock(sk); - timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk), - (!tport->congested || !tport->connected), timeout_val); - lock_sock(sk); - } while (1); + } - if (iocb) - release_sock(sk); - return res; + return rc ? rc : dlen; } /** - * send_stream - send stream-oriented data - * @iocb: (unused) + * tipc_sendstream - send stream-oriented data * @sock: socket structure * @m: data to send - * @total_len: total length of data to be sent + * @dsz: total length of data to be transmitted * * Used for SOCK_STREAM data. * - * Returns the number of bytes sent on success (or partial success), + * Return: the number of bytes sent on success (or partial success), * or errno if no data sent */ -static int send_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); - struct msghdr my_msg; - struct iovec my_iov; - struct iovec *curr_iov; - int curr_iovlen; - char __user *curr_start; - u32 hdr_size; - int curr_left; - int bytes_to_send; - int bytes_sent; - int res; + int ret; lock_sock(sk); + ret = __tipc_sendstream(sock, m, dsz); + release_sock(sk); - /* Handle special cases where there is no connection */ - if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_UNCONNECTED) { - res = send_packet(NULL, sock, m, total_len); - goto exit; - } else if (sock->state == SS_DISCONNECTING) { - res = -EPIPE; - goto exit; - } else { - res = -ENOTCONN; - goto exit; - } - } + return ret; +} - if (unlikely(m->msg_name)) { - res = -EISCONN; - goto exit; - } +static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) +{ + struct sock *sk = sock->sk; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + struct sk_buff_head *txq = &sk->sk_write_queue; + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = &tsk->phdr; + struct net *net = sock_net(sk); + struct sk_buff *skb; + u32 dnode = tsk_peer_node(tsk); + int maxnagle = tsk->maxnagle; + int maxpkt = tsk->max_pkt; + int send, sent = 0; + int blocks, rc = 0; + + if (unlikely(dlen > INT_MAX)) + return -EMSGSIZE; - if (total_len > (unsigned int)INT_MAX) { - res = -EMSGSIZE; - goto exit; + /* Handle implicit connection setup */ + if (unlikely(dest && sk->sk_state == TIPC_OPEN)) { + rc = __tipc_sendmsg(sock, m, dlen); + if (dlen && dlen == rc) { + tsk->peer_caps = tipc_node_get_capabilities(net, dnode); + tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr)); + } + return rc; } - /* - * Send each iovec entry using one or more messages - * - * Note: This algorithm is good for the most likely case - * (i.e. one large iovec entry), but could be improved to pass sets - * of small iovec entries into send_packet(). - */ - curr_iov = m->msg_iov; - curr_iovlen = m->msg_iovlen; - my_msg.msg_iov = &my_iov; - my_msg.msg_iovlen = 1; - my_msg.msg_flags = m->msg_flags; - my_msg.msg_name = NULL; - bytes_sent = 0; - - hdr_size = msg_hdr_sz(&tport->phdr); - - while (curr_iovlen--) { - curr_start = curr_iov->iov_base; - curr_left = curr_iov->iov_len; - - while (curr_left) { - bytes_to_send = tport->max_pkt - hdr_size; - if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE) - bytes_to_send = TIPC_MAX_USER_MSG_SIZE; - if (curr_left < bytes_to_send) - bytes_to_send = curr_left; - my_iov.iov_base = curr_start; - my_iov.iov_len = bytes_to_send; - res = send_packet(NULL, sock, &my_msg, bytes_to_send); - if (res < 0) { - if (bytes_sent) - res = bytes_sent; - goto exit; + do { + rc = tipc_wait_for_cond(sock, &timeout, + (!tsk->cong_link_cnt && + !tsk_conn_cong(tsk) && + tipc_sk_connected(sk))); + if (unlikely(rc)) + break; + send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE); + blocks = tsk->snd_backlog; + if (tsk->oneway++ >= tsk->nagle_start && maxnagle && + send <= maxnagle) { + rc = tipc_msg_append(hdr, m, send, maxnagle, txq); + if (unlikely(rc < 0)) + break; + blocks += rc; + tsk->msg_acc++; + if (blocks <= 64 && tsk->expect_ack) { + tsk->snd_backlog = blocks; + sent += send; + break; + } else if (blocks > 64) { + tsk->pkt_cnt += skb_queue_len(txq); + } else { + skb = skb_peek_tail(txq); + if (skb) { + msg_set_ack_required(buf_msg(skb)); + tsk->expect_ack = true; + } else { + tsk->expect_ack = false; + } + tsk->msg_acc = 0; + tsk->pkt_cnt = 0; } - curr_left -= bytes_to_send; - curr_start += bytes_to_send; - bytes_sent += bytes_to_send; + } else { + rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq); + if (unlikely(rc != send)) + break; + blocks += tsk_inc(tsk, send + MIN_H_SIZE); + } + trace_tipc_sk_sendstream(sk, skb_peek(txq), + TIPC_DUMP_SK_SNDQ, " "); + rc = tipc_node_xmit(net, txq, dnode, tsk->portid); + if (unlikely(rc == -ELINKCONG)) { + tsk->cong_link_cnt = 1; + rc = 0; } + if (likely(!rc)) { + tsk->snt_unacked += blocks; + tsk->snd_backlog = 0; + sent += send; + } + } while (sent < dlen && !rc); - curr_iov++; - } - res = bytes_sent; -exit: - release_sock(sk); - return res; + return sent ? sent : rc; } /** - * auto_connect - complete connection setup to a remote port + * tipc_send_packet - send a connection-oriented message * @sock: socket structure - * @msg: peer's response message + * @m: message to send + * @dsz: length of data to be transmitted * - * Returns 0 on success, errno otherwise + * Used for SOCK_SEQPACKET messages. + * + * Return: the number of bytes sent on success, or errno otherwise */ -static int auto_connect(struct socket *sock, struct tipc_msg *msg) +static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz) { - struct tipc_sock *tsock = tipc_sk(sock->sk); - struct tipc_port *p_ptr; + if (dsz > TIPC_MAX_USER_MSG_SIZE) + return -EMSGSIZE; - tsock->peer_name.ref = msg_origport(msg); - tsock->peer_name.node = msg_orignode(msg); - p_ptr = tipc_port_deref(tsock->p->ref); - if (!p_ptr) - return -EINVAL; + return tipc_sendstream(sock, m, dsz); +} - __tipc_connect(tsock->p->ref, p_ptr, &tsock->peer_name); +/* tipc_sk_finish_conn - complete the setup of a connection + */ +static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, + u32 peer_node) +{ + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_msg *msg = &tsk->phdr; + + msg_set_syn(msg, 0); + msg_set_destnode(msg, peer_node); + msg_set_destport(msg, peer_port); + msg_set_type(msg, TIPC_CONN_MSG); + msg_set_lookup_scope(msg, 0); + msg_set_hdr_sz(msg, SHORT_H_SIZE); + + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); + tipc_set_sk_state(sk, TIPC_ESTABLISHED); + tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); + tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid, true); + tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); + tsk_set_nagle(tsk); + __skb_queue_purge(&sk->sk_write_queue); + if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) + return; - if (msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE) - return -EINVAL; - msg_set_importance(&p_ptr->phdr, (u32)msg_importance(msg)); - sock->state = SS_CONNECTED; - return 0; + /* Fall back to message based flow control */ + tsk->rcv_win = FLOWCTL_MSG_WIN; + tsk->snd_win = FLOWCTL_MSG_WIN; } /** - * set_orig_addr - capture sender's address for received message + * tipc_sk_set_orig_addr - capture sender's address for received message * @m: descriptor for message info - * @msg: received message header + * @skb: received message * * Note: Address is not captured if not requested by receiver. */ -static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) +static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) { - struct sockaddr_tipc *addr = (struct sockaddr_tipc *)m->msg_name; + DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name); + struct tipc_msg *hdr = buf_msg(skb); - if (addr) { - addr->family = AF_TIPC; - addr->addrtype = TIPC_ADDR_ID; - memset(&addr->addr, 0, sizeof(addr->addr)); - addr->addr.id.ref = msg_origport(msg); - addr->addr.id.node = msg_orignode(msg); - addr->addr.name.domain = 0; /* could leave uninitialized */ - addr->scope = 0; /* could leave uninitialized */ - m->msg_namelen = sizeof(struct sockaddr_tipc); - } + if (!srcaddr) + return; + + srcaddr->sock.family = AF_TIPC; + srcaddr->sock.addrtype = TIPC_SOCKET_ADDR; + srcaddr->sock.scope = 0; + srcaddr->sock.addr.id.ref = msg_origport(hdr); + srcaddr->sock.addr.id.node = msg_orignode(hdr); + srcaddr->sock.addr.name.domain = 0; + m->msg_namelen = sizeof(struct sockaddr_tipc); + + if (!msg_in_group(hdr)) + return; + + /* Group message users may also want to know sending member's id */ + srcaddr->member.family = AF_TIPC; + srcaddr->member.addrtype = TIPC_SERVICE_ADDR; + srcaddr->member.scope = 0; + srcaddr->member.addr.name.name.type = msg_nametype(hdr); + srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member; + srcaddr->member.addr.name.domain = 0; + m->msg_namelen = sizeof(*srcaddr); } /** - * anc_data_recv - optionally capture ancillary data for received message + * tipc_sk_anc_data_recv - optionally capture ancillary data for received message * @m: descriptor for message info - * @msg: received message header - * @tport: TIPC port associated with message + * @skb: received message buffer + * @tsk: TIPC port associated with message * * Note: Ancillary data is not captured if not requested by receiver. * - * Returns 0 if successful, otherwise errno + * Return: 0 if successful, otherwise errno */ -static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, - struct tipc_port *tport) +static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb, + struct tipc_sock *tsk) { - u32 anc_data[3]; - u32 err; - u32 dest_type; - int has_name; - int res; + struct tipc_msg *hdr; + u32 data[3] = {0,}; + bool has_addr; + int dlen, rc; if (likely(m->msg_controllen == 0)) return 0; - /* Optionally capture errored message object(s) */ - err = msg ? msg_errcode(msg) : 0; - if (unlikely(err)) { - anc_data[0] = err; - anc_data[1] = msg_data_sz(msg); - res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data); - if (res) - return res; - if (anc_data[1]) { - res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1], - msg_data(msg)); - if (res) - return res; - } + hdr = buf_msg(skb); + dlen = msg_data_sz(hdr); + + /* Capture errored message object, if any */ + if (msg_errcode(hdr)) { + if (skb_linearize(skb)) + return -ENOMEM; + hdr = buf_msg(skb); + data[0] = msg_errcode(hdr); + data[1] = dlen; + rc = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, data); + if (rc || !dlen) + return rc; + rc = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, dlen, msg_data(hdr)); + if (rc) + return rc; } - /* Optionally capture message destination object */ - dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG; - switch (dest_type) { + /* Capture TIPC_SERVICE_ADDR/RANGE destination address, if any */ + switch (msg_type(hdr)) { case TIPC_NAMED_MSG: - has_name = 1; - anc_data[0] = msg_nametype(msg); - anc_data[1] = msg_namelower(msg); - anc_data[2] = msg_namelower(msg); + has_addr = true; + data[0] = msg_nametype(hdr); + data[1] = msg_namelower(hdr); + data[2] = data[1]; break; case TIPC_MCAST_MSG: - has_name = 1; - anc_data[0] = msg_nametype(msg); - anc_data[1] = msg_namelower(msg); - anc_data[2] = msg_nameupper(msg); + has_addr = true; + data[0] = msg_nametype(hdr); + data[1] = msg_namelower(hdr); + data[2] = msg_nameupper(hdr); break; case TIPC_CONN_MSG: - has_name = (tport->conn_type != 0); - anc_data[0] = tport->conn_type; - anc_data[1] = tport->conn_instance; - anc_data[2] = tport->conn_instance; + has_addr = !!tsk->conn_addrtype; + data[0] = msg_nametype(&tsk->phdr); + data[1] = msg_nameinst(&tsk->phdr); + data[2] = data[1]; break; default: - has_name = 0; + has_addr = false; } - if (has_name) { - res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data); - if (res) - return res; + if (!has_addr) + return 0; + return put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, data); +} + +static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk) +{ + struct sock *sk = &tsk->sk; + struct sk_buff *skb = NULL; + struct tipc_msg *msg; + u32 peer_port = tsk_peer_port(tsk); + u32 dnode = tsk_peer_node(tsk); + + if (!tipc_sk_connected(sk)) + return NULL; + skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, + dnode, tsk_own_node(tsk), peer_port, + tsk->portid, TIPC_OK); + if (!skb) + return NULL; + msg = buf_msg(skb); + msg_set_conn_ack(msg, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + + /* Adjust to and advertize the correct window limit */ + if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) { + tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf); + msg_set_adv_win(msg, tsk->rcv_win); } + return skb; +} - return 0; +static void tipc_sk_send_ack(struct tipc_sock *tsk) +{ + struct sk_buff *skb; + + skb = tipc_sk_build_ack(tsk); + if (!skb) + return; + + tipc_node_xmit_skb(sock_net(&tsk->sk), skb, tsk_peer_node(tsk), + msg_link_selector(buf_msg(skb))); +} + +static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + long timeo = *timeop; + int err = sock_error(sk); + + if (err) + return err; + + for (;;) { + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { + if (sk->sk_shutdown & RCV_SHUTDOWN) { + err = -ENOTCONN; + break; + } + add_wait_queue(sk_sleep(sk), &wait); + release_sock(sk); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + sched_annotate_sleep(); + lock_sock(sk); + remove_wait_queue(sk_sleep(sk), &wait); + } + err = 0; + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; + err = -EAGAIN; + if (!timeo) + break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + + err = sock_error(sk); + if (err) + break; + } + *timeop = timeo; + return err; } /** - * recv_msg - receive packet-oriented message - * @iocb: (unused) + * tipc_recvmsg - receive packet-oriented message + * @sock: network socket * @m: descriptor for message info - * @buf_len: total size of user buffer area + * @buflen: length of user buffer area * @flags: receive flags * * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages. * If the complete message doesn't fit in user area, truncate it. * - * Returns size of returned message data, errno otherwise + * Return: size of returned message data, errno otherwise */ -static int recv_msg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) +static int tipc_recvmsg(struct socket *sock, struct msghdr *m, + size_t buflen, int flags) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); - struct sk_buff *buf; - struct tipc_msg *msg; + bool connected = !tipc_sk_type_connectionless(sk); + struct tipc_sock *tsk = tipc_sk(sk); + int rc, err, hlen, dlen, copy; + struct tipc_skb_cb *skb_cb; + struct sk_buff_head xmitq; + struct tipc_msg *hdr; + struct sk_buff *skb; + bool grp_evt; long timeout; - unsigned int sz; - u32 err; - int res; /* Catch invalid receive requests */ - if (unlikely(!buf_len)) + if (unlikely(!buflen)) return -EINVAL; lock_sock(sk); - - if (unlikely(sock->state == SS_UNCONNECTED)) { - res = -ENOTCONN; + if (unlikely(connected && sk->sk_state == TIPC_OPEN)) { + rc = -ENOTCONN; goto exit; } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; + /* Step rcv queue to first msg with data or error; wait if necessary */ + do { + rc = tipc_wait_for_rcvmsg(sock, &timeout); + if (unlikely(rc)) + goto exit; + skb = skb_peek(&sk->sk_receive_queue); + skb_cb = TIPC_SKB_CB(skb); + hdr = buf_msg(skb); + dlen = msg_data_sz(hdr); + hlen = msg_hdr_sz(hdr); + err = msg_errcode(hdr); + grp_evt = msg_is_grp_evt(hdr); + if (likely(dlen || err)) + break; + tsk_advance_rx_queue(sk); + } while (1); - timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); -restart: + /* Collect msg meta data, including error code and rejected data */ + tipc_sk_set_orig_addr(m, skb); + rc = tipc_sk_anc_data_recv(m, skb, tsk); + if (unlikely(rc)) + goto exit; + hdr = buf_msg(skb); + + /* Capture data if non-error msg, otherwise just set return value */ + if (likely(!err)) { + int offset = skb_cb->bytes_read; - /* Look for a message in receive queue; wait if necessary */ - while (skb_queue_empty(&sk->sk_receive_queue)) { - if (sock->state == SS_DISCONNECTING) { - res = -ENOTCONN; + copy = min_t(int, dlen - offset, buflen); + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); + if (unlikely(rc)) goto exit; + if (unlikely(offset + copy < dlen)) { + if (flags & MSG_EOR) { + if (!(flags & MSG_PEEK)) + skb_cb->bytes_read = offset + copy; + } else { + m->msg_flags |= MSG_TRUNC; + skb_cb->bytes_read = 0; + } + } else { + if (flags & MSG_EOR) + m->msg_flags |= MSG_EOR; + skb_cb->bytes_read = 0; } - if (timeout <= 0L) { - res = timeout ? timeout : -EWOULDBLOCK; + } else { + copy = 0; + rc = 0; + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { + rc = -ECONNRESET; goto exit; } - release_sock(sk); - timeout = wait_event_interruptible_timeout(*sk_sleep(sk), - tipc_rx_ready(sock), - timeout); - lock_sock(sk); } - /* Look at first message in receive queue */ - buf = skb_peek(&sk->sk_receive_queue); - msg = buf_msg(buf); - sz = msg_data_sz(msg); - err = msg_errcode(msg); - - /* Discard an empty non-errored message & try again */ - if ((!sz) && (!err)) { - advance_rx_queue(sk); - goto restart; + /* Mark message as group event if applicable */ + if (unlikely(grp_evt)) { + if (msg_grp_evt(hdr) == TIPC_WITHDRAWN) + m->msg_flags |= MSG_EOR; + m->msg_flags |= MSG_OOB; + copy = 0; } - /* Capture sender's address (optional) */ - set_orig_addr(m, msg); - - /* Capture ancillary data (optional) */ - res = anc_data_recv(m, msg, tport); - if (res) + /* Caption of data or error code/rejected data was successful */ + if (unlikely(flags & MSG_PEEK)) goto exit; - /* Capture message data (if valid) & compute return value (always) */ - if (!err) { - if (unlikely(buf_len < sz)) { - sz = buf_len; - m->msg_flags |= MSG_TRUNC; - } - res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg), - m->msg_iov, sz); - if (res) - goto exit; - res = sz; - } else { - if ((sock->state == SS_READY) || - ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)) - res = 0; - else - res = -ECONNRESET; + /* Send group flow control advertisement when applicable */ + if (tsk->group && msg_in_group(hdr) && !grp_evt) { + __skb_queue_head_init(&xmitq); + tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen), + msg_orignode(hdr), msg_origport(hdr), + &xmitq); + tipc_node_distr_xmit(sock_net(sk), &xmitq); } - /* Consume received message (optional) */ - if (likely(!(flags & MSG_PEEK))) { - if ((sock->state != SS_READY) && - (++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) - tipc_acknowledge(tport->ref, tport->conn_unacked); - advance_rx_queue(sk); - } + if (skb_cb->bytes_read) + goto exit; + + tsk_advance_rx_queue(sk); + + if (likely(!connected)) + goto exit; + + /* Send connection flow control advertisement when applicable */ + tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); + if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) + tipc_sk_send_ack(tsk); exit: release_sock(sk); - return res; + return rc ? rc : copy; } /** - * recv_stream - receive stream-oriented data - * @iocb: (unused) + * tipc_recvstream - receive stream-oriented data + * @sock: network socket * @m: descriptor for message info - * @buf_len: total size of user buffer area + * @buflen: total size of user buffer area * @flags: receive flags * * Used for SOCK_STREAM messages only. If not enough data is available * will optionally wait for more; never truncates data. * - * Returns size of returned message data, errno otherwise + * Return: size of returned message data, errno otherwise */ -static int recv_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) +static int tipc_recvstream(struct socket *sock, struct msghdr *m, + size_t buflen, int flags) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); - struct sk_buff *buf; - struct tipc_msg *msg; + struct tipc_sock *tsk = tipc_sk(sk); + struct sk_buff *skb; + struct tipc_msg *hdr; + struct tipc_skb_cb *skb_cb; + bool peek = flags & MSG_PEEK; + int offset, required, copy, copied = 0; + int hlen, dlen, err, rc; long timeout; - unsigned int sz; - int sz_to_copy, target, needed; - int sz_copied = 0; - u32 err; - int res = 0; /* Catch invalid receive attempts */ - if (unlikely(!buf_len)) + if (unlikely(!buflen)) return -EINVAL; lock_sock(sk); - if (unlikely((sock->state == SS_UNCONNECTED))) { - res = -ENOTCONN; + if (unlikely(sk->sk_state == TIPC_OPEN)) { + rc = -ENOTCONN; goto exit; } - - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - - target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); + required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen); timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); -restart: - /* Look for a message in receive queue; wait if necessary */ - while (skb_queue_empty(&sk->sk_receive_queue)) { - if (sock->state == SS_DISCONNECTING) { - res = -ENOTCONN; - goto exit; - } - if (timeout <= 0L) { - res = timeout ? timeout : -EWOULDBLOCK; - goto exit; + do { + /* Look at first msg in receive queue; wait if necessary */ + rc = tipc_wait_for_rcvmsg(sock, &timeout); + if (unlikely(rc)) + break; + skb = skb_peek(&sk->sk_receive_queue); + skb_cb = TIPC_SKB_CB(skb); + hdr = buf_msg(skb); + dlen = msg_data_sz(hdr); + hlen = msg_hdr_sz(hdr); + err = msg_errcode(hdr); + + /* Discard any empty non-errored (SYN-) message */ + if (unlikely(!dlen && !err)) { + tsk_advance_rx_queue(sk); + continue; } - release_sock(sk); - timeout = wait_event_interruptible_timeout(*sk_sleep(sk), - tipc_rx_ready(sock), - timeout); - lock_sock(sk); - } - - /* Look at first message in receive queue */ - buf = skb_peek(&sk->sk_receive_queue); - msg = buf_msg(buf); - sz = msg_data_sz(msg); - err = msg_errcode(msg); - - /* Discard an empty non-errored message & try again */ - if ((!sz) && (!err)) { - advance_rx_queue(sk); - goto restart; - } - - /* Optionally capture sender's address & ancillary data of first msg */ - if (sz_copied == 0) { - set_orig_addr(m, msg); - res = anc_data_recv(m, msg, tport); - if (res) - goto exit; - } - - /* Capture message data (if valid) & compute return value (always) */ - if (!err) { - u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle); - - sz -= offset; - needed = (buf_len - sz_copied); - sz_to_copy = (sz <= needed) ? sz : needed; - - res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset, - m->msg_iov, sz_to_copy); - if (res) - goto exit; - sz_copied += sz_to_copy; + /* Collect msg meta data, incl. error code and rejected data */ + if (!copied) { + tipc_sk_set_orig_addr(m, skb); + rc = tipc_sk_anc_data_recv(m, skb, tsk); + if (rc) + break; + hdr = buf_msg(skb); + } - if (sz_to_copy < sz) { - if (!(flags & MSG_PEEK)) - TIPC_SKB_CB(buf)->handle = - (void *)(unsigned long)(offset + sz_to_copy); - goto exit; + /* Copy data if msg ok, otherwise return error/partial data */ + if (likely(!err)) { + offset = skb_cb->bytes_read; + copy = min_t(int, dlen - offset, buflen - copied); + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); + if (unlikely(rc)) + break; + copied += copy; + offset += copy; + if (unlikely(offset < dlen)) { + if (!peek) + skb_cb->bytes_read = offset; + break; + } + } else { + rc = 0; + if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control) + rc = -ECONNRESET; + if (copied || rc) + break; } - } else { - if (sz_copied != 0) - goto exit; /* can't add error msg to valid data */ - if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control) - res = 0; - else - res = -ECONNRESET; - } + if (unlikely(peek)) + break; - /* Consume received message (optional) */ - if (likely(!(flags & MSG_PEEK))) { - if (unlikely(++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) - tipc_acknowledge(tport->ref, tport->conn_unacked); - advance_rx_queue(sk); - } + tsk_advance_rx_queue(sk); + + /* Send connection flow control advertisement when applicable */ + tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); + if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) + tipc_sk_send_ack(tsk); - /* Loop around if more data is required */ - if ((sz_copied < buf_len) && /* didn't get all requested data */ - (!skb_queue_empty(&sk->sk_receive_queue) || - (sz_copied < target)) && /* and more is ready or required */ - (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */ - (!err)) /* and haven't reached a FIN */ - goto restart; + /* Exit if all requested data or FIN/error received */ + if (copied == buflen || err) + break; + } while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required); exit: release_sock(sk); - return sz_copied ? sz_copied : res; + return copied ? copied : rc; } /** @@ -1200,292 +2112,507 @@ static void tipc_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | - POLLWRNORM | POLLWRBAND); + if (skwq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | + EPOLLWRNORM | EPOLLWRBAND); rcu_read_unlock(); } /** * tipc_data_ready - wake up threads to indicate messages have been received * @sk: socket - * @len: the length of messages */ -static void tipc_data_ready(struct sock *sk, int len) +static void tipc_data_ready(struct sock *sk) { struct socket_wq *wq; + trace_sk_data_ready(sk); + rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLIN | - POLLRDNORM | POLLRDBAND); + if (skwq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | + EPOLLRDNORM | EPOLLRDBAND); rcu_read_unlock(); } -/** - * filter_connect - Handle all incoming messages for a connection-based socket - * @tsock: TIPC socket - * @msg: message - * - * Returns TIPC error status code and socket error status code - * once it encounters some errors - */ -static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) +static void tipc_sock_destruct(struct sock *sk) { - struct socket *sock = tsock->sk.sk_socket; - struct tipc_msg *msg = buf_msg(*buf); - struct sock *sk = &tsock->sk; - u32 retval = TIPC_ERR_NO_PORT; - int res; - - if (msg_mcast(msg)) - return retval; + __skb_queue_purge(&sk->sk_receive_queue); +} - switch ((int)sock->state) { - case SS_CONNECTED: - /* Accept only connection-based messages sent by peer */ - if (msg_connected(msg) && tipc_port_peer_msg(tsock->p, msg)) { - if (unlikely(msg_errcode(msg))) { - sock->state = SS_DISCONNECTING; - __tipc_disconnect(tsock->p); - } - retval = TIPC_OK; - } +static void tipc_sk_proto_rcv(struct sock *sk, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb = __skb_dequeue(inputq); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = buf_msg(skb); + struct tipc_group *grp = tsk->group; + bool wakeup = false; + + switch (msg_user(hdr)) { + case CONN_MANAGER: + tipc_sk_conn_proto_rcv(tsk, skb, inputq, xmitq); + return; + case SOCK_WAKEUP: + tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); + /* coupled with smp_rmb() in tipc_wait_for_cond() */ + smp_wmb(); + tsk->cong_link_cnt--; + wakeup = true; + tipc_sk_push_backlog(tsk, false); break; - case SS_CONNECTING: - /* Accept only ACK or NACK message */ - if (unlikely(msg_errcode(msg))) { - sock->state = SS_DISCONNECTING; - sk->sk_err = -ECONNREFUSED; - retval = TIPC_OK; - break; + case GROUP_PROTOCOL: + tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq); + break; + case TOP_SRV: + tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, + hdr, inputq, xmitq); + break; + default: + break; + } + + if (wakeup) + sk->sk_write_space(sk); + + kfree_skb(skb); +} + +/** + * tipc_sk_filter_connect - check incoming message for a connection-based socket + * @tsk: TIPC socket + * @skb: pointer to message buffer. + * @xmitq: for Nagle ACK if any + * Return: true if message should be added to receive queue, false otherwise + */ +static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_msg *hdr = buf_msg(skb); + bool con_msg = msg_connected(hdr); + u32 pport = tsk_peer_port(tsk); + u32 pnode = tsk_peer_node(tsk); + u32 oport = msg_origport(hdr); + u32 onode = msg_orignode(hdr); + int err = msg_errcode(hdr); + unsigned long delay; + + if (unlikely(msg_mcast(hdr))) + return false; + tsk->oneway = 0; + + switch (sk->sk_state) { + case TIPC_CONNECTING: + /* Setup ACK */ + if (likely(con_msg)) { + if (err) + break; + tipc_sk_finish_conn(tsk, oport, onode); + msg_set_importance(&tsk->phdr, msg_importance(hdr)); + /* ACK+ message with data is added to receive queue */ + if (msg_data_sz(hdr)) + return true; + /* Empty ACK-, - wake up sleeping connect() and drop */ + sk->sk_state_change(sk); + msg_set_dest_droppable(hdr, 1); + return false; } + /* Ignore connectionless message if not from listening socket */ + if (oport != pport || onode != pnode) + return false; - if (unlikely(!msg_connected(msg))) + /* Rejected SYN */ + if (err != TIPC_ERR_OVERLOAD) break; - res = auto_connect(sock, msg); - if (res) { - sock->state = SS_DISCONNECTING; - sk->sk_err = res; - retval = TIPC_OK; + /* Prepare for new setup attempt if we have a SYN clone */ + if (skb_queue_empty(&sk->sk_write_queue)) break; - } - - /* If an incoming message is an 'ACK-', it should be - * discarded here because it doesn't contain useful - * data. In addition, we should try to wake up - * connect() routine if sleeping. - */ - if (msg_data_sz(msg) == 0) { - kfree_skb(*buf); - *buf = NULL; - if (waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); - } - retval = TIPC_OK; - break; - case SS_LISTENING: - case SS_UNCONNECTED: + get_random_bytes(&delay, 2); + delay %= (tsk->conn_timeout / 4); + delay = msecs_to_jiffies(delay + 100); + sk_reset_timer(sk, &sk->sk_timer, jiffies + delay); + return false; + case TIPC_OPEN: + case TIPC_DISCONNECTING: + return false; + case TIPC_LISTEN: /* Accept only SYN message */ - if (!msg_connected(msg) && !(msg_errcode(msg))) - retval = TIPC_OK; - break; - case SS_DISCONNECTING: - break; + if (!msg_is_syn(hdr) && + tipc_node_get_capabilities(net, onode) & TIPC_SYN_BIT) + return false; + if (!con_msg && !err) + return true; + return false; + case TIPC_ESTABLISHED: + if (!skb_queue_empty(&sk->sk_write_queue)) + tipc_sk_push_backlog(tsk, false); + /* Accept only connection-based messages sent by peer */ + if (likely(con_msg && !err && pport == oport && + pnode == onode)) { + if (msg_ack_required(hdr)) { + struct sk_buff *skb; + + skb = tipc_sk_build_ack(tsk); + if (skb) { + msg_set_nagle_ack(buf_msg(skb)); + __skb_queue_tail(xmitq, skb); + } + } + return true; + } + if (!tsk_peer_msg(tsk, hdr)) + return false; + if (!err) + return true; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, pnode, tsk->portid); + sk->sk_state_change(sk); + return true; default: - pr_err("Unknown socket state %u\n", sock->state); + pr_err("Unknown sk_state %u\n", sk->sk_state); } - return retval; + /* Abort connection setup attempt */ + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); + return true; } /** * rcvbuf_limit - get proper overload limit of socket receive queue * @sk: socket - * @buf: message + * @skb: message * - * For all connection oriented messages, irrespective of importance, - * the default overload value (i.e. 67MB) is set as limit. + * For connection oriented messages, irrespective of importance, + * default queue limit is 2 MB. * - * For all connectionless messages, by default new queue limits are - * as belows: + * For connectionless messages, queue limits are based on message + * importance as follows: * - * TIPC_LOW_IMPORTANCE (4 MB) - * TIPC_MEDIUM_IMPORTANCE (8 MB) - * TIPC_HIGH_IMPORTANCE (16 MB) - * TIPC_CRITICAL_IMPORTANCE (32 MB) + * TIPC_LOW_IMPORTANCE (2 MB) + * TIPC_MEDIUM_IMPORTANCE (4 MB) + * TIPC_HIGH_IMPORTANCE (8 MB) + * TIPC_CRITICAL_IMPORTANCE (16 MB) * - * Returns overload limit according to corresponding message importance + * Return: overload limit according to corresponding message importance */ -static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) +static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) { - struct tipc_msg *msg = buf_msg(buf); - unsigned int limit; + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = buf_msg(skb); - if (msg_connected(msg)) - limit = sysctl_tipc_rmem[2]; - else - limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << - msg_importance(msg); - return limit; + if (unlikely(msg_in_group(hdr))) + return READ_ONCE(sk->sk_rcvbuf); + + if (unlikely(!msg_connected(hdr))) + return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr); + + if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) + return READ_ONCE(sk->sk_rcvbuf); + + return FLOWCTL_MSG_LIM; } /** - * filter_rcv - validate incoming message + * tipc_sk_filter_rcv - validate incoming message * @sk: socket - * @buf: message + * @skb: pointer to message. + * @xmitq: output message area (FIXME) * * Enqueues message on receive queue if acceptable; optionally handles * disconnect indication for a connected socket. * - * Called with socket lock already taken; port lock may also be taken. - * - * Returns TIPC error status code (TIPC_OK if message is not to be rejected) + * Called with socket lock already taken */ -static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) +static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { - struct socket *sock = sk->sk_socket; - struct tipc_msg *msg = buf_msg(buf); - unsigned int limit = rcvbuf_limit(sk, buf); - u32 res = TIPC_OK; - - /* Reject message if it is wrong sort of message for socket */ - if (msg_type(msg) > TIPC_DIRECT_MSG) - return TIPC_ERR_NO_PORT; + bool sk_conn = !tipc_sk_type_connectionless(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = buf_msg(skb); + struct net *net = sock_net(sk); + struct sk_buff_head inputq; + int mtyp = msg_type(hdr); + int limit, err = TIPC_OK; + + trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " "); + TIPC_SKB_CB(skb)->bytes_read = 0; + __skb_queue_head_init(&inputq); + __skb_queue_tail(&inputq, skb); + + if (unlikely(!msg_isdata(hdr))) + tipc_sk_proto_rcv(sk, &inputq, xmitq); + + if (unlikely(grp)) + tipc_group_filter_msg(grp, &inputq, xmitq); + + if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG) + tipc_mcast_filter_msg(net, &tsk->mc_method.deferredq, &inputq); + + /* Validate and add to receive buffer if there is space */ + while ((skb = __skb_dequeue(&inputq))) { + hdr = buf_msg(skb); + limit = rcvbuf_limit(sk, skb); + if ((sk_conn && !tipc_sk_filter_connect(tsk, skb, xmitq)) || + (!sk_conn && msg_connected(hdr)) || + (!grp && msg_in_group(hdr))) + err = TIPC_ERR_NO_PORT; + else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) { + trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, + "err_overload2!"); + sk_drops_inc(sk); + err = TIPC_ERR_OVERLOAD; + } - if (sock->state == SS_READY) { - if (msg_connected(msg)) - return TIPC_ERR_NO_PORT; - } else { - res = filter_connect(tipc_sk(sk), &buf); - if (res != TIPC_OK || buf == NULL) - return res; + if (unlikely(err)) { + if (tipc_msg_reverse(tipc_own_addr(net), &skb, err)) { + trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_NONE, + "@filter_rcv!"); + __skb_queue_tail(xmitq, skb); + } + err = TIPC_OK; + continue; + } + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + trace_tipc_sk_overlimit2(sk, skb, TIPC_DUMP_ALL, + "rcvq >90% allocated!"); + sk->sk_data_ready(sk); } - - /* Reject message if there isn't room to queue it */ - if (sk_rmem_alloc_get(sk) + buf->truesize >= limit) - return TIPC_ERR_OVERLOAD; - - /* Enqueue message */ - TIPC_SKB_CB(buf)->handle = 0; - __skb_queue_tail(&sk->sk_receive_queue, buf); - skb_set_owner_r(buf, sk); - - sk->sk_data_ready(sk, 0); - return TIPC_OK; } /** - * backlog_rcv - handle incoming message from backlog queue + * tipc_sk_backlog_rcv - handle incoming message from backlog queue * @sk: socket - * @buf: message + * @skb: message * - * Caller must hold socket lock, but not port lock. - * - * Returns 0 + * Caller must hold socket lock */ -static int backlog_rcv(struct sock *sk, struct sk_buff *buf) +static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { - u32 res; + unsigned int before = sk_rmem_alloc_get(sk); + struct sk_buff_head xmitq; + unsigned int added; - res = filter_rcv(sk, buf); - if (res) - tipc_reject_msg(buf, res); + __skb_queue_head_init(&xmitq); + + tipc_sk_filter_rcv(sk, skb, &xmitq); + added = sk_rmem_alloc_get(sk) - before; + atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt); + + /* Send pending response/rejected messages, if any */ + tipc_node_distr_xmit(sock_net(sk), &xmitq); return 0; } /** - * dispatch - handle incoming message - * @tport: TIPC port that received message - * @buf: message - * - * Called with port lock already taken. + * tipc_sk_enqueue - extract all buffers with destination 'dport' from + * inputq and try adding them to socket or backlog queue + * @inputq: list of incoming buffers with potentially different destinations + * @sk: socket where the buffers should be enqueued + * @dport: port number for the socket + * @xmitq: output queue * - * Returns TIPC error status code (TIPC_OK if message is not to be rejected) + * Caller must hold socket lock */ -static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) +static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, + u32 dport, struct sk_buff_head *xmitq) { - struct sock *sk = tport->sk; - u32 res; + unsigned long time_limit = jiffies + usecs_to_jiffies(20000); + struct sk_buff *skb; + unsigned int lim; + atomic_t *dcnt; + u32 onode; + + while (skb_queue_len(inputq)) { + if (unlikely(time_after_eq(jiffies, time_limit))) + return; + + skb = tipc_skb_dequeue(inputq, dport); + if (unlikely(!skb)) + return; + + /* Add message directly to receive queue if possible */ + if (!sock_owned_by_user(sk)) { + tipc_sk_filter_rcv(sk, skb, xmitq); + continue; + } - /* - * Process message if socket is unlocked; otherwise add to backlog queue - * - * This code is based on sk_receive_skb(), but must be distinct from it - * since a TIPC-specific filter/reject mechanism is utilized - */ - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) { - res = filter_rcv(sk, buf); - } else { - if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf))) - res = TIPC_ERR_OVERLOAD; - else - res = TIPC_OK; - } - bh_unlock_sock(sk); + /* Try backlog, compensating for double-counted bytes */ + dcnt = &tipc_sk(sk)->dupl_rcvcnt; + if (!sk->sk_backlog.len) + atomic_set(dcnt, 0); + lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); + if (likely(!sk_add_backlog(sk, skb, lim))) { + trace_tipc_sk_overlimit1(sk, skb, TIPC_DUMP_ALL, + "bklg & rcvq >90% allocated!"); + continue; + } - return res; + trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, "err_overload!"); + /* Overload => reject message back to sender */ + onode = tipc_own_addr(sock_net(sk)); + sk_drops_inc(sk); + if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) { + trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_ALL, + "@sk_enqueue!"); + __skb_queue_tail(xmitq, skb); + } + break; + } } /** - * wakeupdispatch - wake up port after congestion - * @tport: port to wakeup - * - * Called with port lock already taken. + * tipc_sk_rcv - handle a chain of incoming buffers + * @net: the associated network namespace + * @inputq: buffer list containing the buffers + * Consumes all buffers in list until inputq is empty + * Note: may be called in multiple threads referring to the same queue */ -static void wakeupdispatch(struct tipc_port *tport) +void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) +{ + struct sk_buff_head xmitq; + u32 dnode, dport = 0; + int err; + struct tipc_sock *tsk; + struct sock *sk; + struct sk_buff *skb; + + __skb_queue_head_init(&xmitq); + while (skb_queue_len(inputq)) { + dport = tipc_skb_peek_port(inputq, dport); + tsk = tipc_sk_lookup(net, dport); + + if (likely(tsk)) { + sk = &tsk->sk; + if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { + tipc_sk_enqueue(inputq, sk, dport, &xmitq); + spin_unlock_bh(&sk->sk_lock.slock); + } + /* Send pending response/rejected messages, if any */ + tipc_node_distr_xmit(sock_net(sk), &xmitq); + sock_put(sk); + continue; + } + /* No destination socket => dequeue skb if still there */ + skb = tipc_skb_dequeue(inputq, dport); + if (!skb) + return; + + /* Try secondary lookup if unresolved named message */ + err = TIPC_ERR_NO_PORT; + if (tipc_msg_lookup_dest(net, skb, &err)) + goto xmit; + + /* Prepare for message rejection */ + if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err)) + continue; + + trace_tipc_sk_rej_msg(NULL, skb, TIPC_DUMP_NONE, "@sk_rcv!"); +xmit: + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(net, skb, dnode, dport); + } +} + +static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) { - struct sock *sk = tport->sk; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct sock *sk = sock->sk; + int done; + + do { + int err = sock_error(sk); + if (err) + return err; + if (!*timeo_p) + return -ETIMEDOUT; + if (signal_pending(current)) + return sock_intr_errno(*timeo_p); + if (sk->sk_state == TIPC_DISCONNECTING) + break; + + add_wait_queue(sk_sleep(sk), &wait); + done = sk_wait_event(sk, timeo_p, tipc_sk_connected(sk), + &wait); + remove_wait_queue(sk_sleep(sk), &wait); + } while (!done); + return 0; +} - sk->sk_write_space(sk); +static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr) +{ + if (addr->family != AF_TIPC) + return false; + if (addr->addrtype == TIPC_SERVICE_RANGE) + return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper); + return (addr->addrtype == TIPC_SERVICE_ADDR || + addr->addrtype == TIPC_SOCKET_ADDR); } /** - * connect - establish a connection to another TIPC port + * tipc_connect - establish a connection to another TIPC port * @sock: socket structure * @dest: socket address for destination port * @destlen: size of socket address data structure * @flags: file-related flags associated with socket * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ -static int connect(struct socket *sock, struct sockaddr *dest, int destlen, - int flags) +static int tipc_connect(struct socket *sock, struct sockaddr_unsized *dest, + int destlen, int flags) { struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; struct msghdr m = {NULL,}; - unsigned int timeout; - int res; + long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout; + int previous; + int res = 0; + + if (destlen != sizeof(struct sockaddr_tipc)) + return -EINVAL; lock_sock(sk); - /* For now, TIPC does not allow use of connect() with DGRAM/RDM types */ - if (sock->state == SS_READY) { - res = -EOPNOTSUPP; + if (tsk->group) { + res = -EINVAL; goto exit; } - /* - * Reject connection attempt using multicast address - * - * Note: send_msg() validates the rest of the address fields, - * so there's no need to do it here - */ - if (dst->addrtype == TIPC_ADDR_MCAST) { + if (dst->family == AF_UNSPEC) { + memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); + if (!tipc_sk_type_connectionless(sk)) + res = -EINVAL; + goto exit; + } + if (!tipc_sockaddr_is_sane(dst)) { + res = -EINVAL; + goto exit; + } + /* DGRAM/RDM connect(), just save the destaddr */ + if (tipc_sk_type_connectionless(sk)) { + memcpy(&tsk->peer, dest, destlen); + goto exit; + } else if (dst->addrtype == TIPC_SERVICE_RANGE) { res = -EINVAL; goto exit; } - timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout; + previous = sk->sk_state; - switch (sock->state) { - case SS_UNCONNECTED: + switch (sk->sk_state) { + case TIPC_OPEN: /* Send a 'SYN-' to destination */ m.msg_name = dest; m.msg_namelen = destlen; + iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); /* If connect is in non-blocking case, set MSG_DONTWAIT to * indicate send_msg() is never blocked. @@ -1493,131 +2620,128 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, if (!timeout) m.msg_flags = MSG_DONTWAIT; - res = send_msg(NULL, sock, &m, 0); + res = __tipc_sendmsg(sock, &m, 0); if ((res < 0) && (res != -EWOULDBLOCK)) goto exit; - /* Just entered SS_CONNECTING state; the only + /* Just entered TIPC_CONNECTING state; the only * difference is that return value in non-blocking * case is EINPROGRESS, rather than EALREADY. */ res = -EINPROGRESS; + fallthrough; + case TIPC_CONNECTING: + if (!timeout) { + if (previous == TIPC_CONNECTING) + res = -EALREADY; + goto exit; + } + timeout = msecs_to_jiffies(timeout); + /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ + res = tipc_wait_for_connect(sock, &timeout); break; - case SS_CONNECTING: - res = -EALREADY; - break; - case SS_CONNECTED: + case TIPC_ESTABLISHED: res = -EISCONN; break; default: res = -EINVAL; - goto exit; - } - - if (sock->state == SS_CONNECTING) { - if (!timeout) - goto exit; - - /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ - release_sock(sk); - res = wait_event_interruptible_timeout(*sk_sleep(sk), - sock->state != SS_CONNECTING, - timeout ? (long)msecs_to_jiffies(timeout) - : MAX_SCHEDULE_TIMEOUT); - lock_sock(sk); - if (res <= 0) { - if (res == 0) - res = -ETIMEDOUT; - else - ; /* leave "res" unchanged */ - goto exit; - } } - if (unlikely(sock->state == SS_DISCONNECTING)) - res = sock_error(sk); - else - res = 0; - exit: release_sock(sk); return res; } /** - * listen - allow socket to listen for incoming connections + * tipc_listen - allow socket to listen for incoming connections * @sock: socket structure * @len: (unused) * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ -static int listen(struct socket *sock, int len) +static int tipc_listen(struct socket *sock, int len) { struct sock *sk = sock->sk; int res; lock_sock(sk); - - if (sock->state != SS_UNCONNECTED) - res = -EINVAL; - else { - sock->state = SS_LISTENING; - res = 0; - } - + res = tipc_set_sk_state(sk, TIPC_LISTEN); release_sock(sk); + return res; } +static int tipc_wait_for_accept(struct socket *sock, long timeo) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int err; + + /* True wake-one mechanism for incoming connections: only + * one process gets woken up, not the 'whole herd'. + * Since we do not 'race & poll' for established sockets + * anymore, the common case will execute the loop only once. + */ + for (;;) { + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { + add_wait_queue(sk_sleep(sk), &wait); + release_sock(sk); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + lock_sock(sk); + remove_wait_queue(sk_sleep(sk), &wait); + } + err = 0; + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; + err = -EAGAIN; + if (!timeo) + break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + } + return err; +} + /** - * accept - wait for connection request + * tipc_accept - wait for connection request * @sock: listening socket - * @newsock: new socket that is to be connected - * @flags: file-related flags associated with socket + * @new_sock: new socket that is to be connected + * @arg: arguments for accept * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ -static int accept(struct socket *sock, struct socket *new_sock, int flags) +static int tipc_accept(struct socket *sock, struct socket *new_sock, + struct proto_accept_arg *arg) { struct sock *new_sk, *sk = sock->sk; - struct sk_buff *buf; struct tipc_sock *new_tsock; - struct tipc_port *new_tport; + struct msghdr m = {NULL,}; struct tipc_msg *msg; - u32 new_ref; - + struct sk_buff *buf; + long timeo; int res; lock_sock(sk); - if (sock->state != SS_LISTENING) { + if (sk->sk_state != TIPC_LISTEN) { res = -EINVAL; goto exit; } - - while (skb_queue_empty(&sk->sk_receive_queue)) { - if (flags & O_NONBLOCK) { - res = -EWOULDBLOCK; - goto exit; - } - release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - (!skb_queue_empty(&sk->sk_receive_queue))); - lock_sock(sk); - if (res) - goto exit; - } + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); + res = tipc_wait_for_accept(sock, timeo); + if (res) + goto exit; buf = skb_peek(&sk->sk_receive_queue); - res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, arg->kern); if (res) goto exit; + security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk); - new_tport = new_tsock->p; - new_ref = new_tport->ref; msg = buf_msg(buf); /* we lock on new_sk; but lockdep sees the lock on sk */ @@ -1627,55 +2751,49 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) * Reject any stray messages received by new socket * before the socket lock was taken (very, very unlikely) */ - reject_rx_queue(new_sk); + tsk_rej_rx_queue(new_sk, TIPC_ERR_NO_PORT); /* Connect new socket to it's peer */ - new_tsock->peer_name.ref = msg_origport(msg); - new_tsock->peer_name.node = msg_orignode(msg); - tipc_connect(new_ref, &new_tsock->peer_name); - new_sock->state = SS_CONNECTED; + tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); - tipc_set_portimportance(new_ref, msg_importance(msg)); + tsk_set_importance(new_sk, msg_importance(msg)); if (msg_named(msg)) { - new_tport->conn_type = msg_nametype(msg); - new_tport->conn_instance = msg_nameinst(msg); + new_tsock->conn_addrtype = TIPC_SERVICE_ADDR; + msg_set_nametype(&new_tsock->phdr, msg_nametype(msg)); + msg_set_nameinst(&new_tsock->phdr, msg_nameinst(msg)); } /* - * Respond to 'SYN-' by discarding it & returning 'ACK'-. - * Respond to 'SYN+' by queuing it on new socket. + * Respond to 'SYN-' by discarding it & returning 'ACK'. + * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'. */ if (!msg_data_sz(msg)) { - struct msghdr m = {NULL,}; - - advance_rx_queue(sk); - send_packet(NULL, new_sock, &m, 0); + tsk_advance_rx_queue(sk); } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); skb_set_owner_r(buf, new_sk); } + iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); + __tipc_sendstream(new_sock, &m, 0); release_sock(new_sk); - exit: release_sock(sk); return res; } /** - * shutdown - shutdown socket connection + * tipc_shutdown - shutdown socket connection * @sock: socket structure * @how: direction to close (must be SHUT_RDWR) * * Terminates connection (if necessary), then purges socket's receive queue. * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ -static int shutdown(struct socket *sock, int how) +static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); - struct sk_buff *buf; int res; if (how != SHUT_RDWR) @@ -1683,48 +2801,326 @@ static int shutdown(struct socket *sock, int how) lock_sock(sk); - switch (sock->state) { - case SS_CONNECTING: - case SS_CONNECTED: - -restart: - /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */ - buf = __skb_dequeue(&sk->sk_receive_queue); - if (buf) { - if (TIPC_SKB_CB(buf)->handle != 0) { - kfree_skb(buf); - goto restart; - } - tipc_disconnect(tport->ref); - tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN); - } else { - tipc_shutdown(tport->ref); - } - - sock->state = SS_DISCONNECTING; - - /* fall through */ - - case SS_DISCONNECTING: + trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " "); + __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); + sk->sk_shutdown = SHUTDOWN_MASK; + if (sk->sk_state == TIPC_DISCONNECTING) { /* Discard any unreceived messages */ __skb_queue_purge(&sk->sk_receive_queue); - /* Wake up anyone sleeping in poll */ - sk->sk_state_change(sk); res = 0; - break; - - default: + } else { res = -ENOTCONN; } + /* Wake up anyone sleeping in poll. */ + sk->sk_state_change(sk); release_sock(sk); return res; } +static void tipc_sk_check_probing_state(struct sock *sk, + struct sk_buff_head *list) +{ + struct tipc_sock *tsk = tipc_sk(sk); + u32 pnode = tsk_peer_node(tsk); + u32 pport = tsk_peer_port(tsk); + u32 self = tsk_own_node(tsk); + u32 oport = tsk->portid; + struct sk_buff *skb; + + if (tsk->probe_unacked) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + sk->sk_err = ECONNABORTED; + tipc_node_remove_conn(sock_net(sk), pnode, pport); + sk->sk_state_change(sk); + return; + } + /* Prepare new probe */ + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, + pnode, self, pport, oport, TIPC_OK); + if (skb) + __skb_queue_tail(list, skb); + tsk->probe_unacked = true; + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); +} + +static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list) +{ + struct tipc_sock *tsk = tipc_sk(sk); + + /* Try again later if dest link is congested */ + if (tsk->cong_link_cnt) { + sk_reset_timer(sk, &sk->sk_timer, + jiffies + msecs_to_jiffies(100)); + return; + } + /* Prepare SYN for retransmit */ + tipc_msg_skb_clone(&sk->sk_write_queue, list); +} + +static void tipc_sk_timeout(struct timer_list *t) +{ + struct sock *sk = timer_container_of(sk, t, sk_timer); + struct tipc_sock *tsk = tipc_sk(sk); + u32 pnode = tsk_peer_node(tsk); + struct sk_buff_head list; + int rc = 0; + + __skb_queue_head_init(&list); + bh_lock_sock(sk); + + /* Try again later if socket is busy */ + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20); + bh_unlock_sock(sk); + sock_put(sk); + return; + } + + if (sk->sk_state == TIPC_ESTABLISHED) + tipc_sk_check_probing_state(sk, &list); + else if (sk->sk_state == TIPC_CONNECTING) + tipc_sk_retry_connect(sk, &list); + + bh_unlock_sock(sk); + + if (!skb_queue_empty(&list)) + rc = tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid); + + /* SYN messages may cause link congestion */ + if (rc == -ELINKCONG) { + tipc_dest_push(&tsk->cong_links, pnode, 0); + tsk->cong_link_cnt = 1; + } + sock_put(sk); +} + +static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua) +{ + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_socket_addr skaddr; + struct publication *p; + u32 key; + + if (tipc_sk_connected(sk)) + return -EINVAL; + key = tsk->portid + tsk->pub_count + 1; + if (key == tsk->portid) + return -EADDRINUSE; + skaddr.ref = tsk->portid; + skaddr.node = tipc_own_addr(net); + p = tipc_nametbl_publish(net, ua, &skaddr, key); + if (unlikely(!p)) + return -EINVAL; + + list_add(&p->binding_sock, &tsk->publications); + tsk->pub_count++; + tsk->published = true; + return 0; +} + +static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua) +{ + struct net *net = sock_net(&tsk->sk); + struct publication *safe, *p; + struct tipc_uaddr _ua; + int rc = -EINVAL; + + list_for_each_entry_safe(p, safe, &tsk->publications, binding_sock) { + if (!ua) { + tipc_uaddr(&_ua, TIPC_SERVICE_RANGE, p->scope, + p->sr.type, p->sr.lower, p->sr.upper); + tipc_nametbl_withdraw(net, &_ua, &p->sk, p->key); + continue; + } + /* Unbind specific publication */ + if (p->scope != ua->scope) + continue; + if (p->sr.type != ua->sr.type) + continue; + if (p->sr.lower != ua->sr.lower) + continue; + if (p->sr.upper != ua->sr.upper) + break; + tipc_nametbl_withdraw(net, ua, &p->sk, p->key); + rc = 0; + break; + } + if (list_empty(&tsk->publications)) { + tsk->published = 0; + rc = 0; + } + return rc; +} + +/* tipc_sk_reinit: set non-zero address in all existing sockets + * when we go from standalone to network mode. + */ +void tipc_sk_reinit(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct rhashtable_iter iter; + struct tipc_sock *tsk; + struct tipc_msg *msg; + + rhashtable_walk_enter(&tn->sk_rht, &iter); + + do { + rhashtable_walk_start(&iter); + + while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { + sock_hold(&tsk->sk); + rhashtable_walk_stop(&iter); + lock_sock(&tsk->sk); + msg = &tsk->phdr; + msg_set_prevnode(msg, tipc_own_addr(net)); + msg_set_orignode(msg, tipc_own_addr(net)); + release_sock(&tsk->sk); + rhashtable_walk_start(&iter); + sock_put(&tsk->sk); + } + + rhashtable_walk_stop(&iter); + } while (tsk == ERR_PTR(-EAGAIN)); + + rhashtable_walk_exit(&iter); +} + +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_sock *tsk; + + rcu_read_lock(); + tsk = rhashtable_lookup(&tn->sk_rht, &portid, tsk_rht_params); + if (tsk) + sock_hold(&tsk->sk); + rcu_read_unlock(); + + return tsk; +} + +static int tipc_sk_insert(struct tipc_sock *tsk) +{ + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; + u32 portid = get_random_u32_below(remaining) + TIPC_MIN_PORT; + + while (remaining--) { + portid++; + if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT)) + portid = TIPC_MIN_PORT; + tsk->portid = portid; + sock_hold(&tsk->sk); + if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node, + tsk_rht_params)) + return 0; + sock_put(&tsk->sk); + } + + return -1; +} + +static void tipc_sk_remove(struct tipc_sock *tsk) +{ + struct sock *sk = &tsk->sk; + struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); + + if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) + __sock_put(sk); +} + +static const struct rhashtable_params tsk_rht_params = { + .nelem_hint = 192, + .head_offset = offsetof(struct tipc_sock, node), + .key_offset = offsetof(struct tipc_sock, portid), + .key_len = sizeof(u32), /* portid */ + .max_size = 1048576, + .min_size = 256, + .automatic_shrinking = true, +}; + +int tipc_sk_rht_init(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return rhashtable_init(&tn->sk_rht, &tsk_rht_params); +} + +void tipc_sk_rht_destroy(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + /* Wait for socket readers to complete */ + synchronize_net(); + + rhashtable_destroy(&tn->sk_rht); +} + +static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) +{ + struct net *net = sock_net(&tsk->sk); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; + struct tipc_uaddr ua; + int rc; + + if (mreq->type < TIPC_RESERVED_TYPES) + return -EACCES; + if (mreq->scope > TIPC_NODE_SCOPE) + return -EINVAL; + if (mreq->scope != TIPC_NODE_SCOPE) + mreq->scope = TIPC_CLUSTER_SCOPE; + if (grp) + return -EACCES; + grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); + if (!grp) + return -ENOMEM; + tsk->group = grp; + msg_set_lookup_scope(hdr, mreq->scope); + msg_set_nametype(hdr, mreq->type); + msg_set_dest_droppable(hdr, true); + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, mreq->scope, + mreq->type, mreq->instance, mreq->instance); + tipc_nametbl_build_group(net, grp, &ua); + rc = tipc_sk_publish(tsk, &ua); + if (rc) { + tipc_group_delete(net, grp); + tsk->group = NULL; + return rc; + } + /* Eliminate any risk that a broadcast overtakes sent JOINs */ + tsk->mc_method.rcast = true; + tsk->mc_method.mandatory = true; + tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf); + return rc; +} + +static int tipc_sk_leave(struct tipc_sock *tsk) +{ + struct net *net = sock_net(&tsk->sk); + struct tipc_group *grp = tsk->group; + struct tipc_uaddr ua; + int scope; + + if (!grp) + return -EINVAL; + ua.addrtype = TIPC_SERVICE_RANGE; + tipc_group_self(grp, &ua.sr, &scope); + ua.scope = scope; + tipc_group_delete(net, grp); + tsk->group = NULL; + tipc_sk_withdraw(tsk, &ua); + return 0; +} + /** - * setsockopt - set socket option + * tipc_setsockopt - set socket option * @sock: socket structure * @lvl: option level * @opt: option identifier @@ -1734,44 +3130,79 @@ restart: * For stream sockets only, accepts and ignores all IPPROTO_TCP options * (to ease compatibility). * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ -static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov, - unsigned int ol) +static int tipc_setsockopt(struct socket *sock, int lvl, int opt, + sockptr_t ov, unsigned int ol) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); - u32 value; - int res; + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group_req mreq; + u32 value = 0; + int res = 0; if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM)) return 0; if (lvl != SOL_TIPC) return -ENOPROTOOPT; - if (ol < sizeof(value)) - return -EINVAL; - res = get_user(value, (u32 __user *)ov); - if (res) - return res; + + switch (opt) { + case TIPC_IMPORTANCE: + case TIPC_SRC_DROPPABLE: + case TIPC_DEST_DROPPABLE: + case TIPC_CONN_TIMEOUT: + case TIPC_NODELAY: + if (ol < sizeof(value)) + return -EINVAL; + if (copy_from_sockptr(&value, ov, sizeof(u32))) + return -EFAULT; + break; + case TIPC_GROUP_JOIN: + if (ol < sizeof(mreq)) + return -EINVAL; + if (copy_from_sockptr(&mreq, ov, sizeof(mreq))) + return -EFAULT; + break; + default: + if (!sockptr_is_null(ov) || ol) + return -EINVAL; + } lock_sock(sk); switch (opt) { case TIPC_IMPORTANCE: - res = tipc_set_portimportance(tport->ref, value); + res = tsk_set_importance(sk, value); break; case TIPC_SRC_DROPPABLE: if (sock->type != SOCK_STREAM) - res = tipc_set_portunreliable(tport->ref, value); + tsk_set_unreliable(tsk, value); else res = -ENOPROTOOPT; break; case TIPC_DEST_DROPPABLE: - res = tipc_set_portunreturnable(tport->ref, value); + tsk_set_unreturnable(tsk, value); break; case TIPC_CONN_TIMEOUT: tipc_sk(sk)->conn_timeout = value; - /* no need to set "res", since already 0 at this point */ + break; + case TIPC_MCAST_BROADCAST: + tsk->mc_method.rcast = false; + tsk->mc_method.mandatory = true; + break; + case TIPC_MCAST_REPLICAST: + tsk->mc_method.rcast = true; + tsk->mc_method.mandatory = true; + break; + case TIPC_GROUP_JOIN: + res = tipc_sk_join(tsk, &mreq); + break; + case TIPC_GROUP_LEAVE: + res = tipc_sk_leave(tsk); + break; + case TIPC_NODELAY: + tsk->nodelay = !!value; + tsk_set_nagle(tsk); break; default: res = -EINVAL; @@ -1783,7 +3214,7 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov, } /** - * getsockopt - get socket option + * tipc_getsockopt - get socket option * @sock: socket structure * @lvl: option level * @opt: option identifier @@ -1793,14 +3224,15 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov, * For stream sockets only, returns 0 length result for all IPPROTO_TCP options * (to ease compatibility). * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ -static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, - int __user *ol) +static int tipc_getsockopt(struct socket *sock, int lvl, int opt, + char __user *ov, int __user *ol) { struct sock *sk = sock->sk; - struct tipc_port *tport = tipc_sk_port(sk); - int len; + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_service_range seq; + int len, scope; u32 value; int res; @@ -1816,16 +3248,16 @@ static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, switch (opt) { case TIPC_IMPORTANCE: - res = tipc_portimportance(tport->ref, &value); + value = tsk_importance(tsk); break; case TIPC_SRC_DROPPABLE: - res = tipc_portunreliable(tport->ref, &value); + value = tsk_unreliable(tsk); break; case TIPC_DEST_DROPPABLE: - res = tipc_portunreturnable(tport->ref, &value); + value = tsk_unreturnable(tsk); break; case TIPC_CONN_TIMEOUT: - value = tipc_sk(sk)->conn_timeout; + value = tsk->conn_timeout; /* no need to set "res", since already 0 at this point */ break; case TIPC_NODE_RECVQ_DEPTH: @@ -1834,6 +3266,15 @@ static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, case TIPC_SOCK_RECVQ_DEPTH: value = skb_queue_len(&sk->sk_receive_queue); break; + case TIPC_SOCK_RECVQ_USED: + value = sk_rmem_alloc_get(sk); + break; + case TIPC_GROUP_JOIN: + seq.type = 0; + if (tsk->group) + tipc_group_self(tsk->group, &seq, &scope); + value = seq.type; + break; default: res = -EINVAL; } @@ -1852,69 +3293,120 @@ static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, return put_user(sizeof(value), ol); } +static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct net *net = sock_net(sock->sk); + struct tipc_sioc_nodeid_req nr = {0}; + struct tipc_sioc_ln_req lnr; + void __user *argp = (void __user *)arg; + + switch (cmd) { + case SIOCGETLINKNAME: + if (copy_from_user(&lnr, argp, sizeof(lnr))) + return -EFAULT; + if (!tipc_node_get_linkname(net, + lnr.bearer_id & 0xffff, lnr.peer, + lnr.linkname, TIPC_MAX_LINK_NAME)) { + if (copy_to_user(argp, &lnr, sizeof(lnr))) + return -EFAULT; + return 0; + } + return -EADDRNOTAVAIL; + case SIOCGETNODEID: + if (copy_from_user(&nr, argp, sizeof(nr))) + return -EFAULT; + if (!tipc_node_get_id(net, nr.peer, nr.node_id)) + return -EADDRNOTAVAIL; + if (copy_to_user(argp, &nr, sizeof(nr))) + return -EFAULT; + return 0; + default: + return -ENOIOCTLCMD; + } +} + +static int tipc_socketpair(struct socket *sock1, struct socket *sock2) +{ + struct tipc_sock *tsk2 = tipc_sk(sock2->sk); + struct tipc_sock *tsk1 = tipc_sk(sock1->sk); + u32 onode = tipc_own_addr(sock_net(sock1->sk)); + + tsk1->peer.family = AF_TIPC; + tsk1->peer.addrtype = TIPC_SOCKET_ADDR; + tsk1->peer.scope = TIPC_NODE_SCOPE; + tsk1->peer.addr.id.ref = tsk2->portid; + tsk1->peer.addr.id.node = onode; + tsk2->peer.family = AF_TIPC; + tsk2->peer.addrtype = TIPC_SOCKET_ADDR; + tsk2->peer.scope = TIPC_NODE_SCOPE; + tsk2->peer.addr.id.ref = tsk1->portid; + tsk2->peer.addr.id.node = onode; + + tipc_sk_finish_conn(tsk1, tsk2->portid, onode); + tipc_sk_finish_conn(tsk2, tsk1->portid, onode); + return 0; +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { .owner = THIS_MODULE, .family = AF_TIPC, - .release = release, - .bind = bind, - .connect = connect, - .socketpair = sock_no_socketpair, + .release = tipc_release, + .bind = tipc_bind, + .connect = tipc_connect, + .socketpair = tipc_socketpair, .accept = sock_no_accept, - .getname = get_name, - .poll = poll, - .ioctl = sock_no_ioctl, + .getname = tipc_getname, + .poll = tipc_poll, + .ioctl = tipc_ioctl, .listen = sock_no_listen, - .shutdown = shutdown, - .setsockopt = setsockopt, - .getsockopt = getsockopt, - .sendmsg = send_msg, - .recvmsg = recv_msg, + .shutdown = tipc_shutdown, + .setsockopt = tipc_setsockopt, + .getsockopt = tipc_getsockopt, + .sendmsg = tipc_sendmsg, + .recvmsg = tipc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage }; static const struct proto_ops packet_ops = { .owner = THIS_MODULE, .family = AF_TIPC, - .release = release, - .bind = bind, - .connect = connect, - .socketpair = sock_no_socketpair, - .accept = accept, - .getname = get_name, - .poll = poll, - .ioctl = sock_no_ioctl, - .listen = listen, - .shutdown = shutdown, - .setsockopt = setsockopt, - .getsockopt = getsockopt, - .sendmsg = send_packet, - .recvmsg = recv_msg, + .release = tipc_release, + .bind = tipc_bind, + .connect = tipc_connect, + .socketpair = tipc_socketpair, + .accept = tipc_accept, + .getname = tipc_getname, + .poll = tipc_poll, + .ioctl = tipc_ioctl, + .listen = tipc_listen, + .shutdown = tipc_shutdown, + .setsockopt = tipc_setsockopt, + .getsockopt = tipc_getsockopt, + .sendmsg = tipc_send_packet, + .recvmsg = tipc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage }; static const struct proto_ops stream_ops = { .owner = THIS_MODULE, .family = AF_TIPC, - .release = release, - .bind = bind, - .connect = connect, - .socketpair = sock_no_socketpair, - .accept = accept, - .getname = get_name, - .poll = poll, - .ioctl = sock_no_ioctl, - .listen = listen, - .shutdown = shutdown, - .setsockopt = setsockopt, - .getsockopt = getsockopt, - .sendmsg = send_stream, - .recvmsg = recv_stream, + .release = tipc_release, + .bind = tipc_bind, + .connect = tipc_connect, + .socketpair = tipc_socketpair, + .accept = tipc_accept, + .getname = tipc_getname, + .poll = tipc_poll, + .ioctl = tipc_ioctl, + .listen = tipc_listen, + .shutdown = tipc_shutdown, + .setsockopt = tipc_setsockopt, + .getsockopt = tipc_getsockopt, + .sendmsg = tipc_sendstream, + .recvmsg = tipc_recvstream, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage }; static const struct net_proto_family tipc_family_ops = { @@ -1930,16 +3422,10 @@ static struct proto tipc_proto = { .sysctl_rmem = sysctl_tipc_rmem }; -static struct proto tipc_proto_kern = { - .name = "TIPC", - .obj_size = sizeof(struct tipc_sock), - .sysctl_rmem = sysctl_tipc_rmem -}; - /** * tipc_socket_init - initialize TIPC socket interface * - * Returns 0 on success, errno otherwise + * Return: 0 on success, errno otherwise */ int tipc_socket_init(void) { @@ -1957,8 +3443,6 @@ int tipc_socket_init(void) proto_unregister(&tipc_proto); goto out; } - - sockets_enabled = 1; out: return res; } @@ -1968,10 +3452,557 @@ int tipc_socket_init(void) */ void tipc_socket_stop(void) { - if (!sockets_enabled) - return; - - sockets_enabled = 0; sock_unregister(tipc_family_ops.family); proto_unregister(&tipc_proto); } + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) +{ + u32 peer_node, peer_port; + u32 conn_type, conn_instance; + struct nlattr *nest; + + peer_node = tsk_peer_node(tsk); + peer_port = tsk_peer_port(tsk); + conn_type = msg_nametype(&tsk->phdr); + conn_instance = msg_nameinst(&tsk->phdr); + nest = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_CON); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node)) + goto msg_full; + if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port)) + goto msg_full; + + if (tsk->conn_addrtype != 0) { + if (nla_put_flag(skb, TIPC_NLA_CON_FLAG)) + goto msg_full; + if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, conn_type)) + goto msg_full; + if (nla_put_u32(skb, TIPC_NLA_CON_INST, conn_instance)) + goto msg_full; + } + nla_nest_end(skb, nest); + + return 0; + +msg_full: + nla_nest_cancel(skb, nest); + + return -EMSGSIZE; +} + +static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock + *tsk) +{ + struct net *net = sock_net(skb->sk); + struct sock *sk = &tsk->sk; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) || + nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net))) + return -EMSGSIZE; + + if (tipc_sk_connected(sk)) { + if (__tipc_nl_add_sk_con(skb, tsk)) + return -EMSGSIZE; + } else if (!list_empty(&tsk->publications)) { + if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL)) + return -EMSGSIZE; + } + return 0; +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, + struct tipc_sock *tsk) +{ + struct nlattr *attrs; + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); + if (!hdr) + goto msg_cancel; + + attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK); + if (!attrs) + goto genlmsg_cancel; + + if (__tipc_nl_add_sk_info(skb, tsk)) + goto attr_msg_cancel; + + nla_nest_end(skb, attrs); + genlmsg_end(skb, hdr); + + return 0; + +attr_msg_cancel: + nla_nest_cancel(skb, attrs); +genlmsg_cancel: + genlmsg_cancel(skb, hdr); +msg_cancel: + return -EMSGSIZE; +} + +int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, + int (*skb_handler)(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk)) +{ + struct rhashtable_iter *iter = (void *)cb->args[4]; + struct tipc_sock *tsk; + int err; + + rhashtable_walk_start(iter); + while ((tsk = rhashtable_walk_next(iter)) != NULL) { + if (IS_ERR(tsk)) { + if (PTR_ERR(tsk) == -EAGAIN) + continue; + break; + } + + sock_hold(&tsk->sk); + rhashtable_walk_stop(iter); + lock_sock(&tsk->sk); + err = skb_handler(skb, cb, tsk); + if (err) { + release_sock(&tsk->sk); + sock_put(&tsk->sk); + goto out; + } + release_sock(&tsk->sk); + rhashtable_walk_start(iter); + sock_put(&tsk->sk); + } + rhashtable_walk_stop(iter); +out: + return skb->len; +} +EXPORT_SYMBOL(tipc_nl_sk_walk); + +int tipc_dump_start(struct netlink_callback *cb) +{ + return __tipc_dump_start(cb, sock_net(cb->skb->sk)); +} +EXPORT_SYMBOL(tipc_dump_start); + +int __tipc_dump_start(struct netlink_callback *cb, struct net *net) +{ + /* tipc_nl_name_table_dump() uses cb->args[0...3]. */ + struct rhashtable_iter *iter = (void *)cb->args[4]; + struct tipc_net *tn = tipc_net(net); + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[4] = (long)iter; + } + + rhashtable_walk_enter(&tn->sk_rht, iter); + return 0; +} + +int tipc_dump_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *hti = (void *)cb->args[4]; + + rhashtable_walk_exit(hti); + kfree(hti); + return 0; +} +EXPORT_SYMBOL(tipc_dump_done); + +int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, + struct tipc_sock *tsk, u32 sk_filter_state, + u64 (*tipc_diag_gen_cookie)(struct sock *sk)) +{ + struct sock *sk = &tsk->sk; + struct nlattr *attrs; + struct nlattr *stat; + + /*filter response w.r.t sk_state*/ + if (!(sk_filter_state & (1 << sk->sk_state))) + return 0; + + attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK); + if (!attrs) + goto msg_cancel; + + if (__tipc_nl_add_sk_info(skb, tsk)) + goto attr_msg_cancel; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) || + nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) || + nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) || + nla_put_u32(skb, TIPC_NLA_SOCK_UID, + from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk), + sk_uid(sk))) || + nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE, + tipc_diag_gen_cookie(sk), + TIPC_NLA_SOCK_PAD)) + goto attr_msg_cancel; + + stat = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_STAT); + if (!stat) + goto attr_msg_cancel; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ, + skb_queue_len(&sk->sk_receive_queue)) || + nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ, + skb_queue_len(&sk->sk_write_queue)) || + nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP, + sk_drops_read(sk))) + goto stat_msg_cancel; + + if (tsk->cong_link_cnt && + nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG)) + goto stat_msg_cancel; + + if (tsk_conn_cong(tsk) && + nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG)) + goto stat_msg_cancel; + + nla_nest_end(skb, stat); + + if (tsk->group) + if (tipc_group_fill_sock_diag(tsk->group, skb)) + goto stat_msg_cancel; + + nla_nest_end(skb, attrs); + + return 0; + +stat_msg_cancel: + nla_nest_cancel(skb, stat); +attr_msg_cancel: + nla_nest_cancel(skb, attrs); +msg_cancel: + return -EMSGSIZE; +} +EXPORT_SYMBOL(tipc_sk_fill_sock_diag); + +int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk); +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_add_sk_publ(struct sk_buff *skb, + struct netlink_callback *cb, + struct publication *publ) +{ + void *hdr; + struct nlattr *attrs; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + if (!hdr) + goto msg_cancel; + + attrs = nla_nest_start_noflag(skb, TIPC_NLA_PUBL); + if (!attrs) + goto genlmsg_cancel; + + if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->sr.type)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->sr.lower)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->sr.upper)) + goto attr_msg_cancel; + + nla_nest_end(skb, attrs); + genlmsg_end(skb, hdr); + + return 0; + +attr_msg_cancel: + nla_nest_cancel(skb, attrs); +genlmsg_cancel: + genlmsg_cancel(skb, hdr); +msg_cancel: + return -EMSGSIZE; +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_list_sk_publ(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk, u32 *last_publ) +{ + int err; + struct publication *p; + + if (*last_publ) { + list_for_each_entry(p, &tsk->publications, binding_sock) { + if (p->key == *last_publ) + break; + } + if (list_entry_is_head(p, &tsk->publications, binding_sock)) { + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the last NLMSG_DONE message + * having the NLM_F_DUMP_INTR flag set. + */ + cb->prev_seq = 1; + *last_publ = 0; + return -EPIPE; + } + } else { + p = list_first_entry(&tsk->publications, struct publication, + binding_sock); + } + + list_for_each_entry_from(p, &tsk->publications, binding_sock) { + err = __tipc_nl_add_sk_publ(skb, cb, p); + if (err) { + *last_publ = p->key; + return err; + } + } + *last_publ = 0; + + return 0; +} + +int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + u32 tsk_portid = cb->args[0]; + u32 last_publ = cb->args[1]; + u32 done = cb->args[2]; + struct net *net = sock_net(skb->sk); + struct tipc_sock *tsk; + + if (!tsk_portid) { + struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs; + struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; + + if (!attrs[TIPC_NLA_SOCK]) + return -EINVAL; + + err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX, + attrs[TIPC_NLA_SOCK], + tipc_nl_sock_policy, NULL); + if (err) + return err; + + if (!sock[TIPC_NLA_SOCK_REF]) + return -EINVAL; + + tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + } + + if (done) + return 0; + + tsk = tipc_sk_lookup(net, tsk_portid); + if (!tsk) + return -EINVAL; + + lock_sock(&tsk->sk); + err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ); + if (!err) + done = 1; + release_sock(&tsk->sk); + sock_put(&tsk->sk); + + cb->args[0] = tsk_portid; + cb->args[1] = last_publ; + cb->args[2] = done; + + return skb->len; +} + +/** + * tipc_sk_filtering - check if a socket should be traced + * @sk: the socket to be examined + * + * @sysctl_tipc_sk_filter is used as the socket tuple for filtering: + * (portid, sock type, name type, name lower, name upper) + * + * Return: true if the socket meets the socket tuple data + * (value 0 = 'any') or when there is no tuple set (all = 0), + * otherwise false + */ +bool tipc_sk_filtering(struct sock *sk) +{ + struct tipc_sock *tsk; + struct publication *p; + u32 _port, _sktype, _type, _lower, _upper; + u32 type = 0, lower = 0, upper = 0; + + if (!sk) + return true; + + tsk = tipc_sk(sk); + + _port = sysctl_tipc_sk_filter[0]; + _sktype = sysctl_tipc_sk_filter[1]; + _type = sysctl_tipc_sk_filter[2]; + _lower = sysctl_tipc_sk_filter[3]; + _upper = sysctl_tipc_sk_filter[4]; + + if (!_port && !_sktype && !_type && !_lower && !_upper) + return true; + + if (_port) + return (_port == tsk->portid); + + if (_sktype && _sktype != sk->sk_type) + return false; + + if (tsk->published) { + p = list_first_entry_or_null(&tsk->publications, + struct publication, binding_sock); + if (p) { + type = p->sr.type; + lower = p->sr.lower; + upper = p->sr.upper; + } + } + + if (!tipc_sk_type_connectionless(sk)) { + type = msg_nametype(&tsk->phdr); + lower = msg_nameinst(&tsk->phdr); + upper = lower; + } + + if ((_type && _type != type) || (_lower && _lower != lower) || + (_upper && _upper != upper)) + return false; + + return true; +} + +u32 tipc_sock_get_portid(struct sock *sk) +{ + return (sk) ? (tipc_sk(sk))->portid : 0; +} + +/** + * tipc_sk_overlimit1 - check if socket rx queue is about to be overloaded, + * both the rcv and backlog queues are considered + * @sk: tipc sk to be checked + * @skb: tipc msg to be checked + * + * Return: true if the socket rx queue allocation is > 90%, otherwise false + */ + +bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb) +{ + atomic_t *dcnt = &tipc_sk(sk)->dupl_rcvcnt; + unsigned int lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); + unsigned int qsize = sk->sk_backlog.len + sk_rmem_alloc_get(sk); + + return (qsize > lim * 90 / 100); +} + +/** + * tipc_sk_overlimit2 - check if socket rx queue is about to be overloaded, + * only the rcv queue is considered + * @sk: tipc sk to be checked + * @skb: tipc msg to be checked + * + * Return: true if the socket rx queue allocation is > 90%, otherwise false + */ + +bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb) +{ + unsigned int lim = rcvbuf_limit(sk, skb); + unsigned int qsize = sk_rmem_alloc_get(sk); + + return (qsize > lim * 90 / 100); +} + +/** + * tipc_sk_dump - dump TIPC socket + * @sk: tipc sk to be dumped + * @dqueues: bitmask to decide if any socket queue to be dumped? + * - TIPC_DUMP_NONE: don't dump socket queues + * - TIPC_DUMP_SK_SNDQ: dump socket send queue + * - TIPC_DUMP_SK_RCVQ: dump socket rcv queue + * - TIPC_DUMP_SK_BKLGQ: dump socket backlog queue + * - TIPC_DUMP_ALL: dump all the socket queues above + * @buf: returned buffer of dump data in format + */ +int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf) +{ + int i = 0; + size_t sz = (dqueues) ? SK_LMAX : SK_LMIN; + u32 conn_type, conn_instance; + struct tipc_sock *tsk; + struct publication *p; + bool tsk_connected; + + if (!sk) { + i += scnprintf(buf, sz, "sk data: (null)\n"); + return i; + } + + tsk = tipc_sk(sk); + tsk_connected = !tipc_sk_type_connectionless(sk); + + i += scnprintf(buf, sz, "sk data: %u", sk->sk_type); + i += scnprintf(buf + i, sz - i, " %d", sk->sk_state); + i += scnprintf(buf + i, sz - i, " %x", tsk_own_node(tsk)); + i += scnprintf(buf + i, sz - i, " %u", tsk->portid); + i += scnprintf(buf + i, sz - i, " | %u", tsk_connected); + if (tsk_connected) { + i += scnprintf(buf + i, sz - i, " %x", tsk_peer_node(tsk)); + i += scnprintf(buf + i, sz - i, " %u", tsk_peer_port(tsk)); + conn_type = msg_nametype(&tsk->phdr); + conn_instance = msg_nameinst(&tsk->phdr); + i += scnprintf(buf + i, sz - i, " %u", conn_type); + i += scnprintf(buf + i, sz - i, " %u", conn_instance); + } + i += scnprintf(buf + i, sz - i, " | %u", tsk->published); + if (tsk->published) { + p = list_first_entry_or_null(&tsk->publications, + struct publication, binding_sock); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.type : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.lower : 0); + i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.upper : 0); + } + i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win); + i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win); + i += scnprintf(buf + i, sz - i, " %u", tsk->max_pkt); + i += scnprintf(buf + i, sz - i, " %x", tsk->peer_caps); + i += scnprintf(buf + i, sz - i, " %u", tsk->cong_link_cnt); + i += scnprintf(buf + i, sz - i, " %u", tsk->snt_unacked); + i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_unacked); + i += scnprintf(buf + i, sz - i, " %u", atomic_read(&tsk->dupl_rcvcnt)); + i += scnprintf(buf + i, sz - i, " %u", sk->sk_shutdown); + i += scnprintf(buf + i, sz - i, " | %d", sk_wmem_alloc_get(sk)); + i += scnprintf(buf + i, sz - i, " %d", sk->sk_sndbuf); + i += scnprintf(buf + i, sz - i, " | %d", sk_rmem_alloc_get(sk)); + i += scnprintf(buf + i, sz - i, " %d", sk->sk_rcvbuf); + i += scnprintf(buf + i, sz - i, " | %d\n", READ_ONCE(sk->sk_backlog.len)); + + if (dqueues & TIPC_DUMP_SK_SNDQ) { + i += scnprintf(buf + i, sz - i, "sk_write_queue: "); + i += tipc_list_dump(&sk->sk_write_queue, false, buf + i); + } + + if (dqueues & TIPC_DUMP_SK_RCVQ) { + i += scnprintf(buf + i, sz - i, "sk_receive_queue: "); + i += tipc_list_dump(&sk->sk_receive_queue, false, buf + i); + } + + if (dqueues & TIPC_DUMP_SK_BKLGQ) { + i += scnprintf(buf + i, sz - i, "sk_backlog:\n head "); + i += tipc_skb_dump(sk->sk_backlog.head, false, buf + i); + if (sk->sk_backlog.tail != sk->sk_backlog.head) { + i += scnprintf(buf + i, sz - i, " tail "); + i += tipc_skb_dump(sk->sk_backlog.tail, false, + buf + i); + } + } + + return i; +} diff --git a/net/tipc/socket.h b/net/tipc/socket.h new file mode 100644 index 000000000000..02cdf166807d --- /dev/null +++ b/net/tipc/socket.h @@ -0,0 +1,80 @@ +/* net/tipc/socket.h: Include file for TIPC socket code + * + * Copyright (c) 2014-2016, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_SOCK_H +#define _TIPC_SOCK_H + +#include <net/sock.h> +#include <net/genetlink.h> + +/* Compatibility values for deprecated message based flow control */ +#define FLOWCTL_MSG_WIN 512 +#define FLOWCTL_MSG_LIM ((FLOWCTL_MSG_WIN * 2 + 1) * SKB_TRUESIZE(MAX_MSG_SIZE)) + +#define FLOWCTL_BLK_SZ 1024 + +/* Socket receive buffer sizes */ +#define RCVBUF_MIN (FLOWCTL_BLK_SZ * 512) +#define RCVBUF_DEF (FLOWCTL_BLK_SZ * 1024 * 2) +#define RCVBUF_MAX (FLOWCTL_BLK_SZ * 1024 * 16) + +struct tipc_sock; + +int tipc_socket_init(void); +void tipc_socket_stop(void); +void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, + struct sk_buff_head *inputq); +void tipc_sk_reinit(struct net *net); +int tipc_sk_rht_init(struct net *net); +void tipc_sk_rht_destroy(struct net *net); +int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, + struct tipc_sock *tsk, u32 sk_filter_state, + u64 (*tipc_diag_gen_cookie)(struct sock *sk)); +int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, + int (*skb_handler)(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk)); +int tipc_dump_start(struct netlink_callback *cb); +int __tipc_dump_start(struct netlink_callback *cb, struct net *net); +int tipc_dump_done(struct netlink_callback *cb); +u32 tipc_sock_get_portid(struct sock *sk); +bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb); +bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb); +int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen); +int tsk_set_importance(struct sock *sk, int imp); + +#endif diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index d38bb45d82e9..f8490d94e323 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -1,8 +1,9 @@ /* * net/tipc/subscr.c: TIPC network topology service * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2017, Ericsson AB * Copyright (c) 2005-2007, 2010-2013, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,354 +37,147 @@ #include "core.h" #include "name_table.h" -#include "port.h" #include "subscr.h" -/** - * struct tipc_subscriber - TIPC network topology subscriber - * @conid: connection identifier to server connecting to subscriber - * @lock: controll access to subscriber - * @subscription_list: list of subscription objects for this subscriber - */ -struct tipc_subscriber { - int conid; - spinlock_t lock; - struct list_head subscription_list; -}; - -static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len); -static void *subscr_named_msg_event(int conid); -static void subscr_conn_shutdown_event(int conid, void *usr_data); - -static atomic_t subscription_count = ATOMIC_INIT(0); - -static struct sockaddr_tipc topsrv_addr __read_mostly = { - .family = AF_TIPC, - .addrtype = TIPC_ADDR_NAMESEQ, - .addr.nameseq.type = TIPC_TOP_SRV, - .addr.nameseq.lower = TIPC_TOP_SRV, - .addr.nameseq.upper = TIPC_TOP_SRV, - .scope = TIPC_NODE_SCOPE -}; - -static struct tipc_server topsrv __read_mostly = { - .saddr = &topsrv_addr, - .imp = TIPC_CRITICAL_IMPORTANCE, - .type = SOCK_SEQPACKET, - .max_rcvbuf_size = sizeof(struct tipc_subscr), - .name = "topology_server", - .tipc_conn_recvmsg = subscr_conn_msg_event, - .tipc_conn_new = subscr_named_msg_event, - .tipc_conn_shutdown = subscr_conn_shutdown_event, -}; - -/** - * htohl - convert value to endianness used by destination - * @in: value to convert - * @swap: non-zero if endianness must be reversed - * - * Returns converted value - */ -static u32 htohl(u32 in, int swap) +static void tipc_sub_send_event(struct tipc_subscription *sub, + struct publication *p, + u32 event) { - return swap ? swab32(in) : in; -} + struct tipc_subscr *s = &sub->evt.s; + struct tipc_event *evt = &sub->evt; -static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper, u32 event, u32 port_ref, - u32 node) -{ - struct tipc_subscriber *subscriber = sub->subscriber; - struct kvec msg_sect; - int ret; - - msg_sect.iov_base = (void *)&sub->evt; - msg_sect.iov_len = sizeof(struct tipc_event); - - sub->evt.event = htohl(event, sub->swap); - sub->evt.found_lower = htohl(found_lower, sub->swap); - sub->evt.found_upper = htohl(found_upper, sub->swap); - sub->evt.port.ref = htohl(port_ref, sub->swap); - sub->evt.port.node = htohl(node, sub->swap); - ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, - msg_sect.iov_base, msg_sect.iov_len); - if (ret < 0) - pr_err("Sending subscription event failed, no memory\n"); + if (sub->inactive) + return; + tipc_evt_write(evt, event, event); + if (p) { + tipc_evt_write(evt, found_lower, p->sr.lower); + tipc_evt_write(evt, found_upper, p->sr.upper); + tipc_evt_write(evt, port.ref, p->sk.ref); + tipc_evt_write(evt, port.node, p->sk.node); + } else { + tipc_evt_write(evt, found_lower, s->seq.lower); + tipc_evt_write(evt, found_upper, s->seq.upper); + tipc_evt_write(evt, port.ref, 0); + tipc_evt_write(evt, port.node, 0); + } + tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt); } /** - * tipc_subscr_overlap - test for subscription overlap with the given values + * tipc_sub_check_overlap - test for subscription overlap with the given values + * @subscribed: the service range subscribed for + * @found: the service range we are checking for match * - * Returns 1 if there is overlap, otherwise 0. + * Returns true if there is overlap, otherwise false. */ -int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper) +static bool tipc_sub_check_overlap(struct tipc_service_range *subscribed, + struct tipc_service_range *found) { - if (found_lower < sub->seq.lower) - found_lower = sub->seq.lower; - if (found_upper > sub->seq.upper) - found_upper = sub->seq.upper; - if (found_lower > found_upper) - return 0; - return 1; + u32 found_lower = found->lower; + u32 found_upper = found->upper; + + if (found_lower < subscribed->lower) + found_lower = subscribed->lower; + if (found_upper > subscribed->upper) + found_upper = subscribed->upper; + return found_lower <= found_upper; } -/** - * tipc_subscr_report_overlap - issue event if there is subscription overlap - * - * Protected by nameseq.lock in name_table.c - */ -void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper, u32 event, u32 port_ref, - u32 node, int must) +void tipc_sub_report_overlap(struct tipc_subscription *sub, + struct publication *p, + u32 event, bool must) { - if (!tipc_subscr_overlap(sub, found_lower, found_upper)) + struct tipc_service_range *sr = &sub->s.seq; + u32 filter = sub->s.filter; + + if (!tipc_sub_check_overlap(sr, &p->sr)) return; - if (!must && !(sub->filter & TIPC_SUB_PORTS)) + if (!must && !(filter & TIPC_SUB_PORTS)) return; - - subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); -} - -static void subscr_timeout(struct tipc_subscription *sub) -{ - struct tipc_subscriber *subscriber = sub->subscriber; - - /* The spin lock per subscriber is used to protect its members */ - spin_lock_bh(&subscriber->lock); - - /* Validate if the connection related to the subscriber is - * closed (in case subscriber is terminating) - */ - if (subscriber->conid == 0) { - spin_unlock_bh(&subscriber->lock); + if (filter & TIPC_SUB_CLUSTER_SCOPE && p->scope == TIPC_NODE_SCOPE) return; - } - - /* Validate timeout (in case subscription is being cancelled) */ - if (sub->timeout == TIPC_WAIT_FOREVER) { - spin_unlock_bh(&subscriber->lock); + if (filter & TIPC_SUB_NODE_SCOPE && p->scope != TIPC_NODE_SCOPE) return; - } - - /* Unlink subscription from name table */ - tipc_nametbl_unsubscribe(sub); - - /* Unlink subscription from subscriber */ - list_del(&sub->subscription_list); - - spin_unlock_bh(&subscriber->lock); - - /* Notify subscriber of timeout */ - subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, - TIPC_SUBSCR_TIMEOUT, 0, 0); - - /* Now destroy subscription */ - k_term_timer(&sub->timer); - kfree(sub); - atomic_dec(&subscription_count); + spin_lock(&sub->lock); + tipc_sub_send_event(sub, p, event); + spin_unlock(&sub->lock); } -/** - * subscr_del - delete a subscription within a subscription list - * - * Called with subscriber lock held. - */ -static void subscr_del(struct tipc_subscription *sub) +static void tipc_sub_timeout(struct timer_list *t) { - tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscription_list); - kfree(sub); - atomic_dec(&subscription_count); + struct tipc_subscription *sub = timer_container_of(sub, t, timer); + + spin_lock(&sub->lock); + tipc_sub_send_event(sub, NULL, TIPC_SUBSCR_TIMEOUT); + sub->inactive = true; + spin_unlock(&sub->lock); } -/** - * subscr_terminate - terminate communication with a subscriber - * - * Note: Must call it in process context since it might sleep. - */ -static void subscr_terminate(struct tipc_subscriber *subscriber) +static void tipc_sub_kref_release(struct kref *kref) { - tipc_conn_terminate(&topsrv, subscriber->conid); + kfree(container_of(kref, struct tipc_subscription, kref)); } -static void subscr_release(struct tipc_subscriber *subscriber) +void tipc_sub_put(struct tipc_subscription *subscription) { - struct tipc_subscription *sub; - struct tipc_subscription *sub_temp; - - spin_lock_bh(&subscriber->lock); - - /* Invalidate subscriber reference */ - subscriber->conid = 0; - - /* Destroy any existing subscriptions for subscriber */ - list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, - subscription_list) { - if (sub->timeout != TIPC_WAIT_FOREVER) { - spin_unlock_bh(&subscriber->lock); - k_cancel_timer(&sub->timer); - k_term_timer(&sub->timer); - spin_lock_bh(&subscriber->lock); - } - subscr_del(sub); - } - spin_unlock_bh(&subscriber->lock); - - /* Now destroy subscriber */ - kfree(subscriber); + kref_put(&subscription->kref, tipc_sub_kref_release); } -/** - * subscr_cancel - handle subscription cancellation request - * - * Called with subscriber lock held. Routine must temporarily release lock - * to enable the subscription timeout routine to finish without deadlocking; - * the lock is then reclaimed to allow caller to release it upon return. - * - * Note that fields of 's' use subscriber's endianness! - */ -static void subscr_cancel(struct tipc_subscr *s, - struct tipc_subscriber *subscriber) +void tipc_sub_get(struct tipc_subscription *subscription) { - struct tipc_subscription *sub; - struct tipc_subscription *sub_temp; - int found = 0; - - /* Find first matching subscription, exit if not found */ - list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, - subscription_list) { - if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { - found = 1; - break; - } - } - if (!found) - return; - - /* Cancel subscription timer (if used), then delete subscription */ - if (sub->timeout != TIPC_WAIT_FOREVER) { - sub->timeout = TIPC_WAIT_FOREVER; - spin_unlock_bh(&subscriber->lock); - k_cancel_timer(&sub->timer); - k_term_timer(&sub->timer); - spin_lock_bh(&subscriber->lock); - } - subscr_del(sub); + kref_get(&subscription->kref); } -/** - * subscr_subscribe - create subscription for subscriber - * - * Called with subscriber lock held. - */ -static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, - struct tipc_subscriber *subscriber) +struct tipc_subscription *tipc_sub_subscribe(struct net *net, + struct tipc_subscr *s, + int conid) { + u32 lower = tipc_sub_read(s, seq.lower); + u32 upper = tipc_sub_read(s, seq.upper); + u32 filter = tipc_sub_read(s, filter); struct tipc_subscription *sub; - int swap; - - /* Determine subscriber's endianness */ - swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE)); + u32 timeout; - /* Detect & process a subscription cancellation request */ - if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { - s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); - subscr_cancel(s, subscriber); - return NULL; - } - - /* Refuse subscription if global limit exceeded */ - if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { - pr_warn("Subscription rejected, limit reached (%u)\n", - TIPC_MAX_SUBSCRIPTIONS); - subscr_terminate(subscriber); + if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) || + lower > upper) { + pr_warn("Subscription rejected, illegal request\n"); return NULL; } - - /* Allocate subscription object */ sub = kmalloc(sizeof(*sub), GFP_ATOMIC); if (!sub) { pr_warn("Subscription rejected, no memory\n"); - subscr_terminate(subscriber); return NULL; } - - /* Initialize subscription object */ - sub->seq.type = htohl(s->seq.type, swap); - sub->seq.lower = htohl(s->seq.lower, swap); - sub->seq.upper = htohl(s->seq.upper, swap); - sub->timeout = htohl(s->timeout, swap); - sub->filter = htohl(s->filter, swap); - if ((!(sub->filter & TIPC_SUB_PORTS) == - !(sub->filter & TIPC_SUB_SERVICE)) || - (sub->seq.lower > sub->seq.upper)) { - pr_warn("Subscription rejected, illegal request\n"); + INIT_LIST_HEAD(&sub->service_list); + INIT_LIST_HEAD(&sub->sub_list); + sub->net = net; + sub->conid = conid; + sub->inactive = false; + memcpy(&sub->evt.s, s, sizeof(*s)); + sub->s.seq.type = tipc_sub_read(s, seq.type); + sub->s.seq.lower = lower; + sub->s.seq.upper = upper; + sub->s.filter = filter; + sub->s.timeout = tipc_sub_read(s, timeout); + memcpy(sub->s.usr_handle, s->usr_handle, 8); + spin_lock_init(&sub->lock); + kref_init(&sub->kref); + if (!tipc_nametbl_subscribe(sub)) { kfree(sub); - subscr_terminate(subscriber); return NULL; } - INIT_LIST_HEAD(&sub->nameseq_list); - list_add(&sub->subscription_list, &subscriber->subscription_list); - sub->subscriber = subscriber; - sub->swap = swap; - memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); - atomic_inc(&subscription_count); - if (sub->timeout != TIPC_WAIT_FOREVER) { - k_init_timer(&sub->timer, - (Handler)subscr_timeout, (unsigned long)sub); - k_start_timer(&sub->timer, sub->timeout); - } - + timer_setup(&sub->timer, tipc_sub_timeout, 0); + timeout = tipc_sub_read(&sub->evt.s, timeout); + if (timeout != TIPC_WAIT_FOREVER) + mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); return sub; } -/* Handle one termination request for the subscriber */ -static void subscr_conn_shutdown_event(int conid, void *usr_data) -{ - subscr_release((struct tipc_subscriber *)usr_data); -} - -/* Handle one request to create a new subscription for the subscriber */ -static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len) -{ - struct tipc_subscriber *subscriber = usr_data; - struct tipc_subscription *sub; - - spin_lock_bh(&subscriber->lock); - sub = subscr_subscribe((struct tipc_subscr *)buf, subscriber); - if (sub) - tipc_nametbl_subscribe(sub); - spin_unlock_bh(&subscriber->lock); -} - - -/* Handle one request to establish a new subscriber */ -static void *subscr_named_msg_event(int conid) +void tipc_sub_unsubscribe(struct tipc_subscription *sub) { - struct tipc_subscriber *subscriber; - - /* Create subscriber object */ - subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC); - if (subscriber == NULL) { - pr_warn("Subscriber rejected, no memory\n"); - return NULL; - } - INIT_LIST_HEAD(&subscriber->subscription_list); - subscriber->conid = conid; - spin_lock_init(&subscriber->lock); - - return (void *)subscriber; -} - -int tipc_subscr_start(void) -{ - return tipc_server_start(&topsrv); -} - -void tipc_subscr_stop(void) -{ - tipc_server_stop(&topsrv); + tipc_nametbl_unsubscribe(sub); + if (sub->evt.s.timeout != TIPC_WAIT_FOREVER) + timer_delete_sync(&sub->timer); + list_del(&sub->sub_list); + tipc_sub_put(sub); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 393e417bee3f..60b877531b66 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -1,8 +1,9 @@ /* * net/tipc/subscr.h: Include file for TIPC network topology service * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2017, Ericsson AB * Copyright (c) 2005-2007, 2012-2013, Wind River Systems + * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,45 +38,85 @@ #ifndef _TIPC_SUBSCR_H #define _TIPC_SUBSCR_H -#include "server.h" +#include "topsrv.h" +#define TIPC_MAX_SUBSCR 65535 +#define TIPC_MAX_PUBL 65535 + +struct publication; struct tipc_subscription; -struct tipc_subscriber; +struct tipc_conn; /** * struct tipc_subscription - TIPC network topology subscription object - * @subscriber: pointer to its subscriber - * @seq: name sequence associated with subscription - * @timeout: duration of subscription (in ms) - * @filter: event filtering to be done for subscription - * @timer: timer governing subscription duration (optional) - * @nameseq_list: adjacent subscriptions in name sequence's subscription list - * @subscription_list: adjacent subscriptions in subscriber's subscription list - * @server_ref: object reference of server port associated with subscription - * @swap: indicates if subscriber uses opposite endianness in its messages + * @s: host-endian copy of the user subscription * @evt: template for events generated by subscription + * @kref: reference count for this subscription + * @net: network namespace associated with subscription + * @timer: timer governing subscription duration (optional) + * @service_list: adjacent subscriptions in name sequence's subscription list + * @sub_list: adjacent subscriptions in subscriber's subscription list + * @conid: connection identifier of topology server + * @inactive: true if this subscription is inactive + * @lock: serialize up/down and timer events */ struct tipc_subscription { - struct tipc_subscriber *subscriber; - struct tipc_name_seq seq; - u32 timeout; - u32 filter; - struct timer_list timer; - struct list_head nameseq_list; - struct list_head subscription_list; - int swap; + struct tipc_subscr s; struct tipc_event evt; + struct kref kref; + struct net *net; + struct timer_list timer; + struct list_head service_list; + struct list_head sub_list; + int conid; + bool inactive; + spinlock_t lock; }; -int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper); +struct tipc_subscription *tipc_sub_subscribe(struct net *net, + struct tipc_subscr *s, + int conid); +void tipc_sub_unsubscribe(struct tipc_subscription *sub); +void tipc_sub_report_overlap(struct tipc_subscription *sub, + struct publication *p, + u32 event, bool must); + +int __net_init tipc_topsrv_init_net(struct net *net); +void __net_exit tipc_topsrv_exit_net(struct net *net); -void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper, u32 event, u32 port_ref, - u32 node, int must); +void tipc_sub_put(struct tipc_subscription *subscription); +void tipc_sub_get(struct tipc_subscription *subscription); -int tipc_subscr_start(void); +#define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL) -void tipc_subscr_stop(void); +/* tipc_sub_read - return field_ of struct sub_ in host endian format + */ +#define tipc_sub_read(sub_, field_) \ + ({ \ + struct tipc_subscr *sub__ = sub_; \ + u32 val__ = (sub__)->field_; \ + int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \ + (swap_ ? swab32(val__) : val__); \ + }) + +/* tipc_sub_write - write val_ to field_ of struct sub_ in user endian format + */ +#define tipc_sub_write(sub_, field_, val_) \ + ({ \ + struct tipc_subscr *sub__ = sub_; \ + u32 val__ = val_; \ + int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \ + (sub__)->field_ = swap_ ? swab32(val__) : val__; \ + }) + +/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format + */ +#define tipc_evt_write(evt_, field_, val_) \ + ({ \ + struct tipc_event *evt__ = evt_; \ + u32 val__ = val_; \ + int swap_ = !((evt__)->s.filter & (TIPC_FILTER_MASK)); \ + (evt__)->field_ = swap_ ? swab32(val__) : val__; \ + }) #endif diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index f3fef93325a8..30d2e06e3d8c 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -34,7 +34,9 @@ */ #include "core.h" - +#include "trace.h" +#include "crypto.h" +#include "bcast.h" #include <linux/sysctl.h> static struct ctl_table_header *tipc_ctl_hdr; @@ -45,9 +47,50 @@ static struct ctl_table tipc_table[] = { .data = &sysctl_tipc_rmem, .maxlen = sizeof(sysctl_tipc_rmem), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + }, + { + .procname = "named_timeout", + .data = &sysctl_tipc_named_timeout, + .maxlen = sizeof(sysctl_tipc_named_timeout), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, + { + .procname = "sk_filter", + .data = &sysctl_tipc_sk_filter, + .maxlen = sizeof(sysctl_tipc_sk_filter), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, +#ifdef CONFIG_TIPC_CRYPTO + { + .procname = "max_tfms", + .data = &sysctl_tipc_max_tfms, + .maxlen = sizeof(sysctl_tipc_max_tfms), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + }, + { + .procname = "key_exchange_enabled", + .data = &sysctl_tipc_key_exchange_enabled, + .maxlen = sizeof(sysctl_tipc_key_exchange_enabled), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif + { + .procname = "bc_retruni", + .data = &sysctl_tipc_bc_retruni, + .maxlen = sizeof(sysctl_tipc_bc_retruni), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, }, - {} }; int tipc_register_sysctl(void) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c new file mode 100644 index 000000000000..aad7f96b6009 --- /dev/null +++ b/net/tipc/topsrv.c @@ -0,0 +1,733 @@ +/* + * net/tipc/server.c: TIPC server infrastructure + * + * Copyright (c) 2012-2013, Wind River Systems + * Copyright (c) 2017-2018, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "subscr.h" +#include "topsrv.h" +#include "core.h" +#include "socket.h" +#include "addr.h" +#include "msg.h" +#include "bearer.h" +#include <net/sock.h> +#include <linux/module.h> +#include <trace/events/sock.h> + +/* Number of messages to send before rescheduling */ +#define MAX_SEND_MSG_COUNT 25 +#define MAX_RECV_MSG_COUNT 25 +#define CF_CONNECTED 1 + +#define TIPC_SERVER_NAME_LEN 32 + +/** + * struct tipc_topsrv - TIPC server structure + * @conn_idr: identifier set of connection + * @idr_lock: protect the connection identifier set + * @idr_in_use: amount of allocated identifier entry + * @net: network namespace instance + * @awork: accept work item + * @rcv_wq: receive workqueue + * @send_wq: send workqueue + * @listener: topsrv listener socket + * @name: server name + */ +struct tipc_topsrv { + struct idr conn_idr; + spinlock_t idr_lock; /* for idr list */ + int idr_in_use; + struct net *net; + struct work_struct awork; + struct workqueue_struct *rcv_wq; + struct workqueue_struct *send_wq; + struct socket *listener; + char name[TIPC_SERVER_NAME_LEN]; +}; + +/** + * struct tipc_conn - TIPC connection structure + * @kref: reference counter to connection object + * @conid: connection identifier + * @sock: socket handler associated with connection + * @flags: indicates connection state + * @server: pointer to connected server + * @sub_list: list to all pertaining subscriptions + * @sub_lock: lock protecting the subscription list + * @rwork: receive work item + * @outqueue: pointer to first outbound message in queue + * @outqueue_lock: control access to the outqueue + * @swork: send work item + */ +struct tipc_conn { + struct kref kref; + int conid; + struct socket *sock; + unsigned long flags; + struct tipc_topsrv *server; + struct list_head sub_list; + spinlock_t sub_lock; /* for subscription list */ + struct work_struct rwork; + struct list_head outqueue; + spinlock_t outqueue_lock; /* for outqueue */ + struct work_struct swork; +}; + +/* An entry waiting to be sent */ +struct outqueue_entry { + bool inactive; + struct tipc_event evt; + struct list_head list; +}; + +static void tipc_conn_recv_work(struct work_struct *work); +static void tipc_conn_send_work(struct work_struct *work); +static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt); +static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s); + +static bool connected(struct tipc_conn *con) +{ + return con && test_bit(CF_CONNECTED, &con->flags); +} + +static void tipc_conn_kref_release(struct kref *kref) +{ + struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct tipc_topsrv *s = con->server; + struct outqueue_entry *e, *safe; + + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); + if (con->sock) + sock_release(con->sock); + + spin_lock_bh(&con->outqueue_lock); + list_for_each_entry_safe(e, safe, &con->outqueue, list) { + list_del(&e->list); + kfree(e); + } + spin_unlock_bh(&con->outqueue_lock); + kfree(con); +} + +static void conn_put(struct tipc_conn *con) +{ + kref_put(&con->kref, tipc_conn_kref_release); +} + +static void conn_get(struct tipc_conn *con) +{ + kref_get(&con->kref); +} + +static void tipc_conn_close(struct tipc_conn *con) +{ + struct sock *sk = con->sock->sk; + bool disconnect = false; + + write_lock_bh(&sk->sk_callback_lock); + disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags); + + if (disconnect) { + sk->sk_user_data = NULL; + tipc_conn_delete_sub(con, NULL); + } + write_unlock_bh(&sk->sk_callback_lock); + + /* Handle concurrent calls from sending and receiving threads */ + if (!disconnect) + return; + + /* Don't flush pending works, -just let them expire */ + kernel_sock_shutdown(con->sock, SHUT_RDWR); + + conn_put(con); +} + +static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock) +{ + struct tipc_conn *con; + int ret; + + con = kzalloc(sizeof(*con), GFP_ATOMIC); + if (!con) + return ERR_PTR(-ENOMEM); + + kref_init(&con->kref); + INIT_LIST_HEAD(&con->outqueue); + INIT_LIST_HEAD(&con->sub_list); + spin_lock_init(&con->outqueue_lock); + spin_lock_init(&con->sub_lock); + INIT_WORK(&con->swork, tipc_conn_send_work); + INIT_WORK(&con->rwork, tipc_conn_recv_work); + + spin_lock_bh(&s->idr_lock); + ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); + if (ret < 0) { + kfree(con); + spin_unlock_bh(&s->idr_lock); + return ERR_PTR(-ENOMEM); + } + con->conid = ret; + s->idr_in_use++; + + set_bit(CF_CONNECTED, &con->flags); + con->server = s; + con->sock = sock; + conn_get(con); + spin_unlock_bh(&s->idr_lock); + + return con; +} + +static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid) +{ + struct tipc_conn *con; + + spin_lock_bh(&s->idr_lock); + con = idr_find(&s->conn_idr, conid); + if (!connected(con) || !kref_get_unless_zero(&con->kref)) + con = NULL; + spin_unlock_bh(&s->idr_lock); + return con; +} + +/* tipc_conn_delete_sub - delete a specific or all subscriptions + * for a given subscriber + */ +static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s) +{ + struct tipc_net *tn = tipc_net(con->server->net); + struct list_head *sub_list = &con->sub_list; + struct tipc_subscription *sub, *tmp; + + spin_lock_bh(&con->sub_lock); + list_for_each_entry_safe(sub, tmp, sub_list, sub_list) { + if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) { + tipc_sub_unsubscribe(sub); + atomic_dec(&tn->subscription_count); + if (s) + break; + } + } + spin_unlock_bh(&con->sub_lock); +} + +static void tipc_conn_send_to_sock(struct tipc_conn *con) +{ + struct list_head *queue = &con->outqueue; + struct tipc_topsrv *srv = con->server; + struct outqueue_entry *e; + struct tipc_event *evt; + struct msghdr msg; + struct kvec iov; + int count = 0; + int ret; + + spin_lock_bh(&con->outqueue_lock); + + while (!list_empty(queue)) { + e = list_first_entry(queue, struct outqueue_entry, list); + evt = &e->evt; + spin_unlock_bh(&con->outqueue_lock); + + if (e->inactive) + tipc_conn_delete_sub(con, &evt->s); + + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + iov.iov_base = evt; + iov.iov_len = sizeof(*evt); + msg.msg_name = NULL; + + if (con->sock) { + ret = kernel_sendmsg(con->sock, &msg, &iov, + 1, sizeof(*evt)); + if (ret == -EWOULDBLOCK || ret == 0) { + cond_resched(); + return; + } else if (ret < 0) { + return tipc_conn_close(con); + } + } else { + tipc_topsrv_kern_evt(srv->net, evt); + } + + /* Don't starve users filling buffers */ + if (++count >= MAX_SEND_MSG_COUNT) { + cond_resched(); + count = 0; + } + spin_lock_bh(&con->outqueue_lock); + list_del(&e->list); + kfree(e); + } + spin_unlock_bh(&con->outqueue_lock); +} + +static void tipc_conn_send_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, swork); + + if (connected(con)) + tipc_conn_send_to_sock(con); + + conn_put(con); +} + +/* tipc_topsrv_queue_evt() - interrupt level call from a subscription instance + * The queued work is launched into tipc_conn_send_work()->tipc_conn_send_to_sock() + */ +void tipc_topsrv_queue_evt(struct net *net, int conid, + u32 event, struct tipc_event *evt) +{ + struct tipc_topsrv *srv = tipc_topsrv(net); + struct outqueue_entry *e; + struct tipc_conn *con; + + con = tipc_conn_lookup(srv, conid); + if (!con) + return; + + if (!connected(con)) + goto err; + + e = kmalloc(sizeof(*e), GFP_ATOMIC); + if (!e) + goto err; + e->inactive = (event == TIPC_SUBSCR_TIMEOUT); + memcpy(&e->evt, evt, sizeof(*evt)); + spin_lock_bh(&con->outqueue_lock); + list_add_tail(&e->list, &con->outqueue); + spin_unlock_bh(&con->outqueue_lock); + + if (queue_work(srv->send_wq, &con->swork)) + return; +err: + conn_put(con); +} + +/* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN + * Indicates that there now is more space in the send buffer + * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock() + */ +static void tipc_conn_write_space(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock_bh(&sk->sk_callback_lock); + con = sk->sk_user_data; + if (connected(con)) { + conn_get(con); + if (!queue_work(con->server->send_wq, &con->swork)) + conn_put(con); + } + read_unlock_bh(&sk->sk_callback_lock); +} + +static int tipc_conn_rcv_sub(struct tipc_topsrv *srv, + struct tipc_conn *con, + struct tipc_subscr *s) +{ + struct tipc_net *tn = tipc_net(srv->net); + struct tipc_subscription *sub; + u32 s_filter = tipc_sub_read(s, filter); + + if (s_filter & TIPC_SUB_CANCEL) { + tipc_sub_write(s, filter, s_filter & ~TIPC_SUB_CANCEL); + tipc_conn_delete_sub(con, s); + return 0; + } + if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) { + pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR); + return -1; + } + sub = tipc_sub_subscribe(srv->net, s, con->conid); + if (!sub) + return -1; + atomic_inc(&tn->subscription_count); + spin_lock_bh(&con->sub_lock); + list_add(&sub->sub_list, &con->sub_list); + spin_unlock_bh(&con->sub_lock); + return 0; +} + +static int tipc_conn_rcv_from_sock(struct tipc_conn *con) +{ + struct tipc_topsrv *srv = con->server; + struct sock *sk = con->sock->sk; + struct msghdr msg = {}; + struct tipc_subscr s; + struct kvec iov; + int ret; + + iov.iov_base = &s; + iov.iov_len = sizeof(s); + msg.msg_name = NULL; + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, iov.iov_len); + ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); + if (ret == -EWOULDBLOCK) + return -EWOULDBLOCK; + if (ret == sizeof(s)) { + read_lock_bh(&sk->sk_callback_lock); + /* RACE: the connection can be closed in the meantime */ + if (likely(connected(con))) + ret = tipc_conn_rcv_sub(srv, con, &s); + read_unlock_bh(&sk->sk_callback_lock); + if (!ret) + return 0; + } + + tipc_conn_close(con); + return ret; +} + +static void tipc_conn_recv_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); + int count = 0; + + while (connected(con)) { + if (tipc_conn_rcv_from_sock(con)) + break; + + /* Don't flood Rx machine */ + if (++count >= MAX_RECV_MSG_COUNT) { + cond_resched(); + count = 0; + } + } + conn_put(con); +} + +/* tipc_conn_data_ready - interrupt callback indicating the socket has data + * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock() + */ +static void tipc_conn_data_ready(struct sock *sk) +{ + struct tipc_conn *con; + + trace_sk_data_ready(sk); + + read_lock_bh(&sk->sk_callback_lock); + con = sk->sk_user_data; + if (connected(con)) { + conn_get(con); + if (!queue_work(con->server->rcv_wq, &con->rwork)) + conn_put(con); + } + read_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_topsrv_accept(struct work_struct *work) +{ + struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork); + struct socket *newsock, *lsock; + struct tipc_conn *con; + struct sock *newsk; + int ret; + + spin_lock_bh(&srv->idr_lock); + if (!srv->listener) { + spin_unlock_bh(&srv->idr_lock); + return; + } + lsock = srv->listener; + spin_unlock_bh(&srv->idr_lock); + + while (1) { + ret = kernel_accept(lsock, &newsock, O_NONBLOCK); + if (ret < 0) + return; + con = tipc_conn_alloc(srv, newsock); + if (IS_ERR(con)) { + ret = PTR_ERR(con); + sock_release(newsock); + return; + } + /* Register callbacks */ + newsk = newsock->sk; + write_lock_bh(&newsk->sk_callback_lock); + newsk->sk_data_ready = tipc_conn_data_ready; + newsk->sk_write_space = tipc_conn_write_space; + newsk->sk_user_data = con; + write_unlock_bh(&newsk->sk_callback_lock); + + /* Wake up receive process in case of 'SYN+' message */ + newsk->sk_data_ready(newsk); + conn_put(con); + } +} + +/* tipc_topsrv_listener_data_ready - interrupt callback with connection request + * The queued job is launched into tipc_topsrv_accept() + */ +static void tipc_topsrv_listener_data_ready(struct sock *sk) +{ + struct tipc_topsrv *srv; + + trace_sk_data_ready(sk); + + read_lock_bh(&sk->sk_callback_lock); + srv = sk->sk_user_data; + if (srv) + queue_work(srv->rcv_wq, &srv->awork); + read_unlock_bh(&sk->sk_callback_lock); +} + +static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) +{ + struct socket *lsock = NULL; + struct sockaddr_tipc saddr; + struct sock *sk; + int rc; + + rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock); + if (rc < 0) + return rc; + + srv->listener = lsock; + sk = lsock->sk; + write_lock_bh(&sk->sk_callback_lock); + sk->sk_data_ready = tipc_topsrv_listener_data_ready; + sk->sk_user_data = srv; + write_unlock_bh(&sk->sk_callback_lock); + + lock_sock(sk); + rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE); + release_sock(sk); + if (rc < 0) + goto err; + + saddr.family = AF_TIPC; + saddr.addrtype = TIPC_SERVICE_RANGE; + saddr.addr.nameseq.type = TIPC_TOP_SRV; + saddr.addr.nameseq.lower = TIPC_TOP_SRV; + saddr.addr.nameseq.upper = TIPC_TOP_SRV; + saddr.scope = TIPC_NODE_SCOPE; + + rc = tipc_sk_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr)); + if (rc < 0) + goto err; + rc = kernel_listen(lsock, 0); + if (rc < 0) + goto err; + + /* As server's listening socket owner and creator is the same module, + * we have to decrease TIPC module reference count to guarantee that + * it remains zero after the server socket is created, otherwise, + * executing "rmmod" command is unable to make TIPC module deleted + * after TIPC module is inserted successfully. + * + * However, the reference count is ever increased twice in + * sock_create_kern(): one is to increase the reference count of owner + * of TIPC socket's proto_ops struct; another is to increment the + * reference count of owner of TIPC proto struct. Therefore, we must + * decrement the module reference count twice to ensure that it keeps + * zero after server's listening socket is created. Of course, we + * must bump the module reference count twice as well before the socket + * is closed. + */ + module_put(lsock->ops->owner); + module_put(sk->sk_prot_creator->owner); + + return 0; +err: + sock_release(lsock); + return -EINVAL; +} + +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid) +{ + struct tipc_subscr sub; + struct tipc_conn *con; + int rc; + + sub.seq.type = type; + sub.seq.lower = lower; + sub.seq.upper = upper; + sub.timeout = TIPC_WAIT_FOREVER; + sub.filter = filter; + *(u64 *)&sub.usr_handle = (u64)port; + + con = tipc_conn_alloc(tipc_topsrv(net), NULL); + if (IS_ERR(con)) + return false; + + *conid = con->conid; + rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); + if (rc) + conn_put(con); + + conn_put(con); + return !rc; +} + +void tipc_topsrv_kern_unsubscr(struct net *net, int conid) +{ + struct tipc_conn *con; + + con = tipc_conn_lookup(tipc_topsrv(net), conid); + if (!con) + return; + + test_and_clear_bit(CF_CONNECTED, &con->flags); + tipc_conn_delete_sub(con, NULL); + conn_put(con); + conn_put(con); +} + +static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt) +{ + u32 port = *(u32 *)&evt->s.usr_handle; + u32 self = tipc_own_addr(net); + struct sk_buff_head evtq; + struct sk_buff *skb; + + skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt), + self, self, port, port, 0); + if (!skb) + return; + msg_set_dest_droppable(buf_msg(skb), true); + memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); + skb_queue_head_init(&evtq); + __skb_queue_tail(&evtq, skb); + tipc_loopback_trace(net, &evtq); + tipc_sk_rcv(net, &evtq); +} + +static int tipc_topsrv_work_start(struct tipc_topsrv *s) +{ + s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0); + if (!s->rcv_wq) { + pr_err("can't start tipc receive workqueue\n"); + return -ENOMEM; + } + + s->send_wq = alloc_ordered_workqueue("tipc_send", 0); + if (!s->send_wq) { + pr_err("can't start tipc send workqueue\n"); + destroy_workqueue(s->rcv_wq); + return -ENOMEM; + } + + return 0; +} + +static void tipc_topsrv_work_stop(struct tipc_topsrv *s) +{ + destroy_workqueue(s->rcv_wq); + destroy_workqueue(s->send_wq); +} + +static int tipc_topsrv_start(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + const char name[] = "topology_server"; + struct tipc_topsrv *srv; + int ret; + + srv = kzalloc(sizeof(*srv), GFP_ATOMIC); + if (!srv) + return -ENOMEM; + + srv->net = net; + INIT_WORK(&srv->awork, tipc_topsrv_accept); + + strscpy(srv->name, name, sizeof(srv->name)); + tn->topsrv = srv; + atomic_set(&tn->subscription_count, 0); + + spin_lock_init(&srv->idr_lock); + idr_init(&srv->conn_idr); + srv->idr_in_use = 0; + + ret = tipc_topsrv_work_start(srv); + if (ret < 0) + goto err_start; + + ret = tipc_topsrv_create_listener(srv); + if (ret < 0) + goto err_create; + + return 0; + +err_create: + tipc_topsrv_work_stop(srv); +err_start: + kfree(srv); + return ret; +} + +static void tipc_topsrv_stop(struct net *net) +{ + struct tipc_topsrv *srv = tipc_topsrv(net); + struct socket *lsock = srv->listener; + struct tipc_conn *con; + int id; + + spin_lock_bh(&srv->idr_lock); + for (id = 0; srv->idr_in_use; id++) { + con = idr_find(&srv->conn_idr, id); + if (con) { + conn_get(con); + spin_unlock_bh(&srv->idr_lock); + tipc_conn_close(con); + conn_put(con); + spin_lock_bh(&srv->idr_lock); + } + } + __module_get(lsock->ops->owner); + __module_get(lsock->sk->sk_prot_creator->owner); + srv->listener = NULL; + spin_unlock_bh(&srv->idr_lock); + + tipc_topsrv_work_stop(srv); + sock_release(lsock); + idr_destroy(&srv->conn_idr); + kfree(srv); +} + +int __net_init tipc_topsrv_init_net(struct net *net) +{ + return tipc_topsrv_start(net); +} + +void __net_exit tipc_topsrv_exit_net(struct net *net) +{ + tipc_topsrv_stop(net); +} diff --git a/net/tipc/log.c b/net/tipc/topsrv.h index abef644f27d8..c7ea71293748 100644 --- a/net/tipc/log.c +++ b/net/tipc/topsrv.h @@ -1,8 +1,8 @@ /* - * net/tipc/log.c: TIPC print buffer routines for debugging + * net/tipc/server.h: Include file for TIPC server code * - * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2012-2013, Wind River Systems + * Copyright (c) 2017, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,22 +34,21 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _TIPC_SERVER_H +#define _TIPC_SERVER_H + #include "core.h" -#include "config.h" -/** - * tipc_snprintf - append formatted output to print buffer - * @buf: pointer to print buffer - * @len: buffer length - * @fmt: formatted info to be printed - */ -int tipc_snprintf(char *buf, int len, const char *fmt, ...) -{ - int i; - va_list args; +#define TIPC_SERVER_NAME_LEN 32 +#define TIPC_SUB_CLUSTER_SCOPE 0x20 +#define TIPC_SUB_NODE_SCOPE 0x40 +#define TIPC_SUB_NO_STATUS 0x80 + +void tipc_topsrv_queue_evt(struct net *net, int conid, + u32 event, struct tipc_event *evt); + +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid); +void tipc_topsrv_kern_unsubscr(struct net *net, int conid); - va_start(args, fmt); - i = vscnprintf(buf, len, fmt, args); - va_end(args); - return i; -} +#endif diff --git a/net/tipc/trace.c b/net/tipc/trace.c new file mode 100644 index 000000000000..7d2931521e0e --- /dev/null +++ b/net/tipc/trace.c @@ -0,0 +1,206 @@ +/* + * net/tipc/trace.c: TIPC tracepoints code + * + * Copyright (c) 2018, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define CREATE_TRACE_POINTS +#include "trace.h" + +/* + * socket tuples for filtering in socket traces: + * (portid, sock type, name type, name lower, name upper) + */ +unsigned long sysctl_tipc_sk_filter[5] __read_mostly = {0, }; + +/** + * tipc_skb_dump - dump TIPC skb data + * @skb: skb to be dumped + * @more: dump more? + * - false: dump only tipc msg data + * - true: dump kernel-related skb data and tipc cb[] array as well + * @buf: returned buffer of dump data in format + */ +int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf) +{ + int i = 0; + size_t sz = (more) ? SKB_LMAX : SKB_LMIN; + struct tipc_msg *hdr; + struct tipc_skb_cb *skbcb; + + if (!skb) { + i += scnprintf(buf, sz, "msg: (null)\n"); + return i; + } + + hdr = buf_msg(skb); + skbcb = TIPC_SKB_CB(skb); + + /* tipc msg data section */ + i += scnprintf(buf, sz, "msg: %u", msg_user(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_type(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_hdr_sz(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_data_sz(hdr)); + i += scnprintf(buf + i, sz - i, " %x", msg_orignode(hdr)); + i += scnprintf(buf + i, sz - i, " %x", msg_destnode(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_seqno(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_ack(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_bcast_ack(hdr)); + switch (msg_user(hdr)) { + case LINK_PROTOCOL: + i += scnprintf(buf + i, sz - i, " %c", msg_net_plane(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_probe(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_peer_stopping(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_session(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_next_sent(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_seq_gap(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_bc_snd_nxt(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_bc_gap(hdr)); + break; + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + case CONN_MANAGER: + case SOCK_WAKEUP: + i += scnprintf(buf + i, sz - i, " | %u", msg_origport(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_destport(hdr)); + switch (msg_type(hdr)) { + case TIPC_NAMED_MSG: + i += scnprintf(buf + i, sz - i, " %u", + msg_nametype(hdr)); + i += scnprintf(buf + i, sz - i, " %u", + msg_nameinst(hdr)); + break; + case TIPC_MCAST_MSG: + i += scnprintf(buf + i, sz - i, " %u", + msg_nametype(hdr)); + i += scnprintf(buf + i, sz - i, " %u", + msg_namelower(hdr)); + i += scnprintf(buf + i, sz - i, " %u", + msg_nameupper(hdr)); + break; + default: + break; + } + i += scnprintf(buf + i, sz - i, " | %u", + msg_src_droppable(hdr)); + i += scnprintf(buf + i, sz - i, " %u", + msg_dest_droppable(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_errcode(hdr)); + i += scnprintf(buf + i, sz - i, " %u", msg_reroute_cnt(hdr)); + break; + default: + /* need more? */ + break; + } + + i += scnprintf(buf + i, sz - i, "\n"); + if (!more) + return i; + + /* kernel-related skb data section */ + i += scnprintf(buf + i, sz - i, "skb: %s", + (skb->dev) ? skb->dev->name : "n/a"); + i += scnprintf(buf + i, sz - i, " %u", skb->len); + i += scnprintf(buf + i, sz - i, " %u", skb->data_len); + i += scnprintf(buf + i, sz - i, " %u", skb->hdr_len); + i += scnprintf(buf + i, sz - i, " %u", skb->truesize); + i += scnprintf(buf + i, sz - i, " %u", skb_cloned(skb)); + i += scnprintf(buf + i, sz - i, " %p", skb->sk); + i += scnprintf(buf + i, sz - i, " %u", skb_shinfo(skb)->nr_frags); + i += scnprintf(buf + i, sz - i, " %llx", + ktime_to_ms(skb_get_ktime(skb))); + i += scnprintf(buf + i, sz - i, " %llx\n", + ktime_to_ms(skb_hwtstamps(skb)->hwtstamp)); + + /* tipc skb cb[] data section */ + i += scnprintf(buf + i, sz - i, "cb[]: %u", skbcb->bytes_read); + i += scnprintf(buf + i, sz - i, " %u", skbcb->orig_member); + i += scnprintf(buf + i, sz - i, " %u", + jiffies_to_msecs(skbcb->nxt_retr)); + i += scnprintf(buf + i, sz - i, " %u", skbcb->validated); + i += scnprintf(buf + i, sz - i, " %u", skbcb->chain_imp); + i += scnprintf(buf + i, sz - i, " %u\n", skbcb->ackers); + + return i; +} + +/** + * tipc_list_dump - dump TIPC skb list/queue + * @list: list of skbs to be dumped + * @more: dump more? + * - false: dump only the head & tail skbs + * - true: dump the first & last 5 skbs + * @buf: returned buffer of dump data in format + */ +int tipc_list_dump(struct sk_buff_head *list, bool more, char *buf) +{ + int i = 0; + size_t sz = (more) ? LIST_LMAX : LIST_LMIN; + u32 count, len; + struct sk_buff *hskb, *tskb, *skb, *tmp; + + if (!list) { + i += scnprintf(buf, sz, "(null)\n"); + return i; + } + + len = skb_queue_len(list); + i += scnprintf(buf, sz, "len = %d\n", len); + + if (!len) + return i; + + if (!more) { + hskb = skb_peek(list); + i += scnprintf(buf + i, sz - i, " head "); + i += tipc_skb_dump(hskb, false, buf + i); + if (len > 1) { + tskb = skb_peek_tail(list); + i += scnprintf(buf + i, sz - i, " tail "); + i += tipc_skb_dump(tskb, false, buf + i); + } + } else { + count = 0; + skb_queue_walk_safe(list, skb, tmp) { + count++; + if (count == 6) + i += scnprintf(buf + i, sz - i, " .\n .\n"); + if (count > 5 && count <= len - 5) + continue; + i += scnprintf(buf + i, sz - i, " #%d ", count); + i += tipc_skb_dump(skb, false, buf + i); + } + } + return i; +} diff --git a/net/tipc/trace.h b/net/tipc/trace.h new file mode 100644 index 000000000000..865142ed0ab4 --- /dev/null +++ b/net/tipc/trace.h @@ -0,0 +1,434 @@ +/* + * net/tipc/trace.h: TIPC tracepoints + * + * Copyright (c) 2018, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM tipc + +#if !defined(_TIPC_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TIPC_TRACE_H + +#include <linux/tracepoint.h> +#include "core.h" +#include "link.h" +#include "socket.h" +#include "node.h" + +#define SKB_LMIN (100) +#define SKB_LMAX (SKB_LMIN * 2) +#define LIST_LMIN (SKB_LMIN * 3) +#define LIST_LMAX (SKB_LMIN * 11) +#define SK_LMIN (SKB_LMIN * 2) +#define SK_LMAX (SKB_LMIN * 11) +#define LINK_LMIN (SKB_LMIN) +#define LINK_LMAX (SKB_LMIN * 16) +#define NODE_LMIN (SKB_LMIN) +#define NODE_LMAX (SKB_LMIN * 11) + +#ifndef __TIPC_TRACE_ENUM +#define __TIPC_TRACE_ENUM +enum { + TIPC_DUMP_NONE = 0, + + TIPC_DUMP_TRANSMQ = 1, + TIPC_DUMP_BACKLOGQ = (1 << 1), + TIPC_DUMP_DEFERDQ = (1 << 2), + TIPC_DUMP_INPUTQ = (1 << 3), + TIPC_DUMP_WAKEUP = (1 << 4), + + TIPC_DUMP_SK_SNDQ = (1 << 8), + TIPC_DUMP_SK_RCVQ = (1 << 9), + TIPC_DUMP_SK_BKLGQ = (1 << 10), + TIPC_DUMP_ALL = 0xffffu +}; +#endif + +/* Link & Node FSM states: */ +#define state_sym(val) \ + __print_symbolic(val, \ + {(0xe), "ESTABLISHED" },\ + {(0xe << 4), "ESTABLISHING" },\ + {(0x1 << 8), "RESET" },\ + {(0x2 << 12), "RESETTING" },\ + {(0xd << 16), "PEER_RESET" },\ + {(0xf << 20), "FAILINGOVER" },\ + {(0xc << 24), "SYNCHING" },\ + {(0xdd), "SELF_DOWN_PEER_DOWN" },\ + {(0xaa), "SELF_UP_PEER_UP" },\ + {(0xd1), "SELF_DOWN_PEER_LEAVING" },\ + {(0xac), "SELF_UP_PEER_COMING" },\ + {(0xca), "SELF_COMING_PEER_UP" },\ + {(0x1d), "SELF_LEAVING_PEER_DOWN" },\ + {(0xf0), "FAILINGOVER" },\ + {(0xcc), "SYNCHING" }) + +/* Link & Node FSM events: */ +#define evt_sym(val) \ + __print_symbolic(val, \ + {(0xec1ab1e), "ESTABLISH_EVT" },\ + {(0x9eed0e), "PEER_RESET_EVT" },\ + {(0xfa110e), "FAILURE_EVT" },\ + {(0x10ca1d0e), "RESET_EVT" },\ + {(0xfa110bee), "FAILOVER_BEGIN_EVT" },\ + {(0xfa110ede), "FAILOVER_END_EVT" },\ + {(0xc1ccbee), "SYNCH_BEGIN_EVT" },\ + {(0xc1ccede), "SYNCH_END_EVT" },\ + {(0xece), "SELF_ESTABL_CONTACT_EVT" },\ + {(0x1ce), "SELF_LOST_CONTACT_EVT" },\ + {(0x9ece), "PEER_ESTABL_CONTACT_EVT" },\ + {(0x91ce), "PEER_LOST_CONTACT_EVT" },\ + {(0xfbe), "FAILOVER_BEGIN_EVT" },\ + {(0xfee), "FAILOVER_END_EVT" },\ + {(0xcbe), "SYNCH_BEGIN_EVT" },\ + {(0xcee), "SYNCH_END_EVT" }) + +/* Bearer, net device events: */ +#define dev_evt_sym(val) \ + __print_symbolic(val, \ + {(NETDEV_CHANGE), "NETDEV_CHANGE" },\ + {(NETDEV_GOING_DOWN), "NETDEV_GOING_DOWN" },\ + {(NETDEV_UP), "NETDEV_UP" },\ + {(NETDEV_CHANGEMTU), "NETDEV_CHANGEMTU" },\ + {(NETDEV_CHANGEADDR), "NETDEV_CHANGEADDR" },\ + {(NETDEV_UNREGISTER), "NETDEV_UNREGISTER" },\ + {(NETDEV_CHANGENAME), "NETDEV_CHANGENAME" }) + +extern unsigned long sysctl_tipc_sk_filter[5] __read_mostly; + +int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf); +int tipc_list_dump(struct sk_buff_head *list, bool more, char *buf); +int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf); +int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf); +int tipc_node_dump(struct tipc_node *n, bool more, char *buf); +bool tipc_sk_filtering(struct sock *sk); + +DECLARE_EVENT_CLASS(tipc_skb_class, + + TP_PROTO(struct sk_buff *skb, bool more, const char *header), + + TP_ARGS(skb, more, header), + + TP_STRUCT__entry( + __string(header, header) + __dynamic_array(char, buf, (more) ? SKB_LMAX : SKB_LMIN) + ), + + TP_fast_assign( + __assign_str(header); + tipc_skb_dump(skb, more, __get_str(buf)); + ), + + TP_printk("%s\n%s", __get_str(header), __get_str(buf)) +) + +#define DEFINE_SKB_EVENT(name) \ +DEFINE_EVENT(tipc_skb_class, name, \ + TP_PROTO(struct sk_buff *skb, bool more, const char *header), \ + TP_ARGS(skb, more, header)) +DEFINE_SKB_EVENT(tipc_skb_dump); +DEFINE_SKB_EVENT(tipc_proto_build); +DEFINE_SKB_EVENT(tipc_proto_rcv); + +DECLARE_EVENT_CLASS(tipc_list_class, + + TP_PROTO(struct sk_buff_head *list, bool more, const char *header), + + TP_ARGS(list, more, header), + + TP_STRUCT__entry( + __string(header, header) + __dynamic_array(char, buf, (more) ? LIST_LMAX : LIST_LMIN) + ), + + TP_fast_assign( + __assign_str(header); + tipc_list_dump(list, more, __get_str(buf)); + ), + + TP_printk("%s\n%s", __get_str(header), __get_str(buf)) +); + +#define DEFINE_LIST_EVENT(name) \ +DEFINE_EVENT(tipc_list_class, name, \ + TP_PROTO(struct sk_buff_head *list, bool more, const char *header), \ + TP_ARGS(list, more, header)) +DEFINE_LIST_EVENT(tipc_list_dump); + +DECLARE_EVENT_CLASS(tipc_sk_class, + + TP_PROTO(struct sock *sk, struct sk_buff *skb, u16 dqueues, + const char *header), + + TP_ARGS(sk, skb, dqueues, header), + + TP_STRUCT__entry( + __string(header, header) + __field(u32, portid) + __dynamic_array(char, buf, (dqueues) ? SK_LMAX : SK_LMIN) + __dynamic_array(char, skb_buf, (skb) ? SKB_LMIN : 1) + ), + + TP_fast_assign( + __assign_str(header); + __entry->portid = tipc_sock_get_portid(sk); + tipc_sk_dump(sk, dqueues, __get_str(buf)); + if (skb) + tipc_skb_dump(skb, false, __get_str(skb_buf)); + else + *(__get_str(skb_buf)) = '\0'; + ), + + TP_printk("<%u> %s\n%s%s", __entry->portid, __get_str(header), + __get_str(skb_buf), __get_str(buf)) +); + +#define DEFINE_SK_EVENT_FILTER(name) \ +DEFINE_EVENT_CONDITION(tipc_sk_class, name, \ + TP_PROTO(struct sock *sk, struct sk_buff *skb, u16 dqueues, \ + const char *header), \ + TP_ARGS(sk, skb, dqueues, header), \ + TP_CONDITION(tipc_sk_filtering(sk))) +DEFINE_SK_EVENT_FILTER(tipc_sk_dump); +DEFINE_SK_EVENT_FILTER(tipc_sk_create); +DEFINE_SK_EVENT_FILTER(tipc_sk_sendmcast); +DEFINE_SK_EVENT_FILTER(tipc_sk_sendmsg); +DEFINE_SK_EVENT_FILTER(tipc_sk_sendstream); +DEFINE_SK_EVENT_FILTER(tipc_sk_poll); +DEFINE_SK_EVENT_FILTER(tipc_sk_filter_rcv); +DEFINE_SK_EVENT_FILTER(tipc_sk_advance_rx); +DEFINE_SK_EVENT_FILTER(tipc_sk_rej_msg); +DEFINE_SK_EVENT_FILTER(tipc_sk_drop_msg); +DEFINE_SK_EVENT_FILTER(tipc_sk_release); +DEFINE_SK_EVENT_FILTER(tipc_sk_shutdown); + +#define DEFINE_SK_EVENT_FILTER_COND(name, cond) \ +DEFINE_EVENT_CONDITION(tipc_sk_class, name, \ + TP_PROTO(struct sock *sk, struct sk_buff *skb, u16 dqueues, \ + const char *header), \ + TP_ARGS(sk, skb, dqueues, header), \ + TP_CONDITION(tipc_sk_filtering(sk) && (cond))) +DEFINE_SK_EVENT_FILTER_COND(tipc_sk_overlimit1, tipc_sk_overlimit1(sk, skb)); +DEFINE_SK_EVENT_FILTER_COND(tipc_sk_overlimit2, tipc_sk_overlimit2(sk, skb)); + +DECLARE_EVENT_CLASS(tipc_link_class, + + TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header), + + TP_ARGS(l, dqueues, header), + + TP_STRUCT__entry( + __string(header, header) + __array(char, name, TIPC_MAX_LINK_NAME) + __dynamic_array(char, buf, (dqueues) ? LINK_LMAX : LINK_LMIN) + ), + + TP_fast_assign( + __assign_str(header); + memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME); + tipc_link_dump(l, dqueues, __get_str(buf)); + ), + + TP_printk("<%s> %s\n%s", __entry->name, __get_str(header), + __get_str(buf)) +); + +#define DEFINE_LINK_EVENT(name) \ +DEFINE_EVENT(tipc_link_class, name, \ + TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header), \ + TP_ARGS(l, dqueues, header)) +DEFINE_LINK_EVENT(tipc_link_dump); +DEFINE_LINK_EVENT(tipc_link_conges); +DEFINE_LINK_EVENT(tipc_link_timeout); +DEFINE_LINK_EVENT(tipc_link_reset); + +#define DEFINE_LINK_EVENT_COND(name, cond) \ +DEFINE_EVENT_CONDITION(tipc_link_class, name, \ + TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header), \ + TP_ARGS(l, dqueues, header), \ + TP_CONDITION(cond)) +DEFINE_LINK_EVENT_COND(tipc_link_too_silent, tipc_link_too_silent(l)); + +DECLARE_EVENT_CLASS(tipc_link_transmq_class, + + TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq), + + TP_ARGS(r, f, t, tq), + + TP_STRUCT__entry( + __array(char, name, TIPC_MAX_LINK_NAME) + __field(u16, from) + __field(u16, to) + __field(u32, len) + __field(u16, fseqno) + __field(u16, lseqno) + ), + + TP_fast_assign( + memcpy(__entry->name, tipc_link_name(r), TIPC_MAX_LINK_NAME); + __entry->from = f; + __entry->to = t; + __entry->len = skb_queue_len(tq); + __entry->fseqno = __entry->len ? + msg_seqno(buf_msg(skb_peek(tq))) : 0; + __entry->lseqno = __entry->len ? + msg_seqno(buf_msg(skb_peek_tail(tq))) : 0; + ), + + TP_printk("<%s> retrans req: [%u-%u] transmq: %u [%u-%u]\n", + __entry->name, __entry->from, __entry->to, + __entry->len, __entry->fseqno, __entry->lseqno) +); + +DEFINE_EVENT_CONDITION(tipc_link_transmq_class, tipc_link_retrans, + TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq), + TP_ARGS(r, f, t, tq), + TP_CONDITION(less_eq(f, t)) +); + +DEFINE_EVENT_PRINT(tipc_link_transmq_class, tipc_link_bc_ack, + TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq), + TP_ARGS(r, f, t, tq), + TP_printk("<%s> acked: %u gap: %u transmq: %u [%u-%u]\n", + __entry->name, __entry->from, __entry->to, + __entry->len, __entry->fseqno, __entry->lseqno) +); + +DECLARE_EVENT_CLASS(tipc_node_class, + + TP_PROTO(struct tipc_node *n, bool more, const char *header), + + TP_ARGS(n, more, header), + + TP_STRUCT__entry( + __string(header, header) + __field(u32, addr) + __dynamic_array(char, buf, (more) ? NODE_LMAX : NODE_LMIN) + ), + + TP_fast_assign( + __assign_str(header); + __entry->addr = tipc_node_get_addr(n); + tipc_node_dump(n, more, __get_str(buf)); + ), + + TP_printk("<%x> %s\n%s", __entry->addr, __get_str(header), + __get_str(buf)) +); + +#define DEFINE_NODE_EVENT(name) \ +DEFINE_EVENT(tipc_node_class, name, \ + TP_PROTO(struct tipc_node *n, bool more, const char *header), \ + TP_ARGS(n, more, header)) +DEFINE_NODE_EVENT(tipc_node_dump); +DEFINE_NODE_EVENT(tipc_node_create); +DEFINE_NODE_EVENT(tipc_node_delete); +DEFINE_NODE_EVENT(tipc_node_lost_contact); +DEFINE_NODE_EVENT(tipc_node_timeout); +DEFINE_NODE_EVENT(tipc_node_link_up); +DEFINE_NODE_EVENT(tipc_node_link_down); +DEFINE_NODE_EVENT(tipc_node_reset_links); +DEFINE_NODE_EVENT(tipc_node_check_state); + +DECLARE_EVENT_CLASS(tipc_fsm_class, + + TP_PROTO(const char *name, u32 os, u32 ns, int evt), + + TP_ARGS(name, os, ns, evt), + + TP_STRUCT__entry( + __string(name, name) + __field(u32, os) + __field(u32, ns) + __field(u32, evt) + ), + + TP_fast_assign( + __assign_str(name); + __entry->os = os; + __entry->ns = ns; + __entry->evt = evt; + ), + + TP_printk("<%s> %s--(%s)->%s\n", __get_str(name), + state_sym(__entry->os), evt_sym(__entry->evt), + state_sym(__entry->ns)) +); + +#define DEFINE_FSM_EVENT(fsm_name) \ +DEFINE_EVENT(tipc_fsm_class, fsm_name, \ + TP_PROTO(const char *name, u32 os, u32 ns, int evt), \ + TP_ARGS(name, os, ns, evt)) +DEFINE_FSM_EVENT(tipc_link_fsm); +DEFINE_FSM_EVENT(tipc_node_fsm); + +TRACE_EVENT(tipc_l2_device_event, + + TP_PROTO(struct net_device *dev, struct tipc_bearer *b, + unsigned long evt), + + TP_ARGS(dev, b, evt), + + TP_STRUCT__entry( + __string(dev_name, dev->name) + __string(b_name, b->name) + __field(unsigned long, evt) + __field(u8, b_up) + __field(u8, carrier) + __field(u8, oper) + ), + + TP_fast_assign( + __assign_str(dev_name); + __assign_str(b_name); + __entry->evt = evt; + __entry->b_up = test_bit(0, &b->up); + __entry->carrier = netif_carrier_ok(dev); + __entry->oper = netif_oper_up(dev); + ), + + TP_printk("%s on: <%s>/<%s> oper: %s carrier: %s bearer: %s\n", + dev_evt_sym(__entry->evt), __get_str(dev_name), + __get_str(b_name), (__entry->oper) ? "up" : "down", + (__entry->carrier) ? "ok" : "notok", + (__entry->b_up) ? "up" : "down") +); + +#endif /* _TIPC_TRACE_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace +#include <trace/define_trace.h> diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c new file mode 100644 index 000000000000..b85ab0fb3b8c --- /dev/null +++ b/net/tipc/udp_media.c @@ -0,0 +1,863 @@ +/* net/tipc/udp_media.c: IP bearer support for TIPC + * + * Copyright (c) 2015, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/socket.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/inet.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/kernel.h> +#include <linux/workqueue.h> +#include <linux/list.h> +#include <net/sock.h> +#include <net/ip.h> +#include <net/udp_tunnel.h> +#include <net/ipv6_stubs.h> +#include <linux/tipc_netlink.h> +#include "core.h" +#include "addr.h" +#include "net.h" +#include "bearer.h" +#include "netlink.h" +#include "msg.h" +#include "udp_media.h" + +/* IANA assigned UDP port */ +#define UDP_PORT_DEFAULT 6118 + +#define UDP_MIN_HEADROOM 48 + +/** + * struct udp_media_addr - IP/UDP addressing information + * + * This is the bearer level originating address used in neighbor discovery + * messages, and all fields should be in network byte order + * + * @proto: Ethernet protocol in use + * @port: port being used + * @ipv4: IPv4 address of neighbor + * @ipv6: IPv6 address of neighbor + */ +struct udp_media_addr { + __be16 proto; + __be16 port; + union { + struct in_addr ipv4; + struct in6_addr ipv6; + }; +}; + +/* struct udp_replicast - container for UDP remote addresses */ +struct udp_replicast { + struct udp_media_addr addr; + struct dst_cache dst_cache; + struct rcu_head rcu; + struct list_head list; +}; + +/** + * struct udp_bearer - ip/udp bearer data structure + * @bearer: associated generic tipc bearer + * @ubsock: bearer associated socket + * @ifindex: local address scope + * @work: used to schedule deferred work on a bearer + * @rcast: associated udp_replicast container + */ +struct udp_bearer { + struct tipc_bearer __rcu *bearer; + struct socket *ubsock; + u32 ifindex; + struct work_struct work; + struct udp_replicast rcast; +}; + +static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr) +{ + if (ntohs(addr->proto) == ETH_P_IP) + return ipv4_is_multicast(addr->ipv4.s_addr); +#if IS_ENABLED(CONFIG_IPV6) + else + return ipv6_addr_is_multicast(&addr->ipv6); +#endif + return 0; +} + +/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */ +static void tipc_udp_media_addr_set(struct tipc_media_addr *addr, + struct udp_media_addr *ua) +{ + memset(addr, 0, sizeof(struct tipc_media_addr)); + addr->media_id = TIPC_MEDIA_TYPE_UDP; + memcpy(addr->value, ua, sizeof(struct udp_media_addr)); + + if (tipc_udp_is_mcast_addr(ua)) + addr->broadcast = TIPC_BROADCAST_SUPPORT; +} + +/* tipc_udp_addr2str - convert ip/udp address to string */ +static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size) +{ + struct udp_media_addr *ua = (struct udp_media_addr *)&a->value; + + if (ntohs(ua->proto) == ETH_P_IP) + snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->port)); + else if (ntohs(ua->proto) == ETH_P_IPV6) + snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->port)); + else { + pr_err("Invalid UDP media address\n"); + return 1; + } + + return 0; +} + +/* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */ +static int tipc_udp_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *a, + char *msg) +{ + struct udp_media_addr *ua; + + ua = (struct udp_media_addr *) (msg + TIPC_MEDIA_ADDR_OFFSET); + if (msg[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_UDP) + return -EINVAL; + tipc_udp_media_addr_set(a, ua); + return 0; +} + +/* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */ +static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a) +{ + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_UDP; + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, a->value, + sizeof(struct udp_media_addr)); + return 0; +} + +/* tipc_send_msg - enqueue a send request */ +static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, + struct udp_bearer *ub, struct udp_media_addr *src, + struct udp_media_addr *dst, struct dst_cache *cache) +{ + struct dst_entry *ndst; + int ttl, err; + + local_bh_disable(); + ndst = dst_cache_get(cache); + if (dst->proto == htons(ETH_P_IP)) { + struct rtable *rt = dst_rtable(ndst); + + if (!rt) { + struct flowi4 fl = { + .daddr = dst->ipv4.s_addr, + .saddr = src->ipv4.s_addr, + .flowi4_mark = skb->mark, + .flowi4_proto = IPPROTO_UDP + }; + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto tx_error; + } + dst_cache_set_ip4(cache, &rt->dst, fl.saddr); + } + + ttl = ip4_dst_hoplimit(&rt->dst); + udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, + dst->ipv4.s_addr, 0, ttl, 0, src->port, + dst->port, false, true, 0); +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (!ndst) { + struct flowi6 fl6 = { + .flowi6_oif = ub->ifindex, + .daddr = dst->ipv6, + .saddr = src->ipv6, + .flowi6_proto = IPPROTO_UDP + }; + ndst = ipv6_stub->ipv6_dst_lookup_flow(net, + ub->ubsock->sk, + &fl6, NULL); + if (IS_ERR(ndst)) { + err = PTR_ERR(ndst); + goto tx_error; + } + dst_cache_set_ip6(cache, ndst, &fl6.saddr); + } + ttl = ip6_dst_hoplimit(ndst); + udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL, + &src->ipv6, &dst->ipv6, 0, ttl, 0, + src->port, dst->port, false, 0); +#endif + } + local_bh_enable(); + return 0; + +tx_error: + local_bh_enable(); + kfree_skb(skb); + return err; +} + +static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *addr) +{ + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct udp_media_addr *dst = (struct udp_media_addr *)&addr->value; + struct udp_replicast *rcast; + struct udp_bearer *ub; + int err = 0; + + if (skb_headroom(skb) < UDP_MIN_HEADROOM) { + err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); + if (err) + goto out; + } + + skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); + ub = rcu_dereference(b->media_ptr); + if (!ub) { + err = -ENODEV; + goto out; + } + + if (addr->broadcast != TIPC_REPLICAST_SUPPORT) + return tipc_udp_xmit(net, skb, ub, src, dst, + &ub->rcast.dst_cache); + + /* Replicast, send an skb to each configured IP address */ + list_for_each_entry_rcu(rcast, &ub->rcast.list, list) { + struct sk_buff *_skb; + + _skb = pskb_copy(skb, GFP_ATOMIC); + if (!_skb) { + err = -ENOMEM; + goto out; + } + + err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr, + &rcast->dst_cache); + if (err) + goto out; + } + err = 0; +out: + kfree_skb(skb); + return err; +} + +static bool tipc_udp_is_known_peer(struct tipc_bearer *b, + struct udp_media_addr *addr) +{ + struct udp_replicast *rcast, *tmp; + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + pr_err_ratelimited("UDP bearer instance not found\n"); + return false; + } + + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + if (!memcmp(&rcast->addr, addr, sizeof(struct udp_media_addr))) + return true; + } + + return false; +} + +static int tipc_udp_rcast_add(struct tipc_bearer *b, + struct udp_media_addr *addr) +{ + struct udp_replicast *rcast; + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) + return -ENODEV; + + rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC); + if (!rcast) + return -ENOMEM; + + if (dst_cache_init(&rcast->dst_cache, GFP_ATOMIC)) { + kfree(rcast); + return -ENOMEM; + } + + memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr)); + + if (ntohs(addr->proto) == ETH_P_IP) + pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4); +#if IS_ENABLED(CONFIG_IPV6) + else if (ntohs(addr->proto) == ETH_P_IPV6) + pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6); +#endif + b->bcast_addr.broadcast = TIPC_REPLICAST_SUPPORT; + list_add_rcu(&rcast->list, &ub->rcast.list); + return 0; +} + +static int tipc_udp_rcast_disc(struct tipc_bearer *b, struct sk_buff *skb) +{ + struct udp_media_addr src = {0}; + struct udp_media_addr *dst; + + dst = (struct udp_media_addr *)&b->bcast_addr.value; + if (tipc_udp_is_mcast_addr(dst)) + return 0; + + src.port = udp_hdr(skb)->source; + + if (ip_hdr(skb)->version == 4) { + struct iphdr *iphdr = ip_hdr(skb); + + src.proto = htons(ETH_P_IP); + src.ipv4.s_addr = iphdr->saddr; + if (ipv4_is_multicast(iphdr->daddr)) + return 0; +#if IS_ENABLED(CONFIG_IPV6) + } else if (ip_hdr(skb)->version == 6) { + struct ipv6hdr *iphdr = ipv6_hdr(skb); + + src.proto = htons(ETH_P_IPV6); + src.ipv6 = iphdr->saddr; + if (ipv6_addr_is_multicast(&iphdr->daddr)) + return 0; +#endif + } else { + return 0; + } + + if (likely(tipc_udp_is_known_peer(b, &src))) + return 0; + + return tipc_udp_rcast_add(b, &src); +} + +/* tipc_udp_recv - read data from bearer socket */ +static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_bearer *ub; + struct tipc_bearer *b; + struct tipc_msg *hdr; + int err; + + ub = rcu_dereference_sk_user_data(sk); + if (!ub) { + pr_err_ratelimited("Failed to get UDP bearer reference"); + goto out; + } + skb_pull(skb, sizeof(struct udphdr)); + hdr = buf_msg(skb); + + b = rcu_dereference(ub->bearer); + if (!b) + goto out; + + if (b && test_bit(0, &b->up)) { + TIPC_SKB_CB(skb)->flags = 0; + tipc_rcv(sock_net(sk), skb, b); + return 0; + } + + if (unlikely(msg_user(hdr) == LINK_CONFIG)) { + err = tipc_udp_rcast_disc(b, skb); + if (err) + goto out; + } + +out: + kfree_skb(skb); + return 0; +} + +static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) +{ + int err = 0; + struct ip_mreqn mreqn; + struct sock *sk = ub->ubsock->sk; + + if (ntohs(remote->proto) == ETH_P_IP) { + mreqn.imr_multiaddr = remote->ipv4; + mreqn.imr_ifindex = ub->ifindex; + err = ip_mc_join_group(sk, &mreqn); +#if IS_ENABLED(CONFIG_IPV6) + } else { + lock_sock(sk); + err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, + &remote->ipv6); + release_sock(sk); +#endif + } + return err; +} + +static int __tipc_nl_add_udp_addr(struct sk_buff *skb, + struct udp_media_addr *addr, int nla_t) +{ + if (ntohs(addr->proto) == ETH_P_IP) { + struct sockaddr_in ip4; + + memset(&ip4, 0, sizeof(ip4)); + ip4.sin_family = AF_INET; + ip4.sin_port = addr->port; + ip4.sin_addr.s_addr = addr->ipv4.s_addr; + if (nla_put(skb, nla_t, sizeof(ip4), &ip4)) + return -EMSGSIZE; + +#if IS_ENABLED(CONFIG_IPV6) + } else if (ntohs(addr->proto) == ETH_P_IPV6) { + struct sockaddr_in6 ip6; + + memset(&ip6, 0, sizeof(ip6)); + ip6.sin6_family = AF_INET6; + ip6.sin6_port = addr->port; + memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr)); + if (nla_put(skb, nla_t, sizeof(ip6), &ip6)) + return -EMSGSIZE; +#endif + } + + return 0; +} + +int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) +{ + u32 bid = cb->args[0]; + u32 skip_cnt = cb->args[1]; + u32 portid = NETLINK_CB(cb->skb).portid; + struct udp_replicast *rcast, *tmp; + struct tipc_bearer *b; + struct udp_bearer *ub; + void *hdr; + int err; + int i; + + if (!bid && !skip_cnt) { + struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs; + struct net *net = sock_net(skb->sk); + struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1]; + char *bname; + + if (!attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested_deprecated(battrs, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, NULL); + if (err) + return err; + + if (!battrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + + bname = nla_data(battrs[TIPC_NLA_BEARER_NAME]); + + rtnl_lock(); + b = tipc_bearer_find(net, bname); + if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) { + rtnl_unlock(); + return -EINVAL; + } + bid = b->identity; + } else { + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + rtnl_lock(); + b = rtnl_dereference(tn->bearer_list[bid]); + if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) { + rtnl_unlock(); + return -EINVAL; + } + } + + ub = rtnl_dereference(b->media_ptr); + if (!ub) { + rtnl_unlock(); + return -EINVAL; + } + + i = 0; + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + if (i < skip_cnt) + goto count; + + hdr = genlmsg_put(skb, portid, cb->nlh->nlmsg_seq, + &tipc_genl_family, NLM_F_MULTI, + TIPC_NL_BEARER_GET); + if (!hdr) + goto done; + + err = __tipc_nl_add_udp_addr(skb, &rcast->addr, + TIPC_NLA_UDP_REMOTE); + if (err) { + genlmsg_cancel(skb, hdr); + goto done; + } + genlmsg_end(skb, hdr); +count: + i++; + } +done: + rtnl_unlock(); + cb->args[0] = bid; + cb->args[1] = i; + + return skb->len; +} + +int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b) +{ + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct udp_media_addr *dst; + struct udp_bearer *ub; + struct nlattr *nest; + + ub = rtnl_dereference(b->media_ptr); + if (!ub) + return -ENODEV; + + nest = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_UDP_OPTS); + if (!nest) + goto msg_full; + + if (__tipc_nl_add_udp_addr(msg->skb, src, TIPC_NLA_UDP_LOCAL)) + goto msg_full; + + dst = (struct udp_media_addr *)&b->bcast_addr.value; + if (__tipc_nl_add_udp_addr(msg->skb, dst, TIPC_NLA_UDP_REMOTE)) + goto msg_full; + + if (!list_empty(&ub->rcast.list)) { + if (nla_put_flag(msg->skb, TIPC_NLA_UDP_MULTI_REMOTEIP)) + goto msg_full; + } + + nla_nest_end(msg->skb, nest); + return 0; +msg_full: + nla_nest_cancel(msg->skb, nest); + return -EMSGSIZE; +} + +/** + * tipc_parse_udp_addr - build udp media address from netlink data + * @nla: netlink attribute containing sockaddr storage aligned address + * @addr: tipc media address to fill with address, port and protocol type + * @scope_id: IPv6 scope id pointer, not NULL indicates it's required + */ + +static int tipc_parse_udp_addr(struct nlattr *nla, struct udp_media_addr *addr, + u32 *scope_id) +{ + struct sockaddr_storage sa; + + nla_memcpy(&sa, nla, sizeof(sa)); + if (sa.ss_family == AF_INET) { + struct sockaddr_in *ip4 = (struct sockaddr_in *)&sa; + + addr->proto = htons(ETH_P_IP); + addr->port = ip4->sin_port; + addr->ipv4.s_addr = ip4->sin_addr.s_addr; + return 0; + +#if IS_ENABLED(CONFIG_IPV6) + } else if (sa.ss_family == AF_INET6) { + struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)&sa; + + addr->proto = htons(ETH_P_IPV6); + addr->port = ip6->sin6_port; + memcpy(&addr->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); + + /* Scope ID is only interesting for local addresses */ + if (scope_id) { + int atype; + + atype = ipv6_addr_type(&ip6->sin6_addr); + if (__ipv6_addr_needs_scope_id(atype) && + !ip6->sin6_scope_id) { + return -EINVAL; + } + + *scope_id = ip6->sin6_scope_id ? : 0; + } + + return 0; +#endif + } + return -EADDRNOTAVAIL; +} + +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) +{ + int err; + struct udp_media_addr addr = {0}; + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; + struct udp_media_addr *dst; + + if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy, NULL)) + return -EINVAL; + + if (!opts[TIPC_NLA_UDP_REMOTE]) + return -EINVAL; + + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL); + if (err) + return err; + + dst = (struct udp_media_addr *)&b->bcast_addr.value; + if (tipc_udp_is_mcast_addr(dst)) { + pr_err("Can't add remote ip to TIPC UDP multicast bearer\n"); + return -EINVAL; + } + + if (tipc_udp_is_known_peer(b, &addr)) + return 0; + + return tipc_udp_rcast_add(b, &addr); +} + +/** + * tipc_udp_enable - callback to create a new udp bearer instance + * @net: network namespace + * @b: pointer to generic tipc_bearer + * @attrs: netlink bearer configuration + * + * validate the bearer parameters and initialize the udp bearer + * rtnl_lock should be held + */ +static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]) +{ + int err = -EINVAL; + struct udp_bearer *ub; + struct udp_media_addr remote = {0}; + struct udp_media_addr local = {0}; + struct udp_port_cfg udp_conf = {0}; + struct udp_tunnel_sock_cfg tuncfg = {NULL}; + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; + u8 node_id[NODE_ID_LEN] = {0,}; + struct net_device *dev; + int rmcast = 0; + + ub = kzalloc(sizeof(*ub), GFP_ATOMIC); + if (!ub) + return -ENOMEM; + + INIT_LIST_HEAD(&ub->rcast.list); + + if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) + goto err; + + if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attrs[TIPC_NLA_BEARER_UDP_OPTS], tipc_nl_udp_policy, NULL)) + goto err; + + if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) { + pr_err("Invalid UDP bearer configuration"); + err = -EINVAL; + goto err; + } + + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local, + &ub->ifindex); + if (err) + goto err; + + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL); + if (err) + goto err; + + if (remote.proto != local.proto) { + err = -EINVAL; + goto err; + } + + /* Checking remote ip address */ + rmcast = tipc_udp_is_mcast_addr(&remote); + + /* Autoconfigure own node identity if needed */ + if (!tipc_own_id(net)) { + memcpy(node_id, local.ipv6.in6_u.u6_addr8, 16); + tipc_net_init(net, node_id, 0); + } + if (!tipc_own_id(net)) { + pr_warn("Failed to set node id, please configure manually\n"); + err = -EINVAL; + goto err; + } + + b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP; + b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; + rcu_assign_pointer(b->media_ptr, ub); + rcu_assign_pointer(ub->bearer, b); + tipc_udp_media_addr_set(&b->addr, &local); + if (local.proto == htons(ETH_P_IP)) { + dev = __ip_dev_find(net, local.ipv4.s_addr, false); + if (!dev) { + err = -ENODEV; + goto err; + } + udp_conf.family = AF_INET; + + /* Switch to use ANY to receive packets from group */ + if (rmcast) + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + else + udp_conf.local_ip.s_addr = local.ipv4.s_addr; + udp_conf.use_udp_checksums = false; + ub->ifindex = dev->ifindex; + b->encap_hlen = sizeof(struct iphdr) + sizeof(struct udphdr); + b->mtu = b->media->mtu; +#if IS_ENABLED(CONFIG_IPV6) + } else if (local.proto == htons(ETH_P_IPV6)) { + dev = ub->ifindex ? __dev_get_by_index(net, ub->ifindex) : NULL; + dev = ipv6_dev_find(net, &local.ipv6, dev); + if (!dev) { + err = -ENODEV; + goto err; + } + udp_conf.family = AF_INET6; + udp_conf.use_udp6_tx_checksums = true; + udp_conf.use_udp6_rx_checksums = true; + if (rmcast) + udp_conf.local_ip6 = in6addr_any; + else + udp_conf.local_ip6 = local.ipv6; + ub->ifindex = dev->ifindex; + b->encap_hlen = sizeof(struct ipv6hdr) + sizeof(struct udphdr); + b->mtu = 1280; +#endif + } else { + err = -EAFNOSUPPORT; + goto err; + } + udp_conf.local_udp_port = local.port; + err = udp_sock_create(net, &udp_conf, &ub->ubsock); + if (err) + goto err; + tuncfg.sk_user_data = ub; + tuncfg.encap_type = 1; + tuncfg.encap_rcv = tipc_udp_recv; + tuncfg.encap_destroy = NULL; + setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); + + err = dst_cache_init(&ub->rcast.dst_cache, GFP_ATOMIC); + if (err) + goto free; + + /* + * The bcast media address port is used for all peers and the ip + * is used if it's a multicast address. + */ + memcpy(&b->bcast_addr.value, &remote, sizeof(remote)); + if (rmcast) + err = enable_mcast(ub, &remote); + else + err = tipc_udp_rcast_add(b, &remote); + if (err) + goto free; + + return 0; + +free: + dst_cache_destroy(&ub->rcast.dst_cache); + udp_tunnel_sock_release(ub->ubsock); +err: + kfree(ub); + return err; +} + +/* cleanup_bearer - break the socket/bearer association */ +static void cleanup_bearer(struct work_struct *work) +{ + struct udp_bearer *ub = container_of(work, struct udp_bearer, work); + struct udp_replicast *rcast, *tmp; + struct tipc_net *tn; + + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + dst_cache_destroy(&rcast->dst_cache); + list_del_rcu(&rcast->list); + kfree_rcu(rcast, rcu); + } + + tn = tipc_net(sock_net(ub->ubsock->sk)); + + dst_cache_destroy(&ub->rcast.dst_cache); + udp_tunnel_sock_release(ub->ubsock); + + /* Note: could use a call_rcu() to avoid another synchronize_net() */ + synchronize_net(); + atomic_dec(&tn->wq_count); + kfree(ub); +} + +/* tipc_udp_disable - detach bearer from socket */ +static void tipc_udp_disable(struct tipc_bearer *b) +{ + struct udp_bearer *ub; + + ub = rtnl_dereference(b->media_ptr); + if (!ub) { + pr_err("UDP bearer instance not found\n"); + return; + } + sock_set_flag(ub->ubsock->sk, SOCK_DEAD); + RCU_INIT_POINTER(ub->bearer, NULL); + + /* sock_release need to be done outside of rtnl lock */ + atomic_inc(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); + INIT_WORK(&ub->work, cleanup_bearer); + schedule_work(&ub->work); +} + +struct tipc_media udp_media_info = { + .send_msg = tipc_udp_send_msg, + .enable_media = tipc_udp_enable, + .disable_media = tipc_udp_disable, + .addr2str = tipc_udp_addr2str, + .addr2msg = tipc_udp_addr2msg, + .msg2addr = tipc_udp_msg2addr, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .min_win = TIPC_DEF_LINK_WIN, + .max_win = TIPC_DEF_LINK_WIN, + .mtu = TIPC_DEF_LINK_UDP_MTU, + .type_id = TIPC_MEDIA_TYPE_UDP, + .hwaddr_len = 0, + .name = "udp" +}; diff --git a/net/tipc/ref.h b/net/tipc/udp_media.h index 5bc8e7ab84de..e7455cc73e16 100644 --- a/net/tipc/ref.h +++ b/net/tipc/udp_media.h @@ -1,8 +1,8 @@ /* - * net/tipc/ref.h: Include file for TIPC object registry code + * net/tipc/udp_media.h: Include file for UDP bearer media * - * Copyright (c) 1991-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,16 +34,27 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _TIPC_REF_H -#define _TIPC_REF_H +#ifdef CONFIG_TIPC_MEDIA_UDP +#ifndef _TIPC_UDP_MEDIA_H +#define _TIPC_UDP_MEDIA_H -int tipc_ref_table_init(u32 requested_size, u32 start); -void tipc_ref_table_stop(void); +#include <linux/ip.h> +#include <linux/udp.h> -u32 tipc_ref_acquire(void *object, spinlock_t **lock); -void tipc_ref_discard(u32 ref); +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr); +int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b); +int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb); -void *tipc_ref_lock(u32 ref); -void *tipc_ref_deref(u32 ref); +/* check if configured MTU is too low for tipc headers */ +static inline bool tipc_udp_mtu_bad(u32 mtu) +{ + if (mtu >= (TIPC_MIN_BEARER_MTU + sizeof(struct iphdr) + + sizeof(struct udphdr))) + return false; + pr_warn("MTU too low for tipc bearer\n"); + return true; +} + +#endif #endif |
