summaryrefslogtreecommitdiff
path: root/net/tipc
diff options
context:
space:
mode:
Diffstat (limited to 'net/tipc')
-rw-r--r--net/tipc/Kconfig48
-rw-r--r--net/tipc/Makefile18
-rw-r--r--net/tipc/addr.c129
-rw-r--r--net/tipc/addr.h113
-rw-r--r--net/tipc/bcast.c1370
-rw-r--r--net/tipc/bcast.h122
-rw-r--r--net/tipc/bearer.c1473
-rw-r--r--net/tipc/bearer.h229
-rw-r--r--net/tipc/config.c448
-rw-r--r--net/tipc/config.h72
-rw-r--r--net/tipc/core.c274
-rw-r--r--net/tipc/core.h265
-rw-r--r--net/tipc/crypto.c2484
-rw-r--r--net/tipc/crypto.h200
-rw-r--r--net/tipc/diag.c118
-rw-r--r--net/tipc/discover.c553
-rw-r--r--net/tipc/discover.h16
-rw-r--r--net/tipc/eth_media.c358
-rw-r--r--net/tipc/group.c959
-rw-r--r--net/tipc/group.h77
-rw-r--r--net/tipc/handler.c130
-rw-r--r--net/tipc/ib_media.c342
-rw-r--r--net/tipc/link.c5031
-rw-r--r--net/tipc/link.h375
-rw-r--r--net/tipc/monitor.c875
-rw-r--r--net/tipc/monitor.h83
-rw-r--r--net/tipc/msg.c835
-rw-r--r--net/tipc/msg.h783
-rw-r--r--net/tipc/name_distr.c514
-rw-r--r--net/tipc/name_distr.h41
-rw-r--r--net/tipc/name_table.c1695
-rw-r--r--net/tipc/name_table.h151
-rw-r--r--net/tipc/net.c350
-rw-r--r--net/tipc/net.h15
-rw-r--r--net/tipc/netlink.c305
-rw-r--r--net/tipc/netlink.h (renamed from net/tipc/node_subscr.h)49
-rw-r--r--net/tipc/netlink_compat.c1376
-rw-r--r--net/tipc/node.c3240
-rw-r--r--net/tipc/node.h166
-rw-r--r--net/tipc/node_subscr.c95
-rw-r--r--net/tipc/port.c1066
-rw-r--r--net/tipc/port.h205
-rw-r--r--net/tipc/ref.c286
-rw-r--r--net/tipc/server.c596
-rw-r--r--net/tipc/server.h94
-rw-r--r--net/tipc/socket.c4345
-rw-r--r--net/tipc/socket.h80
-rw-r--r--net/tipc/subscr.c400
-rw-r--r--net/tipc/subscr.h95
-rw-r--r--net/tipc/sysctl.c49
-rw-r--r--net/tipc/topsrv.c733
-rw-r--r--net/tipc/topsrv.h (renamed from net/tipc/log.c)37
-rw-r--r--net/tipc/trace.c206
-rw-r--r--net/tipc/trace.h434
-rw-r--r--net/tipc/udp_media.c863
-rw-r--r--net/tipc/udp_media.h (renamed from net/tipc/ref.h)33
56 files changed, 23952 insertions, 11347 deletions
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index c890848f9d56..bb0d71eb02a6 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# TIPC configuration
#
@@ -5,7 +6,8 @@
menuconfig TIPC
tristate "The TIPC Protocol"
depends on INET
- ---help---
+ depends on IPV6 || IPV6=n
+ help
The Transparent Inter Process Communication (TIPC) protocol is
specially designed for intra cluster communication. This protocol
originates from Ericsson where it has been used in carrier grade
@@ -16,25 +18,43 @@ menuconfig TIPC
This protocol support is also available as a module ( = code which
can be inserted in and removed from the running kernel whenever you
want). The module will be called tipc. If you want to compile it
- as a module, say M here and read <file:Documentation/kbuild/modules.txt>.
+ as a module, say M here and read <file:Documentation/kbuild/modules.rst>.
If in doubt, say N.
-config TIPC_PORTS
- int "Maximum number of ports in a node"
- depends on TIPC
- range 127 65535
- default "8191"
- help
- Specifies how many ports can be supported by a node.
- Can range from 127 to 65535 ports; default is 8191.
-
- Setting this to a smaller value saves some memory,
- setting it to higher allows for more ports.
-
config TIPC_MEDIA_IB
bool "InfiniBand media type support"
depends on TIPC && INFINIBAND_IPOIB
help
Saying Y here will enable support for running TIPC on
IP-over-InfiniBand devices.
+config TIPC_MEDIA_UDP
+ bool "IP/UDP media type support"
+ depends on TIPC
+ select NET_UDP_TUNNEL
+ default y
+ help
+ Saying Y here will enable support for running TIPC over IP/UDP
+
+config TIPC_CRYPTO
+ bool "TIPC encryption support"
+ depends on TIPC
+ select CRYPTO
+ select CRYPTO_AES
+ select CRYPTO_GCM
+ default y
+ help
+ Saying Y here will enable support for TIPC encryption.
+ All TIPC messages will be encrypted/decrypted by using the currently most
+ advanced algorithm: AEAD AES-GCM (like IPSec or TLS) before leaving/
+ entering the TIPC stack.
+ Key setting from user-space is performed via netlink by a user program
+ (e.g. the iproute2 'tipc' tool).
+
+config TIPC_DIAG
+ tristate "TIPC: socket monitoring interface"
+ depends on TIPC
+ default y
+ help
+ Support for TIPC socket monitoring interface used by ss tool.
+ If unsure, say Y.
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index b282f7130d2b..18e1636aa036 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -1,14 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux TIPC layer
#
obj-$(CONFIG_TIPC) := tipc.o
-tipc-y += addr.o bcast.o bearer.o config.o \
- core.o handler.o link.o discover.o msg.o \
- name_distr.o subscr.o name_table.o net.o \
- netlink.o node.o node_subscr.o port.o ref.o \
- socket.o log.o eth_media.o server.o
+tipc-y += addr.o bcast.o bearer.o \
+ core.o link.o discover.o msg.o \
+ name_distr.o subscr.o monitor.o name_table.o net.o \
+ netlink.o netlink_compat.o node.o socket.o eth_media.o \
+ topsrv.o group.o trace.o
+CFLAGS_trace.o += -I$(src)
+
+tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o
tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
tipc-$(CONFIG_SYSCTL) += sysctl.o
+tipc-$(CONFIG_TIPC_CRYPTO) += crypto.o
+
+obj-$(CONFIG_TIPC_DIAG) += tipc_diag.o
+tipc_diag-y += diag.o
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 357b74b26f9e..6f5c54cbf8d9 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -1,8 +1,9 @@
/*
* net/tipc/addr.c: TIPC address utility routines
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2018, Ericsson AB
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,70 +35,90 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "core.h"
#include "addr.h"
+#include "core.h"
-/**
- * tipc_addr_domain_valid - validates a network domain address
- *
- * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>,
- * where Z, C, and N are non-zero.
- *
- * Returns 1 if domain address is valid, otherwise 0
- */
-int tipc_addr_domain_valid(u32 addr)
+bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr)
{
- u32 n = tipc_node(addr);
- u32 c = tipc_cluster(addr);
- u32 z = tipc_zone(addr);
-
- if (n && (!z || !c))
- return 0;
- if (c && !z)
- return 0;
- return 1;
+ if (!domain || (domain == addr))
+ return true;
+ if (!legacy_format)
+ return false;
+ if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
+ return true;
+ if (domain == (addr & TIPC_ZONE_CLUSTER_MASK)) /* domain <Z.C.0> */
+ return true;
+ if (domain == (addr & TIPC_ZONE_MASK)) /* domain <Z.0.0> */
+ return true;
+ return false;
}
-/**
- * tipc_addr_node_valid - validates a proposed network address for this node
- *
- * Accepts <Z.C.N>, where Z, C, and N are non-zero.
- *
- * Returns 1 if address can be used, otherwise 0
- */
-int tipc_addr_node_valid(u32 addr)
+void tipc_set_node_id(struct net *net, u8 *id)
{
- return tipc_addr_domain_valid(addr) && tipc_node(addr);
-}
+ struct tipc_net *tn = tipc_net(net);
-int tipc_in_scope(u32 domain, u32 addr)
-{
- if (!domain || (domain == addr))
- return 1;
- if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
- return 1;
- if (domain == tipc_zone_mask(addr)) /* domain <Z.0.0> */
- return 1;
- return 0;
+ memcpy(tn->node_id, id, NODE_ID_LEN);
+ tipc_nodeid2string(tn->node_id_string, id);
+ tn->trial_addr = hash128to32(id);
+ pr_info("Node identity %s, cluster identity %u\n",
+ tipc_own_id_string(net), tn->net_id);
}
-/**
- * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
- */
-int tipc_addr_scope(u32 domain)
+void tipc_set_node_addr(struct net *net, u32 addr)
{
- if (likely(!domain))
- return TIPC_ZONE_SCOPE;
- if (tipc_node(domain))
- return TIPC_NODE_SCOPE;
- if (tipc_cluster(domain))
- return TIPC_CLUSTER_SCOPE;
- return TIPC_ZONE_SCOPE;
+ struct tipc_net *tn = tipc_net(net);
+ u8 node_id[NODE_ID_LEN] = {0,};
+
+ tn->node_addr = addr;
+ if (!tipc_own_id(net)) {
+ sprintf(node_id, "%x", addr);
+ tipc_set_node_id(net, node_id);
+ }
+ tn->trial_addr = addr;
+ tn->addr_trial_end = jiffies;
+ pr_info("Node number set to %u\n", addr);
}
-char *tipc_addr_string_fill(char *string, u32 addr)
+int tipc_nodeid2string(char *str, u8 *id)
{
- snprintf(string, 16, "<%u.%u.%u>",
- tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
- return string;
+ int i;
+ u8 c;
+
+ /* Already a string ? */
+ for (i = 0; i < NODE_ID_LEN; i++) {
+ c = id[i];
+ if (c >= '0' && c <= '9')
+ continue;
+ if (c >= 'A' && c <= 'Z')
+ continue;
+ if (c >= 'a' && c <= 'z')
+ continue;
+ if (c == '.')
+ continue;
+ if (c == ':')
+ continue;
+ if (c == '_')
+ continue;
+ if (c == '-')
+ continue;
+ if (c == '@')
+ continue;
+ if (c != 0)
+ break;
+ }
+ if (i == NODE_ID_LEN) {
+ memcpy(str, id, NODE_ID_LEN);
+ str[NODE_ID_LEN] = 0;
+ return i;
+ }
+
+ /* Translate to hex string */
+ for (i = 0; i < NODE_ID_LEN; i++)
+ sprintf(&str[2 * i], "%02x", id[i]);
+
+ /* Strip off trailing zeroes */
+ for (i = NODE_ID_STR_LEN - 2; str[i] == '0'; i--)
+ str[i] = 0;
+
+ return i + 1;
}
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 60b00ab93d74..a113cf7e1f89 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -1,8 +1,9 @@
/*
* net/tipc/addr.h: Include file for TIPC address utility routines
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2018, Ericsson AB
* Copyright (c) 2004-2005, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,58 +38,98 @@
#ifndef _TIPC_ADDR_H
#define _TIPC_ADDR_H
-#define TIPC_ZONE_MASK 0xff000000u
-#define TIPC_CLUSTER_MASK 0xfffff000u
+#include <linux/types.h>
+#include <linux/tipc.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include "core.h"
-static inline u32 tipc_zone_mask(u32 addr)
+/* Struct tipc_uaddr: internal version of struct sockaddr_tipc.
+ * Must be kept aligned both regarding field positions and size.
+ */
+struct tipc_uaddr {
+ unsigned short family;
+ unsigned char addrtype;
+ signed char scope;
+ union {
+ struct {
+ struct tipc_service_addr sa;
+ u32 lookup_node;
+ };
+ struct tipc_service_range sr;
+ struct tipc_socket_addr sk;
+ };
+};
+
+static inline void tipc_uaddr(struct tipc_uaddr *ua, u32 atype, u32 scope,
+ u32 type, u32 lower, u32 upper)
{
- return addr & TIPC_ZONE_MASK;
+ ua->family = AF_TIPC;
+ ua->addrtype = atype;
+ ua->scope = scope;
+ ua->sr.type = type;
+ ua->sr.lower = lower;
+ ua->sr.upper = upper;
}
-static inline u32 tipc_cluster_mask(u32 addr)
+static inline bool tipc_uaddr_valid(struct tipc_uaddr *ua, int len)
{
- return addr & TIPC_CLUSTER_MASK;
+ u32 atype;
+
+ if (len < sizeof(struct sockaddr_tipc))
+ return false;
+ atype = ua->addrtype;
+ if (ua->family != AF_TIPC)
+ return false;
+ if (atype == TIPC_SERVICE_ADDR || atype == TIPC_SOCKET_ADDR)
+ return true;
+ if (atype == TIPC_SERVICE_RANGE)
+ return ua->sr.upper >= ua->sr.lower;
+ return false;
}
-static inline int in_own_cluster_exact(u32 addr)
+static inline u32 tipc_own_addr(struct net *net)
{
- return !((addr ^ tipc_own_addr) >> 12);
+ return tipc_net(net)->node_addr;
}
-/**
- * in_own_node - test for node inclusion; <0.0.0> always matches
- */
-static inline int in_own_node(u32 addr)
+static inline u8 *tipc_own_id(struct net *net)
{
- return (addr == tipc_own_addr) || !addr;
+ struct tipc_net *tn = tipc_net(net);
+
+ if (!strlen(tn->node_id_string))
+ return NULL;
+ return tn->node_id;
}
-/**
- * in_own_cluster - test for cluster inclusion; <0.0.0> always matches
- */
-static inline int in_own_cluster(u32 addr)
+static inline char *tipc_own_id_string(struct net *net)
{
- return in_own_cluster_exact(addr) || !addr;
+ return tipc_net(net)->node_id_string;
}
-/**
- * addr_domain - convert 2-bit scope value to equivalent message lookup domain
- *
- * Needed when address of a named message must be looked up a second time
- * after a network hop.
- */
-static inline u32 addr_domain(u32 sc)
+static inline u32 tipc_cluster_mask(u32 addr)
{
- if (likely(sc == TIPC_NODE_SCOPE))
- return tipc_own_addr;
- if (sc == TIPC_CLUSTER_SCOPE)
- return tipc_cluster_mask(tipc_own_addr);
- return tipc_zone_mask(tipc_own_addr);
+ return addr & TIPC_ZONE_CLUSTER_MASK;
}
-int tipc_addr_domain_valid(u32);
-int tipc_addr_node_valid(u32 addr);
-int tipc_in_scope(u32 domain, u32 addr);
-int tipc_addr_scope(u32 domain);
-char *tipc_addr_string_fill(char *string, u32 addr);
+static inline int tipc_node2scope(u32 node)
+{
+ return node ? TIPC_NODE_SCOPE : TIPC_CLUSTER_SCOPE;
+}
+
+static inline int tipc_scope2node(struct net *net, int sc)
+{
+ return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net);
+}
+
+static inline int in_own_node(struct net *net, u32 addr)
+{
+ return addr == tipc_own_addr(net) || !addr;
+}
+
+bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr);
+void tipc_set_node_id(struct net *net, u8 *id);
+void tipc_set_node_addr(struct net *net, u32 addr);
+int tipc_nodeid2string(char *str, u8 *id);
+
#endif
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 716de1ac6cb5..114fef65f92e 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -1,7 +1,7 @@
/*
* net/tipc/bcast.c: TIPC broadcast code
*
- * Copyright (c) 2004-2006, Ericsson AB
+ * Copyright (c) 2004-2006, 2014-2017, Ericsson AB
* Copyright (c) 2004, Intel Corporation.
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
@@ -35,874 +35,830 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "core.h"
-#include "link.h"
-#include "port.h"
+#include <linux/tipc_config.h>
+#include "socket.h"
+#include "msg.h"
#include "bcast.h"
-#include "name_distr.h"
-
-#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */
-
-#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */
+#include "link.h"
+#include "name_table.h"
-/**
- * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link
- * @primary: pointer to primary bearer
- * @secondary: pointer to secondary bearer
- *
- * Bearers must have same priority and same set of reachable destinations
- * to be paired.
- */
+#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */
+#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
-struct tipc_bcbearer_pair {
- struct tipc_bearer *primary;
- struct tipc_bearer *secondary;
-};
-
-/**
- * struct tipc_bcbearer - bearer used by broadcast link
- * @bearer: (non-standard) broadcast bearer structure
- * @media: (non-standard) broadcast media structure
- * @bpairs: array of bearer pairs
- * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort()
- * @remains: temporary node map used by tipc_bcbearer_send()
- * @remains_new: temporary node map used tipc_bcbearer_send()
- *
- * Note: The fields labelled "temporary" are incorporated into the bearer
- * to avoid consuming potentially limited stack space through the use of
- * large local variables within multicast routines. Concurrent access is
- * prevented through use of the spinlock "bc_lock".
- */
-struct tipc_bcbearer {
- struct tipc_bearer bearer;
- struct tipc_media media;
- struct tipc_bcbearer_pair bpairs[MAX_BEARERS];
- struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
- struct tipc_node_map remains;
- struct tipc_node_map remains_new;
-};
+const char tipc_bclink_name[] = "broadcast-link";
+unsigned long sysctl_tipc_bc_retruni __read_mostly;
/**
- * struct tipc_bclink - link used for broadcast messages
- * @link: (non-standard) broadcast link structure
- * @node: (non-standard) node structure representing b'cast link's peer node
- * @bcast_nodes: map of broadcast-capable nodes
- * @retransmit_to: node that most recently requested a retransmit
- *
- * Handles sequence numbering, fragmentation, bundling, etc.
+ * struct tipc_bc_base - base structure for keeping broadcast send state
+ * @link: broadcast send link structure
+ * @inputq: data input queue; will only carry SOCK_WAKEUP messages
+ * @dests: array keeping number of reachable destinations per bearer
+ * @primary_bearer: a bearer having links to all broadcast destinations, if any
+ * @bcast_support: indicates if primary bearer, if any, supports broadcast
+ * @force_bcast: forces broadcast for multicast traffic
+ * @rcast_support: indicates if all peer nodes support replicast
+ * @force_rcast: forces replicast for multicast traffic
+ * @rc_ratio: dest count as percentage of cluster size where send method changes
+ * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast
*/
-struct tipc_bclink {
- struct tipc_link link;
- struct tipc_node node;
- struct tipc_node_map bcast_nodes;
- struct tipc_node *retransmit_to;
+struct tipc_bc_base {
+ struct tipc_link *link;
+ struct sk_buff_head inputq;
+ int dests[MAX_BEARERS];
+ int primary_bearer;
+ bool bcast_support;
+ bool force_bcast;
+ bool rcast_support;
+ bool force_rcast;
+ int rc_ratio;
+ int bc_threshold;
};
-static struct tipc_bcbearer bcast_bearer;
-static struct tipc_bclink bcast_link;
-
-static struct tipc_bcbearer *bcbearer = &bcast_bearer;
-static struct tipc_bclink *bclink = &bcast_link;
-static struct tipc_link *bcl = &bcast_link.link;
-
-static DEFINE_SPINLOCK(bc_lock);
-
-const char tipc_bclink_name[] = "broadcast-link";
-
-static void tipc_nmap_diff(struct tipc_node_map *nm_a,
- struct tipc_node_map *nm_b,
- struct tipc_node_map *nm_diff);
-
-static u32 bcbuf_acks(struct sk_buff *buf)
+static struct tipc_bc_base *tipc_bc_base(struct net *net)
{
- return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle;
+ return tipc_net(net)->bcbase;
}
-static void bcbuf_set_acks(struct sk_buff *buf, u32 acks)
+/* tipc_bcast_get_mtu(): -get the MTU currently used by broadcast link
+ * Note: the MTU is decremented to give room for a tunnel header, in
+ * case the message needs to be sent as replicast
+ */
+int tipc_bcast_get_mtu(struct net *net)
{
- TIPC_SKB_CB(buf)->handle = (void *)(unsigned long)acks;
+ return tipc_link_mss(tipc_bc_sndlink(net));
}
-static void bcbuf_decr_acks(struct sk_buff *buf)
+void tipc_bcast_toggle_rcast(struct net *net, bool supp)
{
- bcbuf_set_acks(buf, bcbuf_acks(buf) - 1);
+ tipc_bc_base(net)->rcast_support = supp;
}
-void tipc_bclink_add_node(u32 addr)
+static void tipc_bcbase_calc_bc_threshold(struct net *net)
{
- spin_lock_bh(&bc_lock);
- tipc_nmap_add(&bclink->bcast_nodes, addr);
- spin_unlock_bh(&bc_lock);
-}
+ struct tipc_bc_base *bb = tipc_bc_base(net);
+ int cluster_size = tipc_link_bc_peers(tipc_bc_sndlink(net));
-void tipc_bclink_remove_node(u32 addr)
-{
- spin_lock_bh(&bc_lock);
- tipc_nmap_remove(&bclink->bcast_nodes, addr);
- spin_unlock_bh(&bc_lock);
+ bb->bc_threshold = 1 + (cluster_size * bb->rc_ratio / 100);
}
-static void bclink_set_last_sent(void)
+/* tipc_bcbase_select_primary(): find a bearer with links to all destinations,
+ * if any, and make it primary bearer
+ */
+static void tipc_bcbase_select_primary(struct net *net)
{
- if (bcl->next_out)
- bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1);
- else
- bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1);
-}
+ struct tipc_bc_base *bb = tipc_bc_base(net);
+ int all_dests = tipc_link_bc_peers(bb->link);
+ int max_win = tipc_link_max_win(bb->link);
+ int min_win = tipc_link_min_win(bb->link);
+ int i, mtu, prim;
-u32 tipc_bclink_get_last_sent(void)
-{
- return bcl->fsm_msg_cnt;
-}
+ bb->primary_bearer = INVALID_BEARER_ID;
+ bb->bcast_support = true;
-static void bclink_update_last_sent(struct tipc_node *node, u32 seqno)
-{
- node->bclink.last_sent = less_eq(node->bclink.last_sent, seqno) ?
- seqno : node->bclink.last_sent;
-}
+ if (!all_dests)
+ return;
+ for (i = 0; i < MAX_BEARERS; i++) {
+ if (!bb->dests[i])
+ continue;
-/**
- * tipc_bclink_retransmit_to - get most recent node to request retransmission
- *
- * Called with bc_lock locked
- */
-struct tipc_node *tipc_bclink_retransmit_to(void)
+ mtu = tipc_bearer_mtu(net, i);
+ if (mtu < tipc_link_mtu(bb->link)) {
+ tipc_link_set_mtu(bb->link, mtu);
+ tipc_link_set_queue_limits(bb->link,
+ min_win,
+ max_win);
+ }
+ bb->bcast_support &= tipc_bearer_bcast_support(net, i);
+ if (bb->dests[i] < all_dests)
+ continue;
+
+ bb->primary_bearer = i;
+
+ /* Reduce risk that all nodes select same primary */
+ if ((i ^ tipc_own_addr(net)) & 1)
+ break;
+ }
+ prim = bb->primary_bearer;
+ if (prim != INVALID_BEARER_ID)
+ bb->bcast_support = tipc_bearer_bcast_support(net, prim);
+}
+
+void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id)
{
- return bclink->retransmit_to;
+ struct tipc_bc_base *bb = tipc_bc_base(net);
+
+ tipc_bcast_lock(net);
+ bb->dests[bearer_id]++;
+ tipc_bcbase_select_primary(net);
+ tipc_bcast_unlock(net);
}
-/**
- * bclink_retransmit_pkt - retransmit broadcast packets
- * @after: sequence number of last packet to *not* retransmit
- * @to: sequence number of last packet to retransmit
- *
- * Called with bc_lock locked
- */
-static void bclink_retransmit_pkt(u32 after, u32 to)
+void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id)
{
- struct sk_buff *buf;
+ struct tipc_bc_base *bb = tipc_bc_base(net);
- buf = bcl->first_out;
- while (buf && less_eq(buf_seqno(buf), after))
- buf = buf->next;
- tipc_link_retransmit(bcl, buf, mod(to - after));
+ tipc_bcast_lock(net);
+ bb->dests[bearer_id]--;
+ tipc_bcbase_select_primary(net);
+ tipc_bcast_unlock(net);
}
-/**
- * tipc_bclink_acknowledge - handle acknowledgement of broadcast packets
- * @n_ptr: node that sent acknowledgement info
- * @acked: broadcast sequence # that has been acknowledged
+/* tipc_bcbase_xmit - broadcast a packet queue across one or more bearers
*
- * Node is locked, bc_lock unlocked.
+ * Note that number of reachable destinations, as indicated in the dests[]
+ * array, may transitionally differ from the number of destinations indicated
+ * in each sent buffer. We can sustain this. Excess destination nodes will
+ * drop and never acknowledge the unexpected packets, and missing destinations
+ * will either require retransmission (if they are just about to be added to
+ * the bearer), or be removed from the buffer's 'ackers' counter (if they
+ * just went down)
*/
-void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
+static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq)
{
- struct sk_buff *crs;
- struct sk_buff *next;
- unsigned int released = 0;
-
- spin_lock_bh(&bc_lock);
+ int bearer_id;
+ struct tipc_bc_base *bb = tipc_bc_base(net);
+ struct sk_buff *skb, *_skb;
+ struct sk_buff_head _xmitq;
- /* Bail out if tx queue is empty (no clean up is required) */
- crs = bcl->first_out;
- if (!crs)
- goto exit;
+ if (skb_queue_empty(xmitq))
+ return;
- /* Determine which messages need to be acknowledged */
- if (acked == INVALID_LINK_SEQ) {
- /*
- * Contact with specified node has been lost, so need to
- * acknowledge sent messages only (if other nodes still exist)
- * or both sent and unsent messages (otherwise)
- */
- if (bclink->bcast_nodes.count)
- acked = bcl->fsm_msg_cnt;
- else
- acked = bcl->next_out_no;
- } else {
- /*
- * Bail out if specified sequence number does not correspond
- * to a message that has been sent and not yet acknowledged
- */
- if (less(acked, buf_seqno(crs)) ||
- less(bcl->fsm_msg_cnt, acked) ||
- less_eq(acked, n_ptr->bclink.acked))
- goto exit;
+ /* The typical case: at least one bearer has links to all nodes */
+ bearer_id = bb->primary_bearer;
+ if (bearer_id >= 0) {
+ tipc_bearer_bc_xmit(net, bearer_id, xmitq);
+ return;
}
- /* Skip over packets that node has previously acknowledged */
- while (crs && less_eq(buf_seqno(crs), n_ptr->bclink.acked))
- crs = crs->next;
-
- /* Update packets that node is now acknowledging */
-
- while (crs && less_eq(buf_seqno(crs), acked)) {
- next = crs->next;
-
- if (crs != bcl->next_out)
- bcbuf_decr_acks(crs);
- else {
- bcbuf_set_acks(crs, 0);
- bcl->next_out = next;
- bclink_set_last_sent();
- }
+ /* We have to transmit across all bearers */
+ __skb_queue_head_init(&_xmitq);
+ for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+ if (!bb->dests[bearer_id])
+ continue;
- if (bcbuf_acks(crs) == 0) {
- bcl->first_out = next;
- bcl->out_queue_size--;
- kfree_skb(crs);
- released = 1;
+ skb_queue_walk(xmitq, skb) {
+ _skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
+ if (!_skb)
+ break;
+ __skb_queue_tail(&_xmitq, _skb);
}
- crs = next;
+ tipc_bearer_bc_xmit(net, bearer_id, &_xmitq);
}
- n_ptr->bclink.acked = acked;
-
- /* Try resolving broadcast link congestion, if necessary */
-
- if (unlikely(bcl->next_out)) {
- tipc_link_push_queue(bcl);
- bclink_set_last_sent();
- }
- if (unlikely(released && !list_empty(&bcl->waiting_ports)))
- tipc_link_wakeup_ports(bcl, 0);
-exit:
- spin_unlock_bh(&bc_lock);
+ __skb_queue_purge(xmitq);
+ __skb_queue_purge(&_xmitq);
}
-/**
- * tipc_bclink_update_link_state - update broadcast link state
- *
- * tipc_net_lock and node lock set
- */
-void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent)
+static void tipc_bcast_select_xmit_method(struct net *net, int dests,
+ struct tipc_mc_method *method)
{
- struct sk_buff *buf;
-
- /* Ignore "stale" link state info */
+ struct tipc_bc_base *bb = tipc_bc_base(net);
+ unsigned long exp = method->expires;
- if (less_eq(last_sent, n_ptr->bclink.last_in))
+ /* Broadcast supported by used bearer/bearers? */
+ if (!bb->bcast_support) {
+ method->rcast = true;
+ return;
+ }
+ /* Any destinations which don't support replicast ? */
+ if (!bb->rcast_support) {
+ method->rcast = false;
+ return;
+ }
+ /* Can current method be changed ? */
+ method->expires = jiffies + TIPC_METHOD_EXPIRE;
+ if (method->mandatory)
return;
- /* Update link synchronization state; quit if in sync */
-
- bclink_update_last_sent(n_ptr, last_sent);
-
- if (n_ptr->bclink.last_sent == n_ptr->bclink.last_in)
+ if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) &&
+ time_before(jiffies, exp))
return;
- /* Update out-of-sync state; quit if loss is still unconfirmed */
+ /* Configuration as force 'broadcast' method */
+ if (bb->force_bcast) {
+ method->rcast = false;
+ return;
+ }
+ /* Configuration as force 'replicast' method */
+ if (bb->force_rcast) {
+ method->rcast = true;
+ return;
+ }
+ /* Configuration as 'autoselect' or default method */
+ /* Determine method to use now */
+ method->rcast = dests <= bb->bc_threshold;
+}
- if ((++n_ptr->bclink.oos_state) == 1) {
- if (n_ptr->bclink.deferred_size < (TIPC_MIN_LINK_WIN / 2))
- return;
- n_ptr->bclink.oos_state++;
+/* tipc_bcast_xmit - broadcast the buffer chain to all external nodes
+ * @net: the applicable net namespace
+ * @pkts: chain of buffers containing message
+ * @cong_link_cnt: set to 1 if broadcast link is congested, otherwise 0
+ * Consumes the buffer chain.
+ * Returns 0 if success, otherwise errno: -EHOSTUNREACH,-EMSGSIZE
+ */
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ u16 *cong_link_cnt)
+{
+ struct tipc_link *l = tipc_bc_sndlink(net);
+ struct sk_buff_head xmitq;
+ int rc = 0;
+
+ __skb_queue_head_init(&xmitq);
+ tipc_bcast_lock(net);
+ if (tipc_link_bc_peers(l))
+ rc = tipc_link_xmit(l, pkts, &xmitq);
+ tipc_bcast_unlock(net);
+ tipc_bcbase_xmit(net, &xmitq);
+ __skb_queue_purge(pkts);
+ if (rc == -ELINKCONG) {
+ *cong_link_cnt = 1;
+ rc = 0;
}
+ return rc;
+}
- /* Don't NACK if one has been recently sent (or seen) */
+/* tipc_rcast_xmit - replicate and send a message to given destination nodes
+ * @net: the applicable net namespace
+ * @pkts: chain of buffers containing message
+ * @dests: list of destination nodes
+ * @cong_link_cnt: returns number of congested links
+ * @cong_links: returns identities of congested links
+ * Returns 0 if success, otherwise errno
+ */
+static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ struct tipc_nlist *dests, u16 *cong_link_cnt)
+{
+ struct tipc_dest *dst, *tmp;
+ struct sk_buff_head _pkts;
+ u32 dnode, selector;
- if (n_ptr->bclink.oos_state & 0x1)
- return;
+ selector = msg_link_selector(buf_msg(skb_peek(pkts)));
+ __skb_queue_head_init(&_pkts);
+
+ list_for_each_entry_safe(dst, tmp, &dests->list, list) {
+ dnode = dst->node;
+ if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts))
+ return -ENOMEM;
- /* Send NACK */
-
- buf = tipc_buf_acquire(INT_H_SIZE);
- if (buf) {
- struct tipc_msg *msg = buf_msg(buf);
-
- tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
- INT_H_SIZE, n_ptr->addr);
- msg_set_non_seq(msg, 1);
- msg_set_mc_netid(msg, tipc_net_id);
- msg_set_bcast_ack(msg, n_ptr->bclink.last_in);
- msg_set_bcgap_after(msg, n_ptr->bclink.last_in);
- msg_set_bcgap_to(msg, n_ptr->bclink.deferred_head
- ? buf_seqno(n_ptr->bclink.deferred_head) - 1
- : n_ptr->bclink.last_sent);
-
- spin_lock_bh(&bc_lock);
- tipc_bearer_send(&bcbearer->bearer, buf, NULL);
- bcl->stats.sent_nacks++;
- spin_unlock_bh(&bc_lock);
- kfree_skb(buf);
-
- n_ptr->bclink.oos_state++;
+ /* Any other return value than -ELINKCONG is ignored */
+ if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG)
+ (*cong_link_cnt)++;
}
+ return 0;
}
-/**
- * bclink_peek_nack - monitor retransmission requests sent by other nodes
- *
- * Delay any upcoming NACK by this node if another node has already
- * requested the first message this node is going to ask for.
- *
- * Only tipc_net_lock set.
+/* tipc_mcast_send_sync - deliver a dummy message with SYN bit
+ * @net: the applicable net namespace
+ * @skb: socket buffer to copy
+ * @method: send method to be used
+ * @dests: destination nodes for message.
+ * Returns 0 if success, otherwise errno
*/
-static void bclink_peek_nack(struct tipc_msg *msg)
+static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb,
+ struct tipc_mc_method *method,
+ struct tipc_nlist *dests)
{
- struct tipc_node *n_ptr = tipc_node_find(msg_destnode(msg));
+ struct tipc_msg *hdr, *_hdr;
+ struct sk_buff_head tmpq;
+ u16 cong_link_cnt = 0;
+ struct sk_buff *_skb;
+ int rc = 0;
+
+ /* Is a cluster supporting with new capabilities ? */
+ if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL))
+ return 0;
- if (unlikely(!n_ptr))
- return;
+ hdr = buf_msg(skb);
+ if (msg_user(hdr) == MSG_FRAGMENTER)
+ hdr = msg_inner_hdr(hdr);
+ if (msg_type(hdr) != TIPC_MCAST_MSG)
+ return 0;
- tipc_node_lock(n_ptr);
+ /* Allocate dummy message */
+ _skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL);
+ if (!_skb)
+ return -ENOMEM;
- if (n_ptr->bclink.recv_permitted &&
- (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) &&
- (n_ptr->bclink.last_in == msg_bcgap_after(msg)))
- n_ptr->bclink.oos_state = 2;
+ /* Preparing for 'synching' header */
+ msg_set_syn(hdr, 1);
+
+ /* Copy skb's header into a dummy header */
+ skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE);
+ skb_orphan(_skb);
+
+ /* Reverse method for dummy message */
+ _hdr = buf_msg(_skb);
+ msg_set_size(_hdr, MCAST_H_SIZE);
+ msg_set_is_rcast(_hdr, !msg_is_rcast(hdr));
+ msg_set_errcode(_hdr, TIPC_ERR_NO_PORT);
+
+ __skb_queue_head_init(&tmpq);
+ __skb_queue_tail(&tmpq, _skb);
+ if (method->rcast)
+ rc = tipc_bcast_xmit(net, &tmpq, &cong_link_cnt);
+ else
+ rc = tipc_rcast_xmit(net, &tmpq, dests, &cong_link_cnt);
- tipc_node_unlock(n_ptr);
+ /* This queue should normally be empty by now */
+ __skb_queue_purge(&tmpq);
+
+ return rc;
}
-/*
- * tipc_bclink_send_msg - broadcast a packet to all nodes in cluster
+/* tipc_mcast_xmit - deliver message to indicated destination nodes
+ * and to identified node local sockets
+ * @net: the applicable net namespace
+ * @pkts: chain of buffers containing message
+ * @method: send method to be used
+ * @dests: destination nodes for message.
+ * @cong_link_cnt: returns number of encountered congested destination links
+ * Consumes buffer chain.
+ * Returns 0 if success, otherwise errno
*/
-int tipc_bclink_send_msg(struct sk_buff *buf)
+int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ struct tipc_mc_method *method, struct tipc_nlist *dests,
+ u16 *cong_link_cnt)
{
- int res;
-
- spin_lock_bh(&bc_lock);
-
- if (!bclink->bcast_nodes.count) {
- res = msg_data_sz(buf_msg(buf));
- kfree_skb(buf);
+ struct sk_buff_head inputq, localq;
+ bool rcast = method->rcast;
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ int rc = 0;
+
+ skb_queue_head_init(&inputq);
+ __skb_queue_head_init(&localq);
+
+ /* Clone packets before they are consumed by next call */
+ if (dests->local && !tipc_msg_reassemble(pkts, &localq)) {
+ rc = -ENOMEM;
goto exit;
}
+ /* Send according to determined transmit method */
+ if (dests->remote) {
+ tipc_bcast_select_xmit_method(net, dests->remote, method);
+
+ skb = skb_peek(pkts);
+ hdr = buf_msg(skb);
+ if (msg_user(hdr) == MSG_FRAGMENTER)
+ hdr = msg_inner_hdr(hdr);
+ msg_set_is_rcast(hdr, method->rcast);
+
+ /* Switch method ? */
+ if (rcast != method->rcast) {
+ rc = tipc_mcast_send_sync(net, skb, method, dests);
+ if (unlikely(rc)) {
+ pr_err("Unable to send SYN: method %d, rc %d\n",
+ rcast, rc);
+ goto exit;
+ }
+ }
- res = tipc_link_send_buf(bcl, buf);
- if (likely(res >= 0)) {
- bclink_set_last_sent();
- bcl->stats.queue_sz_counts++;
- bcl->stats.accu_queue_sz += bcl->out_queue_size;
+ if (method->rcast)
+ rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt);
+ else
+ rc = tipc_bcast_xmit(net, pkts, cong_link_cnt);
}
-exit:
- spin_unlock_bh(&bc_lock);
- return res;
-}
-/**
- * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet
- *
- * Called with both sending node's lock and bc_lock taken.
- */
-static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
-{
- bclink_update_last_sent(node, seqno);
- node->bclink.last_in = seqno;
- node->bclink.oos_state = 0;
- bcl->stats.recv_info++;
-
- /*
- * Unicast an ACK periodically, ensuring that
- * all nodes in the cluster don't ACK at the same time
- */
-
- if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) {
- tipc_link_send_proto_msg(
- node->active_links[node->addr & 1],
- STATE_MSG, 0, 0, 0, 0, 0);
- bcl->stats.sent_acks++;
+ if (dests->local) {
+ tipc_loopback_trace(net, &localq);
+ tipc_sk_mcast_rcv(net, &localq, &inputq);
}
+exit:
+ /* This queue should normally be empty by now */
+ __skb_queue_purge(pkts);
+ return rc;
}
-/**
- * tipc_bclink_recv_pkt - receive a broadcast packet, and deliver upwards
+/* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link
*
- * tipc_net_lock is read_locked, no other locks set
+ * RCU is locked, no other locks set
*/
-void tipc_bclink_recv_pkt(struct sk_buff *buf)
+int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb)
{
- struct tipc_msg *msg = buf_msg(buf);
- struct tipc_node *node;
- u32 next_in;
- u32 seqno;
- int deferred;
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+ struct sk_buff_head xmitq;
+ int rc;
- /* Screen out unwanted broadcast messages */
+ __skb_queue_head_init(&xmitq);
- if (msg_mc_netid(msg) != tipc_net_id)
- goto exit;
-
- node = tipc_node_find(msg_prevnode(msg));
- if (unlikely(!node))
- goto exit;
-
- tipc_node_lock(node);
- if (unlikely(!node->bclink.recv_permitted))
- goto unlock;
-
- /* Handle broadcast protocol message */
-
- if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) {
- if (msg_type(msg) != STATE_MSG)
- goto unlock;
- if (msg_destnode(msg) == tipc_own_addr) {
- tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
- tipc_node_unlock(node);
- spin_lock_bh(&bc_lock);
- bcl->stats.recv_nacks++;
- bclink->retransmit_to = node;
- bclink_retransmit_pkt(msg_bcgap_after(msg),
- msg_bcgap_to(msg));
- spin_unlock_bh(&bc_lock);
- } else {
- tipc_node_unlock(node);
- bclink_peek_nack(msg);
- }
- goto exit;
+ if (msg_mc_netid(hdr) != tipc_netid(net) || !tipc_link_is_up(l)) {
+ kfree_skb(skb);
+ return 0;
}
- /* Handle in-sequence broadcast message */
-
- seqno = msg_seqno(msg);
- next_in = mod(node->bclink.last_in + 1);
-
- if (likely(seqno == next_in)) {
-receive:
- /* Deliver message to destination */
-
- if (likely(msg_isdata(msg))) {
- spin_lock_bh(&bc_lock);
- bclink_accept_pkt(node, seqno);
- spin_unlock_bh(&bc_lock);
- tipc_node_unlock(node);
- if (likely(msg_mcast(msg)))
- tipc_port_recv_mcast(buf, NULL);
- else
- kfree_skb(buf);
- } else if (msg_user(msg) == MSG_BUNDLER) {
- spin_lock_bh(&bc_lock);
- bclink_accept_pkt(node, seqno);
- bcl->stats.recv_bundles++;
- bcl->stats.recv_bundled += msg_msgcnt(msg);
- spin_unlock_bh(&bc_lock);
- tipc_node_unlock(node);
- tipc_link_recv_bundle(buf);
- } else if (msg_user(msg) == MSG_FRAGMENTER) {
- int ret = tipc_link_recv_fragment(&node->bclink.defragm,
- &buf, &msg);
- if (ret < 0)
- goto unlock;
- spin_lock_bh(&bc_lock);
- bclink_accept_pkt(node, seqno);
- bcl->stats.recv_fragments++;
- if (ret > 0)
- bcl->stats.recv_fragmented++;
- spin_unlock_bh(&bc_lock);
- tipc_node_unlock(node);
- tipc_net_route_msg(buf);
- } else if (msg_user(msg) == NAME_DISTRIBUTOR) {
- spin_lock_bh(&bc_lock);
- bclink_accept_pkt(node, seqno);
- spin_unlock_bh(&bc_lock);
- tipc_node_unlock(node);
- tipc_named_recv(buf);
- } else {
- spin_lock_bh(&bc_lock);
- bclink_accept_pkt(node, seqno);
- spin_unlock_bh(&bc_lock);
- tipc_node_unlock(node);
- kfree_skb(buf);
- }
- buf = NULL;
-
- /* Determine new synchronization state */
+ tipc_bcast_lock(net);
+ if (msg_user(hdr) == BCAST_PROTOCOL)
+ rc = tipc_link_bc_nack_rcv(l, skb, &xmitq);
+ else
+ rc = tipc_link_rcv(l, skb, NULL);
+ tipc_bcast_unlock(net);
- tipc_node_lock(node);
- if (unlikely(!tipc_node_is_up(node)))
- goto unlock;
+ tipc_bcbase_xmit(net, &xmitq);
- if (node->bclink.last_in == node->bclink.last_sent)
- goto unlock;
+ /* Any socket wakeup messages ? */
+ if (!skb_queue_empty(inputq))
+ tipc_sk_rcv(net, inputq);
- if (!node->bclink.deferred_head) {
- node->bclink.oos_state = 1;
- goto unlock;
- }
+ return rc;
+}
- msg = buf_msg(node->bclink.deferred_head);
- seqno = msg_seqno(msg);
- next_in = mod(next_in + 1);
- if (seqno != next_in)
- goto unlock;
+/* tipc_bcast_ack_rcv - receive and handle a broadcast acknowledge
+ *
+ * RCU is locked, no other locks set
+ */
+void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
+ struct tipc_msg *hdr)
+{
+ struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+ u16 acked = msg_bcast_ack(hdr);
+ struct sk_buff_head xmitq;
- /* Take in-sequence message from deferred queue & deliver it */
+ /* Ignore bc acks sent by peer before bcast synch point was received */
+ if (msg_bc_ack_invalid(hdr))
+ return;
- buf = node->bclink.deferred_head;
- node->bclink.deferred_head = buf->next;
- node->bclink.deferred_size--;
- goto receive;
- }
+ __skb_queue_head_init(&xmitq);
- /* Handle out-of-sequence broadcast message */
+ tipc_bcast_lock(net);
+ tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq, NULL);
+ tipc_bcast_unlock(net);
- if (less(next_in, seqno)) {
- deferred = tipc_link_defer_pkt(&node->bclink.deferred_head,
- &node->bclink.deferred_tail,
- buf);
- node->bclink.deferred_size += deferred;
- bclink_update_last_sent(node, seqno);
- buf = NULL;
- } else
- deferred = 0;
+ tipc_bcbase_xmit(net, &xmitq);
- spin_lock_bh(&bc_lock);
+ /* Any socket wakeup messages ? */
+ if (!skb_queue_empty(inputq))
+ tipc_sk_rcv(net, inputq);
+}
- if (deferred)
- bcl->stats.deferred_recv++;
- else
- bcl->stats.duplicates++;
+/* tipc_bcast_synch_rcv - check and update rcv link with peer's send state
+ *
+ * RCU is locked, no other locks set
+ */
+int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
+ struct tipc_msg *hdr,
+ struct sk_buff_head *retrq)
+{
+ struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+ struct tipc_gap_ack_blks *ga;
+ struct sk_buff_head xmitq;
+ int rc = 0;
+
+ __skb_queue_head_init(&xmitq);
+
+ tipc_bcast_lock(net);
+ if (msg_type(hdr) != STATE_MSG) {
+ tipc_link_bc_init_rcv(l, hdr);
+ } else if (!msg_bc_ack_invalid(hdr)) {
+ tipc_get_gap_ack_blks(&ga, l, hdr, false);
+ if (!sysctl_tipc_bc_retruni)
+ retrq = &xmitq;
+ rc = tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr),
+ msg_bc_gap(hdr), ga, &xmitq,
+ retrq);
+ rc |= tipc_link_bc_sync_rcv(l, hdr, &xmitq);
+ }
+ tipc_bcast_unlock(net);
- spin_unlock_bh(&bc_lock);
+ tipc_bcbase_xmit(net, &xmitq);
-unlock:
- tipc_node_unlock(node);
-exit:
- kfree_skb(buf);
+ /* Any socket wakeup messages ? */
+ if (!skb_queue_empty(inputq))
+ tipc_sk_rcv(net, inputq);
+ return rc;
}
-u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
+/* tipc_bcast_add_peer - add a peer node to broadcast link and bearer
+ *
+ * RCU is locked, node lock is set
+ */
+void tipc_bcast_add_peer(struct net *net, struct tipc_link *uc_l,
+ struct sk_buff_head *xmitq)
{
- return (n_ptr->bclink.recv_permitted &&
- (tipc_bclink_get_last_sent() != n_ptr->bclink.acked));
-}
+ struct tipc_link *snd_l = tipc_bc_sndlink(net);
+ tipc_bcast_lock(net);
+ tipc_link_add_bc_peer(snd_l, uc_l, xmitq);
+ tipc_bcbase_select_primary(net);
+ tipc_bcbase_calc_bc_threshold(net);
+ tipc_bcast_unlock(net);
+}
-/**
- * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer
+/* tipc_bcast_remove_peer - remove a peer node from broadcast link and bearer
*
- * Send packet over as many bearers as necessary to reach all nodes
- * that have joined the broadcast link.
- *
- * Returns 0 (packet sent successfully) under all circumstances,
- * since the broadcast link's pseudo-bearer never blocks
+ * RCU is locked, node lock is set
*/
-static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
- struct tipc_media_addr *unused2)
-{
- int bp_index;
-
- /* Prepare broadcast link message for reliable transmission,
- * if first time trying to send it;
- * preparation is skipped for broadcast link protocol messages
- * since they are sent in an unreliable manner and don't need it
- */
- if (likely(!msg_non_seq(buf_msg(buf)))) {
- struct tipc_msg *msg;
-
- bcbuf_set_acks(buf, bclink->bcast_nodes.count);
- msg = buf_msg(buf);
- msg_set_non_seq(msg, 1);
- msg_set_mc_netid(msg, tipc_net_id);
- bcl->stats.sent_info++;
-
- if (WARN_ON(!bclink->bcast_nodes.count)) {
- dump_stack();
- return 0;
- }
- }
+void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l)
+{
+ struct tipc_link *snd_l = tipc_bc_sndlink(net);
+ struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+ struct sk_buff_head xmitq;
- /* Send buffer over bearers until all targets reached */
- bcbearer->remains = bclink->bcast_nodes;
+ __skb_queue_head_init(&xmitq);
- for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
- struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
- struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
- struct tipc_bearer *b = p;
- struct sk_buff *tbuf;
+ tipc_bcast_lock(net);
+ tipc_link_remove_bc_peer(snd_l, rcv_l, &xmitq);
+ tipc_bcbase_select_primary(net);
+ tipc_bcbase_calc_bc_threshold(net);
+ tipc_bcast_unlock(net);
- if (!p)
- break; /* No more bearers to try */
+ tipc_bcbase_xmit(net, &xmitq);
- if (tipc_bearer_blocked(p)) {
- if (!s || tipc_bearer_blocked(s))
- continue; /* Can't use either bearer */
- b = s;
- }
+ /* Any socket wakeup messages ? */
+ if (!skb_queue_empty(inputq))
+ tipc_sk_rcv(net, inputq);
+}
- tipc_nmap_diff(&bcbearer->remains, &b->nodes,
- &bcbearer->remains_new);
- if (bcbearer->remains_new.count == bcbearer->remains.count)
- continue; /* Nothing added by bearer pair */
-
- if (bp_index == 0) {
- /* Use original buffer for first bearer */
- tipc_bearer_send(b, buf, &b->bcast_addr);
- } else {
- /* Avoid concurrent buffer access */
- tbuf = pskb_copy(buf, GFP_ATOMIC);
- if (!tbuf)
- break;
- tipc_bearer_send(b, tbuf, &b->bcast_addr);
- kfree_skb(tbuf); /* Bearer keeps a clone */
- }
+int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l)
+{
+ if (!l)
+ return -ENOPROTOOPT;
- /* Swap bearers for next packet */
- if (s) {
- bcbearer->bpairs[bp_index].primary = s;
- bcbearer->bpairs[bp_index].secondary = p;
- }
+ tipc_bcast_lock(net);
+ tipc_link_reset_stats(l);
+ tipc_bcast_unlock(net);
+ return 0;
+}
- if (bcbearer->remains_new.count == 0)
- break; /* All targets reached */
+static int tipc_bc_link_set_queue_limits(struct net *net, u32 max_win)
+{
+ struct tipc_link *l = tipc_bc_sndlink(net);
- bcbearer->remains = bcbearer->remains_new;
+ if (!l)
+ return -ENOPROTOOPT;
+ if (max_win < BCLINK_WIN_MIN)
+ max_win = BCLINK_WIN_MIN;
+ if (max_win > TIPC_MAX_LINK_WIN)
+ return -EINVAL;
+ tipc_bcast_lock(net);
+ tipc_link_set_queue_limits(l, tipc_link_min_win(l), max_win);
+ tipc_bcast_unlock(net);
+ return 0;
+}
+
+static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode)
+{
+ struct tipc_bc_base *bb = tipc_bc_base(net);
+
+ switch (bc_mode) {
+ case BCLINK_MODE_BCAST:
+ if (!bb->bcast_support)
+ return -ENOPROTOOPT;
+
+ bb->force_bcast = true;
+ bb->force_rcast = false;
+ break;
+ case BCLINK_MODE_RCAST:
+ if (!bb->rcast_support)
+ return -ENOPROTOOPT;
+
+ bb->force_bcast = false;
+ bb->force_rcast = true;
+ break;
+ case BCLINK_MODE_SEL:
+ if (!bb->bcast_support || !bb->rcast_support)
+ return -ENOPROTOOPT;
+
+ bb->force_bcast = false;
+ bb->force_rcast = false;
+ break;
+ default:
+ return -EINVAL;
}
return 0;
}
-/**
- * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer
- */
-void tipc_bcbearer_sort(void)
+static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio)
{
- struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp;
- struct tipc_bcbearer_pair *bp_curr;
- int b_index;
- int pri;
+ struct tipc_bc_base *bb = tipc_bc_base(net);
- spin_lock_bh(&bc_lock);
+ if (!bb->bcast_support || !bb->rcast_support)
+ return -ENOPROTOOPT;
- /* Group bearers by priority (can assume max of two per priority) */
- memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp));
+ if (bc_ratio > 100 || bc_ratio <= 0)
+ return -EINVAL;
- for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
- struct tipc_bearer *b = &tipc_bearers[b_index];
+ bb->rc_ratio = bc_ratio;
+ tipc_bcast_lock(net);
+ tipc_bcbase_calc_bc_threshold(net);
+ tipc_bcast_unlock(net);
- if (!b->active || !b->nodes.count)
- continue;
+ return 0;
+}
- if (!bp_temp[b->priority].primary)
- bp_temp[b->priority].primary = b;
- else
- bp_temp[b->priority].secondary = b;
- }
+int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[])
+{
+ int err;
+ u32 win;
+ u32 bc_mode;
+ u32 bc_ratio;
+ struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
- /* Create array of bearer pairs for broadcasting */
- bp_curr = bcbearer->bpairs;
- memset(bcbearer->bpairs, 0, sizeof(bcbearer->bpairs));
+ if (!attrs[TIPC_NLA_LINK_PROP])
+ return -EINVAL;
- for (pri = TIPC_MAX_LINK_PRI; pri >= 0; pri--) {
+ err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props);
+ if (err)
+ return err;
- if (!bp_temp[pri].primary)
- continue;
+ if (!props[TIPC_NLA_PROP_WIN] &&
+ !props[TIPC_NLA_PROP_BROADCAST] &&
+ !props[TIPC_NLA_PROP_BROADCAST_RATIO]) {
+ return -EOPNOTSUPP;
+ }
- bp_curr->primary = bp_temp[pri].primary;
+ if (props[TIPC_NLA_PROP_BROADCAST]) {
+ bc_mode = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST]);
+ err = tipc_bc_link_set_broadcast_mode(net, bc_mode);
+ }
- if (bp_temp[pri].secondary) {
- if (tipc_nmap_equal(&bp_temp[pri].primary->nodes,
- &bp_temp[pri].secondary->nodes)) {
- bp_curr->secondary = bp_temp[pri].secondary;
- } else {
- bp_curr++;
- bp_curr->primary = bp_temp[pri].secondary;
- }
- }
+ if (!err && props[TIPC_NLA_PROP_BROADCAST_RATIO]) {
+ bc_ratio = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST_RATIO]);
+ err = tipc_bc_link_set_broadcast_ratio(net, bc_ratio);
+ }
- bp_curr++;
+ if (!err && props[TIPC_NLA_PROP_WIN]) {
+ win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ err = tipc_bc_link_set_queue_limits(net, win);
}
- spin_unlock_bh(&bc_lock);
+ return err;
}
-
-int tipc_bclink_stats(char *buf, const u32 buf_size)
+int tipc_bcast_init(struct net *net)
{
- int ret;
- struct tipc_stats *s;
-
- if (!bcl)
- return 0;
-
- spin_lock_bh(&bc_lock);
-
- s = &bcl->stats;
-
- ret = tipc_snprintf(buf, buf_size, "Link <%s>\n"
- " Window:%u packets\n",
- bcl->name, bcl->queue_limit[0]);
- ret += tipc_snprintf(buf + ret, buf_size - ret,
- " RX packets:%u fragments:%u/%u bundles:%u/%u\n",
- s->recv_info, s->recv_fragments,
- s->recv_fragmented, s->recv_bundles,
- s->recv_bundled);
- ret += tipc_snprintf(buf + ret, buf_size - ret,
- " TX packets:%u fragments:%u/%u bundles:%u/%u\n",
- s->sent_info, s->sent_fragments,
- s->sent_fragmented, s->sent_bundles,
- s->sent_bundled);
- ret += tipc_snprintf(buf + ret, buf_size - ret,
- " RX naks:%u defs:%u dups:%u\n",
- s->recv_nacks, s->deferred_recv, s->duplicates);
- ret += tipc_snprintf(buf + ret, buf_size - ret,
- " TX naks:%u acks:%u dups:%u\n",
- s->sent_nacks, s->sent_acks, s->retransmitted);
- ret += tipc_snprintf(buf + ret, buf_size - ret,
- " Congestion link:%u Send queue max:%u avg:%u\n",
- s->link_congs, s->max_queue_sz,
- s->queue_sz_counts ?
- (s->accu_queue_sz / s->queue_sz_counts) : 0);
-
- spin_unlock_bh(&bc_lock);
- return ret;
-}
-
-int tipc_bclink_reset_stats(void)
-{
- if (!bcl)
- return -ENOPROTOOPT;
-
- spin_lock_bh(&bc_lock);
- memset(&bcl->stats, 0, sizeof(bcl->stats));
- spin_unlock_bh(&bc_lock);
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_bc_base *bb = NULL;
+ struct tipc_link *l = NULL;
+
+ bb = kzalloc(sizeof(*bb), GFP_KERNEL);
+ if (!bb)
+ goto enomem;
+ tn->bcbase = bb;
+ spin_lock_init(&tipc_net(net)->bclock);
+
+ if (!tipc_link_bc_create(net, 0, 0, NULL,
+ one_page_mtu,
+ BCLINK_WIN_DEFAULT,
+ BCLINK_WIN_DEFAULT,
+ 0,
+ &bb->inputq,
+ NULL,
+ NULL,
+ &l))
+ goto enomem;
+ bb->link = l;
+ tn->bcl = l;
+ bb->rc_ratio = 10;
+ bb->rcast_support = true;
return 0;
+enomem:
+ kfree(bb);
+ kfree(l);
+ return -ENOMEM;
}
-int tipc_bclink_set_queue_limits(u32 limit)
+void tipc_bcast_stop(struct net *net)
{
- if (!bcl)
- return -ENOPROTOOPT;
- if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN))
- return -EINVAL;
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
- spin_lock_bh(&bc_lock);
- tipc_link_set_queue_limits(bcl, limit);
- spin_unlock_bh(&bc_lock);
- return 0;
+ synchronize_net();
+ kfree(tn->bcbase);
+ kfree(tn->bcl);
}
-void tipc_bclink_init(void)
+void tipc_nlist_init(struct tipc_nlist *nl, u32 self)
{
- bcbearer->bearer.media = &bcbearer->media;
- bcbearer->media.send_msg = tipc_bcbearer_send;
- sprintf(bcbearer->media.name, "tipc-broadcast");
+ memset(nl, 0, sizeof(*nl));
+ INIT_LIST_HEAD(&nl->list);
+ nl->self = self;
+}
- INIT_LIST_HEAD(&bcl->waiting_ports);
- bcl->next_out_no = 1;
- spin_lock_init(&bclink->node.lock);
- bcl->owner = &bclink->node;
- bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
- tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
- spin_lock_init(&bcbearer->bearer.lock);
- bcl->b_ptr = &bcbearer->bearer;
- bcl->state = WORKING_WORKING;
- strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
+void tipc_nlist_add(struct tipc_nlist *nl, u32 node)
+{
+ if (node == nl->self)
+ nl->local = true;
+ else if (tipc_dest_push(&nl->list, node, 0))
+ nl->remote++;
}
-void tipc_bclink_stop(void)
+void tipc_nlist_del(struct tipc_nlist *nl, u32 node)
{
- spin_lock_bh(&bc_lock);
- tipc_link_stop(bcl);
- spin_unlock_bh(&bc_lock);
+ if (node == nl->self)
+ nl->local = false;
+ else if (tipc_dest_del(&nl->list, node, 0))
+ nl->remote--;
+}
- memset(bclink, 0, sizeof(*bclink));
- memset(bcbearer, 0, sizeof(*bcbearer));
+void tipc_nlist_purge(struct tipc_nlist *nl)
+{
+ tipc_dest_list_purge(&nl->list);
+ nl->remote = 0;
+ nl->local = false;
}
+u32 tipc_bcast_get_mode(struct net *net)
+{
+ struct tipc_bc_base *bb = tipc_bc_base(net);
-/**
- * tipc_nmap_add - add a node to a node map
- */
-void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
+ if (bb->force_bcast)
+ return BCLINK_MODE_BCAST;
+
+ if (bb->force_rcast)
+ return BCLINK_MODE_RCAST;
+
+ if (bb->bcast_support && bb->rcast_support)
+ return BCLINK_MODE_SEL;
+
+ return 0;
+}
+
+u32 tipc_bcast_get_broadcast_ratio(struct net *net)
{
- int n = tipc_node(node);
- int w = n / WSIZE;
- u32 mask = (1 << (n % WSIZE));
+ struct tipc_bc_base *bb = tipc_bc_base(net);
- if ((nm_ptr->map[w] & mask) == 0) {
- nm_ptr->count++;
- nm_ptr->map[w] |= mask;
- }
+ return bb->rc_ratio;
}
-/**
- * tipc_nmap_remove - remove a node from a node map
- */
-void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
+void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
+ struct sk_buff_head *inputq)
{
- int n = tipc_node(node);
- int w = n / WSIZE;
- u32 mask = (1 << (n % WSIZE));
+ struct sk_buff *skb, *_skb, *tmp;
+ struct tipc_msg *hdr, *_hdr;
+ bool match = false;
+ u32 node, port;
+
+ skb = skb_peek(inputq);
+ if (!skb)
+ return;
+
+ hdr = buf_msg(skb);
+
+ if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq)))
+ return;
+
+ node = msg_orignode(hdr);
+ if (node == tipc_own_addr(net))
+ return;
+
+ port = msg_origport(hdr);
- if ((nm_ptr->map[w] & mask) != 0) {
- nm_ptr->map[w] &= ~mask;
- nm_ptr->count--;
+ /* Has the twin SYN message already arrived ? */
+ skb_queue_walk(defq, _skb) {
+ _hdr = buf_msg(_skb);
+ if (msg_orignode(_hdr) != node)
+ continue;
+ if (msg_origport(_hdr) != port)
+ continue;
+ match = true;
+ break;
}
-}
-/**
- * tipc_nmap_diff - find differences between node maps
- * @nm_a: input node map A
- * @nm_b: input node map B
- * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
- */
-static void tipc_nmap_diff(struct tipc_node_map *nm_a,
- struct tipc_node_map *nm_b,
- struct tipc_node_map *nm_diff)
-{
- int stop = ARRAY_SIZE(nm_a->map);
- int w;
- int b;
- u32 map;
-
- memset(nm_diff, 0, sizeof(*nm_diff));
- for (w = 0; w < stop; w++) {
- map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]);
- nm_diff->map[w] = map;
- if (map != 0) {
- for (b = 0 ; b < WSIZE; b++) {
- if (map & (1 << b))
- nm_diff->count++;
- }
- }
+ if (!match) {
+ if (!msg_is_syn(hdr))
+ return;
+ __skb_dequeue(inputq);
+ __skb_queue_tail(defq, skb);
+ return;
}
-}
-/**
- * tipc_port_list_add - add a port to a port list, ensuring no duplicates
- */
-void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port)
-{
- struct tipc_port_list *item = pl_ptr;
- int i;
- int item_sz = PLSIZE;
- int cnt = pl_ptr->count;
-
- for (; ; cnt -= item_sz, item = item->next) {
- if (cnt < PLSIZE)
- item_sz = cnt;
- for (i = 0; i < item_sz; i++)
- if (item->ports[i] == port)
- return;
- if (i < PLSIZE) {
- item->ports[i] = port;
- pl_ptr->count++;
+ /* Deliver non-SYN message from other link, otherwise queue it */
+ if (!msg_is_syn(hdr)) {
+ if (msg_is_rcast(hdr) != msg_is_rcast(_hdr))
return;
- }
- if (!item->next) {
- item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
- if (!item->next) {
- pr_warn("Incomplete multicast delivery, no memory\n");
- return;
- }
- item->next->next = NULL;
- }
+ __skb_dequeue(inputq);
+ __skb_queue_tail(defq, skb);
+ return;
}
-}
-/**
- * tipc_port_list_free - free dynamically created entries in port_list chain
- *
- */
-void tipc_port_list_free(struct tipc_port_list *pl_ptr)
-{
- struct tipc_port_list *item;
- struct tipc_port_list *next;
+ /* Queue non-SYN/SYN message from same link */
+ if (msg_is_rcast(hdr) == msg_is_rcast(_hdr)) {
+ __skb_dequeue(inputq);
+ __skb_queue_tail(defq, skb);
+ return;
+ }
- for (item = pl_ptr->next; item; item = next) {
- next = item->next;
- kfree(item);
+ /* Matching SYN messages => return the one with data, if any */
+ __skb_unlink(_skb, defq);
+ if (msg_data_sz(hdr)) {
+ kfree_skb(_skb);
+ } else {
+ __skb_dequeue(inputq);
+ kfree_skb(skb);
+ __skb_queue_tail(inputq, _skb);
+ }
+
+ /* Deliver subsequent non-SYN messages from same peer */
+ skb_queue_walk_safe(defq, _skb, tmp) {
+ _hdr = buf_msg(_skb);
+ if (msg_orignode(_hdr) != node)
+ continue;
+ if (msg_origport(_hdr) != port)
+ continue;
+ if (msg_is_syn(_hdr))
+ break;
+ __skb_unlink(_skb, defq);
+ __skb_queue_tail(inputq, _skb);
}
}
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 6ee587b469fd..2d9352dc7b0e 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -1,7 +1,7 @@
/*
* net/tipc/bcast.h: Include file for TIPC broadcast code
*
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2006, 2014-2015, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -37,67 +37,91 @@
#ifndef _TIPC_BCAST_H
#define _TIPC_BCAST_H
-#define MAX_NODES 4096
-#define WSIZE 32
+#include "core.h"
-/**
- * struct tipc_node_map - set of node identifiers
- * @count: # of nodes in set
- * @map: bitmap of node identifiers that are in the set
- */
-struct tipc_node_map {
- u32 count;
- u32 map[MAX_NODES / WSIZE];
+struct tipc_node;
+struct tipc_msg;
+struct tipc_nl_msg;
+struct tipc_nlist;
+struct tipc_nitem;
+extern const char tipc_bclink_name[];
+extern unsigned long sysctl_tipc_bc_retruni;
+
+#define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000)
+
+#define BCLINK_MODE_BCAST 0x1
+#define BCLINK_MODE_RCAST 0x2
+#define BCLINK_MODE_SEL 0x4
+
+struct tipc_nlist {
+ struct list_head list;
+ u32 self;
+ u16 remote;
+ bool local;
};
-#define PLSIZE 32
+void tipc_nlist_init(struct tipc_nlist *nl, u32 self);
+void tipc_nlist_purge(struct tipc_nlist *nl);
+void tipc_nlist_add(struct tipc_nlist *nl, u32 node);
+void tipc_nlist_del(struct tipc_nlist *nl, u32 node);
-/**
- * struct tipc_port_list - set of node local destination ports
- * @count: # of ports in set (only valid for first entry in list)
- * @next: pointer to next entry in list
- * @ports: array of port references
+/* Cookie to be used between socket and broadcast layer
+ * @rcast: replicast (instead of broadcast) was used at previous xmit
+ * @mandatory: broadcast/replicast indication was set by user
+ * @deferredq: defer queue to make message in order
+ * @expires: re-evaluate non-mandatory transmit method if we are past this
*/
-struct tipc_port_list {
- int count;
- struct tipc_port_list *next;
- u32 ports[PLSIZE];
+struct tipc_mc_method {
+ bool rcast;
+ bool mandatory;
+ struct sk_buff_head deferredq;
+ unsigned long expires;
};
+int tipc_bcast_init(struct net *net);
+void tipc_bcast_stop(struct net *net);
+void tipc_bcast_add_peer(struct net *net, struct tipc_link *l,
+ struct sk_buff_head *xmitq);
+void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl);
+void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id);
+void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id);
+int tipc_bcast_get_mtu(struct net *net);
+void tipc_bcast_toggle_rcast(struct net *net, bool supp);
+int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ struct tipc_mc_method *method, struct tipc_nlist *dests,
+ u16 *cong_link_cnt);
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ u16 *cong_link_cnt);
+int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
+void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
+ struct tipc_msg *hdr);
+int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
+ struct tipc_msg *hdr,
+ struct sk_buff_head *retrq);
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_link *bcl);
+int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
+int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l);
-struct tipc_node;
-
-extern const char tipc_bclink_name[];
+u32 tipc_bcast_get_mode(struct net *net);
+u32 tipc_bcast_get_broadcast_ratio(struct net *net);
-void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
-void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
+void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
+ struct sk_buff_head *inputq);
-/**
- * tipc_nmap_equal - test for equality of node maps
- */
-static inline int tipc_nmap_equal(struct tipc_node_map *nm_a,
- struct tipc_node_map *nm_b)
+static inline void tipc_bcast_lock(struct net *net)
{
- return !memcmp(nm_a, nm_b, sizeof(*nm_a));
+ spin_lock_bh(&tipc_net(net)->bclock);
}
-void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port);
-void tipc_port_list_free(struct tipc_port_list *pl_ptr);
+static inline void tipc_bcast_unlock(struct net *net)
+{
+ spin_unlock_bh(&tipc_net(net)->bclock);
+}
-void tipc_bclink_init(void);
-void tipc_bclink_stop(void);
-void tipc_bclink_add_node(u32 addr);
-void tipc_bclink_remove_node(u32 addr);
-struct tipc_node *tipc_bclink_retransmit_to(void);
-void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
-int tipc_bclink_send_msg(struct sk_buff *buf);
-void tipc_bclink_recv_pkt(struct sk_buff *buf);
-u32 tipc_bclink_get_last_sent(void);
-u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr);
-void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent);
-int tipc_bclink_stats(char *stats_buf, const u32 buf_size);
-int tipc_bclink_reset_stats(void);
-int tipc_bclink_set_queue_limits(u32 limit);
-void tipc_bcbearer_sort(void);
+static inline struct tipc_link *tipc_bc_sndlink(struct net *net)
+{
+ return tipc_net(net)->bcl;
+}
#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index cb29ef7ba2f0..ae1ddbf71853 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1,8 +1,8 @@
/*
* net/tipc/bearer.c: TIPC bearer code
*
- * Copyright (c) 1996-2006, Ericsson AB
- * Copyright (c) 2004-2006, 2010-2011, Wind River Systems
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
+ * Copyright (c) 2004-2006, 2010-2013, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,124 +34,97 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <net/sock.h>
#include "core.h"
-#include "config.h"
#include "bearer.h"
+#include "link.h"
#include "discover.h"
+#include "monitor.h"
+#include "bcast.h"
+#include "netlink.h"
+#include "udp_media.h"
+#include "trace.h"
+#include "crypto.h"
#define MAX_ADDR_STR 60
-static struct tipc_media *media_list[MAX_MEDIA];
-static u32 media_count;
+static struct tipc_media * const media_info_array[] = {
+ &eth_media_info,
+#ifdef CONFIG_TIPC_MEDIA_IB
+ &ib_media_info,
+#endif
+#ifdef CONFIG_TIPC_MEDIA_UDP
+ &udp_media_info,
+#endif
+ NULL
+};
-struct tipc_bearer tipc_bearers[MAX_BEARERS];
+static struct tipc_bearer *bearer_get(struct net *net, int bearer_id)
+{
+ struct tipc_net *tn = tipc_net(net);
+
+ return rcu_dereference(tn->bearer_list[bearer_id]);
+}
-static void bearer_disable(struct tipc_bearer *b_ptr);
+static void bearer_disable(struct net *net, struct tipc_bearer *b);
+static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev);
/**
* tipc_media_find - locates specified media object by name
+ * @name: name to locate
*/
struct tipc_media *tipc_media_find(const char *name)
{
u32 i;
- for (i = 0; i < media_count; i++) {
- if (!strcmp(media_list[i]->name, name))
- return media_list[i];
+ for (i = 0; media_info_array[i] != NULL; i++) {
+ if (!strcmp(media_info_array[i]->name, name))
+ break;
}
- return NULL;
+ return media_info_array[i];
}
/**
* media_find_id - locates specified media object by type identifier
+ * @type: type identifier to locate
*/
static struct tipc_media *media_find_id(u8 type)
{
u32 i;
- for (i = 0; i < media_count; i++) {
- if (media_list[i]->type_id == type)
- return media_list[i];
+ for (i = 0; media_info_array[i] != NULL; i++) {
+ if (media_info_array[i]->type_id == type)
+ break;
}
- return NULL;
-}
-
-/**
- * tipc_register_media - register a media type
- *
- * Bearers for this media type must be activated separately at a later stage.
- */
-int tipc_register_media(struct tipc_media *m_ptr)
-{
- int res = -EINVAL;
-
- write_lock_bh(&tipc_net_lock);
-
- if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME)
- goto exit;
- if (m_ptr->priority > TIPC_MAX_LINK_PRI)
- goto exit;
- if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) ||
- (m_ptr->tolerance > TIPC_MAX_LINK_TOL))
- goto exit;
- if (media_count >= MAX_MEDIA)
- goto exit;
- if (tipc_media_find(m_ptr->name) || media_find_id(m_ptr->type_id))
- goto exit;
-
- media_list[media_count] = m_ptr;
- media_count++;
- res = 0;
-exit:
- write_unlock_bh(&tipc_net_lock);
- if (res)
- pr_warn("Media <%s> registration error\n", m_ptr->name);
- return res;
+ return media_info_array[i];
}
/**
* tipc_media_addr_printf - record media address in print buffer
+ * @buf: output buffer
+ * @len: output buffer size remaining
+ * @a: input media address
*/
-void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a)
+int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a)
{
char addr_str[MAX_ADDR_STR];
- struct tipc_media *m_ptr;
+ struct tipc_media *m;
int ret;
- m_ptr = media_find_id(a->media_id);
+ m = media_find_id(a->media_id);
- if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str)))
- ret = tipc_snprintf(buf, len, "%s(%s)", m_ptr->name, addr_str);
+ if (m && !m->addr2str(a, addr_str, sizeof(addr_str)))
+ ret = scnprintf(buf, len, "%s(%s)", m->name, addr_str);
else {
u32 i;
- ret = tipc_snprintf(buf, len, "UNKNOWN(%u)", a->media_id);
+ ret = scnprintf(buf, len, "UNKNOWN(%u)", a->media_id);
for (i = 0; i < sizeof(a->value); i++)
- ret += tipc_snprintf(buf - ret, len + ret,
- "-%02x", a->value[i]);
+ ret += scnprintf(buf + ret, len - ret,
+ "-%x", a->value[i]);
}
-}
-
-/**
- * tipc_media_get_names - record names of registered media in buffer
- */
-struct sk_buff *tipc_media_get_names(void)
-{
- struct sk_buff *buf;
- int i;
-
- buf = tipc_cfg_reply_alloc(MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME));
- if (!buf)
- return NULL;
-
- read_lock_bh(&tipc_net_lock);
- for (i = 0; i < media_count; i++) {
- tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME,
- media_list[i]->name,
- strlen(media_list[i]->name) + 1);
- }
- read_unlock_bh(&tipc_net_lock);
- return buf;
+ return ret;
}
/**
@@ -159,7 +132,7 @@ struct sk_buff *tipc_media_get_names(void)
* @name: ptr to bearer name string
* @name_parts: ptr to area for bearer name components (or NULL if not needed)
*
- * Returns 1 if bearer name is valid, otherwise 0.
+ * Return: 1 if bearer name is valid, otherwise 0.
*/
static int bearer_name_validate(const char *name,
struct tipc_bearer_names *name_parts)
@@ -171,10 +144,7 @@ static int bearer_name_validate(const char *name,
u32 if_len;
/* copy bearer name & ensure length is OK */
- name_copy[TIPC_MAX_BEARER_NAME - 1] = 0;
- /* need above in case non-Posix strncpy() doesn't pad with nulls */
- strncpy(name_copy, name, TIPC_MAX_BEARER_NAME);
- if (name_copy[TIPC_MAX_BEARER_NAME - 1] != 0)
+ if (strscpy(name_copy, name, TIPC_MAX_BEARER_NAME) < 0)
return 0;
/* ensure all component parts of bearer name are present */
@@ -193,314 +163,1215 @@ static int bearer_name_validate(const char *name,
/* return bearer name components, if necessary */
if (name_parts) {
- strcpy(name_parts->media_name, media_name);
- strcpy(name_parts->if_name, if_name);
+ if (strscpy(name_parts->media_name, media_name,
+ TIPC_MAX_MEDIA_NAME) < 0)
+ return 0;
+ if (strscpy(name_parts->if_name, if_name,
+ TIPC_MAX_IF_NAME) < 0)
+ return 0;
}
return 1;
}
/**
* tipc_bearer_find - locates bearer object with matching bearer name
+ * @net: the applicable net namespace
+ * @name: bearer name to locate
*/
-struct tipc_bearer *tipc_bearer_find(const char *name)
+struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name)
{
- struct tipc_bearer *b_ptr;
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_bearer *b;
u32 i;
- for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
- if (b_ptr->active && (!strcmp(b_ptr->name, name)))
- return b_ptr;
+ for (i = 0; i < MAX_BEARERS; i++) {
+ b = rtnl_dereference(tn->bearer_list[i]);
+ if (b && (!strcmp(b->name, name)))
+ return b;
}
return NULL;
}
-/**
- * tipc_bearer_find_interface - locates bearer object with matching interface name
+/* tipc_bearer_get_name - get the bearer name from its id.
+ * @net: network namespace
+ * @name: a pointer to the buffer where the name will be stored.
+ * @bearer_id: the id to get the name from.
*/
-struct tipc_bearer *tipc_bearer_find_interface(const char *if_name)
+int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id)
{
- struct tipc_bearer *b_ptr;
- char *b_if_name;
- u32 i;
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_bearer *b;
- for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
- if (!b_ptr->active)
- continue;
- b_if_name = strchr(b_ptr->name, ':') + 1;
- if (!strcmp(b_if_name, if_name))
- return b_ptr;
- }
- return NULL;
-}
+ if (bearer_id >= MAX_BEARERS)
+ return -EINVAL;
-/**
- * tipc_bearer_get_names - record names of bearers in buffer
- */
-struct sk_buff *tipc_bearer_get_names(void)
-{
- struct sk_buff *buf;
- struct tipc_bearer *b_ptr;
- int i, j;
-
- buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME));
- if (!buf)
- return NULL;
-
- read_lock_bh(&tipc_net_lock);
- for (i = 0; i < media_count; i++) {
- for (j = 0; j < MAX_BEARERS; j++) {
- b_ptr = &tipc_bearers[j];
- if (b_ptr->active && (b_ptr->media == media_list[i])) {
- tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME,
- b_ptr->name,
- strlen(b_ptr->name) + 1);
- }
- }
- }
- read_unlock_bh(&tipc_net_lock);
- return buf;
-}
+ b = rtnl_dereference(tn->bearer_list[bearer_id]);
+ if (!b)
+ return -EINVAL;
-void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest)
-{
- tipc_nmap_add(&b_ptr->nodes, dest);
- tipc_bcbearer_sort();
- tipc_disc_add_dest(b_ptr->link_req);
+ strcpy(name, b->name);
+ return 0;
}
-void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest)
+void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
{
- tipc_nmap_remove(&b_ptr->nodes, dest);
- tipc_bcbearer_sort();
- tipc_disc_remove_dest(b_ptr->link_req);
-}
+ struct tipc_bearer *b;
-/*
- * Interrupt enabling new requests after bearer blocking:
- * See bearer_send().
- */
-void tipc_continue(struct tipc_bearer *b)
-{
- spin_lock_bh(&b->lock);
- b->blocked = 0;
- spin_unlock_bh(&b->lock);
+ rcu_read_lock();
+ b = bearer_get(net, bearer_id);
+ if (b)
+ tipc_disc_add_dest(b->disc);
+ rcu_read_unlock();
}
-/*
- * tipc_bearer_blocked - determines if bearer is currently blocked
- */
-int tipc_bearer_blocked(struct tipc_bearer *b)
+void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
{
- int res;
-
- spin_lock_bh(&b->lock);
- res = b->blocked;
- spin_unlock_bh(&b->lock);
+ struct tipc_bearer *b;
- return res;
+ rcu_read_lock();
+ b = bearer_get(net, bearer_id);
+ if (b)
+ tipc_disc_remove_dest(b->disc);
+ rcu_read_unlock();
}
/**
* tipc_enable_bearer - enable bearer with the given name
+ * @net: the applicable net namespace
+ * @name: bearer name to enable
+ * @disc_domain: bearer domain
+ * @prio: bearer priority
+ * @attr: nlattr array
+ * @extack: netlink extended ack
*/
-int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
+static int tipc_enable_bearer(struct net *net, const char *name,
+ u32 disc_domain, u32 prio,
+ struct nlattr *attr[],
+ struct netlink_ext_ack *extack)
{
- struct tipc_bearer *b_ptr;
- struct tipc_media *m_ptr;
+ struct tipc_net *tn = tipc_net(net);
struct tipc_bearer_names b_names;
- char addr_string[16];
- u32 bearer_id;
- u32 with_this_prio;
- u32 i;
+ int with_this_prio = 1;
+ struct tipc_bearer *b;
+ struct tipc_media *m;
+ struct sk_buff *skb;
+ int bearer_id = 0;
int res = -EINVAL;
+ char *errstr = "";
+ u32 i;
- if (!tipc_own_addr) {
- pr_warn("Bearer <%s> rejected, not supported in standalone mode\n",
- name);
- return -ENOPROTOOPT;
- }
if (!bearer_name_validate(name, &b_names)) {
- pr_warn("Bearer <%s> rejected, illegal name\n", name);
- return -EINVAL;
- }
- if (tipc_addr_domain_valid(disc_domain) &&
- (disc_domain != tipc_own_addr)) {
- if (tipc_in_scope(disc_domain, tipc_own_addr)) {
- disc_domain = tipc_own_addr & TIPC_CLUSTER_MASK;
- res = 0; /* accept any node in own cluster */
- } else if (in_own_cluster_exact(disc_domain))
- res = 0; /* accept specified node in own cluster */
- }
- if (res) {
- pr_warn("Bearer <%s> rejected, illegal discovery domain\n",
- name);
- return -EINVAL;
- }
- if ((priority > TIPC_MAX_LINK_PRI) &&
- (priority != TIPC_MEDIA_LINK_PRI)) {
- pr_warn("Bearer <%s> rejected, illegal priority\n", name);
- return -EINVAL;
+ NL_SET_ERR_MSG(extack, "Illegal name");
+ return res;
}
- write_lock_bh(&tipc_net_lock);
+ if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
+ errstr = "illegal priority";
+ NL_SET_ERR_MSG(extack, "Illegal priority");
+ goto rejected;
+ }
- m_ptr = tipc_media_find(b_names.media_name);
- if (!m_ptr) {
- pr_warn("Bearer <%s> rejected, media <%s> not registered\n",
- name, b_names.media_name);
- goto exit;
+ m = tipc_media_find(b_names.media_name);
+ if (!m) {
+ errstr = "media not registered";
+ NL_SET_ERR_MSG(extack, "Media not registered");
+ goto rejected;
}
- if (priority == TIPC_MEDIA_LINK_PRI)
- priority = m_ptr->priority;
+ if (prio == TIPC_MEDIA_LINK_PRI)
+ prio = m->priority;
-restart:
+ /* Check new bearer vs existing ones and find free bearer id if any */
bearer_id = MAX_BEARERS;
- with_this_prio = 1;
- for (i = MAX_BEARERS; i-- != 0; ) {
- if (!tipc_bearers[i].active) {
+ i = MAX_BEARERS;
+ while (i-- != 0) {
+ b = rtnl_dereference(tn->bearer_list[i]);
+ if (!b) {
bearer_id = i;
continue;
}
- if (!strcmp(name, tipc_bearers[i].name)) {
- pr_warn("Bearer <%s> rejected, already enabled\n",
- name);
- goto exit;
+ if (!strcmp(name, b->name)) {
+ errstr = "already enabled";
+ NL_SET_ERR_MSG(extack, "Already enabled");
+ goto rejected;
}
- if ((tipc_bearers[i].priority == priority) &&
+
+ if (b->priority == prio &&
(++with_this_prio > 2)) {
- if (priority-- == 0) {
- pr_warn("Bearer <%s> rejected, duplicate priority\n",
- name);
- goto exit;
+ pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
+ name, prio);
+
+ if (prio == TIPC_MIN_LINK_PRI) {
+ errstr = "cannot adjust to lower";
+ NL_SET_ERR_MSG(extack, "Cannot adjust to lower");
+ goto rejected;
}
- pr_warn("Bearer <%s> priority adjustment required %u->%u\n",
- name, priority + 1, priority);
- goto restart;
+
+ pr_warn("Bearer <%s>: trying with adjusted priority\n",
+ name);
+ prio--;
+ bearer_id = MAX_BEARERS;
+ i = MAX_BEARERS;
+ with_this_prio = 1;
}
}
+
if (bearer_id >= MAX_BEARERS) {
- pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
- name, MAX_BEARERS);
- goto exit;
+ errstr = "max 3 bearers permitted";
+ NL_SET_ERR_MSG(extack, "Max 3 bearers permitted");
+ goto rejected;
}
- b_ptr = &tipc_bearers[bearer_id];
- strcpy(b_ptr->name, name);
- res = m_ptr->enable_bearer(b_ptr);
+ b = kzalloc(sizeof(*b), GFP_ATOMIC);
+ if (!b)
+ return -ENOMEM;
+
+ strscpy(b->name, name);
+ b->media = m;
+ res = m->enable_media(net, b, attr);
if (res) {
- pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
- name, -res);
- goto exit;
- }
-
- b_ptr->identity = bearer_id;
- b_ptr->media = m_ptr;
- b_ptr->tolerance = m_ptr->tolerance;
- b_ptr->window = m_ptr->window;
- b_ptr->net_plane = bearer_id + 'A';
- b_ptr->active = 1;
- b_ptr->priority = priority;
- INIT_LIST_HEAD(&b_ptr->links);
- spin_lock_init(&b_ptr->lock);
-
- res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain);
+ kfree(b);
+ errstr = "failed to enable media";
+ NL_SET_ERR_MSG(extack, "Failed to enable media");
+ goto rejected;
+ }
+
+ b->identity = bearer_id;
+ b->tolerance = m->tolerance;
+ b->min_win = m->min_win;
+ b->max_win = m->max_win;
+ b->domain = disc_domain;
+ b->net_plane = bearer_id + 'A';
+ b->priority = prio;
+ refcount_set(&b->refcnt, 1);
+
+ res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
if (res) {
- bearer_disable(b_ptr);
- pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
- name);
- goto exit;
- }
- pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
- name,
- tipc_addr_string_fill(addr_string, disc_domain), priority);
-exit:
- write_unlock_bh(&tipc_net_lock);
+ bearer_disable(net, b);
+ errstr = "failed to create discoverer";
+ NL_SET_ERR_MSG(extack, "Failed to create discoverer");
+ goto rejected;
+ }
+
+ /* Create monitoring data before accepting activate messages */
+ if (tipc_mon_create(net, bearer_id)) {
+ bearer_disable(net, b);
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
+ test_and_set_bit_lock(0, &b->up);
+ rcu_assign_pointer(tn->bearer_list[bearer_id], b);
+ if (skb)
+ tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
+
+ pr_info("Enabled bearer <%s>, priority %u\n", name, prio);
+
+ return res;
+rejected:
+ pr_warn("Enabling of bearer <%s> rejected, %s\n", name, errstr);
return res;
}
/**
- * tipc_block_bearer - Block the bearer with the given name, and reset all its links
+ * tipc_reset_bearer - Reset all links established over this bearer
+ * @net: the applicable net namespace
+ * @b: the target bearer
*/
-int tipc_block_bearer(const char *name)
+static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
+{
+ pr_info("Resetting bearer <%s>\n", b->name);
+ tipc_node_delete_links(net, b->identity);
+ tipc_disc_reset(net, b);
+ return 0;
+}
+
+bool tipc_bearer_hold(struct tipc_bearer *b)
+{
+ return (b && refcount_inc_not_zero(&b->refcnt));
+}
+
+void tipc_bearer_put(struct tipc_bearer *b)
+{
+ if (b && refcount_dec_and_test(&b->refcnt))
+ kfree_rcu(b, rcu);
+}
+
+/**
+ * bearer_disable - disable this bearer
+ * @net: the applicable net namespace
+ * @b: the bearer to disable
+ *
+ * Note: This routine assumes caller holds RTNL lock.
+ */
+static void bearer_disable(struct net *net, struct tipc_bearer *b)
+{
+ struct tipc_net *tn = tipc_net(net);
+ int bearer_id = b->identity;
+
+ pr_info("Disabling bearer <%s>\n", b->name);
+ clear_bit_unlock(0, &b->up);
+ tipc_node_delete_links(net, bearer_id);
+ b->media->disable_media(b);
+ RCU_INIT_POINTER(b->media_ptr, NULL);
+ if (b->disc)
+ tipc_disc_delete(b->disc);
+ RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL);
+ tipc_bearer_put(b);
+ tipc_mon_delete(net, bearer_id);
+}
+
+int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
+ struct nlattr *attr[])
{
- struct tipc_bearer *b_ptr = NULL;
- struct tipc_link *l_ptr;
- struct tipc_link *temp_l_ptr;
+ char *dev_name = strchr((const char *)b->name, ':') + 1;
+ int hwaddr_len = b->media->hwaddr_len;
+ u8 node_id[NODE_ID_LEN] = {0,};
+ struct net_device *dev;
- read_lock_bh(&tipc_net_lock);
- b_ptr = tipc_bearer_find(name);
- if (!b_ptr) {
- pr_warn("Attempt to block unknown bearer <%s>\n", name);
- read_unlock_bh(&tipc_net_lock);
+ /* Find device with specified name */
+ dev = dev_get_by_name(net, dev_name);
+ if (!dev)
+ return -ENODEV;
+ if (tipc_mtu_bad(dev)) {
+ dev_put(dev);
+ return -EINVAL;
+ }
+ if (dev == net->loopback_dev) {
+ dev_put(dev);
+ pr_info("Enabling <%s> not permitted\n", b->name);
+ return -EINVAL;
+ }
+
+ /* Autoconfigure own node identity if needed */
+ if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) {
+ memcpy(node_id, dev->dev_addr, hwaddr_len);
+ tipc_net_init(net, node_id, 0);
+ }
+ if (!tipc_own_id(net)) {
+ dev_put(dev);
+ pr_warn("Failed to obtain node identity\n");
return -EINVAL;
}
- pr_info("Blocking bearer <%s>\n", name);
- spin_lock_bh(&b_ptr->lock);
- b_ptr->blocked = 1;
- list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
- struct tipc_node *n_ptr = l_ptr->owner;
+ /* Associate TIPC bearer with L2 bearer */
+ rcu_assign_pointer(b->media_ptr, dev);
+ b->pt.dev = dev;
+ b->pt.type = htons(ETH_P_TIPC);
+ b->pt.func = tipc_l2_rcv_msg;
+ dev_add_pack(&b->pt);
+ memset(&b->bcast_addr, 0, sizeof(b->bcast_addr));
+ memcpy(b->bcast_addr.value, dev->broadcast, hwaddr_len);
+ b->bcast_addr.media_id = b->media->type_id;
+ b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT;
+ b->mtu = dev->mtu;
+ b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr);
+ rcu_assign_pointer(dev->tipc_ptr, b);
+ return 0;
+}
+
+/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface
+ * @b: the target bearer
+ *
+ * Mark L2 bearer as inactive so that incoming buffers are thrown away
+ */
+void tipc_disable_l2_media(struct tipc_bearer *b)
+{
+ struct net_device *dev;
- spin_lock_bh(&n_ptr->lock);
- tipc_link_reset(l_ptr);
- spin_unlock_bh(&n_ptr->lock);
+ dev = (struct net_device *)rtnl_dereference(b->media_ptr);
+ dev_remove_pack(&b->pt);
+ RCU_INIT_POINTER(dev->tipc_ptr, NULL);
+ synchronize_net();
+ dev_put(dev);
+}
+
+/**
+ * tipc_l2_send_msg - send a TIPC packet out over an L2 interface
+ * @net: the associated network namespace
+ * @skb: the packet to be sent
+ * @b: the bearer through which the packet is to be sent
+ * @dest: peer destination address
+ */
+int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
+ struct tipc_bearer *b, struct tipc_media_addr *dest)
+{
+ struct net_device *dev;
+ int delta;
+
+ dev = (struct net_device *)rcu_dereference(b->media_ptr);
+ if (!dev)
+ return 0;
+
+ delta = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb));
+ if ((delta > 0) && pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) {
+ kfree_skb(skb);
+ return 0;
}
- spin_unlock_bh(&b_ptr->lock);
- read_unlock_bh(&tipc_net_lock);
+ skb_reset_network_header(skb);
+ skb->dev = dev;
+ skb->protocol = htons(ETH_P_TIPC);
+ dev_hard_header(skb, dev, ETH_P_TIPC, dest->value,
+ dev->dev_addr, skb->len);
+ dev_queue_xmit(skb);
return 0;
}
+bool tipc_bearer_bcast_support(struct net *net, u32 bearer_id)
+{
+ bool supp = false;
+ struct tipc_bearer *b;
+
+ rcu_read_lock();
+ b = bearer_get(net, bearer_id);
+ if (b)
+ supp = (b->bcast_addr.broadcast == TIPC_BROADCAST_SUPPORT);
+ rcu_read_unlock();
+ return supp;
+}
+
+int tipc_bearer_mtu(struct net *net, u32 bearer_id)
+{
+ int mtu = 0;
+ struct tipc_bearer *b;
+
+ rcu_read_lock();
+ b = bearer_get(net, bearer_id);
+ if (b)
+ mtu = b->mtu;
+ rcu_read_unlock();
+ return mtu;
+}
+
+int tipc_bearer_min_mtu(struct net *net, u32 bearer_id)
+{
+ int mtu = TIPC_MIN_BEARER_MTU;
+ struct tipc_bearer *b;
+
+ rcu_read_lock();
+ b = bearer_get(net, bearer_id);
+ if (b)
+ mtu += b->encap_hlen;
+ rcu_read_unlock();
+ return mtu;
+}
+
+/* tipc_bearer_xmit_skb - sends buffer to destination over bearer
+ */
+void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
+ struct sk_buff *skb,
+ struct tipc_media_addr *dest)
+{
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_bearer *b;
+
+ rcu_read_lock();
+ b = bearer_get(net, bearer_id);
+ if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) {
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_xmit(net, &skb, b, dest, NULL);
+ if (skb)
+#endif
+ b->media->send_msg(net, skb, b, dest);
+ } else {
+ kfree_skb(skb);
+ }
+ rcu_read_unlock();
+}
+
+/* tipc_bearer_xmit() -send buffer to destination over bearer
+ */
+void tipc_bearer_xmit(struct net *net, u32 bearer_id,
+ struct sk_buff_head *xmitq,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode)
+{
+ struct tipc_bearer *b;
+ struct sk_buff *skb, *tmp;
+
+ if (skb_queue_empty(xmitq))
+ return;
+
+ rcu_read_lock();
+ b = bearer_get(net, bearer_id);
+ if (unlikely(!b))
+ __skb_queue_purge(xmitq);
+ skb_queue_walk_safe(xmitq, skb, tmp) {
+ __skb_dequeue(xmitq);
+ if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) {
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_xmit(net, &skb, b, dst, __dnode);
+ if (skb)
+#endif
+ b->media->send_msg(net, skb, b, dst);
+ } else {
+ kfree_skb(skb);
+ }
+ }
+ rcu_read_unlock();
+}
+
+/* tipc_bearer_bc_xmit() - broadcast buffers to all destinations
+ */
+void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
+ struct sk_buff_head *xmitq)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_media_addr *dst;
+ int net_id = tn->net_id;
+ struct tipc_bearer *b;
+ struct sk_buff *skb, *tmp;
+ struct tipc_msg *hdr;
+
+ rcu_read_lock();
+ b = bearer_get(net, bearer_id);
+ if (unlikely(!b || !test_bit(0, &b->up)))
+ __skb_queue_purge(xmitq);
+ skb_queue_walk_safe(xmitq, skb, tmp) {
+ hdr = buf_msg(skb);
+ msg_set_non_seq(hdr, 1);
+ msg_set_mc_netid(hdr, net_id);
+ __skb_dequeue(xmitq);
+ dst = &b->bcast_addr;
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_xmit(net, &skb, b, dst, NULL);
+ if (skb)
+#endif
+ b->media->send_msg(net, skb, b, dst);
+ }
+ rcu_read_unlock();
+}
+
/**
- * bearer_disable
+ * tipc_l2_rcv_msg - handle incoming TIPC message from an interface
+ * @skb: the received message
+ * @dev: the net device that the packet was received on
+ * @pt: the packet_type structure which was used to register this handler
+ * @orig_dev: the original receive net device in case the device is a bond
*
- * Note: This routine assumes caller holds tipc_net_lock.
+ * Accept only packets explicitly sent to this node, or broadcast packets;
+ * ignores packets sent using interface multicast, and traffic sent to other
+ * nodes (which can happen if interface is running in promiscuous mode).
*/
-static void bearer_disable(struct tipc_bearer *b_ptr)
+static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
{
- struct tipc_link *l_ptr;
- struct tipc_link *temp_l_ptr;
+ struct tipc_bearer *b;
- pr_info("Disabling bearer <%s>\n", b_ptr->name);
- spin_lock_bh(&b_ptr->lock);
- b_ptr->blocked = 1;
- b_ptr->media->disable_bearer(b_ptr);
- list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
- tipc_link_delete(l_ptr);
+ rcu_read_lock();
+ b = rcu_dereference(dev->tipc_ptr) ?:
+ rcu_dereference(orig_dev->tipc_ptr);
+ if (likely(b && test_bit(0, &b->up) &&
+ (skb->pkt_type <= PACKET_MULTICAST))) {
+ skb_mark_not_on_list(skb);
+ TIPC_SKB_CB(skb)->flags = 0;
+ tipc_rcv(dev_net(b->pt.dev), skb, b);
+ rcu_read_unlock();
+ return NET_RX_SUCCESS;
}
- if (b_ptr->link_req)
- tipc_disc_delete(b_ptr->link_req);
- spin_unlock_bh(&b_ptr->lock);
- memset(b_ptr, 0, sizeof(struct tipc_bearer));
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return NET_RX_DROP;
}
-int tipc_disable_bearer(const char *name)
+/**
+ * tipc_l2_device_event - handle device events from network device
+ * @nb: the context of the notification
+ * @evt: the type of event
+ * @ptr: the net device that the event was on
+ *
+ * This function is called by the Ethernet driver in case of link
+ * change event.
+ */
+static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
+ void *ptr)
{
- struct tipc_bearer *b_ptr;
- int res;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+ struct tipc_bearer *b;
- write_lock_bh(&tipc_net_lock);
- b_ptr = tipc_bearer_find(name);
- if (b_ptr == NULL) {
- pr_warn("Attempt to disable unknown bearer <%s>\n", name);
- res = -EINVAL;
- } else {
- bearer_disable(b_ptr);
- res = 0;
+ b = rtnl_dereference(dev->tipc_ptr);
+ if (!b)
+ return NOTIFY_DONE;
+
+ trace_tipc_l2_device_event(dev, b, evt);
+ switch (evt) {
+ case NETDEV_CHANGE:
+ if (netif_carrier_ok(dev) && netif_oper_up(dev)) {
+ test_and_set_bit_lock(0, &b->up);
+ break;
+ }
+ fallthrough;
+ case NETDEV_GOING_DOWN:
+ clear_bit_unlock(0, &b->up);
+ tipc_reset_bearer(net, b);
+ break;
+ case NETDEV_UP:
+ test_and_set_bit_lock(0, &b->up);
+ break;
+ case NETDEV_CHANGEMTU:
+ if (tipc_mtu_bad(dev)) {
+ bearer_disable(net, b);
+ break;
+ }
+ b->mtu = dev->mtu;
+ tipc_reset_bearer(net, b);
+ break;
+ case NETDEV_CHANGEADDR:
+ b->media->raw2addr(b, &b->addr,
+ (const char *)dev->dev_addr);
+ tipc_reset_bearer(net, b);
+ break;
+ case NETDEV_UNREGISTER:
+ case NETDEV_CHANGENAME:
+ bearer_disable(net, b);
+ break;
}
- write_unlock_bh(&tipc_net_lock);
- return res;
+ return NOTIFY_OK;
}
+static struct notifier_block notifier = {
+ .notifier_call = tipc_l2_device_event,
+ .priority = 0,
+};
+int tipc_bearer_setup(void)
+{
+ return register_netdevice_notifier(&notifier);
+}
-void tipc_bearer_stop(void)
+void tipc_bearer_cleanup(void)
{
+ unregister_netdevice_notifier(&notifier);
+}
+
+void tipc_bearer_stop(struct net *net)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_bearer *b;
u32 i;
for (i = 0; i < MAX_BEARERS; i++) {
- if (tipc_bearers[i].active)
- bearer_disable(&tipc_bearers[i]);
+ b = rtnl_dereference(tn->bearer_list[i]);
+ if (b) {
+ bearer_disable(net, b);
+ tn->bearer_list[i] = NULL;
+ }
+ }
+}
+
+void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts)
+{
+ struct net_device *dev = net->loopback_dev;
+ struct sk_buff *skb, *_skb;
+ int exp;
+
+ skb_queue_walk(pkts, _skb) {
+ skb = pskb_copy(_skb, GFP_ATOMIC);
+ if (!skb)
+ continue;
+
+ exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb));
+ if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) {
+ kfree_skb(skb);
+ continue;
+ }
+
+ skb_reset_network_header(skb);
+ dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr,
+ dev->dev_addr, skb->len);
+ skb->dev = dev;
+ skb->pkt_type = PACKET_HOST;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->protocol = eth_type_trans(skb, dev);
+ netif_rx(skb);
}
- media_count = 0;
+}
+
+static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *od)
+{
+ consume_skb(skb);
+ return NET_RX_SUCCESS;
+}
+
+int tipc_attach_loopback(struct net *net)
+{
+ struct net_device *dev = net->loopback_dev;
+ struct tipc_net *tn = tipc_net(net);
+
+ if (!dev)
+ return -ENODEV;
+
+ netdev_hold(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL);
+ tn->loopback_pt.dev = dev;
+ tn->loopback_pt.type = htons(ETH_P_TIPC);
+ tn->loopback_pt.func = tipc_loopback_rcv_pkt;
+ dev_add_pack(&tn->loopback_pt);
+ return 0;
+}
+
+void tipc_detach_loopback(struct net *net)
+{
+ struct tipc_net *tn = tipc_net(net);
+
+ dev_remove_pack(&tn->loopback_pt);
+ netdev_put(net->loopback_dev, &tn->loopback_pt.dev_tracker);
+}
+
+/* Caller should hold rtnl_lock to protect the bearer */
+static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
+ struct tipc_bearer *bearer, int nlflags)
+{
+ void *hdr;
+ struct nlattr *attrs;
+ struct nlattr *prop;
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ nlflags, TIPC_NL_BEARER_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER);
+ if (!attrs)
+ goto msg_full;
+
+ if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name))
+ goto attr_msg_full;
+
+ prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_PROP);
+ if (!prop)
+ goto prop_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority))
+ goto prop_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance))
+ goto prop_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->max_win))
+ goto prop_msg_full;
+ if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP)
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu))
+ goto prop_msg_full;
+
+ nla_nest_end(msg->skb, prop);
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+ if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) {
+ if (tipc_udp_nl_add_bearer_data(msg, bearer))
+ goto attr_msg_full;
+ }
+#endif
+
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
+
+ return 0;
+
+prop_msg_full:
+ nla_nest_cancel(msg->skb, prop);
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
+
+ return -EMSGSIZE;
+}
+
+int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int err;
+ int i = cb->args[0];
+ struct tipc_bearer *bearer;
+ struct tipc_nl_msg msg;
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = tipc_net(net);
+
+ if (i == MAX_BEARERS)
+ return 0;
+
+ msg.skb = skb;
+ msg.portid = NETLINK_CB(cb->skb).portid;
+ msg.seq = cb->nlh->nlmsg_seq;
+
+ rtnl_lock();
+ for (i = 0; i < MAX_BEARERS; i++) {
+ bearer = rtnl_dereference(tn->bearer_list[i]);
+ if (!bearer)
+ continue;
+
+ err = __tipc_nl_add_bearer(&msg, bearer, NLM_F_MULTI);
+ if (err)
+ break;
+ }
+ rtnl_unlock();
+
+ cb->args[0] = i;
+ return skb->len;
+}
+
+int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+ char *name;
+ struct sk_buff *rep;
+ struct tipc_bearer *bearer;
+ struct tipc_nl_msg msg;
+ struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+ struct net *net = genl_info_net(info);
+
+ if (!info->attrs[TIPC_NLA_BEARER])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX,
+ info->attrs[TIPC_NLA_BEARER],
+ tipc_nl_bearer_policy, info->extack);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_BEARER_NAME])
+ return -EINVAL;
+ name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
+
+ rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ msg.skb = rep;
+ msg.portid = info->snd_portid;
+ msg.seq = info->snd_seq;
+
+ rtnl_lock();
+ bearer = tipc_bearer_find(net, name);
+ if (!bearer) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(info->extack, "Bearer not found");
+ goto err_out;
+ }
+
+ err = __tipc_nl_add_bearer(&msg, bearer, 0);
+ if (err)
+ goto err_out;
+ rtnl_unlock();
+
+ return genlmsg_reply(rep, info);
+err_out:
+ rtnl_unlock();
+ nlmsg_free(rep);
+
+ return err;
+}
+
+int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+ char *name;
+ struct tipc_bearer *bearer;
+ struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+
+ if (!info->attrs[TIPC_NLA_BEARER])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX,
+ info->attrs[TIPC_NLA_BEARER],
+ tipc_nl_bearer_policy, info->extack);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_BEARER_NAME])
+ return -EINVAL;
+
+ name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
+
+ bearer = tipc_bearer_find(net, name);
+ if (!bearer) {
+ NL_SET_ERR_MSG(info->extack, "Bearer not found");
+ return -EINVAL;
+ }
+
+ bearer_disable(net, bearer);
+
+ return 0;
+}
+
+int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_bearer_disable(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
+
+int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+ char *bearer;
+ struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ u32 domain = 0;
+ u32 prio;
+
+ prio = TIPC_MEDIA_LINK_PRI;
+
+ if (!info->attrs[TIPC_NLA_BEARER])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX,
+ info->attrs[TIPC_NLA_BEARER],
+ tipc_nl_bearer_policy, info->extack);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_BEARER_NAME])
+ return -EINVAL;
+
+ bearer = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
+
+ if (attrs[TIPC_NLA_BEARER_DOMAIN])
+ domain = nla_get_u32(attrs[TIPC_NLA_BEARER_DOMAIN]);
+
+ if (attrs[TIPC_NLA_BEARER_PROP]) {
+ struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
+
+ err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP],
+ props);
+ if (err)
+ return err;
+
+ if (props[TIPC_NLA_PROP_PRIO])
+ prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
+ }
+
+ return tipc_enable_bearer(net, bearer, domain, prio, attrs,
+ info->extack);
+}
+
+int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_bearer_enable(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
+
+int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+ char *name;
+ struct tipc_bearer *b;
+ struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+
+ if (!info->attrs[TIPC_NLA_BEARER])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX,
+ info->attrs[TIPC_NLA_BEARER],
+ tipc_nl_bearer_policy, info->extack);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_BEARER_NAME])
+ return -EINVAL;
+ name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
+
+ rtnl_lock();
+ b = tipc_bearer_find(net, name);
+ if (!b) {
+ NL_SET_ERR_MSG(info->extack, "Bearer not found");
+ err = -EINVAL;
+ goto out;
+ }
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+ if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
+ if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
+ NL_SET_ERR_MSG(info->extack, "UDP option is unsupported");
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = tipc_udp_nl_bearer_add(b,
+ attrs[TIPC_NLA_BEARER_UDP_OPTS]);
+ }
+#endif
+out:
+ rtnl_unlock();
+
+ return err;
+}
+
+int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
+{
+ struct tipc_bearer *b;
+ struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ char *name;
+ int err;
+
+ if (!info->attrs[TIPC_NLA_BEARER])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX,
+ info->attrs[TIPC_NLA_BEARER],
+ tipc_nl_bearer_policy, info->extack);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_BEARER_NAME])
+ return -EINVAL;
+ name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
+
+ b = tipc_bearer_find(net, name);
+ if (!b) {
+ NL_SET_ERR_MSG(info->extack, "Bearer not found");
+ return -EINVAL;
+ }
+
+ if (attrs[TIPC_NLA_BEARER_PROP]) {
+ struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
+
+ err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP],
+ props);
+ if (err)
+ return err;
+
+ if (props[TIPC_NLA_PROP_TOL]) {
+ b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
+ tipc_node_apply_property(net, b, TIPC_NLA_PROP_TOL);
+ }
+ if (props[TIPC_NLA_PROP_PRIO])
+ b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
+ if (props[TIPC_NLA_PROP_WIN])
+ b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ if (props[TIPC_NLA_PROP_MTU]) {
+ if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU property is unsupported");
+ return -EINVAL;
+ }
+#ifdef CONFIG_TIPC_MEDIA_UDP
+ if (nla_get_u32(props[TIPC_NLA_PROP_MTU]) <
+ b->encap_hlen + TIPC_MIN_BEARER_MTU) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU value is out-of-range");
+ return -EINVAL;
+ }
+ b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
+ tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU);
+#endif
+ }
+ }
+
+ return 0;
+}
+
+int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_bearer_set(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
+
+static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
+ struct tipc_media *media, int nlflags)
+{
+ void *hdr;
+ struct nlattr *attrs;
+ struct nlattr *prop;
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ nlflags, TIPC_NL_MEDIA_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA);
+ if (!attrs)
+ goto msg_full;
+
+ if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name))
+ goto attr_msg_full;
+
+ prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA_PROP);
+ if (!prop)
+ goto prop_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority))
+ goto prop_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance))
+ goto prop_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->max_win))
+ goto prop_msg_full;
+ if (media->type_id == TIPC_MEDIA_TYPE_UDP)
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu))
+ goto prop_msg_full;
+
+ nla_nest_end(msg->skb, prop);
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
+
+ return 0;
+
+prop_msg_full:
+ nla_nest_cancel(msg->skb, prop);
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
+
+ return -EMSGSIZE;
+}
+
+int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int err;
+ int i = cb->args[0];
+ struct tipc_nl_msg msg;
+
+ if (i == MAX_MEDIA)
+ return 0;
+
+ msg.skb = skb;
+ msg.portid = NETLINK_CB(cb->skb).portid;
+ msg.seq = cb->nlh->nlmsg_seq;
+
+ rtnl_lock();
+ for (; media_info_array[i] != NULL; i++) {
+ err = __tipc_nl_add_media(&msg, media_info_array[i],
+ NLM_F_MULTI);
+ if (err)
+ break;
+ }
+ rtnl_unlock();
+
+ cb->args[0] = i;
+ return skb->len;
+}
+
+int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+ char *name;
+ struct tipc_nl_msg msg;
+ struct tipc_media *media;
+ struct sk_buff *rep;
+ struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1];
+
+ if (!info->attrs[TIPC_NLA_MEDIA])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX,
+ info->attrs[TIPC_NLA_MEDIA],
+ tipc_nl_media_policy, info->extack);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_MEDIA_NAME])
+ return -EINVAL;
+ name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]);
+
+ rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ msg.skb = rep;
+ msg.portid = info->snd_portid;
+ msg.seq = info->snd_seq;
+
+ rtnl_lock();
+ media = tipc_media_find(name);
+ if (!media) {
+ NL_SET_ERR_MSG(info->extack, "Media not found");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ err = __tipc_nl_add_media(&msg, media, 0);
+ if (err)
+ goto err_out;
+ rtnl_unlock();
+
+ return genlmsg_reply(rep, info);
+err_out:
+ rtnl_unlock();
+ nlmsg_free(rep);
+
+ return err;
+}
+
+int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+ char *name;
+ struct tipc_media *m;
+ struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1];
+
+ if (!info->attrs[TIPC_NLA_MEDIA])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX,
+ info->attrs[TIPC_NLA_MEDIA],
+ tipc_nl_media_policy, info->extack);
+
+ if (!attrs[TIPC_NLA_MEDIA_NAME])
+ return -EINVAL;
+ name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]);
+
+ m = tipc_media_find(name);
+ if (!m) {
+ NL_SET_ERR_MSG(info->extack, "Media not found");
+ return -EINVAL;
+ }
+ if (attrs[TIPC_NLA_MEDIA_PROP]) {
+ struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
+
+ err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP],
+ props);
+ if (err)
+ return err;
+
+ if (props[TIPC_NLA_PROP_TOL])
+ m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
+ if (props[TIPC_NLA_PROP_PRIO])
+ m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
+ if (props[TIPC_NLA_PROP_WIN])
+ m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ if (props[TIPC_NLA_PROP_MTU]) {
+ if (m->type_id != TIPC_MEDIA_TYPE_UDP) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU property is unsupported");
+ return -EINVAL;
+ }
+#ifdef CONFIG_TIPC_MEDIA_UDP
+ if (tipc_udp_mtu_bad(nla_get_u32
+ (props[TIPC_NLA_PROP_MTU]))) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU value is out-of-range");
+ return -EINVAL;
+ }
+ m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
+#endif
+ }
+ }
+
+ return 0;
+}
+
+int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_media_set(skb, info);
+ rtnl_unlock();
+
+ return err;
}
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 09c869adcfcf..41eac1ee0c09 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -1,7 +1,7 @@
/*
* net/tipc/bearer.h: Include file for TIPC bearer code
*
- * Copyright (c) 1996-2006, Ericsson AB
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -37,26 +37,36 @@
#ifndef _TIPC_BEARER_H
#define _TIPC_BEARER_H
-#include "bcast.h"
+#include "netlink.h"
+#include "core.h"
+#include "msg.h"
+#include <net/genetlink.h>
-#define MAX_BEARERS 2
-#define MAX_MEDIA 2
+#define MAX_MEDIA 3
-/*
- * Identifiers associated with TIPC message header media address info
- *
- * - address info field is 20 bytes long
- * - media type identifier located at offset 3
- * - remaining bytes vary according to media type
+/* Identifiers associated with TIPC message header media address info
+ * - address info field is 32 bytes long
+ * - the field's actual content and length is defined per media
+ * - remaining unused bytes in the field are set to zero
*/
-#define TIPC_MEDIA_ADDR_SIZE 20
+#define TIPC_MEDIA_INFO_SIZE 32
#define TIPC_MEDIA_TYPE_OFFSET 3
+#define TIPC_MEDIA_ADDR_OFFSET 4
/*
* Identifiers of supported TIPC media types
*/
#define TIPC_MEDIA_TYPE_ETH 1
#define TIPC_MEDIA_TYPE_IB 2
+#define TIPC_MEDIA_TYPE_UDP 3
+
+/* Minimum bearer MTU */
+#define TIPC_MIN_BEARER_MTU (MAX_H_SIZE + INT_H_SIZE)
+
+/* Identifiers for distinguishing between broadcast/multicast and replicast
+ */
+#define TIPC_BROADCAST_SUPPORT 1
+#define TIPC_REPLICAST_SUPPORT 2
/**
* struct tipc_media_addr - destination address used by TIPC bearers
@@ -65,7 +75,7 @@
* @broadcast: non-zero if address is a broadcast address
*/
struct tipc_media_addr {
- u8 value[TIPC_MEDIA_ADDR_SIZE];
+ u8 value[TIPC_MEDIA_INFO_SIZE];
u8 media_id;
u8 broadcast;
};
@@ -73,78 +83,97 @@ struct tipc_media_addr {
struct tipc_bearer;
/**
- * struct tipc_media - TIPC media information available to internal users
+ * struct tipc_media - Media specific info exposed to generic bearer layer
* @send_msg: routine which handles buffer transmission
- * @enable_bearer: routine which enables a bearer
- * @disable_bearer: routine which disables a bearer
- * @addr2str: routine which converts media address to string
- * @addr2msg: routine which converts media address to protocol message area
- * @msg2addr: routine which converts media address from protocol message area
- * @bcast_addr: media address used in broadcasting
+ * @enable_media: routine which enables a media
+ * @disable_media: routine which disables a media
+ * @addr2str: convert media address format to string
+ * @addr2msg: convert from media addr format to discovery msg addr format
+ * @msg2addr: convert from discovery msg addr format to media addr format
+ * @raw2addr: convert from raw addr format to media addr format
* @priority: default link (and bearer) priority
* @tolerance: default time (in ms) before declaring link failure
- * @window: default window (in packets) before declaring link congestion
+ * @min_win: minimum window (in packets) before declaring link congestion
+ * @max_win: maximum window (in packets) before declaring link congestion
+ * @mtu: max packet size bearer can support for media type not dependent on
+ * underlying device MTU
* @type_id: TIPC media identifier
+ * @hwaddr_len: TIPC media address len
* @name: media name
*/
struct tipc_media {
- int (*send_msg)(struct sk_buff *buf,
- struct tipc_bearer *b_ptr,
+ int (*send_msg)(struct net *net, struct sk_buff *buf,
+ struct tipc_bearer *b,
struct tipc_media_addr *dest);
- int (*enable_bearer)(struct tipc_bearer *b_ptr);
- void (*disable_bearer)(struct tipc_bearer *b_ptr);
- int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size);
- int (*addr2msg)(struct tipc_media_addr *a, char *msg_area);
- int (*msg2addr)(const struct tipc_bearer *b_ptr,
- struct tipc_media_addr *a, char *msg_area);
+ int (*enable_media)(struct net *net, struct tipc_bearer *b,
+ struct nlattr *attr[]);
+ void (*disable_media)(struct tipc_bearer *b);
+ int (*addr2str)(struct tipc_media_addr *addr,
+ char *strbuf,
+ int bufsz);
+ int (*addr2msg)(char *msg, struct tipc_media_addr *addr);
+ int (*msg2addr)(struct tipc_bearer *b,
+ struct tipc_media_addr *addr,
+ char *msg);
+ int (*raw2addr)(struct tipc_bearer *b,
+ struct tipc_media_addr *addr,
+ const char *raw);
u32 priority;
u32 tolerance;
- u32 window;
+ u32 min_win;
+ u32 max_win;
+ u32 mtu;
u32 type_id;
+ u32 hwaddr_len;
char name[TIPC_MAX_MEDIA_NAME];
};
/**
- * struct tipc_bearer - TIPC bearer structure
- * @usr_handle: pointer to additional media-specific information about bearer
+ * struct tipc_bearer - Generic TIPC bearer structure
+ * @media_ptr: pointer to additional media-specific information about bearer
* @mtu: max packet size bearer can support
- * @blocked: non-zero if bearer is blocked
- * @lock: spinlock for controlling access to bearer
* @addr: media-specific address associated with bearer
* @name: bearer name (format = media:interface)
* @media: ptr to media structure associated with bearer
+ * @bcast_addr: media address used in broadcasting
+ * @pt: packet type for bearer
+ * @rcu: rcu struct for tipc_bearer
* @priority: default link priority for bearer
- * @window: default window size for bearer
+ * @min_win: minimum window (in packets) before declaring link congestion
+ * @max_win: maximum window (in packets) before declaring link congestion
* @tolerance: default link tolerance for bearer
+ * @domain: network domain to which links can be established
* @identity: array index of this bearer within TIPC bearer array
- * @link_req: ptr to (optional) structure making periodic link setup requests
- * @links: list of non-congested links associated with bearer
- * @active: non-zero if bearer structure is represents a bearer
+ * @disc: ptr to link setup request
* @net_plane: network plane ('A' through 'H') currently associated with bearer
- * @nodes: indicates which nodes in cluster can be reached through bearer
+ * @encap_hlen: encap headers length
+ * @up: bearer up flag (bit 0)
+ * @refcnt: tipc_bearer reference counter
*
* Note: media-specific code is responsible for initialization of the fields
* indicated below when a bearer is enabled; TIPC's generic bearer code takes
* care of initializing all other fields.
*/
struct tipc_bearer {
- void *usr_handle; /* initalized by media */
- u32 mtu; /* initalized by media */
- int blocked; /* initalized by media */
- struct tipc_media_addr addr; /* initalized by media */
+ void __rcu *media_ptr; /* initialized by media */
+ u32 mtu; /* initialized by media */
+ struct tipc_media_addr addr; /* initialized by media */
char name[TIPC_MAX_BEARER_NAME];
- spinlock_t lock;
struct tipc_media *media;
struct tipc_media_addr bcast_addr;
+ struct packet_type pt;
+ struct rcu_head rcu;
u32 priority;
- u32 window;
+ u32 min_win;
+ u32 max_win;
u32 tolerance;
+ u32 domain;
u32 identity;
- struct tipc_link_req *link_req;
- struct list_head links;
- int active;
+ struct tipc_discoverer *disc;
char net_plane;
- struct tipc_node_map nodes;
+ u16 encap_hlen;
+ unsigned long up;
+ refcount_t refcnt;
};
struct tipc_bearer_names {
@@ -152,62 +181,86 @@ struct tipc_bearer_names {
char if_name[TIPC_MAX_IF_NAME];
};
-struct tipc_link;
-
-extern struct tipc_bearer tipc_bearers[];
-
/*
* TIPC routines available to supported media types
*/
-int tipc_register_media(struct tipc_media *m_ptr);
-
-void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr);
-int tipc_block_bearer(const char *name);
-void tipc_continue(struct tipc_bearer *tb_ptr);
-
-int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority);
-int tipc_disable_bearer(const char *name);
+void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b);
/*
* Routines made available to TIPC by supported media types
*/
-int tipc_eth_media_start(void);
-void tipc_eth_media_stop(void);
+extern struct tipc_media eth_media_info;
#ifdef CONFIG_TIPC_MEDIA_IB
-int tipc_ib_media_start(void);
-void tipc_ib_media_stop(void);
-#else
-static inline int tipc_ib_media_start(void) { return 0; }
-static inline void tipc_ib_media_stop(void) { return; }
+extern struct tipc_media ib_media_info;
+#endif
+#ifdef CONFIG_TIPC_MEDIA_UDP
+extern struct tipc_media udp_media_info;
#endif
-int tipc_media_set_priority(const char *name, u32 new_value);
-int tipc_media_set_window(const char *name, u32 new_value);
-void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
-struct sk_buff *tipc_media_get_names(void);
+int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
+int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
+
+int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
+int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
-struct sk_buff *tipc_bearer_get_names(void);
-void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest);
-void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest);
-struct tipc_bearer *tipc_bearer_find(const char *name);
-struct tipc_bearer *tipc_bearer_find_interface(const char *if_name);
+int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
+int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
+ struct nlattr *attrs[]);
+bool tipc_bearer_hold(struct tipc_bearer *b);
+void tipc_bearer_put(struct tipc_bearer *b);
+void tipc_disable_l2_media(struct tipc_bearer *b);
+int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
+ struct tipc_bearer *b, struct tipc_media_addr *dest);
+
+void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest);
+void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest);
+struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
+int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id);
struct tipc_media *tipc_media_find(const char *name);
-int tipc_bearer_blocked(struct tipc_bearer *b_ptr);
-void tipc_bearer_stop(void);
+int tipc_bearer_setup(void);
+void tipc_bearer_cleanup(void);
+void tipc_bearer_stop(struct net *net);
+int tipc_bearer_mtu(struct net *net, u32 bearer_id);
+int tipc_bearer_min_mtu(struct net *net, u32 bearer_id);
+bool tipc_bearer_bcast_support(struct net *net, u32 bearer_id);
+void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
+ struct sk_buff *skb,
+ struct tipc_media_addr *dest);
+void tipc_bearer_xmit(struct net *net, u32 bearer_id,
+ struct sk_buff_head *xmitq,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode);
+void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
+ struct sk_buff_head *xmitq);
+void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts);
+int tipc_attach_loopback(struct net *net);
+void tipc_detach_loopback(struct net *net);
-/**
- * tipc_bearer_send- sends buffer to destination over bearer
- *
- * IMPORTANT:
- * The media send routine must not alter the buffer being passed in
- * as it may be needed for later retransmission!
- */
-static inline void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf,
- struct tipc_media_addr *dest)
+static inline void tipc_loopback_trace(struct net *net,
+ struct sk_buff_head *pkts)
+{
+ if (unlikely(dev_nit_active(net->loopback_dev)))
+ tipc_clone_to_loopback(net, pkts);
+}
+
+/* check if device MTU is too low for tipc headers */
+static inline bool tipc_mtu_bad(struct net_device *dev)
{
- b->media->send_msg(buf, b, dest);
+ if (dev->mtu >= TIPC_MIN_BEARER_MTU)
+ return false;
+ netdev_warn(dev, "MTU too low for tipc bearer\n");
+ return true;
}
#endif /* _TIPC_BEARER_H */
diff --git a/net/tipc/config.c b/net/tipc/config.c
deleted file mode 100644
index c301a9a592d8..000000000000
--- a/net/tipc/config.c
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * net/tipc/config.c: TIPC configuration management code
- *
- * Copyright (c) 2002-2006, Ericsson AB
- * Copyright (c) 2004-2007, 2010-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "port.h"
-#include "name_table.h"
-#include "config.h"
-#include "server.h"
-
-#define REPLY_TRUNCATED "<truncated>\n"
-
-static DEFINE_MUTEX(config_mutex);
-static struct tipc_server cfgsrv;
-
-static const void *req_tlv_area; /* request message TLV area */
-static int req_tlv_space; /* request message TLV area size */
-static int rep_headroom; /* reply message headroom to use */
-
-
-struct sk_buff *tipc_cfg_reply_alloc(int payload_size)
-{
- struct sk_buff *buf;
-
- buf = alloc_skb(rep_headroom + payload_size, GFP_ATOMIC);
- if (buf)
- skb_reserve(buf, rep_headroom);
- return buf;
-}
-
-int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
- void *tlv_data, int tlv_data_size)
-{
- struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf);
- int new_tlv_space = TLV_SPACE(tlv_data_size);
-
- if (skb_tailroom(buf) < new_tlv_space)
- return 0;
- skb_put(buf, new_tlv_space);
- tlv->tlv_type = htons(tlv_type);
- tlv->tlv_len = htons(TLV_LENGTH(tlv_data_size));
- if (tlv_data_size && tlv_data)
- memcpy(TLV_DATA(tlv), tlv_data, tlv_data_size);
- return 1;
-}
-
-static struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value)
-{
- struct sk_buff *buf;
- __be32 value_net;
-
- buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(value)));
- if (buf) {
- value_net = htonl(value);
- tipc_cfg_append_tlv(buf, tlv_type, &value_net,
- sizeof(value_net));
- }
- return buf;
-}
-
-static struct sk_buff *tipc_cfg_reply_unsigned(u32 value)
-{
- return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value);
-}
-
-struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
-{
- struct sk_buff *buf;
- int string_len = strlen(string) + 1;
-
- buf = tipc_cfg_reply_alloc(TLV_SPACE(string_len));
- if (buf)
- tipc_cfg_append_tlv(buf, tlv_type, string, string_len);
- return buf;
-}
-
-static struct sk_buff *tipc_show_stats(void)
-{
- struct sk_buff *buf;
- struct tlv_desc *rep_tlv;
- char *pb;
- int pb_len;
- int str_len;
- u32 value;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
- value = ntohl(*(u32 *)TLV_DATA(req_tlv_area));
- if (value != 0)
- return tipc_cfg_reply_error_string("unsupported argument");
-
- buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
- if (buf == NULL)
- return NULL;
-
- rep_tlv = (struct tlv_desc *)buf->data;
- pb = TLV_DATA(rep_tlv);
- pb_len = ULTRA_STRING_MAX_LEN;
-
- str_len = tipc_snprintf(pb, pb_len, "TIPC version " TIPC_MOD_VER "\n");
- str_len += 1; /* for "\0" */
- skb_put(buf, TLV_SPACE(str_len));
- TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
-
- return buf;
-}
-
-static struct sk_buff *cfg_enable_bearer(void)
-{
- struct tipc_bearer_config *args;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_CONFIG))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
- args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area);
- if (tipc_enable_bearer(args->name,
- ntohl(args->disc_domain),
- ntohl(args->priority)))
- return tipc_cfg_reply_error_string("unable to enable bearer");
-
- return tipc_cfg_reply_none();
-}
-
-static struct sk_buff *cfg_disable_bearer(void)
-{
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_NAME))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
- if (tipc_disable_bearer((char *)TLV_DATA(req_tlv_area)))
- return tipc_cfg_reply_error_string("unable to disable bearer");
-
- return tipc_cfg_reply_none();
-}
-
-static struct sk_buff *cfg_set_own_addr(void)
-{
- u32 addr;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
- addr = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
- if (addr == tipc_own_addr)
- return tipc_cfg_reply_none();
- if (!tipc_addr_node_valid(addr))
- return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
- " (node address)");
- if (tipc_own_addr)
- return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
- " (cannot change node address once assigned)");
- tipc_core_start_net(addr);
- return tipc_cfg_reply_none();
-}
-
-static struct sk_buff *cfg_set_remote_mng(void)
-{
- u32 value;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
- value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
- tipc_remote_management = (value != 0);
- return tipc_cfg_reply_none();
-}
-
-static struct sk_buff *cfg_set_max_ports(void)
-{
- u32 value;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
- if (value == tipc_max_ports)
- return tipc_cfg_reply_none();
- if (value < 127 || value > 65535)
- return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
- " (max ports must be 127-65535)");
- return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
- " (cannot change max ports while TIPC is active)");
-}
-
-static struct sk_buff *cfg_set_netid(void)
-{
- u32 value;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
- value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
- if (value == tipc_net_id)
- return tipc_cfg_reply_none();
- if (value < 1 || value > 9999)
- return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
- " (network id must be 1-9999)");
- if (tipc_own_addr)
- return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
- " (cannot change network id once TIPC has joined a network)");
- tipc_net_id = value;
- return tipc_cfg_reply_none();
-}
-
-struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area,
- int request_space, int reply_headroom)
-{
- struct sk_buff *rep_tlv_buf;
-
- mutex_lock(&config_mutex);
-
- /* Save request and reply details in a well-known location */
- req_tlv_area = request_area;
- req_tlv_space = request_space;
- rep_headroom = reply_headroom;
-
- /* Check command authorization */
- if (likely(in_own_node(orig_node))) {
- /* command is permitted */
- } else if (cmd >= 0x8000) {
- rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
- " (cannot be done remotely)");
- goto exit;
- } else if (!tipc_remote_management) {
- rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NO_REMOTE);
- goto exit;
- } else if (cmd >= 0x4000) {
- u32 domain = 0;
-
- if ((tipc_nametbl_translate(TIPC_ZM_SRV, 0, &domain) == 0) ||
- (domain != orig_node)) {
- rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_ZONE_MSTR);
- goto exit;
- }
- }
-
- /* Call appropriate processing routine */
- switch (cmd) {
- case TIPC_CMD_NOOP:
- rep_tlv_buf = tipc_cfg_reply_none();
- break;
- case TIPC_CMD_GET_NODES:
- rep_tlv_buf = tipc_node_get_nodes(req_tlv_area, req_tlv_space);
- break;
- case TIPC_CMD_GET_LINKS:
- rep_tlv_buf = tipc_node_get_links(req_tlv_area, req_tlv_space);
- break;
- case TIPC_CMD_SHOW_LINK_STATS:
- rep_tlv_buf = tipc_link_cmd_show_stats(req_tlv_area, req_tlv_space);
- break;
- case TIPC_CMD_RESET_LINK_STATS:
- rep_tlv_buf = tipc_link_cmd_reset_stats(req_tlv_area, req_tlv_space);
- break;
- case TIPC_CMD_SHOW_NAME_TABLE:
- rep_tlv_buf = tipc_nametbl_get(req_tlv_area, req_tlv_space);
- break;
- case TIPC_CMD_GET_BEARER_NAMES:
- rep_tlv_buf = tipc_bearer_get_names();
- break;
- case TIPC_CMD_GET_MEDIA_NAMES:
- rep_tlv_buf = tipc_media_get_names();
- break;
- case TIPC_CMD_SHOW_PORTS:
- rep_tlv_buf = tipc_port_get_ports();
- break;
- case TIPC_CMD_SHOW_STATS:
- rep_tlv_buf = tipc_show_stats();
- break;
- case TIPC_CMD_SET_LINK_TOL:
- case TIPC_CMD_SET_LINK_PRI:
- case TIPC_CMD_SET_LINK_WINDOW:
- rep_tlv_buf = tipc_link_cmd_config(req_tlv_area, req_tlv_space, cmd);
- break;
- case TIPC_CMD_ENABLE_BEARER:
- rep_tlv_buf = cfg_enable_bearer();
- break;
- case TIPC_CMD_DISABLE_BEARER:
- rep_tlv_buf = cfg_disable_bearer();
- break;
- case TIPC_CMD_SET_NODE_ADDR:
- rep_tlv_buf = cfg_set_own_addr();
- break;
- case TIPC_CMD_SET_REMOTE_MNG:
- rep_tlv_buf = cfg_set_remote_mng();
- break;
- case TIPC_CMD_SET_MAX_PORTS:
- rep_tlv_buf = cfg_set_max_ports();
- break;
- case TIPC_CMD_SET_NETID:
- rep_tlv_buf = cfg_set_netid();
- break;
- case TIPC_CMD_GET_REMOTE_MNG:
- rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_remote_management);
- break;
- case TIPC_CMD_GET_MAX_PORTS:
- rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports);
- break;
- case TIPC_CMD_GET_NETID:
- rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
- break;
- case TIPC_CMD_NOT_NET_ADMIN:
- rep_tlv_buf =
- tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN);
- break;
- case TIPC_CMD_SET_MAX_ZONES:
- case TIPC_CMD_GET_MAX_ZONES:
- case TIPC_CMD_SET_MAX_SLAVES:
- case TIPC_CMD_GET_MAX_SLAVES:
- case TIPC_CMD_SET_MAX_CLUSTERS:
- case TIPC_CMD_GET_MAX_CLUSTERS:
- case TIPC_CMD_SET_MAX_NODES:
- case TIPC_CMD_GET_MAX_NODES:
- case TIPC_CMD_SET_MAX_SUBSCR:
- case TIPC_CMD_GET_MAX_SUBSCR:
- case TIPC_CMD_SET_MAX_PUBL:
- case TIPC_CMD_GET_MAX_PUBL:
- case TIPC_CMD_SET_LOG_SIZE:
- case TIPC_CMD_DUMP_LOG:
- rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
- " (obsolete command)");
- break;
- default:
- rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
- " (unknown command)");
- break;
- }
-
- WARN_ON(rep_tlv_buf->len > TLV_SPACE(ULTRA_STRING_MAX_LEN));
-
- /* Append an error message if we cannot return all requested data */
- if (rep_tlv_buf->len == TLV_SPACE(ULTRA_STRING_MAX_LEN)) {
- if (*(rep_tlv_buf->data + ULTRA_STRING_MAX_LEN) != '\0')
- sprintf(rep_tlv_buf->data + rep_tlv_buf->len -
- sizeof(REPLY_TRUNCATED) - 1, REPLY_TRUNCATED);
- }
-
- /* Return reply buffer */
-exit:
- mutex_unlock(&config_mutex);
- return rep_tlv_buf;
-}
-
-static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr,
- void *usr_data, void *buf, size_t len)
-{
- struct tipc_cfg_msg_hdr *req_hdr;
- struct tipc_cfg_msg_hdr *rep_hdr;
- struct sk_buff *rep_buf;
- int ret;
-
- /* Validate configuration message header (ignore invalid message) */
- req_hdr = (struct tipc_cfg_msg_hdr *)buf;
- if ((len < sizeof(*req_hdr)) ||
- (len != TCM_ALIGN(ntohl(req_hdr->tcm_len))) ||
- (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) {
- pr_warn("Invalid configuration message discarded\n");
- return;
- }
-
- /* Generate reply for request (if can't, return request) */
- rep_buf = tipc_cfg_do_cmd(addr->addr.id.node, ntohs(req_hdr->tcm_type),
- buf + sizeof(*req_hdr),
- len - sizeof(*req_hdr),
- BUF_HEADROOM + MAX_H_SIZE + sizeof(*rep_hdr));
- if (rep_buf) {
- skb_push(rep_buf, sizeof(*rep_hdr));
- rep_hdr = (struct tipc_cfg_msg_hdr *)rep_buf->data;
- memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr));
- rep_hdr->tcm_len = htonl(rep_buf->len);
- rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST);
-
- ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data,
- rep_buf->len);
- if (ret < 0)
- pr_err("Sending cfg reply message failed, no memory\n");
-
- kfree_skb(rep_buf);
- }
-}
-
-static struct sockaddr_tipc cfgsrv_addr __read_mostly = {
- .family = AF_TIPC,
- .addrtype = TIPC_ADDR_NAMESEQ,
- .addr.nameseq.type = TIPC_CFG_SRV,
- .addr.nameseq.lower = 0,
- .addr.nameseq.upper = 0,
- .scope = TIPC_ZONE_SCOPE
-};
-
-static struct tipc_server cfgsrv __read_mostly = {
- .saddr = &cfgsrv_addr,
- .imp = TIPC_CRITICAL_IMPORTANCE,
- .type = SOCK_RDM,
- .max_rcvbuf_size = 64 * 1024,
- .name = "cfg_server",
- .tipc_conn_recvmsg = cfg_conn_msg_event,
- .tipc_conn_new = NULL,
- .tipc_conn_shutdown = NULL
-};
-
-int tipc_cfg_init(void)
-{
- return tipc_server_start(&cfgsrv);
-}
-
-void tipc_cfg_reinit(void)
-{
- tipc_server_stop(&cfgsrv);
-
- cfgsrv_addr.addr.nameseq.lower = tipc_own_addr;
- cfgsrv_addr.addr.nameseq.upper = tipc_own_addr;
- tipc_server_start(&cfgsrv);
-}
-
-void tipc_cfg_stop(void)
-{
- tipc_server_stop(&cfgsrv);
-}
diff --git a/net/tipc/config.h b/net/tipc/config.h
deleted file mode 100644
index 1f252f3fa058..000000000000
--- a/net/tipc/config.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * net/tipc/config.h: Include file for TIPC configuration service code
- *
- * Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_CONFIG_H
-#define _TIPC_CONFIG_H
-
-/* ---------------------------------------------------------------------- */
-
-#include "link.h"
-
-struct sk_buff *tipc_cfg_reply_alloc(int payload_size);
-int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
- void *tlv_data, int tlv_data_size);
-struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string);
-
-static inline struct sk_buff *tipc_cfg_reply_none(void)
-{
- return tipc_cfg_reply_alloc(0);
-}
-
-static inline struct sk_buff *tipc_cfg_reply_error_string(char *string)
-{
- return tipc_cfg_reply_string_type(TIPC_TLV_ERROR_STRING, string);
-}
-
-static inline struct sk_buff *tipc_cfg_reply_ultra_string(char *string)
-{
- return tipc_cfg_reply_string_type(TIPC_TLV_ULTRA_STRING, string);
-}
-
-struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd,
- const void *req_tlv_area, int req_tlv_space,
- int headroom);
-
-int tipc_cfg_init(void);
-void tipc_cfg_reinit(void);
-void tipc_cfg_stop(void);
-
-#endif
diff --git a/net/tipc/core.c b/net/tipc/core.c
index fd4eeeaa972a..434e70eabe08 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -1,7 +1,7 @@
/*
* net/tipc/core.c: TIPC module code
*
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2006, 2013, Ericsson AB
* Copyright (c) 2005-2006, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -35,151 +35,189 @@
*/
#include "core.h"
-#include "ref.h"
#include "name_table.h"
#include "subscr.h"
-#include "config.h"
-#include "port.h"
+#include "bearer.h"
+#include "net.h"
+#include "socket.h"
+#include "bcast.h"
+#include "node.h"
+#include "crypto.h"
#include <linux/module.h>
-/* global variables used by multiple sub-systems within TIPC */
-int tipc_random __read_mostly;
-
/* configurable TIPC parameters */
-u32 tipc_own_addr __read_mostly;
-int tipc_max_ports __read_mostly;
-int tipc_net_id __read_mostly;
-int tipc_remote_management __read_mostly;
+unsigned int tipc_net_id __read_mostly;
int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */
-/**
- * tipc_buf_acquire - creates a TIPC message buffer
- * @size: message size (including TIPC header)
- *
- * Returns a new buffer with data pointers set to the specified size.
- *
- * NOTE: Headroom is reserved to allow prepending of a data link header.
- * There may also be unrequested tailroom present at the buffer's end.
- */
-struct sk_buff *tipc_buf_acquire(u32 size)
+static int __net_init tipc_init_net(struct net *net)
{
- struct sk_buff *skb;
- unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
-
- skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
- if (skb) {
- skb_reserve(skb, BUF_HEADROOM);
- skb_put(skb, size);
- skb->next = NULL;
- }
- return skb;
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ int err;
+
+ tn->net_id = 4711;
+ tn->node_addr = 0;
+ tn->trial_addr = 0;
+ tn->addr_trial_end = 0;
+ tn->capabilities = TIPC_NODE_CAPABILITIES;
+ INIT_WORK(&tn->work, tipc_net_finalize_work);
+ memset(tn->node_id, 0, sizeof(tn->node_id));
+ memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
+ tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
+ get_random_bytes(&tn->random, sizeof(int));
+ INIT_LIST_HEAD(&tn->node_list);
+ spin_lock_init(&tn->node_list_lock);
+
+#ifdef CONFIG_TIPC_CRYPTO
+ err = tipc_crypto_start(&tn->crypto_tx, net, NULL);
+ if (err)
+ goto out_crypto;
+#endif
+ err = tipc_sk_rht_init(net);
+ if (err)
+ goto out_sk_rht;
+
+ err = tipc_nametbl_init(net);
+ if (err)
+ goto out_nametbl;
+
+ err = tipc_bcast_init(net);
+ if (err)
+ goto out_bclink;
+
+ err = tipc_attach_loopback(net);
+ if (err)
+ goto out_bclink;
+
+ return 0;
+
+out_bclink:
+ tipc_nametbl_stop(net);
+out_nametbl:
+ tipc_sk_rht_destroy(net);
+out_sk_rht:
+
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_stop(&tn->crypto_tx);
+out_crypto:
+#endif
+ return err;
}
-/**
- * tipc_core_stop_net - shut down TIPC networking sub-systems
- */
-static void tipc_core_stop_net(void)
+static void __net_exit tipc_exit_net(struct net *net)
{
- tipc_net_stop();
- tipc_eth_media_stop();
- tipc_ib_media_stop();
+ struct tipc_net *tn = tipc_net(net);
+
+ tipc_detach_loopback(net);
+ tipc_net_stop(net);
+ /* Make sure the tipc_net_finalize_work() finished */
+ cancel_work_sync(&tn->work);
+ tipc_bcast_stop(net);
+ tipc_nametbl_stop(net);
+ tipc_sk_rht_destroy(net);
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_stop(&tipc_net(net)->crypto_tx);
+#endif
+ while (atomic_read(&tn->wq_count))
+ cond_resched();
}
-/**
- * start_net - start TIPC networking sub-systems
- */
-int tipc_core_start_net(unsigned long addr)
+static void __net_exit tipc_pernet_pre_exit(struct net *net)
{
- int res;
-
- tipc_net_start(addr);
- res = tipc_eth_media_start();
- if (res < 0)
- goto err;
- res = tipc_ib_media_start();
- if (res < 0)
- goto err;
- return res;
-
-err:
- tipc_core_stop_net();
- return res;
+ tipc_node_pre_cleanup_net(net);
}
-/**
- * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode
- */
-static void tipc_core_stop(void)
-{
- tipc_netlink_stop();
- tipc_handler_stop();
- tipc_cfg_stop();
- tipc_subscr_stop();
- tipc_nametbl_stop();
- tipc_ref_table_stop();
- tipc_socket_stop();
- tipc_unregister_sysctl();
-}
+static struct pernet_operations tipc_pernet_pre_exit_ops = {
+ .pre_exit = tipc_pernet_pre_exit,
+};
-/**
- * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode
- */
-static int tipc_core_start(void)
-{
- int res;
-
- get_random_bytes(&tipc_random, sizeof(tipc_random));
-
- res = tipc_handler_start();
- if (!res)
- res = tipc_ref_table_init(tipc_max_ports, tipc_random);
- if (!res)
- res = tipc_nametbl_init();
- if (!res)
- res = tipc_netlink_start();
- if (!res)
- res = tipc_socket_init();
- if (!res)
- res = tipc_register_sysctl();
- if (!res)
- res = tipc_subscr_start();
- if (!res)
- res = tipc_cfg_init();
- if (res)
- tipc_core_stop();
-
- return res;
-}
+static struct pernet_operations tipc_net_ops = {
+ .init = tipc_init_net,
+ .exit = tipc_exit_net,
+ .id = &tipc_net_id,
+ .size = sizeof(struct tipc_net),
+};
+
+static struct pernet_operations tipc_topsrv_net_ops = {
+ .init = tipc_topsrv_init_net,
+ .exit = tipc_topsrv_exit_net,
+};
static int __init tipc_init(void)
{
- int res;
+ int err;
pr_info("Activated (version " TIPC_MOD_VER ")\n");
- tipc_own_addr = 0;
- tipc_remote_management = 1;
- tipc_max_ports = CONFIG_TIPC_PORTS;
- tipc_net_id = 4711;
-
- sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE;
- sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 <<
- TIPC_CRITICAL_IMPORTANCE;
- sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT;
-
- res = tipc_core_start();
- if (res)
- pr_err("Unable to start in single node mode\n");
- else
- pr_info("Started in single node mode\n");
- return res;
+ sysctl_tipc_rmem[0] = RCVBUF_MIN;
+ sysctl_tipc_rmem[1] = RCVBUF_DEF;
+ sysctl_tipc_rmem[2] = RCVBUF_MAX;
+
+ err = tipc_register_sysctl();
+ if (err)
+ goto out_sysctl;
+
+ err = register_pernet_device(&tipc_net_ops);
+ if (err)
+ goto out_pernet;
+
+ err = tipc_socket_init();
+ if (err)
+ goto out_socket;
+
+ err = register_pernet_device(&tipc_topsrv_net_ops);
+ if (err)
+ goto out_pernet_topsrv;
+
+ err = register_pernet_subsys(&tipc_pernet_pre_exit_ops);
+ if (err)
+ goto out_register_pernet_subsys;
+
+ err = tipc_bearer_setup();
+ if (err)
+ goto out_bearer;
+
+ err = tipc_netlink_start();
+ if (err)
+ goto out_netlink;
+
+ err = tipc_netlink_compat_start();
+ if (err)
+ goto out_netlink_compat;
+
+ pr_info("Started in single node mode\n");
+ return 0;
+
+out_netlink_compat:
+ tipc_netlink_stop();
+out_netlink:
+ tipc_bearer_cleanup();
+out_bearer:
+ unregister_pernet_subsys(&tipc_pernet_pre_exit_ops);
+out_register_pernet_subsys:
+ unregister_pernet_device(&tipc_topsrv_net_ops);
+out_pernet_topsrv:
+ tipc_socket_stop();
+out_socket:
+ unregister_pernet_device(&tipc_net_ops);
+out_pernet:
+ tipc_unregister_sysctl();
+out_sysctl:
+ pr_err("Unable to start in single node mode\n");
+ return err;
}
static void __exit tipc_exit(void)
{
- tipc_core_stop_net();
- tipc_core_stop();
+ tipc_netlink_compat_stop();
+ tipc_netlink_stop();
+ tipc_bearer_cleanup();
+ unregister_pernet_subsys(&tipc_pernet_pre_exit_ops);
+ unregister_pernet_device(&tipc_topsrv_net_ops);
+ tipc_socket_stop();
+ unregister_pernet_device(&tipc_net_ops);
+ tipc_unregister_sysctl();
+
pr_info("Deactivated\n");
}
diff --git a/net/tipc/core.h b/net/tipc/core.h
index be72f8cebc53..7f3fe3401c45 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -1,8 +1,9 @@
/*
* net/tipc/core.h: Include file for TIPC global declarations
*
- * Copyright (c) 2005-2006, 2013 Ericsson AB
+ * Copyright (c) 2005-2006, 2013-2018 Ericsson AB
* Copyright (c) 2005-2007, 2010-2013, Wind River Systems
+ * Copyright (c) 2020, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,170 +38,190 @@
#ifndef _TIPC_CORE_H
#define _TIPC_CORE_H
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include <linux/tipc.h>
#include <linux/tipc_config.h>
+#include <linux/tipc_netlink.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/timer.h>
#include <linux/string.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/interrupt.h>
#include <linux/atomic.h>
-#include <asm/hardirq.h>
#include <linux/netdevice.h>
#include <linux/in.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
+#include <linux/etherdevice.h>
+#include <net/netns/generic.h>
+#include <linux/rhashtable.h>
+#include <net/genetlink.h>
+#include <net/netns/hash.h>
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#define TIPC_MOD_VER "2.0.0"
-
-#define ULTRA_STRING_MAX_LEN 32768
-#define TIPC_MAX_SUBSCRIPTIONS 65535
-#define TIPC_MAX_PUBLICATIONS 65535
+struct tipc_node;
+struct tipc_bearer;
+struct tipc_bc_base;
+struct tipc_link;
+struct tipc_topsrv;
+struct tipc_monitor;
+#ifdef CONFIG_TIPC_CRYPTO
+struct tipc_crypto;
+#endif
-struct tipc_msg; /* msg.h */
+#define TIPC_MOD_VER "2.0.0"
-int tipc_snprintf(char *buf, int len, const char *fmt, ...);
+#define NODE_HTABLE_SIZE 512
+#define MAX_BEARERS 3
+#define TIPC_DEF_MON_THRESHOLD 32
+#define NODE_ID_LEN 16
+#define NODE_ID_STR_LEN (NODE_ID_LEN * 2 + 1)
-/*
- * TIPC-specific error codes
- */
-#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */
-
-/*
- * Global configuration variables
- */
-extern u32 tipc_own_addr __read_mostly;
-extern int tipc_max_ports __read_mostly;
-extern int tipc_net_id __read_mostly;
-extern int tipc_remote_management __read_mostly;
+extern unsigned int tipc_net_id __read_mostly;
extern int sysctl_tipc_rmem[3] __read_mostly;
+extern int sysctl_tipc_named_timeout __read_mostly;
+
+struct tipc_net {
+ u8 node_id[NODE_ID_LEN];
+ u32 node_addr;
+ u32 trial_addr;
+ unsigned long addr_trial_end;
+ char node_id_string[NODE_ID_STR_LEN];
+ int net_id;
+ int random;
+ bool legacy_addr_format;
+
+ /* Node table and node list */
+ spinlock_t node_list_lock;
+ struct hlist_head node_htable[NODE_HTABLE_SIZE];
+ struct list_head node_list;
+ u32 num_nodes;
+ u32 num_links;
+
+ /* Neighbor monitoring list */
+ struct tipc_monitor *monitors[MAX_BEARERS];
+ int mon_threshold;
+
+ /* Bearer list */
+ struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
+
+ /* Broadcast link */
+ spinlock_t bclock;
+ struct tipc_bc_base *bcbase;
+ struct tipc_link *bcl;
+
+ /* Socket hash table */
+ struct rhashtable sk_rht;
+
+ /* Name table */
+ spinlock_t nametbl_lock;
+ struct name_table *nametbl;
+
+ /* Topology subscription server */
+ struct tipc_topsrv *topsrv;
+ atomic_t subscription_count;
+
+ /* Cluster capabilities */
+ u16 capabilities;
+
+ /* Tracing of node internal messages */
+ struct packet_type loopback_pt;
+
+#ifdef CONFIG_TIPC_CRYPTO
+ /* TX crypto handler */
+ struct tipc_crypto *crypto_tx;
+#endif
+ /* Work item for net finalize */
+ struct work_struct work;
+ /* The numbers of work queues in schedule */
+ atomic_t wq_count;
+};
-/*
- * Other global variables
- */
-extern int tipc_random __read_mostly;
-
-/*
- * Routines available to privileged subsystems
- */
-extern int tipc_core_start_net(unsigned long);
-extern int tipc_handler_start(void);
-extern void tipc_handler_stop(void);
-extern int tipc_netlink_start(void);
-extern void tipc_netlink_stop(void);
-extern int tipc_socket_init(void);
-extern void tipc_socket_stop(void);
-extern int tipc_sock_create_local(int type, struct socket **res);
-extern void tipc_sock_release_local(struct socket *sock);
-extern int tipc_sock_accept_local(struct socket *sock,
- struct socket **newsock, int flags);
+static inline struct tipc_net *tipc_net(struct net *net)
+{
+ return net_generic(net, tipc_net_id);
+}
-#ifdef CONFIG_SYSCTL
-extern int tipc_register_sysctl(void);
-extern void tipc_unregister_sysctl(void);
-#else
-#define tipc_register_sysctl() 0
-#define tipc_unregister_sysctl()
-#endif
+static inline int tipc_netid(struct net *net)
+{
+ return tipc_net(net)->net_id;
+}
-/*
- * TIPC timer and signal code
- */
-typedef void (*Handler) (unsigned long);
+static inline struct list_head *tipc_nodes(struct net *net)
+{
+ return &tipc_net(net)->node_list;
+}
-u32 tipc_k_signal(Handler routine, unsigned long argument);
+static inline struct name_table *tipc_name_table(struct net *net)
+{
+ return tipc_net(net)->nametbl;
+}
-/**
- * k_init_timer - initialize a timer
- * @timer: pointer to timer structure
- * @routine: pointer to routine to invoke when timer expires
- * @argument: value to pass to routine when timer expires
- *
- * Timer must be initialized before use (and terminated when no longer needed).
- */
-static inline void k_init_timer(struct timer_list *timer, Handler routine,
- unsigned long argument)
+static inline struct tipc_topsrv *tipc_topsrv(struct net *net)
{
- setup_timer(timer, routine, argument);
+ return tipc_net(net)->topsrv;
}
-/**
- * k_start_timer - start a timer
- * @timer: pointer to timer structure
- * @msec: time to delay (in ms)
- *
- * Schedules a previously initialized timer for later execution.
- * If timer is already running, the new timeout overrides the previous request.
- *
- * To ensure the timer doesn't expire before the specified delay elapses,
- * the amount of delay is rounded up when converting to the jiffies
- * then an additional jiffy is added to account for the fact that
- * the starting time may be in the middle of the current jiffy.
- */
-static inline void k_start_timer(struct timer_list *timer, unsigned long msec)
+static inline unsigned int tipc_hashfn(u32 addr)
{
- mod_timer(timer, jiffies + msecs_to_jiffies(msec) + 1);
+ return addr & (NODE_HTABLE_SIZE - 1);
}
-/**
- * k_cancel_timer - cancel a timer
- * @timer: pointer to timer structure
- *
- * Cancels a previously initialized timer.
- * Can be called safely even if the timer is already inactive.
- *
- * WARNING: Must not be called when holding locks required by the timer's
- * timeout routine, otherwise deadlock can occur on SMP systems!
- */
-static inline void k_cancel_timer(struct timer_list *timer)
+static inline u16 mod(u16 x)
{
- del_timer_sync(timer);
+ return x & 0xffffu;
}
-/**
- * k_term_timer - terminate a timer
- * @timer: pointer to timer structure
- *
- * Prevents further use of a previously initialized timer.
- *
- * WARNING: Caller must ensure timer isn't currently running.
- *
- * (Do not "enhance" this routine to automatically cancel an active timer,
- * otherwise deadlock can arise when a timeout routine calls k_term_timer.)
- */
-static inline void k_term_timer(struct timer_list *timer)
+static inline int less_eq(u16 left, u16 right)
{
+ return mod(right - left) < 32768u;
}
-/*
- * TIPC message buffer code
- *
- * TIPC message buffer headroom reserves space for the worst-case
- * link-level device header (in case the message is sent off-node).
- *
- * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields
- * are word aligned for quicker access
- */
-#define BUF_HEADROOM LL_MAX_HEADER
+static inline int more(u16 left, u16 right)
+{
+ return !less_eq(left, right);
+}
-struct tipc_skb_cb {
- void *handle;
-};
+static inline int less(u16 left, u16 right)
+{
+ return less_eq(left, right) && (mod(right) != mod(left));
+}
-#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
+static inline int tipc_in_range(u16 val, u16 min, u16 max)
+{
+ return !less(val, min) && !more(val, max);
+}
-static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
+static inline u32 tipc_net_hash_mixes(struct net *net, int tn_rand)
{
- return (struct tipc_msg *)skb->data;
+ return net_hash_mix(&init_net) ^ net_hash_mix(net) ^ tn_rand;
}
-extern struct sk_buff *tipc_buf_acquire(u32 size);
+static inline u32 hash128to32(char *bytes)
+{
+ __be32 *tmp = (__be32 *)bytes;
+ u32 res;
+
+ res = ntohl(tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3]);
+ if (likely(res))
+ return res;
+ return ntohl(tmp[0] | tmp[1] | tmp[2] | tmp[3]);
+}
+#ifdef CONFIG_SYSCTL
+int tipc_register_sysctl(void);
+void tipc_unregister_sysctl(void);
+#else
+#define tipc_register_sysctl() 0
+#define tipc_unregister_sysctl()
+#endif
#endif
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
new file mode 100644
index 000000000000..751904f10aab
--- /dev/null
+++ b/net/tipc/crypto.c
@@ -0,0 +1,2484 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * net/tipc/crypto.c: TIPC crypto for key handling & packet en/decryption
+ *
+ * Copyright (c) 2019, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <crypto/aead.h>
+#include <crypto/aes.h>
+#include <crypto/rng.h>
+#include "crypto.h"
+#include "msg.h"
+#include "bcast.h"
+
+#define TIPC_TX_GRACE_PERIOD msecs_to_jiffies(5000) /* 5s */
+#define TIPC_TX_LASTING_TIME msecs_to_jiffies(10000) /* 10s */
+#define TIPC_RX_ACTIVE_LIM msecs_to_jiffies(3000) /* 3s */
+#define TIPC_RX_PASSIVE_LIM msecs_to_jiffies(15000) /* 15s */
+
+#define TIPC_MAX_TFMS_DEF 10
+#define TIPC_MAX_TFMS_LIM 1000
+
+#define TIPC_REKEYING_INTV_DEF (60 * 24) /* default: 1 day */
+
+/*
+ * TIPC Key ids
+ */
+enum {
+ KEY_MASTER = 0,
+ KEY_MIN = KEY_MASTER,
+ KEY_1 = 1,
+ KEY_2,
+ KEY_3,
+ KEY_MAX = KEY_3,
+};
+
+/*
+ * TIPC Crypto statistics
+ */
+enum {
+ STAT_OK,
+ STAT_NOK,
+ STAT_ASYNC,
+ STAT_ASYNC_OK,
+ STAT_ASYNC_NOK,
+ STAT_BADKEYS, /* tx only */
+ STAT_BADMSGS = STAT_BADKEYS, /* rx only */
+ STAT_NOKEYS,
+ STAT_SWITCHES,
+
+ MAX_STATS,
+};
+
+/* TIPC crypto statistics' header */
+static const char *hstats[MAX_STATS] = {"ok", "nok", "async", "async_ok",
+ "async_nok", "badmsgs", "nokeys",
+ "switches"};
+
+/* Max TFMs number per key */
+int sysctl_tipc_max_tfms __read_mostly = TIPC_MAX_TFMS_DEF;
+/* Key exchange switch, default: on */
+int sysctl_tipc_key_exchange_enabled __read_mostly = 1;
+
+/*
+ * struct tipc_key - TIPC keys' status indicator
+ *
+ * 7 6 5 4 3 2 1 0
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * key: | (reserved)|passive idx| active idx|pending idx|
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ */
+struct tipc_key {
+#define KEY_BITS (2)
+#define KEY_MASK ((1 << KEY_BITS) - 1)
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 pending:2,
+ active:2,
+ passive:2, /* rx only */
+ reserved:2;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved:2,
+ passive:2, /* rx only */
+ active:2,
+ pending:2;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ } __packed;
+ u8 keys;
+ };
+};
+
+/**
+ * struct tipc_tfm - TIPC TFM structure to form a list of TFMs
+ * @tfm: cipher handle/key
+ * @list: linked list of TFMs
+ */
+struct tipc_tfm {
+ struct crypto_aead *tfm;
+ struct list_head list;
+};
+
+/**
+ * struct tipc_aead - TIPC AEAD key structure
+ * @tfm_entry: per-cpu pointer to one entry in TFM list
+ * @crypto: TIPC crypto owns this key
+ * @cloned: reference to the source key in case cloning
+ * @users: the number of the key users (TX/RX)
+ * @salt: the key's SALT value
+ * @authsize: authentication tag size (max = 16)
+ * @mode: crypto mode is applied to the key
+ * @hint: a hint for user key
+ * @rcu: struct rcu_head
+ * @key: the aead key
+ * @gen: the key's generation
+ * @seqno: the key seqno (cluster scope)
+ * @refcnt: the key reference counter
+ */
+struct tipc_aead {
+#define TIPC_AEAD_HINT_LEN (5)
+ struct tipc_tfm * __percpu *tfm_entry;
+ struct tipc_crypto *crypto;
+ struct tipc_aead *cloned;
+ atomic_t users;
+ u32 salt;
+ u8 authsize;
+ u8 mode;
+ char hint[2 * TIPC_AEAD_HINT_LEN + 1];
+ struct rcu_head rcu;
+ struct tipc_aead_key *key;
+ u16 gen;
+
+ atomic64_t seqno ____cacheline_aligned;
+ refcount_t refcnt ____cacheline_aligned;
+
+} ____cacheline_aligned;
+
+/**
+ * struct tipc_crypto_stats - TIPC Crypto statistics
+ * @stat: array of crypto statistics
+ */
+struct tipc_crypto_stats {
+ unsigned int stat[MAX_STATS];
+};
+
+/**
+ * struct tipc_crypto - TIPC TX/RX crypto structure
+ * @net: struct net
+ * @node: TIPC node (RX)
+ * @aead: array of pointers to AEAD keys for encryption/decryption
+ * @peer_rx_active: replicated peer RX active key index
+ * @key_gen: TX/RX key generation
+ * @key: the key states
+ * @skey_mode: session key's mode
+ * @skey: received session key
+ * @wq: common workqueue on TX crypto
+ * @work: delayed work sched for TX/RX
+ * @key_distr: key distributing state
+ * @rekeying_intv: rekeying interval (in minutes)
+ * @stats: the crypto statistics
+ * @name: the crypto name
+ * @sndnxt: the per-peer sndnxt (TX)
+ * @timer1: general timer 1 (jiffies)
+ * @timer2: general timer 2 (jiffies)
+ * @working: the crypto is working or not
+ * @key_master: flag indicates if master key exists
+ * @legacy_user: flag indicates if a peer joins w/o master key (for bwd comp.)
+ * @nokey: no key indication
+ * @flags: combined flags field
+ * @lock: tipc_key lock
+ */
+struct tipc_crypto {
+ struct net *net;
+ struct tipc_node *node;
+ struct tipc_aead __rcu *aead[KEY_MAX + 1];
+ atomic_t peer_rx_active;
+ u16 key_gen;
+ struct tipc_key key;
+ u8 skey_mode;
+ struct tipc_aead_key *skey;
+ struct workqueue_struct *wq;
+ struct delayed_work work;
+#define KEY_DISTR_SCHED 1
+#define KEY_DISTR_COMPL 2
+ atomic_t key_distr;
+ u32 rekeying_intv;
+
+ struct tipc_crypto_stats __percpu *stats;
+ char name[48];
+
+ atomic64_t sndnxt ____cacheline_aligned;
+ unsigned long timer1;
+ unsigned long timer2;
+ union {
+ struct {
+ u8 working:1;
+ u8 key_master:1;
+ u8 legacy_user:1;
+ u8 nokey: 1;
+ };
+ u8 flags;
+ };
+ spinlock_t lock; /* crypto lock */
+
+} ____cacheline_aligned;
+
+/* struct tipc_crypto_tx_ctx - TX context for callbacks */
+struct tipc_crypto_tx_ctx {
+ struct tipc_aead *aead;
+ struct tipc_bearer *bearer;
+ struct tipc_media_addr dst;
+};
+
+/* struct tipc_crypto_rx_ctx - RX context for callbacks */
+struct tipc_crypto_rx_ctx {
+ struct tipc_aead *aead;
+ struct tipc_bearer *bearer;
+};
+
+static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead);
+static inline void tipc_aead_put(struct tipc_aead *aead);
+static void tipc_aead_free(struct rcu_head *rp);
+static int tipc_aead_users(struct tipc_aead __rcu *aead);
+static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim);
+static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim);
+static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val);
+static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead);
+static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey,
+ u8 mode);
+static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src);
+static void *tipc_aead_mem_alloc(struct crypto_aead *tfm,
+ unsigned int crypto_ctx_size,
+ u8 **iv, struct aead_request **req,
+ struct scatterlist **sg, int nsg);
+static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode);
+static void tipc_aead_encrypt_done(void *data, int err);
+static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
+ struct sk_buff *skb, struct tipc_bearer *b);
+static void tipc_aead_decrypt_done(void *data, int err);
+static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr);
+static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead,
+ u8 tx_key, struct sk_buff *skb,
+ struct tipc_crypto *__rx);
+static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
+ u8 new_passive,
+ u8 new_active,
+ u8 new_pending);
+static int tipc_crypto_key_attach(struct tipc_crypto *c,
+ struct tipc_aead *aead, u8 pos,
+ bool master_key);
+static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending);
+static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
+ struct tipc_crypto *rx,
+ struct sk_buff *skb,
+ u8 tx_key);
+static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb);
+static int tipc_crypto_key_revoke(struct net *net, u8 tx_key);
+static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode, u8 type);
+static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
+ struct tipc_bearer *b,
+ struct sk_buff **skb, int err);
+static void tipc_crypto_do_cmd(struct net *net, int cmd);
+static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf);
+static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new,
+ char *buf);
+static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey,
+ u16 gen, u8 mode, u32 dnode);
+static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr);
+static void tipc_crypto_work_tx(struct work_struct *work);
+static void tipc_crypto_work_rx(struct work_struct *work);
+static int tipc_aead_key_generate(struct tipc_aead_key *skey);
+
+#define is_tx(crypto) (!(crypto)->node)
+#define is_rx(crypto) (!is_tx(crypto))
+
+#define key_next(cur) ((cur) % KEY_MAX + 1)
+
+#define tipc_aead_rcu_ptr(rcu_ptr, lock) \
+ rcu_dereference_protected((rcu_ptr), lockdep_is_held(lock))
+
+#define tipc_aead_rcu_replace(rcu_ptr, ptr, lock) \
+do { \
+ struct tipc_aead *__tmp = rcu_dereference_protected((rcu_ptr), \
+ lockdep_is_held(lock)); \
+ rcu_assign_pointer((rcu_ptr), (ptr)); \
+ tipc_aead_put(__tmp); \
+} while (0)
+
+#define tipc_crypto_key_detach(rcu_ptr, lock) \
+ tipc_aead_rcu_replace((rcu_ptr), NULL, lock)
+
+/**
+ * tipc_aead_key_validate - Validate a AEAD user key
+ * @ukey: pointer to user key data
+ * @info: netlink info pointer
+ */
+int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info)
+{
+ int keylen;
+
+ /* Check if algorithm exists */
+ if (unlikely(!crypto_has_alg(ukey->alg_name, 0, 0))) {
+ GENL_SET_ERR_MSG(info, "unable to load the algorithm (module existed?)");
+ return -ENODEV;
+ }
+
+ /* Currently, we only support the "gcm(aes)" cipher algorithm */
+ if (strcmp(ukey->alg_name, "gcm(aes)")) {
+ GENL_SET_ERR_MSG(info, "not supported yet the algorithm");
+ return -ENOTSUPP;
+ }
+
+ /* Check if key size is correct */
+ keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE;
+ if (unlikely(keylen != TIPC_AES_GCM_KEY_SIZE_128 &&
+ keylen != TIPC_AES_GCM_KEY_SIZE_192 &&
+ keylen != TIPC_AES_GCM_KEY_SIZE_256)) {
+ GENL_SET_ERR_MSG(info, "incorrect key length (20, 28 or 36 octets?)");
+ return -EKEYREJECTED;
+ }
+
+ return 0;
+}
+
+/**
+ * tipc_aead_key_generate - Generate new session key
+ * @skey: input/output key with new content
+ *
+ * Return: 0 in case of success, otherwise < 0
+ */
+static int tipc_aead_key_generate(struct tipc_aead_key *skey)
+{
+ int rc = 0;
+
+ /* Fill the key's content with a random value via RNG cipher */
+ rc = crypto_get_default_rng();
+ if (likely(!rc)) {
+ rc = crypto_rng_get_bytes(crypto_default_rng, skey->key,
+ skey->keylen);
+ crypto_put_default_rng();
+ }
+
+ return rc;
+}
+
+static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead)
+{
+ struct tipc_aead *tmp;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (unlikely(!tmp || !refcount_inc_not_zero(&tmp->refcnt)))
+ tmp = NULL;
+ rcu_read_unlock();
+
+ return tmp;
+}
+
+static inline void tipc_aead_put(struct tipc_aead *aead)
+{
+ if (aead && refcount_dec_and_test(&aead->refcnt))
+ call_rcu(&aead->rcu, tipc_aead_free);
+}
+
+/**
+ * tipc_aead_free - Release AEAD key incl. all the TFMs in the list
+ * @rp: rcu head pointer
+ */
+static void tipc_aead_free(struct rcu_head *rp)
+{
+ struct tipc_aead *aead = container_of(rp, struct tipc_aead, rcu);
+ struct tipc_tfm *tfm_entry, *head, *tmp;
+
+ if (aead->cloned) {
+ tipc_aead_put(aead->cloned);
+ } else {
+ head = *get_cpu_ptr(aead->tfm_entry);
+ put_cpu_ptr(aead->tfm_entry);
+ list_for_each_entry_safe(tfm_entry, tmp, &head->list, list) {
+ crypto_free_aead(tfm_entry->tfm);
+ list_del(&tfm_entry->list);
+ kfree(tfm_entry);
+ }
+ /* Free the head */
+ crypto_free_aead(head->tfm);
+ list_del(&head->list);
+ kfree(head);
+ }
+ free_percpu(aead->tfm_entry);
+ kfree_sensitive(aead->key);
+ kfree_sensitive(aead);
+}
+
+static int tipc_aead_users(struct tipc_aead __rcu *aead)
+{
+ struct tipc_aead *tmp;
+ int users = 0;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (tmp)
+ users = atomic_read(&tmp->users);
+ rcu_read_unlock();
+
+ return users;
+}
+
+static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim)
+{
+ struct tipc_aead *tmp;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (tmp)
+ atomic_add_unless(&tmp->users, 1, lim);
+ rcu_read_unlock();
+}
+
+static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim)
+{
+ struct tipc_aead *tmp;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (tmp)
+ atomic_add_unless(&rcu_dereference(aead)->users, -1, lim);
+ rcu_read_unlock();
+}
+
+static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val)
+{
+ struct tipc_aead *tmp;
+ int cur;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (tmp) {
+ do {
+ cur = atomic_read(&tmp->users);
+ if (cur == val)
+ break;
+ } while (atomic_cmpxchg(&tmp->users, cur, val) != cur);
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * tipc_aead_tfm_next - Move TFM entry to the next one in list and return it
+ * @aead: the AEAD key pointer
+ */
+static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead)
+{
+ struct tipc_tfm **tfm_entry;
+ struct crypto_aead *tfm;
+
+ tfm_entry = get_cpu_ptr(aead->tfm_entry);
+ *tfm_entry = list_next_entry(*tfm_entry, list);
+ tfm = (*tfm_entry)->tfm;
+ put_cpu_ptr(tfm_entry);
+
+ return tfm;
+}
+
+/**
+ * tipc_aead_init - Initiate TIPC AEAD
+ * @aead: returned new TIPC AEAD key handle pointer
+ * @ukey: pointer to user key data
+ * @mode: the key mode
+ *
+ * Allocate a (list of) new cipher transformation (TFM) with the specific user
+ * key data if valid. The number of the allocated TFMs can be set via the sysfs
+ * "net/tipc/max_tfms" first.
+ * Also, all the other AEAD data are also initialized.
+ *
+ * Return: 0 if the initiation is successful, otherwise: < 0
+ */
+static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey,
+ u8 mode)
+{
+ struct tipc_tfm *tfm_entry, *head;
+ struct crypto_aead *tfm;
+ struct tipc_aead *tmp;
+ int keylen, err, cpu;
+ int tfm_cnt = 0;
+
+ if (unlikely(*aead))
+ return -EEXIST;
+
+ /* Allocate a new AEAD */
+ tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
+ if (unlikely(!tmp))
+ return -ENOMEM;
+
+ /* The key consists of two parts: [AES-KEY][SALT] */
+ keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE;
+
+ /* Allocate per-cpu TFM entry pointer */
+ tmp->tfm_entry = alloc_percpu(struct tipc_tfm *);
+ if (!tmp->tfm_entry) {
+ kfree_sensitive(tmp);
+ return -ENOMEM;
+ }
+
+ /* Make a list of TFMs with the user key data */
+ do {
+ tfm = crypto_alloc_aead(ukey->alg_name, 0, 0);
+ if (IS_ERR(tfm)) {
+ err = PTR_ERR(tfm);
+ break;
+ }
+
+ if (unlikely(!tfm_cnt &&
+ crypto_aead_ivsize(tfm) != TIPC_AES_GCM_IV_SIZE)) {
+ crypto_free_aead(tfm);
+ err = -ENOTSUPP;
+ break;
+ }
+
+ err = crypto_aead_setauthsize(tfm, TIPC_AES_GCM_TAG_SIZE);
+ err |= crypto_aead_setkey(tfm, ukey->key, keylen);
+ if (unlikely(err)) {
+ crypto_free_aead(tfm);
+ break;
+ }
+
+ tfm_entry = kmalloc(sizeof(*tfm_entry), GFP_KERNEL);
+ if (unlikely(!tfm_entry)) {
+ crypto_free_aead(tfm);
+ err = -ENOMEM;
+ break;
+ }
+ INIT_LIST_HEAD(&tfm_entry->list);
+ tfm_entry->tfm = tfm;
+
+ /* First entry? */
+ if (!tfm_cnt) {
+ head = tfm_entry;
+ for_each_possible_cpu(cpu) {
+ *per_cpu_ptr(tmp->tfm_entry, cpu) = head;
+ }
+ } else {
+ list_add_tail(&tfm_entry->list, &head->list);
+ }
+
+ } while (++tfm_cnt < sysctl_tipc_max_tfms);
+
+ /* Not any TFM is allocated? */
+ if (!tfm_cnt) {
+ free_percpu(tmp->tfm_entry);
+ kfree_sensitive(tmp);
+ return err;
+ }
+
+ /* Form a hex string of some last bytes as the key's hint */
+ bin2hex(tmp->hint, ukey->key + keylen - TIPC_AEAD_HINT_LEN,
+ TIPC_AEAD_HINT_LEN);
+
+ /* Initialize the other data */
+ tmp->mode = mode;
+ tmp->cloned = NULL;
+ tmp->authsize = TIPC_AES_GCM_TAG_SIZE;
+ tmp->key = kmemdup(ukey, tipc_aead_key_size(ukey), GFP_KERNEL);
+ if (!tmp->key) {
+ tipc_aead_free(&tmp->rcu);
+ return -ENOMEM;
+ }
+ memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE);
+ atomic_set(&tmp->users, 0);
+ atomic64_set(&tmp->seqno, 0);
+ refcount_set(&tmp->refcnt, 1);
+
+ *aead = tmp;
+ return 0;
+}
+
+/**
+ * tipc_aead_clone - Clone a TIPC AEAD key
+ * @dst: dest key for the cloning
+ * @src: source key to clone from
+ *
+ * Make a "copy" of the source AEAD key data to the dest, the TFMs list is
+ * common for the keys.
+ * A reference to the source is hold in the "cloned" pointer for the later
+ * freeing purposes.
+ *
+ * Note: this must be done in cluster-key mode only!
+ * Return: 0 in case of success, otherwise < 0
+ */
+static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src)
+{
+ struct tipc_aead *aead;
+ int cpu;
+
+ if (!src)
+ return -ENOKEY;
+
+ if (src->mode != CLUSTER_KEY)
+ return -EINVAL;
+
+ if (unlikely(*dst))
+ return -EEXIST;
+
+ aead = kzalloc(sizeof(*aead), GFP_ATOMIC);
+ if (unlikely(!aead))
+ return -ENOMEM;
+
+ aead->tfm_entry = alloc_percpu_gfp(struct tipc_tfm *, GFP_ATOMIC);
+ if (unlikely(!aead->tfm_entry)) {
+ kfree_sensitive(aead);
+ return -ENOMEM;
+ }
+
+ for_each_possible_cpu(cpu) {
+ *per_cpu_ptr(aead->tfm_entry, cpu) =
+ *per_cpu_ptr(src->tfm_entry, cpu);
+ }
+
+ memcpy(aead->hint, src->hint, sizeof(src->hint));
+ aead->mode = src->mode;
+ aead->salt = src->salt;
+ aead->authsize = src->authsize;
+ atomic_set(&aead->users, 0);
+ atomic64_set(&aead->seqno, 0);
+ refcount_set(&aead->refcnt, 1);
+
+ WARN_ON(!refcount_inc_not_zero(&src->refcnt));
+ aead->cloned = src;
+
+ *dst = aead;
+ return 0;
+}
+
+/**
+ * tipc_aead_mem_alloc - Allocate memory for AEAD request operations
+ * @tfm: cipher handle to be registered with the request
+ * @crypto_ctx_size: size of crypto context for callback
+ * @iv: returned pointer to IV data
+ * @req: returned pointer to AEAD request data
+ * @sg: returned pointer to SG lists
+ * @nsg: number of SG lists to be allocated
+ *
+ * Allocate memory to store the crypto context data, AEAD request, IV and SG
+ * lists, the memory layout is as follows:
+ * crypto_ctx || iv || aead_req || sg[]
+ *
+ * Return: the pointer to the memory areas in case of success, otherwise NULL
+ */
+static void *tipc_aead_mem_alloc(struct crypto_aead *tfm,
+ unsigned int crypto_ctx_size,
+ u8 **iv, struct aead_request **req,
+ struct scatterlist **sg, int nsg)
+{
+ unsigned int iv_size, req_size;
+ unsigned int len;
+ u8 *mem;
+
+ iv_size = crypto_aead_ivsize(tfm);
+ req_size = sizeof(**req) + crypto_aead_reqsize(tfm);
+
+ len = crypto_ctx_size;
+ len += iv_size;
+ len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1);
+ len = ALIGN(len, crypto_tfm_ctx_alignment());
+ len += req_size;
+ len = ALIGN(len, __alignof__(struct scatterlist));
+ len += nsg * sizeof(**sg);
+
+ mem = kmalloc(len, GFP_ATOMIC);
+ if (!mem)
+ return NULL;
+
+ *iv = (u8 *)PTR_ALIGN(mem + crypto_ctx_size,
+ crypto_aead_alignmask(tfm) + 1);
+ *req = (struct aead_request *)PTR_ALIGN(*iv + iv_size,
+ crypto_tfm_ctx_alignment());
+ *sg = (struct scatterlist *)PTR_ALIGN((u8 *)*req + req_size,
+ __alignof__(struct scatterlist));
+
+ return (void *)mem;
+}
+
+/**
+ * tipc_aead_encrypt - Encrypt a message
+ * @aead: TIPC AEAD key for the message encryption
+ * @skb: the input/output skb
+ * @b: TIPC bearer where the message will be delivered after the encryption
+ * @dst: the destination media address
+ * @__dnode: TIPC dest node if "known"
+ *
+ * Return:
+ * * 0 : if the encryption has completed
+ * * -EINPROGRESS/-EBUSY : if a callback will be performed
+ * * < 0 : the encryption has failed
+ */
+static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode)
+{
+ struct crypto_aead *tfm = tipc_aead_tfm_next(aead);
+ struct tipc_crypto_tx_ctx *tx_ctx;
+ struct aead_request *req;
+ struct sk_buff *trailer;
+ struct scatterlist *sg;
+ struct tipc_ehdr *ehdr;
+ int ehsz, len, tailen, nsg, rc;
+ void *ctx;
+ u32 salt;
+ u8 *iv;
+
+ /* Make sure message len at least 4-byte aligned */
+ len = ALIGN(skb->len, 4);
+ tailen = len - skb->len + aead->authsize;
+
+ /* Expand skb tail for authentication tag:
+ * As for simplicity, we'd have made sure skb having enough tailroom
+ * for authentication tag @skb allocation. Even when skb is nonlinear
+ * but there is no frag_list, it should be still fine!
+ * Otherwise, we must cow it to be a writable buffer with the tailroom.
+ */
+ SKB_LINEAR_ASSERT(skb);
+ if (tailen > skb_tailroom(skb)) {
+ pr_debug("TX(): skb tailroom is not enough: %d, requires: %d\n",
+ skb_tailroom(skb), tailen);
+ }
+
+ nsg = skb_cow_data(skb, tailen, &trailer);
+ if (unlikely(nsg < 0)) {
+ pr_err("TX: skb_cow_data() returned %d\n", nsg);
+ return nsg;
+ }
+
+ pskb_put(skb, trailer, tailen);
+
+ /* Allocate memory for the AEAD operation */
+ ctx = tipc_aead_mem_alloc(tfm, sizeof(*tx_ctx), &iv, &req, &sg, nsg);
+ if (unlikely(!ctx))
+ return -ENOMEM;
+ TIPC_SKB_CB(skb)->crypto_ctx = ctx;
+
+ /* Map skb to the sg lists */
+ sg_init_table(sg, nsg);
+ rc = skb_to_sgvec(skb, sg, 0, skb->len);
+ if (unlikely(rc < 0)) {
+ pr_err("TX: skb_to_sgvec() returned %d, nsg %d!\n", rc, nsg);
+ goto exit;
+ }
+
+ /* Prepare IV: [SALT (4 octets)][SEQNO (8 octets)]
+ * In case we're in cluster-key mode, SALT is varied by xor-ing with
+ * the source address (or w0 of id), otherwise with the dest address
+ * if dest is known.
+ */
+ ehdr = (struct tipc_ehdr *)skb->data;
+ salt = aead->salt;
+ if (aead->mode == CLUSTER_KEY)
+ salt ^= __be32_to_cpu(ehdr->addr);
+ else if (__dnode)
+ salt ^= tipc_node_get_addr(__dnode);
+ memcpy(iv, &salt, 4);
+ memcpy(iv + 4, (u8 *)&ehdr->seqno, 8);
+
+ /* Prepare request */
+ ehsz = tipc_ehdr_size(ehdr);
+ aead_request_set_tfm(req, tfm);
+ aead_request_set_ad(req, ehsz);
+ aead_request_set_crypt(req, sg, sg, len - ehsz, iv);
+
+ /* Set callback function & data */
+ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ tipc_aead_encrypt_done, skb);
+ tx_ctx = (struct tipc_crypto_tx_ctx *)ctx;
+ tx_ctx->aead = aead;
+ tx_ctx->bearer = b;
+ memcpy(&tx_ctx->dst, dst, sizeof(*dst));
+
+ /* Hold bearer */
+ if (unlikely(!tipc_bearer_hold(b))) {
+ rc = -ENODEV;
+ goto exit;
+ }
+
+ /* Get net to avoid freed tipc_crypto when delete namespace */
+ if (!maybe_get_net(aead->crypto->net)) {
+ tipc_bearer_put(b);
+ rc = -ENODEV;
+ goto exit;
+ }
+
+ /* Now, do encrypt */
+ rc = crypto_aead_encrypt(req);
+ if (rc == -EINPROGRESS || rc == -EBUSY)
+ return rc;
+
+ tipc_bearer_put(b);
+ put_net(aead->crypto->net);
+
+exit:
+ kfree(ctx);
+ TIPC_SKB_CB(skb)->crypto_ctx = NULL;
+ return rc;
+}
+
+static void tipc_aead_encrypt_done(void *data, int err)
+{
+ struct sk_buff *skb = data;
+ struct tipc_crypto_tx_ctx *tx_ctx = TIPC_SKB_CB(skb)->crypto_ctx;
+ struct tipc_bearer *b = tx_ctx->bearer;
+ struct tipc_aead *aead = tx_ctx->aead;
+ struct tipc_crypto *tx = aead->crypto;
+ struct net *net = tx->net;
+
+ switch (err) {
+ case 0:
+ this_cpu_inc(tx->stats->stat[STAT_ASYNC_OK]);
+ rcu_read_lock();
+ if (likely(test_bit(0, &b->up)))
+ b->media->send_msg(net, skb, b, &tx_ctx->dst);
+ else
+ kfree_skb(skb);
+ rcu_read_unlock();
+ break;
+ case -EINPROGRESS:
+ return;
+ default:
+ this_cpu_inc(tx->stats->stat[STAT_ASYNC_NOK]);
+ kfree_skb(skb);
+ break;
+ }
+
+ kfree(tx_ctx);
+ tipc_bearer_put(b);
+ tipc_aead_put(aead);
+ put_net(net);
+}
+
+/**
+ * tipc_aead_decrypt - Decrypt an encrypted message
+ * @net: struct net
+ * @aead: TIPC AEAD for the message decryption
+ * @skb: the input/output skb
+ * @b: TIPC bearer where the message has been received
+ *
+ * Return:
+ * * 0 : if the decryption has completed
+ * * -EINPROGRESS/-EBUSY : if a callback will be performed
+ * * < 0 : the decryption has failed
+ */
+static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
+ struct sk_buff *skb, struct tipc_bearer *b)
+{
+ struct tipc_crypto_rx_ctx *rx_ctx;
+ struct aead_request *req;
+ struct crypto_aead *tfm;
+ struct sk_buff *unused;
+ struct scatterlist *sg;
+ struct tipc_ehdr *ehdr;
+ int ehsz, nsg, rc;
+ void *ctx;
+ u32 salt;
+ u8 *iv;
+
+ if (unlikely(!aead))
+ return -ENOKEY;
+
+ nsg = skb_cow_data(skb, 0, &unused);
+ if (unlikely(nsg < 0)) {
+ pr_err("RX: skb_cow_data() returned %d\n", nsg);
+ return nsg;
+ }
+
+ /* Allocate memory for the AEAD operation */
+ tfm = tipc_aead_tfm_next(aead);
+ ctx = tipc_aead_mem_alloc(tfm, sizeof(*rx_ctx), &iv, &req, &sg, nsg);
+ if (unlikely(!ctx))
+ return -ENOMEM;
+ TIPC_SKB_CB(skb)->crypto_ctx = ctx;
+
+ /* Map skb to the sg lists */
+ sg_init_table(sg, nsg);
+ rc = skb_to_sgvec(skb, sg, 0, skb->len);
+ if (unlikely(rc < 0)) {
+ pr_err("RX: skb_to_sgvec() returned %d, nsg %d\n", rc, nsg);
+ goto exit;
+ }
+
+ /* Reconstruct IV: */
+ ehdr = (struct tipc_ehdr *)skb->data;
+ salt = aead->salt;
+ if (aead->mode == CLUSTER_KEY)
+ salt ^= __be32_to_cpu(ehdr->addr);
+ else if (ehdr->destined)
+ salt ^= tipc_own_addr(net);
+ memcpy(iv, &salt, 4);
+ memcpy(iv + 4, (u8 *)&ehdr->seqno, 8);
+
+ /* Prepare request */
+ ehsz = tipc_ehdr_size(ehdr);
+ aead_request_set_tfm(req, tfm);
+ aead_request_set_ad(req, ehsz);
+ aead_request_set_crypt(req, sg, sg, skb->len - ehsz, iv);
+
+ /* Set callback function & data */
+ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ tipc_aead_decrypt_done, skb);
+ rx_ctx = (struct tipc_crypto_rx_ctx *)ctx;
+ rx_ctx->aead = aead;
+ rx_ctx->bearer = b;
+
+ /* Hold bearer */
+ if (unlikely(!tipc_bearer_hold(b))) {
+ rc = -ENODEV;
+ goto exit;
+ }
+
+ /* Now, do decrypt */
+ rc = crypto_aead_decrypt(req);
+ if (rc == -EINPROGRESS || rc == -EBUSY)
+ return rc;
+
+ tipc_bearer_put(b);
+
+exit:
+ kfree(ctx);
+ TIPC_SKB_CB(skb)->crypto_ctx = NULL;
+ return rc;
+}
+
+static void tipc_aead_decrypt_done(void *data, int err)
+{
+ struct sk_buff *skb = data;
+ struct tipc_crypto_rx_ctx *rx_ctx = TIPC_SKB_CB(skb)->crypto_ctx;
+ struct tipc_bearer *b = rx_ctx->bearer;
+ struct tipc_aead *aead = rx_ctx->aead;
+ struct tipc_crypto_stats __percpu *stats = aead->crypto->stats;
+ struct net *net = aead->crypto->net;
+
+ switch (err) {
+ case 0:
+ this_cpu_inc(stats->stat[STAT_ASYNC_OK]);
+ break;
+ case -EINPROGRESS:
+ return;
+ default:
+ this_cpu_inc(stats->stat[STAT_ASYNC_NOK]);
+ break;
+ }
+
+ kfree(rx_ctx);
+ tipc_crypto_rcv_complete(net, aead, b, &skb, err);
+ if (likely(skb)) {
+ if (likely(test_bit(0, &b->up)))
+ tipc_rcv(net, skb, b);
+ else
+ kfree_skb(skb);
+ }
+
+ tipc_bearer_put(b);
+}
+
+static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr)
+{
+ return (ehdr->user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE;
+}
+
+/**
+ * tipc_ehdr_validate - Validate an encryption message
+ * @skb: the message buffer
+ *
+ * Return: "true" if this is a valid encryption message, otherwise "false"
+ */
+bool tipc_ehdr_validate(struct sk_buff *skb)
+{
+ struct tipc_ehdr *ehdr;
+ int ehsz;
+
+ if (unlikely(!pskb_may_pull(skb, EHDR_MIN_SIZE)))
+ return false;
+
+ ehdr = (struct tipc_ehdr *)skb->data;
+ if (unlikely(ehdr->version != TIPC_EVERSION))
+ return false;
+ ehsz = tipc_ehdr_size(ehdr);
+ if (unlikely(!pskb_may_pull(skb, ehsz)))
+ return false;
+ if (unlikely(skb->len <= ehsz + TIPC_AES_GCM_TAG_SIZE))
+ return false;
+
+ return true;
+}
+
+/**
+ * tipc_ehdr_build - Build TIPC encryption message header
+ * @net: struct net
+ * @aead: TX AEAD key to be used for the message encryption
+ * @tx_key: key id used for the message encryption
+ * @skb: input/output message skb
+ * @__rx: RX crypto handle if dest is "known"
+ *
+ * Return: the header size if the building is successful, otherwise < 0
+ */
+static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead,
+ u8 tx_key, struct sk_buff *skb,
+ struct tipc_crypto *__rx)
+{
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_ehdr *ehdr;
+ u32 user = msg_user(hdr);
+ u64 seqno;
+ int ehsz;
+
+ /* Make room for encryption header */
+ ehsz = (user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE;
+ WARN_ON(skb_headroom(skb) < ehsz);
+ ehdr = (struct tipc_ehdr *)skb_push(skb, ehsz);
+
+ /* Obtain a seqno first:
+ * Use the key seqno (= cluster wise) if dest is unknown or we're in
+ * cluster key mode, otherwise it's better for a per-peer seqno!
+ */
+ if (!__rx || aead->mode == CLUSTER_KEY)
+ seqno = atomic64_inc_return(&aead->seqno);
+ else
+ seqno = atomic64_inc_return(&__rx->sndnxt);
+
+ /* Revoke the key if seqno is wrapped around */
+ if (unlikely(!seqno))
+ return tipc_crypto_key_revoke(net, tx_key);
+
+ /* Word 1-2 */
+ ehdr->seqno = cpu_to_be64(seqno);
+
+ /* Words 0, 3- */
+ ehdr->version = TIPC_EVERSION;
+ ehdr->user = 0;
+ ehdr->keepalive = 0;
+ ehdr->tx_key = tx_key;
+ ehdr->destined = (__rx) ? 1 : 0;
+ ehdr->rx_key_active = (__rx) ? __rx->key.active : 0;
+ ehdr->rx_nokey = (__rx) ? __rx->nokey : 0;
+ ehdr->master_key = aead->crypto->key_master;
+ ehdr->reserved_1 = 0;
+ ehdr->reserved_2 = 0;
+
+ switch (user) {
+ case LINK_CONFIG:
+ ehdr->user = LINK_CONFIG;
+ memcpy(ehdr->id, tipc_own_id(net), NODE_ID_LEN);
+ break;
+ default:
+ if (user == LINK_PROTOCOL && msg_type(hdr) == STATE_MSG) {
+ ehdr->user = LINK_PROTOCOL;
+ ehdr->keepalive = msg_is_keepalive(hdr);
+ }
+ ehdr->addr = hdr->hdr[3];
+ break;
+ }
+
+ return ehsz;
+}
+
+static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
+ u8 new_passive,
+ u8 new_active,
+ u8 new_pending)
+{
+ struct tipc_key old = c->key;
+ char buf[32];
+
+ c->key.keys = ((new_passive & KEY_MASK) << (KEY_BITS * 2)) |
+ ((new_active & KEY_MASK) << (KEY_BITS)) |
+ ((new_pending & KEY_MASK));
+
+ pr_debug("%s: key changing %s ::%pS\n", c->name,
+ tipc_key_change_dump(old, c->key, buf),
+ __builtin_return_address(0));
+}
+
+/**
+ * tipc_crypto_key_init - Initiate a new user / AEAD key
+ * @c: TIPC crypto to which new key is attached
+ * @ukey: the user key
+ * @mode: the key mode (CLUSTER_KEY or PER_NODE_KEY)
+ * @master_key: specify this is a cluster master key
+ *
+ * A new TIPC AEAD key will be allocated and initiated with the specified user
+ * key, then attached to the TIPC crypto.
+ *
+ * Return: new key id in case of success, otherwise: < 0
+ */
+int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
+ u8 mode, bool master_key)
+{
+ struct tipc_aead *aead = NULL;
+ int rc = 0;
+
+ /* Initiate with the new user key */
+ rc = tipc_aead_init(&aead, ukey, mode);
+
+ /* Attach it to the crypto */
+ if (likely(!rc)) {
+ rc = tipc_crypto_key_attach(c, aead, 0, master_key);
+ if (rc < 0)
+ tipc_aead_free(&aead->rcu);
+ }
+
+ return rc;
+}
+
+/**
+ * tipc_crypto_key_attach - Attach a new AEAD key to TIPC crypto
+ * @c: TIPC crypto to which the new AEAD key is attached
+ * @aead: the new AEAD key pointer
+ * @pos: desired slot in the crypto key array, = 0 if any!
+ * @master_key: specify this is a cluster master key
+ *
+ * Return: new key id in case of success, otherwise: -EBUSY
+ */
+static int tipc_crypto_key_attach(struct tipc_crypto *c,
+ struct tipc_aead *aead, u8 pos,
+ bool master_key)
+{
+ struct tipc_key key;
+ int rc = -EBUSY;
+ u8 new_key;
+
+ spin_lock_bh(&c->lock);
+ key = c->key;
+ if (master_key) {
+ new_key = KEY_MASTER;
+ goto attach;
+ }
+ if (key.active && key.passive)
+ goto exit;
+ if (key.pending) {
+ if (tipc_aead_users(c->aead[key.pending]) > 0)
+ goto exit;
+ /* if (pos): ok with replacing, will be aligned when needed */
+ /* Replace it */
+ new_key = key.pending;
+ } else {
+ if (pos) {
+ if (key.active && pos != key_next(key.active)) {
+ key.passive = pos;
+ new_key = pos;
+ goto attach;
+ } else if (!key.active && !key.passive) {
+ key.pending = pos;
+ new_key = pos;
+ goto attach;
+ }
+ }
+ key.pending = key_next(key.active ?: key.passive);
+ new_key = key.pending;
+ }
+
+attach:
+ aead->crypto = c;
+ aead->gen = (is_tx(c)) ? ++c->key_gen : c->key_gen;
+ tipc_aead_rcu_replace(c->aead[new_key], aead, &c->lock);
+ if (likely(c->key.keys != key.keys))
+ tipc_crypto_key_set_state(c, key.passive, key.active,
+ key.pending);
+ c->working = 1;
+ c->nokey = 0;
+ c->key_master |= master_key;
+ rc = new_key;
+
+exit:
+ spin_unlock_bh(&c->lock);
+ return rc;
+}
+
+void tipc_crypto_key_flush(struct tipc_crypto *c)
+{
+ struct tipc_crypto *tx, *rx;
+ int k;
+
+ spin_lock_bh(&c->lock);
+ if (is_rx(c)) {
+ /* Try to cancel pending work */
+ rx = c;
+ tx = tipc_net(rx->net)->crypto_tx;
+ if (cancel_delayed_work(&rx->work)) {
+ kfree(rx->skey);
+ rx->skey = NULL;
+ atomic_xchg(&rx->key_distr, 0);
+ tipc_node_put(rx->node);
+ }
+ /* RX stopping => decrease TX key users if any */
+ k = atomic_xchg(&rx->peer_rx_active, 0);
+ if (k) {
+ tipc_aead_users_dec(tx->aead[k], 0);
+ /* Mark the point TX key users changed */
+ tx->timer1 = jiffies;
+ }
+ }
+
+ c->flags = 0;
+ tipc_crypto_key_set_state(c, 0, 0, 0);
+ for (k = KEY_MIN; k <= KEY_MAX; k++)
+ tipc_crypto_key_detach(c->aead[k], &c->lock);
+ atomic64_set(&c->sndnxt, 0);
+ spin_unlock_bh(&c->lock);
+}
+
+/**
+ * tipc_crypto_key_try_align - Align RX keys if possible
+ * @rx: RX crypto handle
+ * @new_pending: new pending slot if aligned (= TX key from peer)
+ *
+ * Peer has used an unknown key slot, this only happens when peer has left and
+ * rejoned, or we are newcomer.
+ * That means, there must be no active key but a pending key at unaligned slot.
+ * If so, we try to move the pending key to the new slot.
+ * Note: A potential passive key can exist, it will be shifted correspondingly!
+ *
+ * Return: "true" if key is successfully aligned, otherwise "false"
+ */
+static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending)
+{
+ struct tipc_aead *tmp1, *tmp2 = NULL;
+ struct tipc_key key;
+ bool aligned = false;
+ u8 new_passive = 0;
+ int x;
+
+ spin_lock(&rx->lock);
+ key = rx->key;
+ if (key.pending == new_pending) {
+ aligned = true;
+ goto exit;
+ }
+ if (key.active)
+ goto exit;
+ if (!key.pending)
+ goto exit;
+ if (tipc_aead_users(rx->aead[key.pending]) > 0)
+ goto exit;
+
+ /* Try to "isolate" this pending key first */
+ tmp1 = tipc_aead_rcu_ptr(rx->aead[key.pending], &rx->lock);
+ if (!refcount_dec_if_one(&tmp1->refcnt))
+ goto exit;
+ rcu_assign_pointer(rx->aead[key.pending], NULL);
+
+ /* Move passive key if any */
+ if (key.passive) {
+ tmp2 = rcu_replace_pointer(rx->aead[key.passive], tmp2, lockdep_is_held(&rx->lock));
+ x = (key.passive - key.pending + new_pending) % KEY_MAX;
+ new_passive = (x <= 0) ? x + KEY_MAX : x;
+ }
+
+ /* Re-allocate the key(s) */
+ tipc_crypto_key_set_state(rx, new_passive, 0, new_pending);
+ rcu_assign_pointer(rx->aead[new_pending], tmp1);
+ if (new_passive)
+ rcu_assign_pointer(rx->aead[new_passive], tmp2);
+ refcount_set(&tmp1->refcnt, 1);
+ aligned = true;
+ pr_info_ratelimited("%s: key[%d] -> key[%d]\n", rx->name, key.pending,
+ new_pending);
+
+exit:
+ spin_unlock(&rx->lock);
+ return aligned;
+}
+
+/**
+ * tipc_crypto_key_pick_tx - Pick one TX key for message decryption
+ * @tx: TX crypto handle
+ * @rx: RX crypto handle (can be NULL)
+ * @skb: the message skb which will be decrypted later
+ * @tx_key: peer TX key id
+ *
+ * This function looks up the existing TX keys and pick one which is suitable
+ * for the message decryption, that must be a cluster key and not used before
+ * on the same message (i.e. recursive).
+ *
+ * Return: the TX AEAD key handle in case of success, otherwise NULL
+ */
+static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
+ struct tipc_crypto *rx,
+ struct sk_buff *skb,
+ u8 tx_key)
+{
+ struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(skb);
+ struct tipc_aead *aead = NULL;
+ struct tipc_key key = tx->key;
+ u8 k, i = 0;
+
+ /* Initialize data if not yet */
+ if (!skb_cb->tx_clone_deferred) {
+ skb_cb->tx_clone_deferred = 1;
+ memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx));
+ }
+
+ skb_cb->tx_clone_ctx.rx = rx;
+ if (++skb_cb->tx_clone_ctx.recurs > 2)
+ return NULL;
+
+ /* Pick one TX key */
+ spin_lock(&tx->lock);
+ if (tx_key == KEY_MASTER) {
+ aead = tipc_aead_rcu_ptr(tx->aead[KEY_MASTER], &tx->lock);
+ goto done;
+ }
+ do {
+ k = (i == 0) ? key.pending :
+ ((i == 1) ? key.active : key.passive);
+ if (!k)
+ continue;
+ aead = tipc_aead_rcu_ptr(tx->aead[k], &tx->lock);
+ if (!aead)
+ continue;
+ if (aead->mode != CLUSTER_KEY ||
+ aead == skb_cb->tx_clone_ctx.last) {
+ aead = NULL;
+ continue;
+ }
+ /* Ok, found one cluster key */
+ skb_cb->tx_clone_ctx.last = aead;
+ WARN_ON(skb->next);
+ skb->next = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!skb->next))
+ pr_warn("Failed to clone skb for next round if any\n");
+ break;
+ } while (++i < 3);
+
+done:
+ if (likely(aead))
+ WARN_ON(!refcount_inc_not_zero(&aead->refcnt));
+ spin_unlock(&tx->lock);
+
+ return aead;
+}
+
+/**
+ * tipc_crypto_key_synch: Synch own key data according to peer key status
+ * @rx: RX crypto handle
+ * @skb: TIPCv2 message buffer (incl. the ehdr from peer)
+ *
+ * This function updates the peer node related data as the peer RX active key
+ * has changed, so the number of TX keys' users on this node are increased and
+ * decreased correspondingly.
+ *
+ * It also considers if peer has no key, then we need to make own master key
+ * (if any) taking over i.e. starting grace period and also trigger key
+ * distributing process.
+ *
+ * The "per-peer" sndnxt is also reset when the peer key has switched.
+ */
+static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb)
+{
+ struct tipc_ehdr *ehdr = (struct tipc_ehdr *)skb_network_header(skb);
+ struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
+ struct tipc_msg *hdr = buf_msg(skb);
+ u32 self = tipc_own_addr(rx->net);
+ u8 cur, new;
+ unsigned long delay;
+
+ /* Update RX 'key_master' flag according to peer, also mark "legacy" if
+ * a peer has no master key.
+ */
+ rx->key_master = ehdr->master_key;
+ if (!rx->key_master)
+ tx->legacy_user = 1;
+
+ /* For later cases, apply only if message is destined to this node */
+ if (!ehdr->destined || msg_short(hdr) || msg_destnode(hdr) != self)
+ return;
+
+ /* Case 1: Peer has no keys, let's make master key take over */
+ if (ehdr->rx_nokey) {
+ /* Set or extend grace period */
+ tx->timer2 = jiffies;
+ /* Schedule key distributing for the peer if not yet */
+ if (tx->key.keys &&
+ !atomic_cmpxchg(&rx->key_distr, 0, KEY_DISTR_SCHED)) {
+ get_random_bytes(&delay, 2);
+ delay %= 5;
+ delay = msecs_to_jiffies(500 * ++delay);
+ if (queue_delayed_work(tx->wq, &rx->work, delay))
+ tipc_node_get(rx->node);
+ }
+ } else {
+ /* Cancel a pending key distributing if any */
+ atomic_xchg(&rx->key_distr, 0);
+ }
+
+ /* Case 2: Peer RX active key has changed, let's update own TX users */
+ cur = atomic_read(&rx->peer_rx_active);
+ new = ehdr->rx_key_active;
+ if (tx->key.keys &&
+ cur != new &&
+ atomic_cmpxchg(&rx->peer_rx_active, cur, new) == cur) {
+ if (new)
+ tipc_aead_users_inc(tx->aead[new], INT_MAX);
+ if (cur)
+ tipc_aead_users_dec(tx->aead[cur], 0);
+
+ atomic64_set(&rx->sndnxt, 0);
+ /* Mark the point TX key users changed */
+ tx->timer1 = jiffies;
+
+ pr_debug("%s: key users changed %d-- %d++, peer %s\n",
+ tx->name, cur, new, rx->name);
+ }
+}
+
+static int tipc_crypto_key_revoke(struct net *net, u8 tx_key)
+{
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
+ struct tipc_key key;
+
+ spin_lock_bh(&tx->lock);
+ key = tx->key;
+ WARN_ON(!key.active || tx_key != key.active);
+
+ /* Free the active key */
+ tipc_crypto_key_set_state(tx, key.passive, 0, key.pending);
+ tipc_crypto_key_detach(tx->aead[key.active], &tx->lock);
+ spin_unlock_bh(&tx->lock);
+
+ pr_warn("%s: key is revoked\n", tx->name);
+ return -EKEYREVOKED;
+}
+
+int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net,
+ struct tipc_node *node)
+{
+ struct tipc_crypto *c;
+
+ if (*crypto)
+ return -EEXIST;
+
+ /* Allocate crypto */
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
+ if (!c)
+ return -ENOMEM;
+
+ /* Allocate workqueue on TX */
+ if (!node) {
+ c->wq = alloc_ordered_workqueue("tipc_crypto", 0);
+ if (!c->wq) {
+ kfree(c);
+ return -ENOMEM;
+ }
+ }
+
+ /* Allocate statistic structure */
+ c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC);
+ if (!c->stats) {
+ if (c->wq)
+ destroy_workqueue(c->wq);
+ kfree_sensitive(c);
+ return -ENOMEM;
+ }
+
+ c->flags = 0;
+ c->net = net;
+ c->node = node;
+ get_random_bytes(&c->key_gen, 2);
+ tipc_crypto_key_set_state(c, 0, 0, 0);
+ atomic_set(&c->key_distr, 0);
+ atomic_set(&c->peer_rx_active, 0);
+ atomic64_set(&c->sndnxt, 0);
+ c->timer1 = jiffies;
+ c->timer2 = jiffies;
+ c->rekeying_intv = TIPC_REKEYING_INTV_DEF;
+ spin_lock_init(&c->lock);
+ scnprintf(c->name, 48, "%s(%s)", (is_rx(c)) ? "RX" : "TX",
+ (is_rx(c)) ? tipc_node_get_id_str(c->node) :
+ tipc_own_id_string(c->net));
+
+ if (is_rx(c))
+ INIT_DELAYED_WORK(&c->work, tipc_crypto_work_rx);
+ else
+ INIT_DELAYED_WORK(&c->work, tipc_crypto_work_tx);
+
+ *crypto = c;
+ return 0;
+}
+
+void tipc_crypto_stop(struct tipc_crypto **crypto)
+{
+ struct tipc_crypto *c = *crypto;
+ u8 k;
+
+ if (!c)
+ return;
+
+ /* Flush any queued works & destroy wq */
+ if (is_tx(c)) {
+ c->rekeying_intv = 0;
+ cancel_delayed_work_sync(&c->work);
+ destroy_workqueue(c->wq);
+ }
+
+ /* Release AEAD keys */
+ rcu_read_lock();
+ for (k = KEY_MIN; k <= KEY_MAX; k++)
+ tipc_aead_put(rcu_dereference(c->aead[k]));
+ rcu_read_unlock();
+ pr_debug("%s: has been stopped\n", c->name);
+
+ /* Free this crypto statistics */
+ free_percpu(c->stats);
+
+ *crypto = NULL;
+ kfree_sensitive(c);
+}
+
+void tipc_crypto_timeout(struct tipc_crypto *rx)
+{
+ struct tipc_net *tn = tipc_net(rx->net);
+ struct tipc_crypto *tx = tn->crypto_tx;
+ struct tipc_key key;
+ int cmd;
+
+ /* TX pending: taking all users & stable -> active */
+ spin_lock(&tx->lock);
+ key = tx->key;
+ if (key.active && tipc_aead_users(tx->aead[key.active]) > 0)
+ goto s1;
+ if (!key.pending || tipc_aead_users(tx->aead[key.pending]) <= 0)
+ goto s1;
+ if (time_before(jiffies, tx->timer1 + TIPC_TX_LASTING_TIME))
+ goto s1;
+
+ tipc_crypto_key_set_state(tx, key.passive, key.pending, 0);
+ if (key.active)
+ tipc_crypto_key_detach(tx->aead[key.active], &tx->lock);
+ this_cpu_inc(tx->stats->stat[STAT_SWITCHES]);
+ pr_info("%s: key[%d] is activated\n", tx->name, key.pending);
+
+s1:
+ spin_unlock(&tx->lock);
+
+ /* RX pending: having user -> active */
+ spin_lock(&rx->lock);
+ key = rx->key;
+ if (!key.pending || tipc_aead_users(rx->aead[key.pending]) <= 0)
+ goto s2;
+
+ if (key.active)
+ key.passive = key.active;
+ key.active = key.pending;
+ rx->timer2 = jiffies;
+ tipc_crypto_key_set_state(rx, key.passive, key.active, 0);
+ this_cpu_inc(rx->stats->stat[STAT_SWITCHES]);
+ pr_info("%s: key[%d] is activated\n", rx->name, key.pending);
+ goto s5;
+
+s2:
+ /* RX pending: not working -> remove */
+ if (!key.pending || tipc_aead_users(rx->aead[key.pending]) > -10)
+ goto s3;
+
+ tipc_crypto_key_set_state(rx, key.passive, key.active, 0);
+ tipc_crypto_key_detach(rx->aead[key.pending], &rx->lock);
+ pr_debug("%s: key[%d] is removed\n", rx->name, key.pending);
+ goto s5;
+
+s3:
+ /* RX active: timed out or no user -> pending */
+ if (!key.active)
+ goto s4;
+ if (time_before(jiffies, rx->timer1 + TIPC_RX_ACTIVE_LIM) &&
+ tipc_aead_users(rx->aead[key.active]) > 0)
+ goto s4;
+
+ if (key.pending)
+ key.passive = key.active;
+ else
+ key.pending = key.active;
+ rx->timer2 = jiffies;
+ tipc_crypto_key_set_state(rx, key.passive, 0, key.pending);
+ tipc_aead_users_set(rx->aead[key.pending], 0);
+ pr_debug("%s: key[%d] is deactivated\n", rx->name, key.active);
+ goto s5;
+
+s4:
+ /* RX passive: outdated or not working -> free */
+ if (!key.passive)
+ goto s5;
+ if (time_before(jiffies, rx->timer2 + TIPC_RX_PASSIVE_LIM) &&
+ tipc_aead_users(rx->aead[key.passive]) > -10)
+ goto s5;
+
+ tipc_crypto_key_set_state(rx, 0, key.active, key.pending);
+ tipc_crypto_key_detach(rx->aead[key.passive], &rx->lock);
+ pr_debug("%s: key[%d] is freed\n", rx->name, key.passive);
+
+s5:
+ spin_unlock(&rx->lock);
+
+ /* Relax it here, the flag will be set again if it really is, but only
+ * when we are not in grace period for safety!
+ */
+ if (time_after(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD))
+ tx->legacy_user = 0;
+
+ /* Limit max_tfms & do debug commands if needed */
+ if (likely(sysctl_tipc_max_tfms <= TIPC_MAX_TFMS_LIM))
+ return;
+
+ cmd = sysctl_tipc_max_tfms;
+ sysctl_tipc_max_tfms = TIPC_MAX_TFMS_DEF;
+ tipc_crypto_do_cmd(rx->net, cmd);
+}
+
+static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode, u8 type)
+{
+ struct sk_buff *skb;
+
+ skb = skb_clone(_skb, GFP_ATOMIC);
+ if (skb) {
+ TIPC_SKB_CB(skb)->xmit_type = type;
+ tipc_crypto_xmit(net, &skb, b, dst, __dnode);
+ if (skb)
+ b->media->send_msg(net, skb, b, dst);
+ }
+}
+
+/**
+ * tipc_crypto_xmit - Build & encrypt TIPC message for xmit
+ * @net: struct net
+ * @skb: input/output message skb pointer
+ * @b: bearer used for xmit later
+ * @dst: destination media address
+ * @__dnode: destination node for reference if any
+ *
+ * First, build an encryption message header on the top of the message, then
+ * encrypt the original TIPC message by using the pending, master or active
+ * key with this preference order.
+ * If the encryption is successful, the encrypted skb is returned directly or
+ * via the callback.
+ * Otherwise, the skb is freed!
+ *
+ * Return:
+ * * 0 : the encryption has succeeded (or no encryption)
+ * * -EINPROGRESS/-EBUSY : the encryption is ongoing, a callback will be made
+ * * -ENOKEK : the encryption has failed due to no key
+ * * -EKEYREVOKED : the encryption has failed due to key revoked
+ * * -ENOMEM : the encryption has failed due to no memory
+ * * < 0 : the encryption has failed due to other reasons
+ */
+int tipc_crypto_xmit(struct net *net, struct sk_buff **skb,
+ struct tipc_bearer *b, struct tipc_media_addr *dst,
+ struct tipc_node *__dnode)
+{
+ struct tipc_crypto *__rx = tipc_node_crypto_rx(__dnode);
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
+ struct tipc_crypto_stats __percpu *stats = tx->stats;
+ struct tipc_msg *hdr = buf_msg(*skb);
+ struct tipc_key key = tx->key;
+ struct tipc_aead *aead = NULL;
+ u32 user = msg_user(hdr);
+ u32 type = msg_type(hdr);
+ int rc = -ENOKEY;
+ u8 tx_key = 0;
+
+ /* No encryption? */
+ if (!tx->working)
+ return 0;
+
+ /* Pending key if peer has active on it or probing time */
+ if (unlikely(key.pending)) {
+ tx_key = key.pending;
+ if (!tx->key_master && !key.active)
+ goto encrypt;
+ if (__rx && atomic_read(&__rx->peer_rx_active) == tx_key)
+ goto encrypt;
+ if (TIPC_SKB_CB(*skb)->xmit_type == SKB_PROBING) {
+ pr_debug("%s: probing for key[%d]\n", tx->name,
+ key.pending);
+ goto encrypt;
+ }
+ if (user == LINK_CONFIG || user == LINK_PROTOCOL)
+ tipc_crypto_clone_msg(net, *skb, b, dst, __dnode,
+ SKB_PROBING);
+ }
+
+ /* Master key if this is a *vital* message or in grace period */
+ if (tx->key_master) {
+ tx_key = KEY_MASTER;
+ if (!key.active)
+ goto encrypt;
+ if (TIPC_SKB_CB(*skb)->xmit_type == SKB_GRACING) {
+ pr_debug("%s: gracing for msg (%d %d)\n", tx->name,
+ user, type);
+ goto encrypt;
+ }
+ if (user == LINK_CONFIG ||
+ (user == LINK_PROTOCOL && type == RESET_MSG) ||
+ (user == MSG_CRYPTO && type == KEY_DISTR_MSG) ||
+ time_before(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD)) {
+ if (__rx && __rx->key_master &&
+ !atomic_read(&__rx->peer_rx_active))
+ goto encrypt;
+ if (!__rx) {
+ if (likely(!tx->legacy_user))
+ goto encrypt;
+ tipc_crypto_clone_msg(net, *skb, b, dst,
+ __dnode, SKB_GRACING);
+ }
+ }
+ }
+
+ /* Else, use the active key if any */
+ if (likely(key.active)) {
+ tx_key = key.active;
+ goto encrypt;
+ }
+
+ goto exit;
+
+encrypt:
+ aead = tipc_aead_get(tx->aead[tx_key]);
+ if (unlikely(!aead))
+ goto exit;
+ rc = tipc_ehdr_build(net, aead, tx_key, *skb, __rx);
+ if (likely(rc > 0))
+ rc = tipc_aead_encrypt(aead, *skb, b, dst, __dnode);
+
+exit:
+ switch (rc) {
+ case 0:
+ this_cpu_inc(stats->stat[STAT_OK]);
+ break;
+ case -EINPROGRESS:
+ case -EBUSY:
+ this_cpu_inc(stats->stat[STAT_ASYNC]);
+ *skb = NULL;
+ return rc;
+ default:
+ this_cpu_inc(stats->stat[STAT_NOK]);
+ if (rc == -ENOKEY)
+ this_cpu_inc(stats->stat[STAT_NOKEYS]);
+ else if (rc == -EKEYREVOKED)
+ this_cpu_inc(stats->stat[STAT_BADKEYS]);
+ kfree_skb(*skb);
+ *skb = NULL;
+ break;
+ }
+
+ tipc_aead_put(aead);
+ return rc;
+}
+
+/**
+ * tipc_crypto_rcv - Decrypt an encrypted TIPC message from peer
+ * @net: struct net
+ * @rx: RX crypto handle
+ * @skb: input/output message skb pointer
+ * @b: bearer where the message has been received
+ *
+ * If the decryption is successful, the decrypted skb is returned directly or
+ * as the callback, the encryption header and auth tag will be trimmed out
+ * before forwarding to tipc_rcv() via the tipc_crypto_rcv_complete().
+ * Otherwise, the skb will be freed!
+ * Note: RX key(s) can be re-aligned, or in case of no key suitable, TX
+ * cluster key(s) can be taken for decryption (- recursive).
+ *
+ * Return:
+ * * 0 : the decryption has successfully completed
+ * * -EINPROGRESS/-EBUSY : the decryption is ongoing, a callback will be made
+ * * -ENOKEY : the decryption has failed due to no key
+ * * -EBADMSG : the decryption has failed due to bad message
+ * * -ENOMEM : the decryption has failed due to no memory
+ * * < 0 : the decryption has failed due to other reasons
+ */
+int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
+ struct sk_buff **skb, struct tipc_bearer *b)
+{
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
+ struct tipc_crypto_stats __percpu *stats;
+ struct tipc_aead *aead = NULL;
+ struct tipc_key key;
+ int rc = -ENOKEY;
+ u8 tx_key, n;
+
+ tx_key = ((struct tipc_ehdr *)(*skb)->data)->tx_key;
+
+ /* New peer?
+ * Let's try with TX key (i.e. cluster mode) & verify the skb first!
+ */
+ if (unlikely(!rx || tx_key == KEY_MASTER))
+ goto pick_tx;
+
+ /* Pick RX key according to TX key if any */
+ key = rx->key;
+ if (tx_key == key.active || tx_key == key.pending ||
+ tx_key == key.passive)
+ goto decrypt;
+
+ /* Unknown key, let's try to align RX key(s) */
+ if (tipc_crypto_key_try_align(rx, tx_key))
+ goto decrypt;
+
+pick_tx:
+ /* No key suitable? Try to pick one from TX... */
+ aead = tipc_crypto_key_pick_tx(tx, rx, *skb, tx_key);
+ if (aead)
+ goto decrypt;
+ goto exit;
+
+decrypt:
+ rcu_read_lock();
+ if (!aead)
+ aead = tipc_aead_get(rx->aead[tx_key]);
+ rc = tipc_aead_decrypt(net, aead, *skb, b);
+ rcu_read_unlock();
+
+exit:
+ stats = ((rx) ?: tx)->stats;
+ switch (rc) {
+ case 0:
+ this_cpu_inc(stats->stat[STAT_OK]);
+ break;
+ case -EINPROGRESS:
+ case -EBUSY:
+ this_cpu_inc(stats->stat[STAT_ASYNC]);
+ *skb = NULL;
+ return rc;
+ default:
+ this_cpu_inc(stats->stat[STAT_NOK]);
+ if (rc == -ENOKEY) {
+ kfree_skb(*skb);
+ *skb = NULL;
+ if (rx) {
+ /* Mark rx->nokey only if we dont have a
+ * pending received session key, nor a newer
+ * one i.e. in the next slot.
+ */
+ n = key_next(tx_key);
+ rx->nokey = !(rx->skey ||
+ rcu_access_pointer(rx->aead[n]));
+ pr_debug_ratelimited("%s: nokey %d, key %d/%x\n",
+ rx->name, rx->nokey,
+ tx_key, rx->key.keys);
+ tipc_node_put(rx->node);
+ }
+ this_cpu_inc(stats->stat[STAT_NOKEYS]);
+ return rc;
+ } else if (rc == -EBADMSG) {
+ this_cpu_inc(stats->stat[STAT_BADMSGS]);
+ }
+ break;
+ }
+
+ tipc_crypto_rcv_complete(net, aead, b, skb, rc);
+ return rc;
+}
+
+static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
+ struct tipc_bearer *b,
+ struct sk_buff **skb, int err)
+{
+ struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(*skb);
+ struct tipc_crypto *rx = aead->crypto;
+ struct tipc_aead *tmp = NULL;
+ struct tipc_ehdr *ehdr;
+ struct tipc_node *n;
+
+ /* Is this completed by TX? */
+ if (unlikely(is_tx(aead->crypto))) {
+ rx = skb_cb->tx_clone_ctx.rx;
+ pr_debug("TX->RX(%s): err %d, aead %p, skb->next %p, flags %x\n",
+ (rx) ? tipc_node_get_id_str(rx->node) : "-", err, aead,
+ (*skb)->next, skb_cb->flags);
+ pr_debug("skb_cb [recurs %d, last %p], tx->aead [%p %p %p]\n",
+ skb_cb->tx_clone_ctx.recurs, skb_cb->tx_clone_ctx.last,
+ aead->crypto->aead[1], aead->crypto->aead[2],
+ aead->crypto->aead[3]);
+ if (unlikely(err)) {
+ if (err == -EBADMSG && (*skb)->next)
+ tipc_rcv(net, (*skb)->next, b);
+ goto free_skb;
+ }
+
+ if (likely((*skb)->next)) {
+ kfree_skb((*skb)->next);
+ (*skb)->next = NULL;
+ }
+ ehdr = (struct tipc_ehdr *)(*skb)->data;
+ if (!rx) {
+ WARN_ON(ehdr->user != LINK_CONFIG);
+ n = tipc_node_create(net, 0, ehdr->id, 0xffffu, 0,
+ true);
+ rx = tipc_node_crypto_rx(n);
+ if (unlikely(!rx))
+ goto free_skb;
+ }
+
+ /* Ignore cloning if it was TX master key */
+ if (ehdr->tx_key == KEY_MASTER)
+ goto rcv;
+ if (tipc_aead_clone(&tmp, aead) < 0)
+ goto rcv;
+ WARN_ON(!refcount_inc_not_zero(&tmp->refcnt));
+ if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key, false) < 0) {
+ tipc_aead_free(&tmp->rcu);
+ goto rcv;
+ }
+ tipc_aead_put(aead);
+ aead = tmp;
+ }
+
+ if (unlikely(err)) {
+ tipc_aead_users_dec((struct tipc_aead __force __rcu *)aead, INT_MIN);
+ goto free_skb;
+ }
+
+ /* Set the RX key's user */
+ tipc_aead_users_set((struct tipc_aead __force __rcu *)aead, 1);
+
+ /* Mark this point, RX works */
+ rx->timer1 = jiffies;
+
+rcv:
+ /* Remove ehdr & auth. tag prior to tipc_rcv() */
+ ehdr = (struct tipc_ehdr *)(*skb)->data;
+
+ /* Mark this point, RX passive still works */
+ if (rx->key.passive && ehdr->tx_key == rx->key.passive)
+ rx->timer2 = jiffies;
+
+ skb_reset_network_header(*skb);
+ skb_pull(*skb, tipc_ehdr_size(ehdr));
+ if (pskb_trim(*skb, (*skb)->len - aead->authsize))
+ goto free_skb;
+
+ /* Validate TIPCv2 message */
+ if (unlikely(!tipc_msg_validate(skb))) {
+ pr_err_ratelimited("Packet dropped after decryption!\n");
+ goto free_skb;
+ }
+
+ /* Ok, everything's fine, try to synch own keys according to peers' */
+ tipc_crypto_key_synch(rx, *skb);
+
+ /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */
+ skb_cb = TIPC_SKB_CB(*skb);
+
+ /* Mark skb decrypted */
+ skb_cb->decrypted = 1;
+
+ /* Clear clone cxt if any */
+ if (likely(!skb_cb->tx_clone_deferred))
+ goto exit;
+ skb_cb->tx_clone_deferred = 0;
+ memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx));
+ goto exit;
+
+free_skb:
+ kfree_skb(*skb);
+ *skb = NULL;
+
+exit:
+ tipc_aead_put(aead);
+ if (rx)
+ tipc_node_put(rx->node);
+}
+
+static void tipc_crypto_do_cmd(struct net *net, int cmd)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_crypto *tx = tn->crypto_tx, *rx;
+ struct list_head *p;
+ unsigned int stat;
+ int i, j, cpu;
+ char buf[200];
+
+ /* Currently only one command is supported */
+ switch (cmd) {
+ case 0xfff1:
+ goto print_stats;
+ default:
+ return;
+ }
+
+print_stats:
+ /* Print a header */
+ pr_info("\n=============== TIPC Crypto Statistics ===============\n\n");
+
+ /* Print key status */
+ pr_info("Key status:\n");
+ pr_info("TX(%7.7s)\n%s", tipc_own_id_string(net),
+ tipc_crypto_key_dump(tx, buf));
+
+ rcu_read_lock();
+ for (p = tn->node_list.next; p != &tn->node_list; p = p->next) {
+ rx = tipc_node_crypto_rx_by_list(p);
+ pr_info("RX(%7.7s)\n%s", tipc_node_get_id_str(rx->node),
+ tipc_crypto_key_dump(rx, buf));
+ }
+ rcu_read_unlock();
+
+ /* Print crypto statistics */
+ for (i = 0, j = 0; i < MAX_STATS; i++)
+ j += scnprintf(buf + j, 200 - j, "|%11s ", hstats[i]);
+ pr_info("Counter %s", buf);
+
+ memset(buf, '-', 115);
+ buf[115] = '\0';
+ pr_info("%s\n", buf);
+
+ j = scnprintf(buf, 200, "TX(%7.7s) ", tipc_own_id_string(net));
+ for_each_possible_cpu(cpu) {
+ for (i = 0; i < MAX_STATS; i++) {
+ stat = per_cpu_ptr(tx->stats, cpu)->stat[i];
+ j += scnprintf(buf + j, 200 - j, "|%11d ", stat);
+ }
+ pr_info("%s", buf);
+ j = scnprintf(buf, 200, "%12s", " ");
+ }
+
+ rcu_read_lock();
+ for (p = tn->node_list.next; p != &tn->node_list; p = p->next) {
+ rx = tipc_node_crypto_rx_by_list(p);
+ j = scnprintf(buf, 200, "RX(%7.7s) ",
+ tipc_node_get_id_str(rx->node));
+ for_each_possible_cpu(cpu) {
+ for (i = 0; i < MAX_STATS; i++) {
+ stat = per_cpu_ptr(rx->stats, cpu)->stat[i];
+ j += scnprintf(buf + j, 200 - j, "|%11d ",
+ stat);
+ }
+ pr_info("%s", buf);
+ j = scnprintf(buf, 200, "%12s", " ");
+ }
+ }
+ rcu_read_unlock();
+
+ pr_info("\n======================== Done ========================\n");
+}
+
+static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf)
+{
+ struct tipc_key key = c->key;
+ struct tipc_aead *aead;
+ int k, i = 0;
+ char *s;
+
+ for (k = KEY_MIN; k <= KEY_MAX; k++) {
+ if (k == KEY_MASTER) {
+ if (is_rx(c))
+ continue;
+ if (time_before(jiffies,
+ c->timer2 + TIPC_TX_GRACE_PERIOD))
+ s = "ACT";
+ else
+ s = "PAS";
+ } else {
+ if (k == key.passive)
+ s = "PAS";
+ else if (k == key.active)
+ s = "ACT";
+ else if (k == key.pending)
+ s = "PEN";
+ else
+ s = "-";
+ }
+ i += scnprintf(buf + i, 200 - i, "\tKey%d: %s", k, s);
+
+ rcu_read_lock();
+ aead = rcu_dereference(c->aead[k]);
+ if (aead)
+ i += scnprintf(buf + i, 200 - i,
+ "{\"0x...%s\", \"%s\"}/%d:%d",
+ aead->hint,
+ (aead->mode == CLUSTER_KEY) ? "c" : "p",
+ atomic_read(&aead->users),
+ refcount_read(&aead->refcnt));
+ rcu_read_unlock();
+ i += scnprintf(buf + i, 200 - i, "\n");
+ }
+
+ if (is_rx(c))
+ i += scnprintf(buf + i, 200 - i, "\tPeer RX active: %d\n",
+ atomic_read(&c->peer_rx_active));
+
+ return buf;
+}
+
+static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new,
+ char *buf)
+{
+ struct tipc_key *key = &old;
+ int k, i = 0;
+ char *s;
+
+ /* Output format: "[%s %s %s] -> [%s %s %s]", max len = 32 */
+again:
+ i += scnprintf(buf + i, 32 - i, "[");
+ for (k = KEY_1; k <= KEY_3; k++) {
+ if (k == key->passive)
+ s = "pas";
+ else if (k == key->active)
+ s = "act";
+ else if (k == key->pending)
+ s = "pen";
+ else
+ s = "-";
+ i += scnprintf(buf + i, 32 - i,
+ (k != KEY_3) ? "%s " : "%s", s);
+ }
+ if (key != &new) {
+ i += scnprintf(buf + i, 32 - i, "] -> ");
+ key = &new;
+ goto again;
+ }
+ i += scnprintf(buf + i, 32 - i, "]");
+ return buf;
+}
+
+/**
+ * tipc_crypto_msg_rcv - Common 'MSG_CRYPTO' processing point
+ * @net: the struct net
+ * @skb: the receiving message buffer
+ */
+void tipc_crypto_msg_rcv(struct net *net, struct sk_buff *skb)
+{
+ struct tipc_crypto *rx;
+ struct tipc_msg *hdr;
+
+ if (unlikely(skb_linearize(skb)))
+ goto exit;
+
+ hdr = buf_msg(skb);
+ rx = tipc_node_crypto_rx_by_addr(net, msg_prevnode(hdr));
+ if (unlikely(!rx))
+ goto exit;
+
+ switch (msg_type(hdr)) {
+ case KEY_DISTR_MSG:
+ if (tipc_crypto_key_rcv(rx, hdr))
+ goto exit;
+ break;
+ default:
+ break;
+ }
+
+ tipc_node_put(rx->node);
+
+exit:
+ kfree_skb(skb);
+}
+
+/**
+ * tipc_crypto_key_distr - Distribute a TX key
+ * @tx: the TX crypto
+ * @key: the key's index
+ * @dest: the destination tipc node, = NULL if distributing to all nodes
+ *
+ * Return: 0 in case of success, otherwise < 0
+ */
+int tipc_crypto_key_distr(struct tipc_crypto *tx, u8 key,
+ struct tipc_node *dest)
+{
+ struct tipc_aead *aead;
+ u32 dnode = tipc_node_get_addr(dest);
+ int rc = -ENOKEY;
+
+ if (!sysctl_tipc_key_exchange_enabled)
+ return 0;
+
+ if (key) {
+ rcu_read_lock();
+ aead = tipc_aead_get(tx->aead[key]);
+ if (likely(aead)) {
+ rc = tipc_crypto_key_xmit(tx->net, aead->key,
+ aead->gen, aead->mode,
+ dnode);
+ tipc_aead_put(aead);
+ }
+ rcu_read_unlock();
+ }
+
+ return rc;
+}
+
+/**
+ * tipc_crypto_key_xmit - Send a session key
+ * @net: the struct net
+ * @skey: the session key to be sent
+ * @gen: the key's generation
+ * @mode: the key's mode
+ * @dnode: the destination node address, = 0 if broadcasting to all nodes
+ *
+ * The session key 'skey' is packed in a TIPC v2 'MSG_CRYPTO/KEY_DISTR_MSG'
+ * as its data section, then xmit-ed through the uc/bc link.
+ *
+ * Return: 0 in case of success, otherwise < 0
+ */
+static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey,
+ u16 gen, u8 mode, u32 dnode)
+{
+ struct sk_buff_head pkts;
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ u16 size, cong_link_cnt;
+ u8 *data;
+ int rc;
+
+ size = tipc_aead_key_size(skey);
+ skb = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = buf_msg(skb);
+ tipc_msg_init(tipc_own_addr(net), hdr, MSG_CRYPTO, KEY_DISTR_MSG,
+ INT_H_SIZE, dnode);
+ msg_set_size(hdr, INT_H_SIZE + size);
+ msg_set_key_gen(hdr, gen);
+ msg_set_key_mode(hdr, mode);
+
+ data = msg_data(hdr);
+ *((__be32 *)(data + TIPC_AEAD_ALG_NAME)) = htonl(skey->keylen);
+ memcpy(data, skey->alg_name, TIPC_AEAD_ALG_NAME);
+ memcpy(data + TIPC_AEAD_ALG_NAME + sizeof(__be32), skey->key,
+ skey->keylen);
+
+ __skb_queue_head_init(&pkts);
+ __skb_queue_tail(&pkts, skb);
+ if (dnode)
+ rc = tipc_node_xmit(net, &pkts, dnode, 0);
+ else
+ rc = tipc_bcast_xmit(net, &pkts, &cong_link_cnt);
+
+ return rc;
+}
+
+/**
+ * tipc_crypto_key_rcv - Receive a session key
+ * @rx: the RX crypto
+ * @hdr: the TIPC v2 message incl. the receiving session key in its data
+ *
+ * This function retrieves the session key in the message from peer, then
+ * schedules a RX work to attach the key to the corresponding RX crypto.
+ *
+ * Return: "true" if the key has been scheduled for attaching, otherwise
+ * "false".
+ */
+static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr)
+{
+ struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
+ struct tipc_aead_key *skey = NULL;
+ u16 key_gen = msg_key_gen(hdr);
+ u32 size = msg_data_sz(hdr);
+ u8 *data = msg_data(hdr);
+ unsigned int keylen;
+
+ /* Verify whether the size can exist in the packet */
+ if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) {
+ pr_debug("%s: message data size is too small\n", rx->name);
+ goto exit;
+ }
+
+ keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME)));
+
+ /* Verify the supplied size values */
+ if (unlikely(keylen > TIPC_AEAD_KEY_SIZE_MAX ||
+ size != keylen + sizeof(struct tipc_aead_key))) {
+ pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name);
+ goto exit;
+ }
+
+ spin_lock(&rx->lock);
+ if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) {
+ pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name,
+ rx->skey, key_gen, rx->key_gen);
+ goto exit_unlock;
+ }
+
+ /* Allocate memory for the key */
+ skey = kmalloc(size, GFP_ATOMIC);
+ if (unlikely(!skey)) {
+ pr_err("%s: unable to allocate memory for skey\n", rx->name);
+ goto exit_unlock;
+ }
+
+ /* Copy key from msg data */
+ skey->keylen = keylen;
+ memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME);
+ memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32),
+ skey->keylen);
+
+ rx->key_gen = key_gen;
+ rx->skey_mode = msg_key_mode(hdr);
+ rx->skey = skey;
+ rx->nokey = 0;
+ mb(); /* for nokey flag */
+
+exit_unlock:
+ spin_unlock(&rx->lock);
+
+exit:
+ /* Schedule the key attaching on this crypto */
+ if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0)))
+ return true;
+
+ return false;
+}
+
+/**
+ * tipc_crypto_work_rx - Scheduled RX works handler
+ * @work: the struct RX work
+ *
+ * The function processes the previous scheduled works i.e. distributing TX key
+ * or attaching a received session key on RX crypto.
+ */
+static void tipc_crypto_work_rx(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct tipc_crypto *rx = container_of(dwork, struct tipc_crypto, work);
+ struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
+ unsigned long delay = msecs_to_jiffies(5000);
+ bool resched = false;
+ u8 key;
+ int rc;
+
+ /* Case 1: Distribute TX key to peer if scheduled */
+ if (atomic_cmpxchg(&rx->key_distr,
+ KEY_DISTR_SCHED,
+ KEY_DISTR_COMPL) == KEY_DISTR_SCHED) {
+ /* Always pick the newest one for distributing */
+ key = tx->key.pending ?: tx->key.active;
+ rc = tipc_crypto_key_distr(tx, key, rx->node);
+ if (unlikely(rc))
+ pr_warn("%s: unable to distr key[%d] to %s, err %d\n",
+ tx->name, key, tipc_node_get_id_str(rx->node),
+ rc);
+
+ /* Sched for key_distr releasing */
+ resched = true;
+ } else {
+ atomic_cmpxchg(&rx->key_distr, KEY_DISTR_COMPL, 0);
+ }
+
+ /* Case 2: Attach a pending received session key from peer if any */
+ if (rx->skey) {
+ rc = tipc_crypto_key_init(rx, rx->skey, rx->skey_mode, false);
+ if (unlikely(rc < 0))
+ pr_warn("%s: unable to attach received skey, err %d\n",
+ rx->name, rc);
+ switch (rc) {
+ case -EBUSY:
+ case -ENOMEM:
+ /* Resched the key attaching */
+ resched = true;
+ break;
+ default:
+ synchronize_rcu();
+ kfree(rx->skey);
+ rx->skey = NULL;
+ break;
+ }
+ }
+
+ if (resched && queue_delayed_work(tx->wq, &rx->work, delay))
+ return;
+
+ tipc_node_put(rx->node);
+}
+
+/**
+ * tipc_crypto_rekeying_sched - (Re)schedule rekeying w/o new interval
+ * @tx: TX crypto
+ * @changed: if the rekeying needs to be rescheduled with new interval
+ * @new_intv: new rekeying interval (when "changed" = true)
+ */
+void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed,
+ u32 new_intv)
+{
+ unsigned long delay;
+ bool now = false;
+
+ if (changed) {
+ if (new_intv == TIPC_REKEYING_NOW)
+ now = true;
+ else
+ tx->rekeying_intv = new_intv;
+ cancel_delayed_work_sync(&tx->work);
+ }
+
+ if (tx->rekeying_intv || now) {
+ delay = (now) ? 0 : tx->rekeying_intv * 60 * 1000;
+ queue_delayed_work(tx->wq, &tx->work, msecs_to_jiffies(delay));
+ }
+}
+
+/**
+ * tipc_crypto_work_tx - Scheduled TX works handler
+ * @work: the struct TX work
+ *
+ * The function processes the previous scheduled work, i.e. key rekeying, by
+ * generating a new session key based on current one, then attaching it to the
+ * TX crypto and finally distributing it to peers. It also re-schedules the
+ * rekeying if needed.
+ */
+static void tipc_crypto_work_tx(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct tipc_crypto *tx = container_of(dwork, struct tipc_crypto, work);
+ struct tipc_aead_key *skey = NULL;
+ struct tipc_key key = tx->key;
+ struct tipc_aead *aead;
+ int rc = -ENOMEM;
+
+ if (unlikely(key.pending))
+ goto resched;
+
+ /* Take current key as a template */
+ rcu_read_lock();
+ aead = rcu_dereference(tx->aead[key.active ?: KEY_MASTER]);
+ if (unlikely(!aead)) {
+ rcu_read_unlock();
+ /* At least one key should exist for securing */
+ return;
+ }
+
+ /* Lets duplicate it first */
+ skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC);
+ rcu_read_unlock();
+
+ /* Now, generate new key, initiate & distribute it */
+ if (likely(skey)) {
+ rc = tipc_aead_key_generate(skey) ?:
+ tipc_crypto_key_init(tx, skey, PER_NODE_KEY, false);
+ if (likely(rc > 0))
+ rc = tipc_crypto_key_distr(tx, rc, NULL);
+ kfree_sensitive(skey);
+ }
+
+ if (unlikely(rc))
+ pr_warn_ratelimited("%s: rekeying returns %d\n", tx->name, rc);
+
+resched:
+ /* Re-schedule rekeying if any */
+ tipc_crypto_rekeying_sched(tx, false, 0);
+}
diff --git a/net/tipc/crypto.h b/net/tipc/crypto.h
new file mode 100644
index 000000000000..ce7d4cc8a9e0
--- /dev/null
+++ b/net/tipc/crypto.h
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * net/tipc/crypto.h: Include file for TIPC crypto
+ *
+ * Copyright (c) 2019, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef CONFIG_TIPC_CRYPTO
+#ifndef _TIPC_CRYPTO_H
+#define _TIPC_CRYPTO_H
+
+#include "core.h"
+#include "node.h"
+#include "msg.h"
+#include "bearer.h"
+
+#define TIPC_EVERSION 7
+
+/* AEAD aes(gcm) */
+#define TIPC_AES_GCM_KEY_SIZE_128 16
+#define TIPC_AES_GCM_KEY_SIZE_192 24
+#define TIPC_AES_GCM_KEY_SIZE_256 32
+
+#define TIPC_AES_GCM_SALT_SIZE 4
+#define TIPC_AES_GCM_IV_SIZE 12
+#define TIPC_AES_GCM_TAG_SIZE 16
+
+/*
+ * TIPC crypto modes:
+ * - CLUSTER_KEY:
+ * One single key is used for both TX & RX in all nodes in the cluster.
+ * - PER_NODE_KEY:
+ * Each nodes in the cluster has one TX key, for RX a node needs to know
+ * its peers' TX key for the decryption of messages from those nodes.
+ */
+enum {
+ CLUSTER_KEY = 1,
+ PER_NODE_KEY = (1 << 1),
+};
+
+extern int sysctl_tipc_max_tfms __read_mostly;
+extern int sysctl_tipc_key_exchange_enabled __read_mostly;
+
+/*
+ * TIPC encryption message format:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
+ * 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w0:|Ver=7| User |D|TX |RX |K|M|N| Rsvd |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w1:| Seqno |
+ * w2:| (8 octets) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w3:\ Prevnode \
+ * / (4 or 16 octets) /
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \ \
+ * / Encrypted complete TIPC V2 header and user data /
+ * \ \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | AuthTag |
+ * | (16 octets) |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Word0:
+ * Ver : = 7 i.e. TIPC encryption message version
+ * User : = 7 (for LINK_PROTOCOL); = 13 (for LINK_CONFIG) or = 0
+ * D : The destined bit i.e. the message's destination node is
+ * "known" or not at the message encryption
+ * TX : TX key used for the message encryption
+ * RX : Currently RX active key corresponding to the destination
+ * node's TX key (when the "D" bit is set)
+ * K : Keep-alive bit (for RPS, LINK_PROTOCOL/STATE_MSG only)
+ * M : Bit indicates if sender has master key
+ * N : Bit indicates if sender has no RX keys corresponding to the
+ * receiver's TX (when the "D" bit is set)
+ * Rsvd : Reserved bit, field
+ * Word1-2:
+ * Seqno : The 64-bit sequence number of the encrypted message, also
+ * part of the nonce used for the message encryption/decryption
+ * Word3-:
+ * Prevnode: The source node address, or ID in case LINK_CONFIG only
+ * AuthTag : The authentication tag for the message integrity checking
+ * generated by the message encryption
+ */
+struct tipc_ehdr {
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 destined:1,
+ user:4,
+ version:3;
+ __u8 reserved_1:1,
+ rx_nokey:1,
+ master_key:1,
+ keepalive:1,
+ rx_key_active:2,
+ tx_key:2;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 version:3,
+ user:4,
+ destined:1;
+ __u8 tx_key:2,
+ rx_key_active:2,
+ keepalive:1,
+ master_key:1,
+ rx_nokey:1,
+ reserved_1:1;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __be16 reserved_2;
+ } __packed;
+ __be32 w0;
+ };
+ __be64 seqno;
+ union {
+ __be32 addr;
+ __u8 id[NODE_ID_LEN]; /* For a LINK_CONFIG message only! */
+ };
+#define EHDR_SIZE (offsetof(struct tipc_ehdr, addr) + sizeof(__be32))
+#define EHDR_CFG_SIZE (sizeof(struct tipc_ehdr))
+#define EHDR_MIN_SIZE (EHDR_SIZE)
+#define EHDR_MAX_SIZE (EHDR_CFG_SIZE)
+#define EMSG_OVERHEAD (EHDR_SIZE + TIPC_AES_GCM_TAG_SIZE)
+} __packed;
+
+int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net,
+ struct tipc_node *node);
+void tipc_crypto_stop(struct tipc_crypto **crypto);
+void tipc_crypto_timeout(struct tipc_crypto *rx);
+int tipc_crypto_xmit(struct net *net, struct sk_buff **skb,
+ struct tipc_bearer *b, struct tipc_media_addr *dst,
+ struct tipc_node *__dnode);
+int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
+ struct sk_buff **skb, struct tipc_bearer *b);
+int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
+ u8 mode, bool master_key);
+void tipc_crypto_key_flush(struct tipc_crypto *c);
+int tipc_crypto_key_distr(struct tipc_crypto *tx, u8 key,
+ struct tipc_node *dest);
+void tipc_crypto_msg_rcv(struct net *net, struct sk_buff *skb);
+void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed,
+ u32 new_intv);
+int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info);
+bool tipc_ehdr_validate(struct sk_buff *skb);
+
+static inline u32 msg_key_gen(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 16, 0xffff);
+}
+
+static inline void msg_set_key_gen(struct tipc_msg *m, u32 gen)
+{
+ msg_set_bits(m, 4, 16, 0xffff, gen);
+}
+
+static inline u32 msg_key_mode(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 0, 0xf);
+}
+
+static inline void msg_set_key_mode(struct tipc_msg *m, u32 mode)
+{
+ msg_set_bits(m, 4, 0, 0xf, mode);
+}
+
+#endif /* _TIPC_CRYPTO_H */
+#endif
diff --git a/net/tipc/diag.c b/net/tipc/diag.c
new file mode 100644
index 000000000000..54dde8c4e4d4
--- /dev/null
+++ b/net/tipc/diag.c
@@ -0,0 +1,118 @@
+/*
+ * net/tipc/diag.c: TIPC socket diag
+ *
+ * Copyright (c) 2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "socket.h"
+#include <linux/sock_diag.h>
+#include <linux/tipc_sockets_diag.h>
+
+static u64 __tipc_diag_gen_cookie(struct sock *sk)
+{
+ u32 res[2];
+
+ sock_diag_save_cookie(sk, res);
+ return *((u64 *)res);
+}
+
+static int __tipc_add_sock_diag(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct tipc_sock *tsk)
+{
+ struct tipc_sock_diag_req *req = nlmsg_data(cb->nlh);
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = nlmsg_put_answer(skb, cb, SOCK_DIAG_BY_FAMILY, 0,
+ NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ err = tipc_sk_fill_sock_diag(skb, cb, tsk, req->tidiag_states,
+ __tipc_diag_gen_cookie);
+ if (err)
+ return err;
+
+ nlmsg_end(skb, nlh);
+ return 0;
+}
+
+static int tipc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return tipc_nl_sk_walk(skb, cb, __tipc_add_sock_diag);
+}
+
+static int tipc_sock_diag_handler_dump(struct sk_buff *skb,
+ struct nlmsghdr *h)
+{
+ int hdrlen = sizeof(struct tipc_sock_diag_req);
+ struct net *net = sock_net(skb->sk);
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .start = tipc_dump_start,
+ .dump = tipc_diag_dump,
+ .done = tipc_dump_done,
+ };
+ netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ return 0;
+ }
+ return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler tipc_sock_diag_handler = {
+ .owner = THIS_MODULE,
+ .family = AF_TIPC,
+ .dump = tipc_sock_diag_handler_dump,
+};
+
+static int __init tipc_diag_init(void)
+{
+ return sock_diag_register(&tipc_sock_diag_handler);
+}
+
+static void __exit tipc_diag_exit(void)
+{
+ sock_diag_unregister(&tipc_sock_diag_handler);
+}
+
+module_init(tipc_diag_init);
+module_exit(tipc_diag_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("TIPC socket monitoring via SOCK_DIAG");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index ecc758c6eacf..775fd4f3f072 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -1,7 +1,7 @@
/*
* net/tipc/discover.c
*
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2006, 2014-2018, Ericsson AB
* Copyright (c) 2005-2006, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -35,351 +35,386 @@
*/
#include "core.h"
-#include "link.h"
+#include "node.h"
#include "discover.h"
-#define TIPC_LINK_REQ_INIT 125 /* min delay during bearer start up */
-#define TIPC_LINK_REQ_FAST 1000 /* max delay if bearer has no links */
-#define TIPC_LINK_REQ_SLOW 60000 /* max delay if bearer has links */
-#define TIPC_LINK_REQ_INACTIVE 0xffffffff /* indicates no timer in use */
-
+/* min delay during bearer start up */
+#define TIPC_DISC_INIT msecs_to_jiffies(125)
+/* max delay if bearer has no links */
+#define TIPC_DISC_FAST msecs_to_jiffies(1000)
+/* max delay if bearer has links */
+#define TIPC_DISC_SLOW msecs_to_jiffies(60000)
+/* indicates no timer in use */
+#define TIPC_DISC_INACTIVE 0xffffffff
/**
- * struct tipc_link_req - information about an ongoing link setup request
- * @bearer: bearer issuing requests
+ * struct tipc_discoverer - information about an ongoing link setup request
+ * @bearer_id: identity of bearer issuing requests
+ * @net: network namespace instance
* @dest: destination address for request messages
* @domain: network domain to which links can be established
* @num_nodes: number of nodes currently discovered (i.e. with an active link)
- * @buf: request message to be (repeatedly) sent
+ * @lock: spinlock for controlling access to requests
+ * @skb: request message to be (repeatedly) sent
* @timer: timer governing period between requests
* @timer_intv: current interval between requests (in ms)
*/
-struct tipc_link_req {
- struct tipc_bearer *bearer;
+struct tipc_discoverer {
+ u32 bearer_id;
struct tipc_media_addr dest;
+ struct net *net;
u32 domain;
int num_nodes;
- struct sk_buff *buf;
+ spinlock_t lock;
+ struct sk_buff *skb;
struct timer_list timer;
- unsigned int timer_intv;
+ unsigned long timer_intv;
};
/**
* tipc_disc_init_msg - initialize a link setup message
- * @type: message type (request or response)
- * @dest_domain: network domain of node(s) which should respond to message
- * @b_ptr: ptr to bearer issuing message
+ * @net: the applicable net namespace
+ * @skb: buffer containing message
+ * @mtyp: message type (request or response)
+ * @b: ptr to bearer issuing message
*/
-static struct sk_buff *tipc_disc_init_msg(u32 type, u32 dest_domain,
- struct tipc_bearer *b_ptr)
+static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
+ u32 mtyp, struct tipc_bearer *b)
{
- struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE);
- struct tipc_msg *msg;
-
- if (buf) {
- msg = buf_msg(buf);
- tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
- msg_set_non_seq(msg, 1);
- msg_set_node_sig(msg, tipc_random);
- msg_set_dest_domain(msg, dest_domain);
- msg_set_bc_netid(msg, tipc_net_id);
- b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg));
- }
- return buf;
+ struct tipc_net *tn = tipc_net(net);
+ u32 dest_domain = b->domain;
+ struct tipc_msg *hdr;
+
+ hdr = buf_msg(skb);
+ tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp,
+ MAX_H_SIZE, dest_domain);
+ msg_set_size(hdr, MAX_H_SIZE + NODE_ID_LEN);
+ msg_set_non_seq(hdr, 1);
+ msg_set_node_sig(hdr, tn->random);
+ msg_set_node_capabilities(hdr, TIPC_NODE_CAPABILITIES);
+ msg_set_dest_domain(hdr, dest_domain);
+ msg_set_bc_netid(hdr, tn->net_id);
+ b->media->addr2msg(msg_media_addr(hdr), &b->addr);
+ msg_set_peer_net_hash(hdr, tipc_net_hash_mixes(net, tn->random));
+ msg_set_node_id(hdr, tipc_own_id(net));
+}
+
+static void tipc_disc_msg_xmit(struct net *net, u32 mtyp, u32 dst,
+ u32 src, u32 sugg_addr,
+ struct tipc_media_addr *maddr,
+ struct tipc_bearer *b)
+{
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+
+ skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC);
+ if (!skb)
+ return;
+ hdr = buf_msg(skb);
+ tipc_disc_init_msg(net, skb, mtyp, b);
+ msg_set_sugg_node_addr(hdr, sugg_addr);
+ msg_set_dest_domain(hdr, dst);
+ tipc_bearer_xmit_skb(net, b->identity, skb, maddr);
}
/**
* disc_dupl_alert - issue node address duplication alert
- * @b_ptr: pointer to bearer detecting duplication
+ * @b: pointer to bearer detecting duplication
* @node_addr: duplicated node address
* @media_addr: media address advertised by duplicated node
*/
-static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
+static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
struct tipc_media_addr *media_addr)
{
- char node_addr_str[16];
char media_addr_str[64];
- tipc_addr_string_fill(node_addr_str, node_addr);
tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str),
media_addr);
- pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str,
- media_addr_str, b_ptr->name);
+ pr_warn("Duplicate %x using %s seen on <%s>\n", node_addr,
+ media_addr_str, b->name);
}
-/**
- * tipc_disc_recv_msg - handle incoming link setup message (request or response)
- * @buf: buffer containing message
- * @b_ptr: bearer that message arrived on
+/* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer
+ * Returns true if message should be dropped by caller, i.e., if it is a
+ * trial message or we are inside trial period. Otherwise false.
*/
-void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
+static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
+ struct tipc_media_addr *maddr,
+ struct tipc_bearer *b,
+ u32 dst, u32 src,
+ u32 sugg_addr,
+ u8 *peer_id,
+ int mtyp)
{
- struct tipc_node *n_ptr;
- struct tipc_link *link;
- struct tipc_media_addr media_addr;
- struct sk_buff *rbuf;
- struct tipc_msg *msg = buf_msg(buf);
- u32 dest = msg_dest_domain(msg);
- u32 orig = msg_prevnode(msg);
- u32 net_id = msg_bc_netid(msg);
- u32 type = msg_type(msg);
- u32 signature = msg_node_sig(msg);
- int addr_mismatch;
- int link_fully_up;
-
- media_addr.broadcast = 1;
- b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg));
- kfree_skb(buf);
-
- /* Ensure message from node is valid and communication is permitted */
- if (net_id != tipc_net_id)
- return;
- if (media_addr.broadcast)
- return;
- if (!tipc_addr_domain_valid(dest))
- return;
- if (!tipc_addr_node_valid(orig))
- return;
- if (orig == tipc_own_addr) {
- if (memcmp(&media_addr, &b_ptr->addr, sizeof(media_addr)))
- disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr);
- return;
- }
- if (!tipc_in_scope(dest, tipc_own_addr))
- return;
- if (!tipc_in_scope(b_ptr->link_req->domain, orig))
- return;
-
- /* Locate structure corresponding to requesting node */
- n_ptr = tipc_node_find(orig);
- if (!n_ptr) {
- n_ptr = tipc_node_create(orig);
- if (!n_ptr)
- return;
- }
- tipc_node_lock(n_ptr);
-
- /* Prepare to validate requesting node's signature and media address */
- link = n_ptr->links[b_ptr->identity];
- addr_mismatch = (link != NULL) &&
- memcmp(&link->media_addr, &media_addr, sizeof(media_addr));
-
- /*
- * Ensure discovery message's signature is correct
- *
- * If signature is incorrect and there is no working link to the node,
- * accept the new signature but invalidate all existing links to the
- * node so they won't re-activate without a new discovery message.
- *
- * If signature is incorrect and the requested link to the node is
- * working, accept the new signature. (This is an instance of delayed
- * rediscovery, where a link endpoint was able to re-establish contact
- * with its peer endpoint on a node that rebooted before receiving a
- * discovery message from that node.)
- *
- * If signature is incorrect and there is a working link to the node
- * that is not the requested link, reject the request (must be from
- * a duplicate node).
- */
- if (signature != n_ptr->signature) {
- if (n_ptr->working_links == 0) {
- struct tipc_link *curr_link;
- int i;
-
- for (i = 0; i < MAX_BEARERS; i++) {
- curr_link = n_ptr->links[i];
- if (curr_link) {
- memset(&curr_link->media_addr, 0,
- sizeof(media_addr));
- tipc_link_reset(curr_link);
- }
- }
- addr_mismatch = (link != NULL);
- } else if (tipc_link_is_up(link) && !addr_mismatch) {
- /* delayed rediscovery */
- } else {
- disc_dupl_alert(b_ptr, orig, &media_addr);
- tipc_node_unlock(n_ptr);
- return;
- }
- n_ptr->signature = signature;
- }
-
- /*
- * Ensure requesting node's media address is correct
- *
- * If media address doesn't match and the link is working, reject the
- * request (must be from a duplicate node).
- *
- * If media address doesn't match and the link is not working, accept
- * the new media address and reset the link to ensure it starts up
- * cleanly.
- */
- if (addr_mismatch) {
- if (tipc_link_is_up(link)) {
- disc_dupl_alert(b_ptr, orig, &media_addr);
- tipc_node_unlock(n_ptr);
- return;
- } else {
- memcpy(&link->media_addr, &media_addr,
- sizeof(media_addr));
- tipc_link_reset(link);
- }
+ struct net *net = d->net;
+ struct tipc_net *tn = tipc_net(net);
+ u32 self = tipc_own_addr(net);
+ bool trial = time_before(jiffies, tn->addr_trial_end) && !self;
+
+ if (mtyp == DSC_TRIAL_FAIL_MSG) {
+ if (!trial)
+ return true;
+
+ /* Ignore if somebody else already gave new suggestion */
+ if (dst != tn->trial_addr)
+ return true;
+
+ /* Otherwise update trial address and restart trial period */
+ tn->trial_addr = sugg_addr;
+ msg_set_prevnode(buf_msg(d->skb), sugg_addr);
+ tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
+ return true;
}
- /* Create a link endpoint for this bearer, if necessary */
- if (!link) {
- link = tipc_link_create(n_ptr, b_ptr, &media_addr);
- if (!link) {
- tipc_node_unlock(n_ptr);
- return;
- }
+ /* Apply trial address if we just left trial period */
+ if (!trial && !self) {
+ schedule_work(&tn->work);
+ msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
+ msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
}
- /* Accept discovery message & send response, if necessary */
- link_fully_up = link_working_working(link);
-
- if ((type == DSC_REQ_MSG) && !link_fully_up && !b_ptr->blocked) {
- rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr);
- if (rbuf) {
- tipc_bearer_send(b_ptr, rbuf, &media_addr);
- kfree_skb(rbuf);
- }
- }
+ /* Accept regular link requests/responses only after trial period */
+ if (mtyp != DSC_TRIAL_MSG)
+ return trial;
- tipc_node_unlock(n_ptr);
+ sugg_addr = tipc_node_try_addr(net, peer_id, src);
+ if (sugg_addr)
+ tipc_disc_msg_xmit(net, DSC_TRIAL_FAIL_MSG, src,
+ self, sugg_addr, maddr, b);
+ return true;
}
/**
- * disc_update - update frequency of periodic link setup requests
- * @req: ptr to link request structure
- *
- * Reinitiates discovery process if discovery object has no associated nodes
- * and is either not currently searching or is searching at a slow rate
+ * tipc_disc_rcv - handle incoming discovery message (request or response)
+ * @net: applicable net namespace
+ * @skb: buffer containing message
+ * @b: bearer that message arrived on
*/
-static void disc_update(struct tipc_link_req *req)
+void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
+ struct tipc_bearer *b)
{
- if (!req->num_nodes) {
- if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) ||
- (req->timer_intv > TIPC_LINK_REQ_FAST)) {
- req->timer_intv = TIPC_LINK_REQ_INIT;
- k_start_timer(&req->timer, req->timer_intv);
- }
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_msg *hdr = buf_msg(skb);
+ u32 pnet_hash = msg_peer_net_hash(hdr);
+ u16 caps = msg_node_capabilities(hdr);
+ bool legacy = tn->legacy_addr_format;
+ u32 sugg = msg_sugg_node_addr(hdr);
+ u32 signature = msg_node_sig(hdr);
+ u8 peer_id[NODE_ID_LEN] = {0,};
+ u32 dst = msg_dest_domain(hdr);
+ u32 net_id = msg_bc_netid(hdr);
+ struct tipc_media_addr maddr;
+ u32 src = msg_prevnode(hdr);
+ u32 mtyp = msg_type(hdr);
+ bool dupl_addr = false;
+ bool respond = false;
+ u32 self;
+ int err;
+
+ if (skb_linearize(skb)) {
+ kfree_skb(skb);
+ return;
}
-}
+ hdr = buf_msg(skb);
-/**
- * tipc_disc_add_dest - increment set of discovered nodes
- * @req: ptr to link request structure
- */
-void tipc_disc_add_dest(struct tipc_link_req *req)
-{
- req->num_nodes++;
+ if (caps & TIPC_NODE_ID128)
+ memcpy(peer_id, msg_node_id(hdr), NODE_ID_LEN);
+ else
+ sprintf(peer_id, "%x", src);
+
+ err = b->media->msg2addr(b, &maddr, msg_media_addr(hdr));
+ kfree_skb(skb);
+ if (err || maddr.broadcast) {
+ pr_warn_ratelimited("Rcv corrupt discovery message\n");
+ return;
+ }
+ /* Ignore discovery messages from own node */
+ if (!memcmp(&maddr, &b->addr, sizeof(maddr)))
+ return;
+ if (net_id != tn->net_id)
+ return;
+ if (tipc_disc_addr_trial_msg(b->disc, &maddr, b, dst,
+ src, sugg, peer_id, mtyp))
+ return;
+ self = tipc_own_addr(net);
+
+ /* Message from somebody using this node's address */
+ if (in_own_node(net, src)) {
+ disc_dupl_alert(b, self, &maddr);
+ return;
+ }
+ if (!tipc_in_scope(legacy, dst, self))
+ return;
+ if (!tipc_in_scope(legacy, b->domain, src))
+ return;
+ tipc_node_check_dest(net, src, peer_id, b, caps, signature, pnet_hash,
+ &maddr, &respond, &dupl_addr);
+ if (dupl_addr)
+ disc_dupl_alert(b, src, &maddr);
+ if (!respond)
+ return;
+ if (mtyp != DSC_REQ_MSG)
+ return;
+ tipc_disc_msg_xmit(net, DSC_RESP_MSG, src, self, 0, &maddr, b);
}
-/**
- * tipc_disc_remove_dest - decrement set of discovered nodes
- * @req: ptr to link request structure
+/* tipc_disc_add_dest - increment set of discovered nodes
*/
-void tipc_disc_remove_dest(struct tipc_link_req *req)
+void tipc_disc_add_dest(struct tipc_discoverer *d)
{
- req->num_nodes--;
- disc_update(req);
+ spin_lock_bh(&d->lock);
+ d->num_nodes++;
+ spin_unlock_bh(&d->lock);
}
-/**
- * disc_send_msg - send link setup request message
- * @req: ptr to link request structure
+/* tipc_disc_remove_dest - decrement set of discovered nodes
*/
-static void disc_send_msg(struct tipc_link_req *req)
+void tipc_disc_remove_dest(struct tipc_discoverer *d)
{
- if (!req->bearer->blocked)
- tipc_bearer_send(req->bearer, req->buf, &req->dest);
+ int intv, num;
+
+ spin_lock_bh(&d->lock);
+ d->num_nodes--;
+ num = d->num_nodes;
+ intv = d->timer_intv;
+ if (!num && (intv == TIPC_DISC_INACTIVE || intv > TIPC_DISC_FAST)) {
+ d->timer_intv = TIPC_DISC_INIT;
+ mod_timer(&d->timer, jiffies + d->timer_intv);
+ }
+ spin_unlock_bh(&d->lock);
}
-/**
- * disc_timeout - send a periodic link setup request
- * @req: ptr to link request structure
- *
+/* tipc_disc_timeout - send a periodic link setup request
* Called whenever a link setup request timer associated with a bearer expires.
+ * - Keep doubling time between sent request until limit is reached;
+ * - Hold at fast polling rate if we don't have any associated nodes
+ * - Otherwise hold at slow polling rate
*/
-static void disc_timeout(struct tipc_link_req *req)
+static void tipc_disc_timeout(struct timer_list *t)
{
- int max_delay;
+ struct tipc_discoverer *d = timer_container_of(d, t, timer);
+ struct tipc_net *tn = tipc_net(d->net);
+ struct tipc_media_addr maddr;
+ struct sk_buff *skb = NULL;
+ struct net *net = d->net;
+ u32 bearer_id;
- spin_lock_bh(&req->bearer->lock);
+ spin_lock_bh(&d->lock);
/* Stop searching if only desired node has been found */
- if (tipc_node(req->domain) && req->num_nodes) {
- req->timer_intv = TIPC_LINK_REQ_INACTIVE;
+ if (tipc_node(d->domain) && d->num_nodes) {
+ d->timer_intv = TIPC_DISC_INACTIVE;
goto exit;
}
- /*
- * Send discovery message, then update discovery timer
- *
- * Keep doubling time between requests until limit is reached;
- * hold at fast polling rate if don't have any associated nodes,
- * otherwise hold at slow polling rate
- */
- disc_send_msg(req);
-
- req->timer_intv *= 2;
- if (req->num_nodes)
- max_delay = TIPC_LINK_REQ_SLOW;
- else
- max_delay = TIPC_LINK_REQ_FAST;
- if (req->timer_intv > max_delay)
- req->timer_intv = max_delay;
+ /* Did we just leave trial period ? */
+ if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) {
+ mod_timer(&d->timer, jiffies + TIPC_DISC_INIT);
+ spin_unlock_bh(&d->lock);
+ schedule_work(&tn->work);
+ return;
+ }
+
+ /* Adjust timeout interval according to discovery phase */
+ if (time_before(jiffies, tn->addr_trial_end)) {
+ d->timer_intv = TIPC_DISC_INIT;
+ } else {
+ d->timer_intv *= 2;
+ if (d->num_nodes && d->timer_intv > TIPC_DISC_SLOW)
+ d->timer_intv = TIPC_DISC_SLOW;
+ else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST)
+ d->timer_intv = TIPC_DISC_FAST;
+ msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+ msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
+ }
- k_start_timer(&req->timer, req->timer_intv);
+ mod_timer(&d->timer, jiffies + d->timer_intv);
+ memcpy(&maddr, &d->dest, sizeof(maddr));
+ skb = skb_clone(d->skb, GFP_ATOMIC);
+ bearer_id = d->bearer_id;
exit:
- spin_unlock_bh(&req->bearer->lock);
+ spin_unlock_bh(&d->lock);
+ if (skb)
+ tipc_bearer_xmit_skb(net, bearer_id, skb, &maddr);
}
/**
* tipc_disc_create - create object to send periodic link setup requests
- * @b_ptr: ptr to bearer issuing requests
+ * @net: the applicable net namespace
+ * @b: ptr to bearer issuing requests
* @dest: destination address for request messages
- * @dest_domain: network domain to which links can be established
+ * @skb: pointer to created frame
*
- * Returns 0 if successful, otherwise -errno.
+ * Return: 0 if successful, otherwise -errno.
*/
-int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest,
- u32 dest_domain)
+int tipc_disc_create(struct net *net, struct tipc_bearer *b,
+ struct tipc_media_addr *dest, struct sk_buff **skb)
{
- struct tipc_link_req *req;
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_discoverer *d;
- req = kmalloc(sizeof(*req), GFP_ATOMIC);
- if (!req)
+ d = kmalloc(sizeof(*d), GFP_ATOMIC);
+ if (!d)
+ return -ENOMEM;
+ d->skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC);
+ if (!d->skb) {
+ kfree(d);
return -ENOMEM;
-
- req->buf = tipc_disc_init_msg(DSC_REQ_MSG, dest_domain, b_ptr);
- if (!req->buf) {
- kfree(req);
- return -ENOMSG;
}
+ tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
- memcpy(&req->dest, dest, sizeof(*dest));
- req->bearer = b_ptr;
- req->domain = dest_domain;
- req->num_nodes = 0;
- req->timer_intv = TIPC_LINK_REQ_INIT;
- k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req);
- k_start_timer(&req->timer, req->timer_intv);
- b_ptr->link_req = req;
- disc_send_msg(req);
+ /* Do we need an address trial period first ? */
+ if (!tipc_own_addr(net)) {
+ tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
+ msg_set_type(buf_msg(d->skb), DSC_TRIAL_MSG);
+ }
+ memcpy(&d->dest, dest, sizeof(*dest));
+ d->net = net;
+ d->bearer_id = b->identity;
+ d->domain = b->domain;
+ d->num_nodes = 0;
+ d->timer_intv = TIPC_DISC_INIT;
+ spin_lock_init(&d->lock);
+ timer_setup(&d->timer, tipc_disc_timeout, 0);
+ mod_timer(&d->timer, jiffies + d->timer_intv);
+ b->disc = d;
+ *skb = skb_clone(d->skb, GFP_ATOMIC);
return 0;
}
/**
* tipc_disc_delete - destroy object sending periodic link setup requests
- * @req: ptr to link request structure
+ * @d: ptr to link dest structure
+ */
+void tipc_disc_delete(struct tipc_discoverer *d)
+{
+ timer_shutdown_sync(&d->timer);
+ kfree_skb(d->skb);
+ kfree(d);
+}
+
+/**
+ * tipc_disc_reset - reset object to send periodic link setup requests
+ * @net: the applicable net namespace
+ * @b: ptr to bearer issuing requests
*/
-void tipc_disc_delete(struct tipc_link_req *req)
+void tipc_disc_reset(struct net *net, struct tipc_bearer *b)
{
- k_cancel_timer(&req->timer);
- k_term_timer(&req->timer);
- kfree_skb(req->buf);
- kfree(req);
+ struct tipc_discoverer *d = b->disc;
+ struct tipc_media_addr maddr;
+ struct sk_buff *skb;
+
+ spin_lock_bh(&d->lock);
+ tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
+ d->net = net;
+ d->bearer_id = b->identity;
+ d->domain = b->domain;
+ d->num_nodes = 0;
+ d->timer_intv = TIPC_DISC_INIT;
+ memcpy(&maddr, &d->dest, sizeof(maddr));
+ mod_timer(&d->timer, jiffies + d->timer_intv);
+ skb = skb_clone(d->skb, GFP_ATOMIC);
+ spin_unlock_bh(&d->lock);
+ if (skb)
+ tipc_bearer_xmit_skb(net, b->identity, skb, &maddr);
}
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index 75b67c403aa3..521d96c41dfd 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -37,13 +37,15 @@
#ifndef _TIPC_DISCOVER_H
#define _TIPC_DISCOVER_H
-struct tipc_link_req;
+struct tipc_discoverer;
-int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest,
- u32 dest_domain);
-void tipc_disc_delete(struct tipc_link_req *req);
-void tipc_disc_add_dest(struct tipc_link_req *req);
-void tipc_disc_remove_dest(struct tipc_link_req *req);
-void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr);
+int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
+ struct tipc_media_addr *dest, struct sk_buff **skb);
+void tipc_disc_delete(struct tipc_discoverer *req);
+void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr);
+void tipc_disc_add_dest(struct tipc_discoverer *req);
+void tipc_disc_remove_dest(struct tipc_discoverer *req);
+void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
+ struct tipc_bearer *b_ptr);
#endif
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 40ea40cf6204..cb0d185e06af 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -1,8 +1,8 @@
/*
* net/tipc/eth_media.c: Ethernet bearer support for TIPC
*
- * Copyright (c) 2001-2007, Ericsson AB
- * Copyright (c) 2005-2008, 2011, Wind River Systems
+ * Copyright (c) 2001-2007, 2013-2014, Ericsson AB
+ * Copyright (c) 2005-2008, 2011-2013, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,340 +37,62 @@
#include "core.h"
#include "bearer.h"
-#define MAX_ETH_BEARERS MAX_BEARERS
-
-#define ETH_ADDR_OFFSET 4 /* message header offset of MAC address */
-
-/**
- * struct eth_bearer - Ethernet bearer data structure
- * @bearer: ptr to associated "generic" bearer structure
- * @dev: ptr to associated Ethernet network device
- * @tipc_packet_type: used in binding TIPC to Ethernet driver
- * @setup: work item used when enabling bearer
- * @cleanup: work item used when disabling bearer
- */
-struct eth_bearer {
- struct tipc_bearer *bearer;
- struct net_device *dev;
- struct packet_type tipc_packet_type;
- struct work_struct setup;
- struct work_struct cleanup;
-};
-
-static struct tipc_media eth_media_info;
-static struct eth_bearer eth_bearers[MAX_ETH_BEARERS];
-static int eth_started;
-
-static int recv_notification(struct notifier_block *nb, unsigned long evt,
- void *dv);
-/*
- * Network device notifier info
- */
-static struct notifier_block notifier = {
- .notifier_call = recv_notification,
- .priority = 0
-};
-
-/**
- * eth_media_addr_set - initialize Ethernet media address structure
- *
- * Media-dependent "value" field stores MAC address in first 6 bytes
- * and zeroes out the remaining bytes.
- */
-static void eth_media_addr_set(const struct tipc_bearer *tb_ptr,
- struct tipc_media_addr *a, char *mac)
-{
- memcpy(a->value, mac, ETH_ALEN);
- memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN);
- a->media_id = TIPC_MEDIA_TYPE_ETH;
- a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, ETH_ALEN);
-}
-
-/**
- * send_msg - send a TIPC message out over an Ethernet interface
- */
-static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
- struct tipc_media_addr *dest)
+/* Convert Ethernet address (media address format) to string */
+static int tipc_eth_addr2str(struct tipc_media_addr *addr,
+ char *strbuf, int bufsz)
{
- struct sk_buff *clone;
- struct net_device *dev;
- int delta;
-
- clone = skb_clone(buf, GFP_ATOMIC);
- if (!clone)
- return 0;
-
- dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
- delta = dev->hard_header_len - skb_headroom(buf);
-
- if ((delta > 0) &&
- pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
- kfree_skb(clone);
- return 0;
- }
-
- skb_reset_network_header(clone);
- clone->dev = dev;
- clone->protocol = htons(ETH_P_TIPC);
- dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
- dev->dev_addr, clone->len);
- dev_queue_xmit(clone);
- return 0;
-}
-
-/**
- * recv_msg - handle incoming TIPC message from an Ethernet interface
- *
- * Accept only packets explicitly sent to this node, or broadcast packets;
- * ignores packets sent using Ethernet multicast, and traffic sent to other
- * nodes (which can happen if interface is running in promiscuous mode).
- */
-static int recv_msg(struct sk_buff *buf, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
-{
- struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv;
-
- if (!net_eq(dev_net(dev), &init_net)) {
- kfree_skb(buf);
- return 0;
- }
-
- if (likely(eb_ptr->bearer)) {
- if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
- buf->next = NULL;
- tipc_recv_msg(buf, eb_ptr->bearer);
- return 0;
- }
- }
- kfree_skb(buf);
- return 0;
-}
-
-/**
- * setup_bearer - setup association between Ethernet bearer and interface
- */
-static void setup_bearer(struct work_struct *work)
-{
- struct eth_bearer *eb_ptr =
- container_of(work, struct eth_bearer, setup);
-
- dev_add_pack(&eb_ptr->tipc_packet_type);
-}
-
-/**
- * enable_bearer - attach TIPC bearer to an Ethernet interface
- */
-static int enable_bearer(struct tipc_bearer *tb_ptr)
-{
- struct net_device *dev;
- struct eth_bearer *eb_ptr = &eth_bearers[0];
- struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
- char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
- int pending_dev = 0;
-
- /* Find unused Ethernet bearer structure */
- while (eb_ptr->dev) {
- if (!eb_ptr->bearer)
- pending_dev++;
- if (++eb_ptr == stop)
- return pending_dev ? -EAGAIN : -EDQUOT;
- }
-
- /* Find device with specified name */
- dev = dev_get_by_name(&init_net, driver_name);
- if (!dev)
- return -ENODEV;
-
- /* Create Ethernet bearer for device */
- eb_ptr->dev = dev;
- eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
- eb_ptr->tipc_packet_type.dev = dev;
- eb_ptr->tipc_packet_type.func = recv_msg;
- eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
- INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list));
- INIT_WORK(&eb_ptr->setup, setup_bearer);
- schedule_work(&eb_ptr->setup);
+ if (bufsz < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */
+ return 1;
- /* Associate TIPC bearer with Ethernet bearer */
- eb_ptr->bearer = tb_ptr;
- tb_ptr->usr_handle = (void *)eb_ptr;
- memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value));
- memcpy(tb_ptr->bcast_addr.value, dev->broadcast, ETH_ALEN);
- tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_ETH;
- tb_ptr->bcast_addr.broadcast = 1;
- tb_ptr->mtu = dev->mtu;
- tb_ptr->blocked = 0;
- eth_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr);
+ sprintf(strbuf, "%pM", addr->value);
return 0;
}
-/**
- * cleanup_bearer - break association between Ethernet bearer and interface
- *
- * This routine must be invoked from a work queue because it can sleep.
- */
-static void cleanup_bearer(struct work_struct *work)
-{
- struct eth_bearer *eb_ptr =
- container_of(work, struct eth_bearer, cleanup);
-
- dev_remove_pack(&eb_ptr->tipc_packet_type);
- dev_put(eb_ptr->dev);
- eb_ptr->dev = NULL;
-}
-
-/**
- * disable_bearer - detach TIPC bearer from an Ethernet interface
- *
- * Mark Ethernet bearer as inactive so that incoming buffers are thrown away,
- * then get worker thread to complete bearer cleanup. (Can't do cleanup
- * here because cleanup code needs to sleep and caller holds spinlocks.)
- */
-static void disable_bearer(struct tipc_bearer *tb_ptr)
-{
- struct eth_bearer *eb_ptr = (struct eth_bearer *)tb_ptr->usr_handle;
-
- eb_ptr->bearer = NULL;
- INIT_WORK(&eb_ptr->cleanup, cleanup_bearer);
- schedule_work(&eb_ptr->cleanup);
-}
-
-/**
- * recv_notification - handle device updates from OS
- *
- * Change the state of the Ethernet bearer (if any) associated with the
- * specified device.
- */
-static int recv_notification(struct notifier_block *nb, unsigned long evt,
- void *ptr)
+/* Convert from media address format to discovery message addr format */
+static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct eth_bearer *eb_ptr = &eth_bearers[0];
- struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
-
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- while ((eb_ptr->dev != dev)) {
- if (++eb_ptr == stop)
- return NOTIFY_DONE; /* couldn't find device */
- }
- if (!eb_ptr->bearer)
- return NOTIFY_DONE; /* bearer had been disabled */
-
- eb_ptr->bearer->mtu = dev->mtu;
-
- switch (evt) {
- case NETDEV_CHANGE:
- if (netif_carrier_ok(dev))
- tipc_continue(eb_ptr->bearer);
- else
- tipc_block_bearer(eb_ptr->bearer->name);
- break;
- case NETDEV_UP:
- tipc_continue(eb_ptr->bearer);
- break;
- case NETDEV_DOWN:
- tipc_block_bearer(eb_ptr->bearer->name);
- break;
- case NETDEV_CHANGEMTU:
- case NETDEV_CHANGEADDR:
- tipc_block_bearer(eb_ptr->bearer->name);
- tipc_continue(eb_ptr->bearer);
- break;
- case NETDEV_UNREGISTER:
- case NETDEV_CHANGENAME:
- tipc_disable_bearer(eb_ptr->bearer->name);
- break;
- }
- return NOTIFY_OK;
-}
-
-/**
- * eth_addr2str - convert Ethernet address to string
- */
-static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
-{
- if (str_size < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */
- return 1;
-
- sprintf(str_buf, "%pM", a->value);
+ memset(msg, 0, TIPC_MEDIA_INFO_SIZE);
+ msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH;
+ memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, addr->value, ETH_ALEN);
return 0;
}
-/**
- * eth_str2addr - convert Ethernet address format to message header format
- */
-static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area)
+/* Convert raw mac address format to media addr format */
+static int tipc_eth_raw2addr(struct tipc_bearer *b,
+ struct tipc_media_addr *addr,
+ const char *msg)
{
- memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE);
- msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH;
- memcpy(msg_area + ETH_ADDR_OFFSET, a->value, ETH_ALEN);
+ memset(addr, 0, sizeof(*addr));
+ ether_addr_copy(addr->value, msg);
+ addr->media_id = TIPC_MEDIA_TYPE_ETH;
+ addr->broadcast = is_broadcast_ether_addr(addr->value);
return 0;
}
-/**
- * eth_str2addr - convert message header address format to Ethernet format
- */
-static int eth_msg2addr(const struct tipc_bearer *tb_ptr,
- struct tipc_media_addr *a, char *msg_area)
+/* Convert discovery msg addr format to Ethernet media addr format */
+static int tipc_eth_msg2addr(struct tipc_bearer *b,
+ struct tipc_media_addr *addr,
+ char *msg)
{
- if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH)
- return 1;
-
- eth_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET);
- return 0;
+ /* Skip past preamble: */
+ msg += TIPC_MEDIA_ADDR_OFFSET;
+ return tipc_eth_raw2addr(b, addr, msg);
}
-/*
- * Ethernet media registration info
- */
-static struct tipc_media eth_media_info = {
- .send_msg = send_msg,
- .enable_bearer = enable_bearer,
- .disable_bearer = disable_bearer,
- .addr2str = eth_addr2str,
- .addr2msg = eth_addr2msg,
- .msg2addr = eth_msg2addr,
+/* Ethernet media registration info */
+struct tipc_media eth_media_info = {
+ .send_msg = tipc_l2_send_msg,
+ .enable_media = tipc_enable_l2_media,
+ .disable_media = tipc_disable_l2_media,
+ .addr2str = tipc_eth_addr2str,
+ .addr2msg = tipc_eth_addr2msg,
+ .msg2addr = tipc_eth_msg2addr,
+ .raw2addr = tipc_eth_raw2addr,
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
- .window = TIPC_DEF_LINK_WIN,
+ .min_win = TIPC_DEF_LINK_WIN,
+ .max_win = TIPC_MAX_LINK_WIN,
.type_id = TIPC_MEDIA_TYPE_ETH,
+ .hwaddr_len = ETH_ALEN,
.name = "eth"
};
-
-/**
- * tipc_eth_media_start - activate Ethernet bearer support
- *
- * Register Ethernet media type with TIPC bearer code. Also register
- * with OS for notifications about device state changes.
- */
-int tipc_eth_media_start(void)
-{
- int res;
-
- if (eth_started)
- return -EINVAL;
-
- res = tipc_register_media(&eth_media_info);
- if (res)
- return res;
-
- res = register_netdevice_notifier(&notifier);
- if (!res)
- eth_started = 1;
- return res;
-}
-
-/**
- * tipc_eth_media_stop - deactivate Ethernet bearer support
- */
-void tipc_eth_media_stop(void)
-{
- if (!eth_started)
- return;
-
- flush_scheduled_work();
- unregister_netdevice_notifier(&notifier);
- eth_started = 0;
-}
diff --git a/net/tipc/group.c b/net/tipc/group.c
new file mode 100644
index 000000000000..3e137d8c9d2f
--- /dev/null
+++ b/net/tipc/group.c
@@ -0,0 +1,959 @@
+/*
+ * net/tipc/group.c: TIPC group messaging code
+ *
+ * Copyright (c) 2017, Ericsson AB
+ * Copyright (c) 2020, Red Hat Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "addr.h"
+#include "group.h"
+#include "bcast.h"
+#include "topsrv.h"
+#include "msg.h"
+#include "socket.h"
+#include "node.h"
+#include "name_table.h"
+#include "subscr.h"
+
+#define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1)
+#define ADV_IDLE ADV_UNIT
+#define ADV_ACTIVE (ADV_UNIT * 12)
+
+enum mbr_state {
+ MBR_JOINING,
+ MBR_PUBLISHED,
+ MBR_JOINED,
+ MBR_PENDING,
+ MBR_ACTIVE,
+ MBR_RECLAIMING,
+ MBR_REMITTED,
+ MBR_LEAVING
+};
+
+struct tipc_member {
+ struct rb_node tree_node;
+ struct list_head list;
+ struct list_head small_win;
+ struct sk_buff_head deferredq;
+ struct tipc_group *group;
+ u32 node;
+ u32 port;
+ u32 instance;
+ enum mbr_state state;
+ u16 advertised;
+ u16 window;
+ u16 bc_rcv_nxt;
+ u16 bc_syncpt;
+ u16 bc_acked;
+};
+
+struct tipc_group {
+ struct rb_root members;
+ struct list_head small_win;
+ struct list_head pending;
+ struct list_head active;
+ struct tipc_nlist dests;
+ struct net *net;
+ int subid;
+ u32 type;
+ u32 instance;
+ u32 scope;
+ u32 portid;
+ u16 member_cnt;
+ u16 active_cnt;
+ u16 max_active;
+ u16 bc_snd_nxt;
+ u16 bc_ackers;
+ bool *open;
+ bool loopback;
+ bool events;
+};
+
+static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
+ int mtyp, struct sk_buff_head *xmitq);
+
+static void tipc_group_open(struct tipc_member *m, bool *wakeup)
+{
+ *wakeup = false;
+ if (list_empty(&m->small_win))
+ return;
+ list_del_init(&m->small_win);
+ *m->group->open = true;
+ *wakeup = true;
+}
+
+static void tipc_group_decr_active(struct tipc_group *grp,
+ struct tipc_member *m)
+{
+ if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING ||
+ m->state == MBR_REMITTED)
+ grp->active_cnt--;
+}
+
+static int tipc_group_rcvbuf_limit(struct tipc_group *grp)
+{
+ int max_active, active_pool, idle_pool;
+ int mcnt = grp->member_cnt + 1;
+
+ /* Limit simultaneous reception from other members */
+ max_active = min(mcnt / 8, 64);
+ max_active = max(max_active, 16);
+ grp->max_active = max_active;
+
+ /* Reserve blocks for active and idle members */
+ active_pool = max_active * ADV_ACTIVE;
+ idle_pool = (mcnt - max_active) * ADV_IDLE;
+
+ /* Scale to bytes, considering worst-case truesize/msgsize ratio */
+ return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4;
+}
+
+u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
+{
+ return grp->bc_snd_nxt;
+}
+
+static bool tipc_group_is_receiver(struct tipc_member *m)
+{
+ return m && m->state != MBR_JOINING && m->state != MBR_LEAVING;
+}
+
+static bool tipc_group_is_sender(struct tipc_member *m)
+{
+ return m && m->state != MBR_JOINING && m->state != MBR_PUBLISHED;
+}
+
+u32 tipc_group_exclude(struct tipc_group *grp)
+{
+ if (!grp->loopback)
+ return grp->portid;
+ return 0;
+}
+
+struct tipc_group *tipc_group_create(struct net *net, u32 portid,
+ struct tipc_group_req *mreq,
+ bool *group_is_open)
+{
+ u32 filter = TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS;
+ bool global = mreq->scope != TIPC_NODE_SCOPE;
+ struct tipc_group *grp;
+ u32 type = mreq->type;
+
+ grp = kzalloc(sizeof(*grp), GFP_ATOMIC);
+ if (!grp)
+ return NULL;
+ tipc_nlist_init(&grp->dests, tipc_own_addr(net));
+ INIT_LIST_HEAD(&grp->small_win);
+ INIT_LIST_HEAD(&grp->active);
+ INIT_LIST_HEAD(&grp->pending);
+ grp->members = RB_ROOT;
+ grp->net = net;
+ grp->portid = portid;
+ grp->type = type;
+ grp->instance = mreq->instance;
+ grp->scope = mreq->scope;
+ grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
+ grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
+ grp->open = group_is_open;
+ *grp->open = false;
+ filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE;
+ if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0,
+ filter, &grp->subid))
+ return grp;
+ kfree(grp);
+ return NULL;
+}
+
+void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf)
+{
+ struct rb_root *tree = &grp->members;
+ struct tipc_member *m, *tmp;
+ struct sk_buff_head xmitq;
+
+ __skb_queue_head_init(&xmitq);
+ rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
+ tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq);
+ tipc_group_update_member(m, 0);
+ }
+ tipc_node_distr_xmit(net, &xmitq);
+ *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
+}
+
+void tipc_group_delete(struct net *net, struct tipc_group *grp)
+{
+ struct rb_root *tree = &grp->members;
+ struct tipc_member *m, *tmp;
+ struct sk_buff_head xmitq;
+
+ __skb_queue_head_init(&xmitq);
+
+ rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
+ tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq);
+ __skb_queue_purge(&m->deferredq);
+ list_del(&m->list);
+ kfree(m);
+ }
+ tipc_node_distr_xmit(net, &xmitq);
+ tipc_nlist_purge(&grp->dests);
+ tipc_topsrv_kern_unsubscr(net, grp->subid);
+ kfree(grp);
+}
+
+static struct tipc_member *tipc_group_find_member(struct tipc_group *grp,
+ u32 node, u32 port)
+{
+ struct rb_node *n = grp->members.rb_node;
+ u64 nkey, key = (u64)node << 32 | port;
+ struct tipc_member *m;
+
+ while (n) {
+ m = container_of(n, struct tipc_member, tree_node);
+ nkey = (u64)m->node << 32 | m->port;
+ if (key < nkey)
+ n = n->rb_left;
+ else if (key > nkey)
+ n = n->rb_right;
+ else
+ return m;
+ }
+ return NULL;
+}
+
+static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp,
+ u32 node, u32 port)
+{
+ struct tipc_member *m;
+
+ m = tipc_group_find_member(grp, node, port);
+ if (m && tipc_group_is_receiver(m))
+ return m;
+ return NULL;
+}
+
+static struct tipc_member *tipc_group_find_node(struct tipc_group *grp,
+ u32 node)
+{
+ struct tipc_member *m;
+ struct rb_node *n;
+
+ for (n = rb_first(&grp->members); n; n = rb_next(n)) {
+ m = container_of(n, struct tipc_member, tree_node);
+ if (m->node == node)
+ return m;
+ }
+ return NULL;
+}
+
+static int tipc_group_add_to_tree(struct tipc_group *grp,
+ struct tipc_member *m)
+{
+ u64 nkey, key = (u64)m->node << 32 | m->port;
+ struct rb_node **n, *parent = NULL;
+ struct tipc_member *tmp;
+
+ n = &grp->members.rb_node;
+ while (*n) {
+ tmp = container_of(*n, struct tipc_member, tree_node);
+ parent = *n;
+ tmp = container_of(parent, struct tipc_member, tree_node);
+ nkey = (u64)tmp->node << 32 | tmp->port;
+ if (key < nkey)
+ n = &(*n)->rb_left;
+ else if (key > nkey)
+ n = &(*n)->rb_right;
+ else
+ return -EEXIST;
+ }
+ rb_link_node(&m->tree_node, parent, n);
+ rb_insert_color(&m->tree_node, &grp->members);
+ return 0;
+}
+
+static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
+ u32 node, u32 port,
+ u32 instance, int state)
+{
+ struct tipc_member *m;
+ int ret;
+
+ m = kzalloc(sizeof(*m), GFP_ATOMIC);
+ if (!m)
+ return NULL;
+ INIT_LIST_HEAD(&m->list);
+ INIT_LIST_HEAD(&m->small_win);
+ __skb_queue_head_init(&m->deferredq);
+ m->group = grp;
+ m->node = node;
+ m->port = port;
+ m->instance = instance;
+ m->bc_acked = grp->bc_snd_nxt - 1;
+ ret = tipc_group_add_to_tree(grp, m);
+ if (ret < 0) {
+ kfree(m);
+ return NULL;
+ }
+ grp->member_cnt++;
+ tipc_nlist_add(&grp->dests, m->node);
+ m->state = state;
+ return m;
+}
+
+void tipc_group_add_member(struct tipc_group *grp, u32 node,
+ u32 port, u32 instance)
+{
+ tipc_group_create_member(grp, node, port, instance, MBR_PUBLISHED);
+}
+
+static void tipc_group_delete_member(struct tipc_group *grp,
+ struct tipc_member *m)
+{
+ rb_erase(&m->tree_node, &grp->members);
+ grp->member_cnt--;
+
+ /* Check if we were waiting for replicast ack from this member */
+ if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1))
+ grp->bc_ackers--;
+
+ list_del_init(&m->list);
+ list_del_init(&m->small_win);
+ tipc_group_decr_active(grp, m);
+
+ /* If last member on a node, remove node from dest list */
+ if (!tipc_group_find_node(grp, m->node))
+ tipc_nlist_del(&grp->dests, m->node);
+
+ kfree(m);
+}
+
+struct tipc_nlist *tipc_group_dests(struct tipc_group *grp)
+{
+ return &grp->dests;
+}
+
+void tipc_group_self(struct tipc_group *grp, struct tipc_service_range *seq,
+ int *scope)
+{
+ seq->type = grp->type;
+ seq->lower = grp->instance;
+ seq->upper = grp->instance;
+ *scope = grp->scope;
+}
+
+void tipc_group_update_member(struct tipc_member *m, int len)
+{
+ struct tipc_group *grp = m->group;
+ struct tipc_member *_m, *tmp;
+
+ if (!tipc_group_is_receiver(m))
+ return;
+
+ m->window -= len;
+
+ if (m->window >= ADV_IDLE)
+ return;
+
+ list_del_init(&m->small_win);
+
+ /* Sort member into small_window members' list */
+ list_for_each_entry_safe(_m, tmp, &grp->small_win, small_win) {
+ if (_m->window > m->window)
+ break;
+ }
+ list_add_tail(&m->small_win, &_m->small_win);
+}
+
+void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
+{
+ u16 prev = grp->bc_snd_nxt - 1;
+ struct tipc_member *m;
+ struct rb_node *n;
+ u16 ackers = 0;
+
+ for (n = rb_first(&grp->members); n; n = rb_next(n)) {
+ m = container_of(n, struct tipc_member, tree_node);
+ if (tipc_group_is_receiver(m)) {
+ tipc_group_update_member(m, len);
+ m->bc_acked = prev;
+ ackers++;
+ }
+ }
+
+ /* Mark number of acknowledges to expect, if any */
+ if (ack)
+ grp->bc_ackers = ackers;
+ grp->bc_snd_nxt++;
+}
+
+bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
+ int len, struct tipc_member **mbr)
+{
+ struct sk_buff_head xmitq;
+ struct tipc_member *m;
+ int adv, state;
+
+ m = tipc_group_find_dest(grp, dnode, dport);
+ if (!tipc_group_is_receiver(m)) {
+ *mbr = NULL;
+ return false;
+ }
+ *mbr = m;
+
+ if (m->window >= len)
+ return false;
+
+ *grp->open = false;
+
+ /* If not fully advertised, do it now to prevent mutual blocking */
+ adv = m->advertised;
+ state = m->state;
+ if (state == MBR_JOINED && adv == ADV_IDLE)
+ return true;
+ if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
+ return true;
+ if (state == MBR_PENDING && adv == ADV_IDLE)
+ return true;
+ __skb_queue_head_init(&xmitq);
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq);
+ tipc_node_distr_xmit(grp->net, &xmitq);
+ return true;
+}
+
+bool tipc_group_bc_cong(struct tipc_group *grp, int len)
+{
+ struct tipc_member *m = NULL;
+
+ /* If prev bcast was replicast, reject until all receivers have acked */
+ if (grp->bc_ackers) {
+ *grp->open = false;
+ return true;
+ }
+ if (list_empty(&grp->small_win))
+ return false;
+
+ m = list_first_entry(&grp->small_win, struct tipc_member, small_win);
+ if (m->window >= len)
+ return false;
+
+ return tipc_group_cong(grp, m->node, m->port, len, &m);
+}
+
+/* tipc_group_sort_msg() - sort msg into queue by bcast sequence number
+ */
+static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq)
+{
+ struct tipc_msg *_hdr, *hdr = buf_msg(skb);
+ u16 bc_seqno = msg_grp_bc_seqno(hdr);
+ struct sk_buff *_skb, *tmp;
+ int mtyp = msg_type(hdr);
+
+ /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */
+ if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) {
+ skb_queue_walk_safe(defq, _skb, tmp) {
+ _hdr = buf_msg(_skb);
+ if (!less(bc_seqno, msg_grp_bc_seqno(_hdr)))
+ continue;
+ __skb_queue_before(defq, _skb, skb);
+ return;
+ }
+ /* Bcast was not bypassed, - add to tail */
+ }
+ /* Unicasts are never bypassed, - always add to tail */
+ __skb_queue_tail(defq, skb);
+}
+
+/* tipc_group_filter_msg() - determine if we should accept arriving message
+ */
+void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb = __skb_dequeue(inputq);
+ bool ack, deliver, update, leave = false;
+ struct sk_buff_head *defq;
+ struct tipc_member *m;
+ struct tipc_msg *hdr;
+ u32 node, port;
+ int mtyp, blks;
+
+ if (!skb)
+ return;
+
+ hdr = buf_msg(skb);
+ node = msg_orignode(hdr);
+ port = msg_origport(hdr);
+
+ if (!msg_in_group(hdr))
+ goto drop;
+
+ m = tipc_group_find_member(grp, node, port);
+ if (!tipc_group_is_sender(m))
+ goto drop;
+
+ if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
+ goto drop;
+
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+ defq = &m->deferredq;
+ tipc_group_sort_msg(skb, defq);
+
+ while ((skb = skb_peek(defq))) {
+ hdr = buf_msg(skb);
+ mtyp = msg_type(hdr);
+ blks = msg_blocks(hdr);
+ deliver = true;
+ ack = false;
+ update = false;
+
+ if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
+ break;
+
+ /* Decide what to do with message */
+ switch (mtyp) {
+ case TIPC_GRP_MCAST_MSG:
+ if (msg_nameinst(hdr) != grp->instance) {
+ update = true;
+ deliver = false;
+ }
+ fallthrough;
+ case TIPC_GRP_BCAST_MSG:
+ m->bc_rcv_nxt++;
+ ack = msg_grp_bc_ack_req(hdr);
+ break;
+ case TIPC_GRP_UCAST_MSG:
+ break;
+ case TIPC_GRP_MEMBER_EVT:
+ if (m->state == MBR_LEAVING)
+ leave = true;
+ if (!grp->events)
+ deliver = false;
+ break;
+ default:
+ break;
+ }
+
+ /* Execute decisions */
+ __skb_dequeue(defq);
+ if (deliver)
+ __skb_queue_tail(inputq, skb);
+ else
+ kfree_skb(skb);
+
+ if (ack)
+ tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq);
+
+ if (leave) {
+ __skb_queue_purge(defq);
+ tipc_group_delete_member(grp, m);
+ break;
+ }
+ if (!update)
+ continue;
+
+ tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
+ }
+ return;
+drop:
+ kfree_skb(skb);
+}
+
+void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
+ u32 port, struct sk_buff_head *xmitq)
+{
+ struct list_head *active = &grp->active;
+ int max_active = grp->max_active;
+ int reclaim_limit = max_active * 3 / 4;
+ int active_cnt = grp->active_cnt;
+ struct tipc_member *m, *rm, *pm;
+
+ m = tipc_group_find_member(grp, node, port);
+ if (!m)
+ return;
+
+ m->advertised -= blks;
+
+ switch (m->state) {
+ case MBR_JOINED:
+ /* First, decide if member can go active */
+ if (active_cnt <= max_active) {
+ m->state = MBR_ACTIVE;
+ list_add_tail(&m->list, active);
+ grp->active_cnt++;
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ } else {
+ m->state = MBR_PENDING;
+ list_add_tail(&m->list, &grp->pending);
+ }
+
+ if (active_cnt < reclaim_limit)
+ break;
+
+ /* Reclaim from oldest active member, if possible */
+ if (!list_empty(active)) {
+ rm = list_first_entry(active, struct tipc_member, list);
+ rm->state = MBR_RECLAIMING;
+ list_del_init(&rm->list);
+ tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
+ break;
+ }
+ /* Nobody to reclaim from; - revert oldest pending to JOINED */
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ list_del_init(&pm->list);
+ pm->state = MBR_JOINED;
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
+ break;
+ case MBR_ACTIVE:
+ if (!list_is_last(&m->list, &grp->active))
+ list_move_tail(&m->list, &grp->active);
+ if (m->advertised > (ADV_ACTIVE * 3 / 4))
+ break;
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ break;
+ case MBR_REMITTED:
+ if (m->advertised > ADV_IDLE)
+ break;
+ m->state = MBR_JOINED;
+ grp->active_cnt--;
+ if (m->advertised < ADV_IDLE) {
+ pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ }
+
+ if (list_empty(&grp->pending))
+ return;
+
+ /* Set oldest pending member to active and advertise */
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ pm->state = MBR_ACTIVE;
+ list_move_tail(&pm->list, &grp->active);
+ grp->active_cnt++;
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
+ break;
+ case MBR_RECLAIMING:
+ case MBR_JOINING:
+ case MBR_LEAVING:
+ default:
+ break;
+ }
+}
+
+static void tipc_group_create_event(struct tipc_group *grp,
+ struct tipc_member *m,
+ u32 event, u16 seqno,
+ struct sk_buff_head *inputq)
+{ u32 dnode = tipc_own_addr(grp->net);
+ struct tipc_event evt;
+ struct sk_buff *skb;
+ struct tipc_msg *hdr;
+
+ memset(&evt, 0, sizeof(evt));
+ evt.event = event;
+ evt.found_lower = m->instance;
+ evt.found_upper = m->instance;
+ evt.port.ref = m->port;
+ evt.port.node = m->node;
+ evt.s.seq.type = grp->type;
+ evt.s.seq.lower = m->instance;
+ evt.s.seq.upper = m->instance;
+
+ skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_GRP_MEMBER_EVT,
+ GROUP_H_SIZE, sizeof(evt), dnode, m->node,
+ grp->portid, m->port, 0);
+ if (!skb)
+ return;
+
+ hdr = buf_msg(skb);
+ msg_set_nametype(hdr, grp->type);
+ msg_set_grp_evt(hdr, event);
+ msg_set_dest_droppable(hdr, true);
+ msg_set_grp_bc_seqno(hdr, seqno);
+ memcpy(msg_data(hdr), &evt, sizeof(evt));
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+ __skb_queue_tail(inputq, skb);
+}
+
+static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
+ int mtyp, struct sk_buff_head *xmitq)
+{
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ int adv = 0;
+
+ skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0,
+ m->node, tipc_own_addr(grp->net),
+ m->port, grp->portid, 0);
+ if (!skb)
+ return;
+
+ if (m->state == MBR_ACTIVE)
+ adv = ADV_ACTIVE - m->advertised;
+ else if (m->state == MBR_JOINED || m->state == MBR_PENDING)
+ adv = ADV_IDLE - m->advertised;
+
+ hdr = buf_msg(skb);
+
+ if (mtyp == GRP_JOIN_MSG) {
+ msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
+ msg_set_adv_win(hdr, adv);
+ m->advertised += adv;
+ } else if (mtyp == GRP_LEAVE_MSG) {
+ msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
+ } else if (mtyp == GRP_ADV_MSG) {
+ msg_set_adv_win(hdr, adv);
+ m->advertised += adv;
+ } else if (mtyp == GRP_ACK_MSG) {
+ msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt);
+ } else if (mtyp == GRP_REMIT_MSG) {
+ msg_set_grp_remitted(hdr, m->window);
+ }
+ msg_set_dest_droppable(hdr, true);
+ __skb_queue_tail(xmitq, skb);
+}
+
+void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
+ struct tipc_msg *hdr, struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ u32 node = msg_orignode(hdr);
+ u32 port = msg_origport(hdr);
+ struct tipc_member *m, *pm;
+ u16 remitted, in_flight;
+
+ if (!grp)
+ return;
+
+ if (grp->scope == TIPC_NODE_SCOPE && node != tipc_own_addr(grp->net))
+ return;
+
+ m = tipc_group_find_member(grp, node, port);
+
+ switch (msg_type(hdr)) {
+ case GRP_JOIN_MSG:
+ if (!m)
+ m = tipc_group_create_member(grp, node, port,
+ 0, MBR_JOINING);
+ if (!m)
+ return;
+ m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+ m->bc_rcv_nxt = m->bc_syncpt;
+ m->window += msg_adv_win(hdr);
+
+ /* Wait until PUBLISH event is received if necessary */
+ if (m->state != MBR_PUBLISHED)
+ return;
+
+ /* Member can be taken into service */
+ m->state = MBR_JOINED;
+ tipc_group_open(m, usr_wakeup);
+ tipc_group_update_member(m, 0);
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ tipc_group_create_event(grp, m, TIPC_PUBLISHED,
+ m->bc_syncpt, inputq);
+ return;
+ case GRP_LEAVE_MSG:
+ if (!m)
+ return;
+ m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+ list_del_init(&m->list);
+ tipc_group_open(m, usr_wakeup);
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ tipc_group_create_event(grp, m, TIPC_WITHDRAWN,
+ m->bc_syncpt, inputq);
+ return;
+ case GRP_ADV_MSG:
+ if (!m)
+ return;
+ m->window += msg_adv_win(hdr);
+ tipc_group_open(m, usr_wakeup);
+ return;
+ case GRP_ACK_MSG:
+ if (!m)
+ return;
+ m->bc_acked = msg_grp_bc_acked(hdr);
+ if (--grp->bc_ackers)
+ return;
+ list_del_init(&m->small_win);
+ *m->group->open = true;
+ *usr_wakeup = true;
+ tipc_group_update_member(m, 0);
+ return;
+ case GRP_RECLAIM_MSG:
+ if (!m)
+ return;
+ tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
+ m->window = ADV_IDLE;
+ tipc_group_open(m, usr_wakeup);
+ return;
+ case GRP_REMIT_MSG:
+ if (!m || m->state != MBR_RECLAIMING)
+ return;
+
+ remitted = msg_grp_remitted(hdr);
+
+ /* Messages preceding the REMIT still in receive queue */
+ if (m->advertised > remitted) {
+ m->state = MBR_REMITTED;
+ in_flight = m->advertised - remitted;
+ m->advertised = ADV_IDLE + in_flight;
+ return;
+ }
+ /* This should never happen */
+ if (m->advertised < remitted)
+ pr_warn_ratelimited("Unexpected REMIT msg\n");
+
+ /* All messages preceding the REMIT have been read */
+ m->state = MBR_JOINED;
+ grp->active_cnt--;
+ m->advertised = ADV_IDLE;
+
+ /* Set oldest pending member to active and advertise */
+ if (list_empty(&grp->pending))
+ return;
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ pm->state = MBR_ACTIVE;
+ list_move_tail(&pm->list, &grp->active);
+ grp->active_cnt++;
+ if (pm->advertised <= (ADV_ACTIVE * 3 / 4))
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
+ return;
+ default:
+ pr_warn("Received unknown GROUP_PROTO message\n");
+ }
+}
+
+/* tipc_group_member_evt() - receive and handle a member up/down event
+ */
+void tipc_group_member_evt(struct tipc_group *grp,
+ bool *usr_wakeup,
+ int *sk_rcvbuf,
+ struct tipc_msg *hdr,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct tipc_event *evt = (void *)msg_data(hdr);
+ u32 instance = evt->found_lower;
+ u32 node = evt->port.node;
+ u32 port = evt->port.ref;
+ int event = evt->event;
+ struct tipc_member *m;
+ struct net *net;
+ u32 self;
+
+ if (!grp)
+ return;
+
+ net = grp->net;
+ self = tipc_own_addr(net);
+ if (!grp->loopback && node == self && port == grp->portid)
+ return;
+
+ m = tipc_group_find_member(grp, node, port);
+
+ switch (event) {
+ case TIPC_PUBLISHED:
+ /* Send and wait for arrival of JOIN message if necessary */
+ if (!m) {
+ m = tipc_group_create_member(grp, node, port, instance,
+ MBR_PUBLISHED);
+ if (!m)
+ break;
+ tipc_group_update_member(m, 0);
+ tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
+ break;
+ }
+
+ if (m->state != MBR_JOINING)
+ break;
+
+ /* Member can be taken into service */
+ m->instance = instance;
+ m->state = MBR_JOINED;
+ tipc_group_open(m, usr_wakeup);
+ tipc_group_update_member(m, 0);
+ tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
+ tipc_group_create_event(grp, m, TIPC_PUBLISHED,
+ m->bc_syncpt, inputq);
+ break;
+ case TIPC_WITHDRAWN:
+ if (!m)
+ break;
+
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ list_del_init(&m->list);
+ tipc_group_open(m, usr_wakeup);
+
+ /* Only send event if no LEAVE message can be expected */
+ if (!tipc_node_is_up(net, node))
+ tipc_group_create_event(grp, m, TIPC_WITHDRAWN,
+ m->bc_rcv_nxt, inputq);
+ break;
+ default:
+ break;
+ }
+ *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
+}
+
+int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb)
+{
+ struct nlattr *group = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_GROUP);
+
+ if (!group)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID,
+ grp->type) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_INSTANCE,
+ grp->instance) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_BC_SEND_NEXT,
+ grp->bc_snd_nxt))
+ goto group_msg_cancel;
+
+ if (grp->scope == TIPC_NODE_SCOPE)
+ if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_NODE_SCOPE))
+ goto group_msg_cancel;
+
+ if (grp->scope == TIPC_CLUSTER_SCOPE)
+ if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_CLUSTER_SCOPE))
+ goto group_msg_cancel;
+
+ if (*grp->open)
+ if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_OPEN))
+ goto group_msg_cancel;
+
+ nla_nest_end(skb, group);
+ return 0;
+
+group_msg_cancel:
+ nla_nest_cancel(skb, group);
+ return -1;
+}
diff --git a/net/tipc/group.h b/net/tipc/group.h
new file mode 100644
index 000000000000..ea4c3be64c78
--- /dev/null
+++ b/net/tipc/group.h
@@ -0,0 +1,77 @@
+/*
+ * net/tipc/group.h: Include file for TIPC group unicast/multicast functions
+ *
+ * Copyright (c) 2017, Ericsson AB
+ * Copyright (c) 2020, Red Hat Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_GROUP_H
+#define _TIPC_GROUP_H
+
+#include "core.h"
+
+struct tipc_group;
+struct tipc_member;
+struct tipc_msg;
+
+struct tipc_group *tipc_group_create(struct net *net, u32 portid,
+ struct tipc_group_req *mreq,
+ bool *group_is_open);
+void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcv_buf);
+void tipc_group_delete(struct net *net, struct tipc_group *grp);
+void tipc_group_add_member(struct tipc_group *grp, u32 node,
+ u32 port, u32 instance);
+struct tipc_nlist *tipc_group_dests(struct tipc_group *grp);
+void tipc_group_self(struct tipc_group *grp, struct tipc_service_range *seq,
+ int *scope);
+u32 tipc_group_exclude(struct tipc_group *grp);
+void tipc_group_filter_msg(struct tipc_group *grp,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq);
+void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup,
+ int *sk_rcvbuf, struct tipc_msg *hdr,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq);
+void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup,
+ struct tipc_msg *hdr,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq);
+void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack);
+bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
+ int len, struct tipc_member **m);
+bool tipc_group_bc_cong(struct tipc_group *grp, int len);
+void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
+ u32 port, struct sk_buff_head *xmitq);
+u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
+void tipc_group_update_member(struct tipc_member *m, int len);
+int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb);
+#endif
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
deleted file mode 100644
index b36f0fcd9bdf..000000000000
--- a/net/tipc/handler.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * net/tipc/handler.c: TIPC signal handling
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-
-struct queue_item {
- struct list_head next_signal;
- void (*handler) (unsigned long);
- unsigned long data;
-};
-
-static struct kmem_cache *tipc_queue_item_cache;
-static struct list_head signal_queue_head;
-static DEFINE_SPINLOCK(qitem_lock);
-static int handler_enabled __read_mostly;
-
-static void process_signal_queue(unsigned long dummy);
-
-static DECLARE_TASKLET_DISABLED(tipc_tasklet, process_signal_queue, 0);
-
-
-unsigned int tipc_k_signal(Handler routine, unsigned long argument)
-{
- struct queue_item *item;
-
- if (!handler_enabled) {
- pr_err("Signal request ignored by handler\n");
- return -ENOPROTOOPT;
- }
-
- spin_lock_bh(&qitem_lock);
- item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC);
- if (!item) {
- pr_err("Signal queue out of memory\n");
- spin_unlock_bh(&qitem_lock);
- return -ENOMEM;
- }
- item->handler = routine;
- item->data = argument;
- list_add_tail(&item->next_signal, &signal_queue_head);
- spin_unlock_bh(&qitem_lock);
- tasklet_schedule(&tipc_tasklet);
- return 0;
-}
-
-static void process_signal_queue(unsigned long dummy)
-{
- struct queue_item *__volatile__ item;
- struct list_head *l, *n;
-
- spin_lock_bh(&qitem_lock);
- list_for_each_safe(l, n, &signal_queue_head) {
- item = list_entry(l, struct queue_item, next_signal);
- list_del(&item->next_signal);
- spin_unlock_bh(&qitem_lock);
- item->handler(item->data);
- spin_lock_bh(&qitem_lock);
- kmem_cache_free(tipc_queue_item_cache, item);
- }
- spin_unlock_bh(&qitem_lock);
-}
-
-int tipc_handler_start(void)
-{
- tipc_queue_item_cache =
- kmem_cache_create("tipc_queue_items", sizeof(struct queue_item),
- 0, SLAB_HWCACHE_ALIGN, NULL);
- if (!tipc_queue_item_cache)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&signal_queue_head);
- tasklet_enable(&tipc_tasklet);
- handler_enabled = 1;
- return 0;
-}
-
-void tipc_handler_stop(void)
-{
- struct list_head *l, *n;
- struct queue_item *item;
-
- if (!handler_enabled)
- return;
-
- handler_enabled = 0;
- tasklet_kill(&tipc_tasklet);
-
- spin_lock_bh(&qitem_lock);
- list_for_each_safe(l, n, &signal_queue_head) {
- item = list_entry(l, struct queue_item, next_signal);
- list_del(&item->next_signal);
- kmem_cache_free(tipc_queue_item_cache, item);
- }
- spin_unlock_bh(&qitem_lock);
-
- kmem_cache_destroy(tipc_queue_item_cache);
-}
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
index 9934a32bfa87..b9ad0434c3cd 100644
--- a/net/tipc/ib_media.c
+++ b/net/tipc/ib_media.c
@@ -42,252 +42,11 @@
#include "core.h"
#include "bearer.h"
-#define MAX_IB_BEARERS MAX_BEARERS
+#define TIPC_MAX_IB_LINK_WIN 500
-/**
- * struct ib_bearer - Infiniband bearer data structure
- * @bearer: ptr to associated "generic" bearer structure
- * @dev: ptr to associated Infiniband network device
- * @tipc_packet_type: used in binding TIPC to Infiniband driver
- * @cleanup: work item used when disabling bearer
- */
-
-struct ib_bearer {
- struct tipc_bearer *bearer;
- struct net_device *dev;
- struct packet_type tipc_packet_type;
- struct work_struct setup;
- struct work_struct cleanup;
-};
-
-static struct tipc_media ib_media_info;
-static struct ib_bearer ib_bearers[MAX_IB_BEARERS];
-static int ib_started;
-
-/**
- * ib_media_addr_set - initialize Infiniband media address structure
- *
- * Media-dependent "value" field stores MAC address in first 6 bytes
- * and zeroes out the remaining bytes.
- */
-static void ib_media_addr_set(const struct tipc_bearer *tb_ptr,
- struct tipc_media_addr *a, char *mac)
-{
- BUILD_BUG_ON(sizeof(a->value) < INFINIBAND_ALEN);
- memcpy(a->value, mac, INFINIBAND_ALEN);
- a->media_id = TIPC_MEDIA_TYPE_IB;
- a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, INFINIBAND_ALEN);
-}
-
-/**
- * send_msg - send a TIPC message out over an InfiniBand interface
- */
-static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
- struct tipc_media_addr *dest)
-{
- struct sk_buff *clone;
- struct net_device *dev;
- int delta;
-
- clone = skb_clone(buf, GFP_ATOMIC);
- if (!clone)
- return 0;
-
- dev = ((struct ib_bearer *)(tb_ptr->usr_handle))->dev;
- delta = dev->hard_header_len - skb_headroom(buf);
-
- if ((delta > 0) &&
- pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
- kfree_skb(clone);
- return 0;
- }
-
- skb_reset_network_header(clone);
- clone->dev = dev;
- clone->protocol = htons(ETH_P_TIPC);
- dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
- dev->dev_addr, clone->len);
- dev_queue_xmit(clone);
- return 0;
-}
-
-/**
- * recv_msg - handle incoming TIPC message from an InfiniBand interface
- *
- * Accept only packets explicitly sent to this node, or broadcast packets;
- * ignores packets sent using InfiniBand multicast, and traffic sent to other
- * nodes (which can happen if interface is running in promiscuous mode).
- */
-static int recv_msg(struct sk_buff *buf, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
-{
- struct ib_bearer *ib_ptr = (struct ib_bearer *)pt->af_packet_priv;
-
- if (!net_eq(dev_net(dev), &init_net)) {
- kfree_skb(buf);
- return 0;
- }
-
- if (likely(ib_ptr->bearer)) {
- if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
- buf->next = NULL;
- tipc_recv_msg(buf, ib_ptr->bearer);
- return 0;
- }
- }
- kfree_skb(buf);
- return 0;
-}
-
-/**
- * setup_bearer - setup association between InfiniBand bearer and interface
- */
-static void setup_bearer(struct work_struct *work)
-{
- struct ib_bearer *ib_ptr =
- container_of(work, struct ib_bearer, setup);
-
- dev_add_pack(&ib_ptr->tipc_packet_type);
-}
-
-/**
- * enable_bearer - attach TIPC bearer to an InfiniBand interface
- */
-static int enable_bearer(struct tipc_bearer *tb_ptr)
-{
- struct net_device *dev;
- struct ib_bearer *ib_ptr = &ib_bearers[0];
- struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
- char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
- int pending_dev = 0;
-
- /* Find unused InfiniBand bearer structure */
- while (ib_ptr->dev) {
- if (!ib_ptr->bearer)
- pending_dev++;
- if (++ib_ptr == stop)
- return pending_dev ? -EAGAIN : -EDQUOT;
- }
-
- /* Find device with specified name */
- dev = dev_get_by_name(&init_net, driver_name);
- if (!dev)
- return -ENODEV;
-
- /* Create InfiniBand bearer for device */
- ib_ptr->dev = dev;
- ib_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
- ib_ptr->tipc_packet_type.dev = dev;
- ib_ptr->tipc_packet_type.func = recv_msg;
- ib_ptr->tipc_packet_type.af_packet_priv = ib_ptr;
- INIT_LIST_HEAD(&(ib_ptr->tipc_packet_type.list));
- INIT_WORK(&ib_ptr->setup, setup_bearer);
- schedule_work(&ib_ptr->setup);
-
- /* Associate TIPC bearer with InfiniBand bearer */
- ib_ptr->bearer = tb_ptr;
- tb_ptr->usr_handle = (void *)ib_ptr;
- memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value));
- memcpy(tb_ptr->bcast_addr.value, dev->broadcast, INFINIBAND_ALEN);
- tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_IB;
- tb_ptr->bcast_addr.broadcast = 1;
- tb_ptr->mtu = dev->mtu;
- tb_ptr->blocked = 0;
- ib_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr);
- return 0;
-}
-
-/**
- * cleanup_bearer - break association between InfiniBand bearer and interface
- *
- * This routine must be invoked from a work queue because it can sleep.
- */
-static void cleanup_bearer(struct work_struct *work)
-{
- struct ib_bearer *ib_ptr =
- container_of(work, struct ib_bearer, cleanup);
-
- dev_remove_pack(&ib_ptr->tipc_packet_type);
- dev_put(ib_ptr->dev);
- ib_ptr->dev = NULL;
-}
-
-/**
- * disable_bearer - detach TIPC bearer from an InfiniBand interface
- *
- * Mark InfiniBand bearer as inactive so that incoming buffers are thrown away,
- * then get worker thread to complete bearer cleanup. (Can't do cleanup
- * here because cleanup code needs to sleep and caller holds spinlocks.)
- */
-static void disable_bearer(struct tipc_bearer *tb_ptr)
-{
- struct ib_bearer *ib_ptr = (struct ib_bearer *)tb_ptr->usr_handle;
-
- ib_ptr->bearer = NULL;
- INIT_WORK(&ib_ptr->cleanup, cleanup_bearer);
- schedule_work(&ib_ptr->cleanup);
-}
-
-/**
- * recv_notification - handle device updates from OS
- *
- * Change the state of the InfiniBand bearer (if any) associated with the
- * specified device.
- */
-static int recv_notification(struct notifier_block *nb, unsigned long evt,
- void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct ib_bearer *ib_ptr = &ib_bearers[0];
- struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
-
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- while ((ib_ptr->dev != dev)) {
- if (++ib_ptr == stop)
- return NOTIFY_DONE; /* couldn't find device */
- }
- if (!ib_ptr->bearer)
- return NOTIFY_DONE; /* bearer had been disabled */
-
- ib_ptr->bearer->mtu = dev->mtu;
-
- switch (evt) {
- case NETDEV_CHANGE:
- if (netif_carrier_ok(dev))
- tipc_continue(ib_ptr->bearer);
- else
- tipc_block_bearer(ib_ptr->bearer->name);
- break;
- case NETDEV_UP:
- tipc_continue(ib_ptr->bearer);
- break;
- case NETDEV_DOWN:
- tipc_block_bearer(ib_ptr->bearer->name);
- break;
- case NETDEV_CHANGEMTU:
- case NETDEV_CHANGEADDR:
- tipc_block_bearer(ib_ptr->bearer->name);
- tipc_continue(ib_ptr->bearer);
- break;
- case NETDEV_UNREGISTER:
- case NETDEV_CHANGENAME:
- tipc_disable_bearer(ib_ptr->bearer->name);
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block notifier = {
- .notifier_call = recv_notification,
- .priority = 0,
-};
-
-/**
- * ib_addr2str - convert InfiniBand address to string
- */
-static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
+/* convert InfiniBand address (media address format) media address to string */
+static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf,
+ int str_size)
{
if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */
return 1;
@@ -297,76 +56,49 @@ static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
return 0;
}
-/**
- * ib_addr2msg - convert InfiniBand address format to message header format
- */
-static int ib_addr2msg(struct tipc_media_addr *a, char *msg_area)
+/* Convert from media address format to discovery message addr format */
+static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr)
{
- memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE);
- msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB;
- memcpy(msg_area, a->value, INFINIBAND_ALEN);
+ memset(msg, 0, TIPC_MEDIA_INFO_SIZE);
+ memcpy(msg, addr->value, INFINIBAND_ALEN);
return 0;
}
-/**
- * ib_msg2addr - convert message header address format to InfiniBand format
- */
-static int ib_msg2addr(const struct tipc_bearer *tb_ptr,
- struct tipc_media_addr *a, char *msg_area)
+/* Convert raw InfiniBand address format to media addr format */
+static int tipc_ib_raw2addr(struct tipc_bearer *b,
+ struct tipc_media_addr *addr,
+ const char *msg)
{
- ib_media_addr_set(tb_ptr, a, msg_area);
+ memset(addr, 0, sizeof(*addr));
+ memcpy(addr->value, msg, INFINIBAND_ALEN);
+ addr->media_id = TIPC_MEDIA_TYPE_IB;
+ addr->broadcast = !memcmp(msg, b->bcast_addr.value,
+ INFINIBAND_ALEN);
return 0;
}
-/*
- * InfiniBand media registration info
- */
-static struct tipc_media ib_media_info = {
- .send_msg = send_msg,
- .enable_bearer = enable_bearer,
- .disable_bearer = disable_bearer,
- .addr2str = ib_addr2str,
- .addr2msg = ib_addr2msg,
- .msg2addr = ib_msg2addr,
+/* Convert discovery msg addr format to InfiniBand media addr format */
+static int tipc_ib_msg2addr(struct tipc_bearer *b,
+ struct tipc_media_addr *addr,
+ char *msg)
+{
+ return tipc_ib_raw2addr(b, addr, msg);
+}
+
+/* InfiniBand media registration info */
+struct tipc_media ib_media_info = {
+ .send_msg = tipc_l2_send_msg,
+ .enable_media = tipc_enable_l2_media,
+ .disable_media = tipc_disable_l2_media,
+ .addr2str = tipc_ib_addr2str,
+ .addr2msg = tipc_ib_addr2msg,
+ .msg2addr = tipc_ib_msg2addr,
+ .raw2addr = tipc_ib_raw2addr,
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
- .window = TIPC_DEF_LINK_WIN,
+ .min_win = TIPC_DEF_LINK_WIN,
+ .max_win = TIPC_MAX_IB_LINK_WIN,
.type_id = TIPC_MEDIA_TYPE_IB,
+ .hwaddr_len = INFINIBAND_ALEN,
.name = "ib"
};
-
-/**
- * tipc_ib_media_start - activate InfiniBand bearer support
- *
- * Register InfiniBand media type with TIPC bearer code. Also register
- * with OS for notifications about device state changes.
- */
-int tipc_ib_media_start(void)
-{
- int res;
-
- if (ib_started)
- return -EINVAL;
-
- res = tipc_register_media(&ib_media_info);
- if (res)
- return res;
-
- res = register_netdevice_notifier(&notifier);
- if (!res)
- ib_started = 1;
- return res;
-}
-
-/**
- * tipc_ib_media_stop - deactivate InfiniBand bearer support
- */
-void tipc_ib_media_stop(void)
-{
- if (!ib_started)
- return;
-
- flush_scheduled_work();
- unregister_netdevice_notifier(&notifier);
- ib_started = 0;
-}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 0cc3d9015c5d..931f55f781a1 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1,7 +1,7 @@
/*
* net/tipc/link.c: TIPC link code
*
- * Copyright (c) 1996-2007, 2012, Ericsson AB
+ * Copyright (c) 1996-2007, 2012-2016, Ericsson AB
* Copyright (c) 2004-2007, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -35,2925 +35,2952 @@
*/
#include "core.h"
+#include "subscr.h"
#include "link.h"
-#include "port.h"
+#include "bcast.h"
+#include "socket.h"
#include "name_distr.h"
#include "discover.h"
-#include "config.h"
+#include "netlink.h"
+#include "monitor.h"
+#include "trace.h"
+#include "crypto.h"
#include <linux/pkt_sched.h>
-/*
- * Error message prefixes
- */
-static const char *link_co_err = "Link changeover error, ";
-static const char *link_rst_msg = "Resetting link ";
-static const char *link_unk_evt = "Unknown link event ";
+struct tipc_stats {
+ u32 sent_pkts;
+ u32 recv_pkts;
+ u32 sent_states;
+ u32 recv_states;
+ u32 sent_probes;
+ u32 recv_probes;
+ u32 sent_nacks;
+ u32 recv_nacks;
+ u32 sent_acks;
+ u32 sent_bundled;
+ u32 sent_bundles;
+ u32 recv_bundled;
+ u32 recv_bundles;
+ u32 retransmitted;
+ u32 sent_fragmented;
+ u32 sent_fragments;
+ u32 recv_fragmented;
+ u32 recv_fragments;
+ u32 link_congs; /* # port sends blocked by congestion */
+ u32 deferred_recv;
+ u32 duplicates;
+ u32 max_queue_sz; /* send queue size high water mark */
+ u32 accu_queue_sz; /* used for send queue size profiling */
+ u32 queue_sz_counts; /* used for send queue size profiling */
+ u32 msg_length_counts; /* used for message length profiling */
+ u32 msg_lengths_total; /* used for message length profiling */
+ u32 msg_length_profile[7]; /* used for msg. length profiling */
+};
-/*
- * Out-of-range value for link session numbers
+/**
+ * struct tipc_link - TIPC link data structure
+ * @addr: network address of link's peer node
+ * @name: link name character string
+ * @net: pointer to namespace struct
+ * @peer_session: link session # being used by peer end of link
+ * @peer_bearer_id: bearer id used by link's peer endpoint
+ * @bearer_id: local bearer id used by link
+ * @tolerance: minimum link continuity loss needed to reset link [in ms]
+ * @abort_limit: # of unacknowledged continuity probes needed to reset link
+ * @state: current state of link FSM
+ * @peer_caps: bitmap describing capabilities of peer node
+ * @silent_intv_cnt: # of timer intervals without any reception from peer
+ * @priority: current link priority
+ * @net_plane: current link network plane ('A' through 'H')
+ * @mon_state: cookie with information needed by link monitor
+ * @mtu: current maximum packet size for this link
+ * @advertised_mtu: advertised own mtu when link is being established
+ * @backlogq: queue for messages waiting to be sent
+ * @ackers: # of peers that needs to ack each packet before it can be released
+ * @acked: # last packet acked by a certain peer. Used for broadcast.
+ * @rcv_nxt: next sequence number to expect for inbound messages
+ * @inputq: buffer queue for messages to be delivered upwards
+ * @namedq: buffer queue for name table messages to be delivered upwards
+ * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate
+ * @reasm_buf: head of partially reassembled inbound message fragments
+ * @stats: collects statistics regarding link activity
+ * @session: session to be used by link
+ * @snd_nxt_state: next send seq number
+ * @rcv_nxt_state: next rcv seq number
+ * @in_session: have received ACTIVATE_MSG from peer
+ * @active: link is active
+ * @if_name: associated interface name
+ * @rst_cnt: link reset counter
+ * @drop_point: seq number for failover handling (FIXME)
+ * @failover_reasm_skb: saved failover msg ptr (FIXME)
+ * @failover_deferdq: deferred message queue for failover processing (FIXME)
+ * @transmq: the link's transmit queue
+ * @backlog: link's backlog by priority (importance)
+ * @snd_nxt: next sequence number to be used
+ * @rcv_unacked: # messages read by user, but not yet acked back to peer
+ * @deferdq: deferred receive queue
+ * @window: sliding window size for congestion handling
+ * @min_win: minimal send window to be used by link
+ * @ssthresh: slow start threshold for congestion handling
+ * @max_win: maximal send window to be used by link
+ * @cong_acks: congestion acks for congestion avoidance (FIXME)
+ * @checkpoint: seq number for congestion window size handling
+ * @reasm_tnlmsg: fragmentation/reassembly area for tunnel protocol message
+ * @last_gap: last gap ack blocks for bcast (FIXME)
+ * @last_ga: ptr to gap ack blocks
+ * @bc_rcvlink: the peer specific link used for broadcast reception
+ * @bc_sndlink: the namespace global link used for broadcast sending
+ * @nack_state: bcast nack state
+ * @bc_peer_is_up: peer has acked the bcast init msg
*/
-#define INVALID_SESSION 0x10000
+struct tipc_link {
+ u32 addr;
+ char name[TIPC_MAX_LINK_NAME];
+ struct net *net;
+
+ /* Management and link supervision data */
+ u16 peer_session;
+ u16 session;
+ u16 snd_nxt_state;
+ u16 rcv_nxt_state;
+ u32 peer_bearer_id;
+ u32 bearer_id;
+ u32 tolerance;
+ u32 abort_limit;
+ u32 state;
+ u16 peer_caps;
+ bool in_session;
+ bool active;
+ u32 silent_intv_cnt;
+ char if_name[TIPC_MAX_IF_NAME];
+ u32 priority;
+ char net_plane;
+ struct tipc_mon_state mon_state;
+ u16 rst_cnt;
+
+ /* Failover/synch */
+ u16 drop_point;
+ struct sk_buff *failover_reasm_skb;
+ struct sk_buff_head failover_deferdq;
+
+ /* Max packet negotiation */
+ u16 mtu;
+ u16 advertised_mtu;
+
+ /* Sending */
+ struct sk_buff_head transmq;
+ struct sk_buff_head backlogq;
+ struct {
+ u16 len;
+ u16 limit;
+ struct sk_buff *target_bskb;
+ } backlog[5];
+ u16 snd_nxt;
+
+ /* Reception */
+ u16 rcv_nxt;
+ u32 rcv_unacked;
+ struct sk_buff_head deferdq;
+ struct sk_buff_head *inputq;
+ struct sk_buff_head *namedq;
+
+ /* Congestion handling */
+ struct sk_buff_head wakeupq;
+ u16 window;
+ u16 min_win;
+ u16 ssthresh;
+ u16 max_win;
+ u16 cong_acks;
+ u16 checkpoint;
+
+ /* Fragmentation/reassembly */
+ struct sk_buff *reasm_buf;
+ struct sk_buff *reasm_tnlmsg;
+
+ /* Broadcast */
+ u16 ackers;
+ u16 acked;
+ u16 last_gap;
+ struct tipc_gap_ack_blks *last_ga;
+ struct tipc_link *bc_rcvlink;
+ struct tipc_link *bc_sndlink;
+ u8 nack_state;
+ bool bc_peer_is_up;
+
+ /* Statistics */
+ struct tipc_stats stats;
+};
/*
- * Link state events:
+ * Error message prefixes
*/
-#define STARTING_EVT 856384768 /* link processing trigger */
-#define TRAFFIC_MSG_EVT 560815u /* rx'd ??? */
-#define TIMEOUT_EVT 560817u /* link timer expired */
+static const char *link_co_err = "Link tunneling error, ";
+static const char *link_rst_msg = "Resetting link ";
-/*
- * The following two 'message types' is really just implementation
- * data conveniently stored in the message header.
- * They must not be considered part of the protocol
+/* Send states for broadcast NACKs
*/
-#define OPEN_MSG 0
-#define CLOSED_MSG 1
+enum {
+ BC_NACK_SND_CONDITIONAL,
+ BC_NACK_SND_UNCONDITIONAL,
+ BC_NACK_SND_SUPPRESS,
+};
-/*
- * State value stored in 'exp_msg_count'
- */
-#define START_CHANGEOVER 100000u
+#define TIPC_BC_RETR_LIM (jiffies + msecs_to_jiffies(10))
+#define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1))
-/**
- * struct tipc_link_name - deconstructed link name
- * @addr_local: network address of node at this end
- * @if_local: name of interface at this end
- * @addr_peer: network address of node at far end
- * @if_peer: name of interface at far end
+/* Link FSM states:
*/
-struct tipc_link_name {
- u32 addr_local;
- char if_local[TIPC_MAX_IF_NAME];
- u32 addr_peer;
- char if_peer[TIPC_MAX_IF_NAME];
+enum {
+ LINK_ESTABLISHED = 0xe,
+ LINK_ESTABLISHING = 0xe << 4,
+ LINK_RESET = 0x1 << 8,
+ LINK_RESETTING = 0x2 << 12,
+ LINK_PEER_RESET = 0xd << 16,
+ LINK_FAILINGOVER = 0xf << 20,
+ LINK_SYNCHING = 0xc << 24
};
-static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
- struct sk_buff *buf);
-static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf);
-static int link_recv_changeover_msg(struct tipc_link **l_ptr,
- struct sk_buff **buf);
-static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance);
-static int link_send_sections_long(struct tipc_port *sender,
- struct iovec const *msg_sect,
- u32 num_sect, unsigned int total_len,
- u32 destnode);
-static void link_state_event(struct tipc_link *l_ptr, u32 event);
-static void link_reset_statistics(struct tipc_link *l_ptr);
-static void link_print(struct tipc_link *l_ptr, const char *str);
-static void link_start(struct tipc_link *l_ptr);
-static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf);
-static void tipc_link_send_sync(struct tipc_link *l);
-static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf);
-
+static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq);
+static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
+ bool probe_reply, u16 rcvgap,
+ int tolerance, int priority,
+ struct sk_buff_head *xmitq);
+static void link_print(struct tipc_link *l, const char *str);
+static int tipc_link_build_nack_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq);
+static void tipc_link_build_bc_init_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq);
+static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga,
+ struct tipc_link *l, u8 start_index);
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr);
+static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
+ u16 acked, u16 gap,
+ struct tipc_gap_ack_blks *ga,
+ struct sk_buff_head *xmitq,
+ bool *retransmitted, int *rc);
+static void tipc_link_update_cwin(struct tipc_link *l, int released,
+ bool retransmitted);
/*
- * Simple link routines
+ * Simple non-static link routines (i.e. referenced outside this file)
*/
-static unsigned int align(unsigned int i)
+bool tipc_link_is_up(struct tipc_link *l)
{
- return (i + 3) & ~3u;
+ return l->state & (LINK_ESTABLISHED | LINK_SYNCHING);
}
-static void link_init_max_pkt(struct tipc_link *l_ptr)
+bool tipc_link_peer_is_down(struct tipc_link *l)
{
- u32 max_pkt;
-
- max_pkt = (l_ptr->b_ptr->mtu & ~3);
- if (max_pkt > MAX_MSG_SIZE)
- max_pkt = MAX_MSG_SIZE;
-
- l_ptr->max_pkt_target = max_pkt;
- if (l_ptr->max_pkt_target < MAX_PKT_DEFAULT)
- l_ptr->max_pkt = l_ptr->max_pkt_target;
- else
- l_ptr->max_pkt = MAX_PKT_DEFAULT;
+ return l->state == LINK_PEER_RESET;
+}
- l_ptr->max_pkt_probes = 0;
+bool tipc_link_is_reset(struct tipc_link *l)
+{
+ return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING);
}
-static u32 link_next_sent(struct tipc_link *l_ptr)
+bool tipc_link_is_establishing(struct tipc_link *l)
{
- if (l_ptr->next_out)
- return buf_seqno(l_ptr->next_out);
- return mod(l_ptr->next_out_no);
+ return l->state == LINK_ESTABLISHING;
}
-static u32 link_last_sent(struct tipc_link *l_ptr)
+bool tipc_link_is_synching(struct tipc_link *l)
{
- return mod(link_next_sent(l_ptr) - 1);
+ return l->state == LINK_SYNCHING;
}
-/*
- * Simple non-static link routines (i.e. referenced outside this file)
- */
-int tipc_link_is_up(struct tipc_link *l_ptr)
+bool tipc_link_is_failingover(struct tipc_link *l)
{
- if (!l_ptr)
- return 0;
- return link_working_working(l_ptr) || link_working_unknown(l_ptr);
+ return l->state == LINK_FAILINGOVER;
}
-int tipc_link_is_active(struct tipc_link *l_ptr)
+bool tipc_link_is_blocked(struct tipc_link *l)
{
- return (l_ptr->owner->active_links[0] == l_ptr) ||
- (l_ptr->owner->active_links[1] == l_ptr);
+ return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER);
}
-/**
- * link_name_validate - validate & (optionally) deconstruct tipc_link name
- * @name: ptr to link name string
- * @name_parts: ptr to area for link name components (or NULL if not needed)
- *
- * Returns 1 if link name is valid, otherwise 0.
- */
-static int link_name_validate(const char *name,
- struct tipc_link_name *name_parts)
-{
- char name_copy[TIPC_MAX_LINK_NAME];
- char *addr_local;
- char *if_local;
- char *addr_peer;
- char *if_peer;
- char dummy;
- u32 z_local, c_local, n_local;
- u32 z_peer, c_peer, n_peer;
- u32 if_local_len;
- u32 if_peer_len;
-
- /* copy link name & ensure length is OK */
- name_copy[TIPC_MAX_LINK_NAME - 1] = 0;
- /* need above in case non-Posix strncpy() doesn't pad with nulls */
- strncpy(name_copy, name, TIPC_MAX_LINK_NAME);
- if (name_copy[TIPC_MAX_LINK_NAME - 1] != 0)
- return 0;
+static bool link_is_bc_sndlink(struct tipc_link *l)
+{
+ return !l->bc_sndlink;
+}
- /* ensure all component parts of link name are present */
- addr_local = name_copy;
- if_local = strchr(addr_local, ':');
- if (if_local == NULL)
- return 0;
- *(if_local++) = 0;
- addr_peer = strchr(if_local, '-');
- if (addr_peer == NULL)
- return 0;
- *(addr_peer++) = 0;
- if_local_len = addr_peer - if_local;
- if_peer = strchr(addr_peer, ':');
- if (if_peer == NULL)
- return 0;
- *(if_peer++) = 0;
- if_peer_len = strlen(if_peer) + 1;
-
- /* validate component parts of link name */
- if ((sscanf(addr_local, "%u.%u.%u%c",
- &z_local, &c_local, &n_local, &dummy) != 3) ||
- (sscanf(addr_peer, "%u.%u.%u%c",
- &z_peer, &c_peer, &n_peer, &dummy) != 3) ||
- (z_local > 255) || (c_local > 4095) || (n_local > 4095) ||
- (z_peer > 255) || (c_peer > 4095) || (n_peer > 4095) ||
- (if_local_len <= 1) || (if_local_len > TIPC_MAX_IF_NAME) ||
- (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME))
- return 0;
+static bool link_is_bc_rcvlink(struct tipc_link *l)
+{
+ return ((l->bc_rcvlink == l) && !link_is_bc_sndlink(l));
+}
- /* return link name components, if necessary */
- if (name_parts) {
- name_parts->addr_local = tipc_addr(z_local, c_local, n_local);
- strcpy(name_parts->if_local, if_local);
- name_parts->addr_peer = tipc_addr(z_peer, c_peer, n_peer);
- strcpy(name_parts->if_peer, if_peer);
- }
- return 1;
+void tipc_link_set_active(struct tipc_link *l, bool active)
+{
+ l->active = active;
}
-/**
- * link_timeout - handle expiration of link timer
- * @l_ptr: pointer to link
- *
- * This routine must not grab "tipc_net_lock" to avoid a potential deadlock conflict
- * with tipc_link_delete(). (There is no risk that the node will be deleted by
- * another thread because tipc_link_delete() always cancels the link timer before
- * tipc_node_delete() is called.)
- */
-static void link_timeout(struct tipc_link *l_ptr)
+u32 tipc_link_id(struct tipc_link *l)
{
- tipc_node_lock(l_ptr->owner);
+ return l->peer_bearer_id << 16 | l->bearer_id;
+}
- /* update counters used in statistical profiling of send traffic */
- l_ptr->stats.accu_queue_sz += l_ptr->out_queue_size;
- l_ptr->stats.queue_sz_counts++;
+int tipc_link_min_win(struct tipc_link *l)
+{
+ return l->min_win;
+}
- if (l_ptr->first_out) {
- struct tipc_msg *msg = buf_msg(l_ptr->first_out);
- u32 length = msg_size(msg);
+int tipc_link_max_win(struct tipc_link *l)
+{
+ return l->max_win;
+}
- if ((msg_user(msg) == MSG_FRAGMENTER) &&
- (msg_type(msg) == FIRST_FRAGMENT)) {
- length = msg_size(msg_get_wrapped(msg));
- }
- if (length) {
- l_ptr->stats.msg_lengths_total += length;
- l_ptr->stats.msg_length_counts++;
- if (length <= 64)
- l_ptr->stats.msg_length_profile[0]++;
- else if (length <= 256)
- l_ptr->stats.msg_length_profile[1]++;
- else if (length <= 1024)
- l_ptr->stats.msg_length_profile[2]++;
- else if (length <= 4096)
- l_ptr->stats.msg_length_profile[3]++;
- else if (length <= 16384)
- l_ptr->stats.msg_length_profile[4]++;
- else if (length <= 32768)
- l_ptr->stats.msg_length_profile[5]++;
- else
- l_ptr->stats.msg_length_profile[6]++;
- }
- }
+int tipc_link_prio(struct tipc_link *l)
+{
+ return l->priority;
+}
- /* do all other link processing performed on a periodic basis */
+unsigned long tipc_link_tolerance(struct tipc_link *l)
+{
+ return l->tolerance;
+}
- link_state_event(l_ptr, TIMEOUT_EVT);
+struct sk_buff_head *tipc_link_inputq(struct tipc_link *l)
+{
+ return l->inputq;
+}
- if (l_ptr->next_out)
- tipc_link_push_queue(l_ptr);
+char tipc_link_plane(struct tipc_link *l)
+{
+ return l->net_plane;
+}
- tipc_node_unlock(l_ptr->owner);
+struct net *tipc_link_net(struct tipc_link *l)
+{
+ return l->net;
}
-static void link_set_timer(struct tipc_link *l_ptr, u32 time)
+void tipc_link_update_caps(struct tipc_link *l, u16 capabilities)
{
- k_start_timer(&l_ptr->timer, time);
+ l->peer_caps = capabilities;
}
-/**
- * tipc_link_create - create a new link
- * @n_ptr: pointer to associated node
- * @b_ptr: pointer to associated bearer
- * @media_addr: media address to use when sending messages over link
- *
- * Returns pointer to link.
- */
-struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
- struct tipc_bearer *b_ptr,
- const struct tipc_media_addr *media_addr)
+void tipc_link_add_bc_peer(struct tipc_link *snd_l,
+ struct tipc_link *uc_l,
+ struct sk_buff_head *xmitq)
{
- struct tipc_link *l_ptr;
- struct tipc_msg *msg;
- char *if_name;
- char addr_string[16];
- u32 peer = n_ptr->addr;
+ struct tipc_link *rcv_l = uc_l->bc_rcvlink;
- if (n_ptr->link_cnt >= 2) {
- tipc_addr_string_fill(addr_string, n_ptr->addr);
- pr_err("Attempt to establish third link to %s\n", addr_string);
- return NULL;
- }
+ snd_l->ackers++;
+ rcv_l->acked = snd_l->snd_nxt - 1;
+ snd_l->state = LINK_ESTABLISHED;
+ tipc_link_build_bc_init_msg(uc_l, xmitq);
+}
- if (n_ptr->links[b_ptr->identity]) {
- tipc_addr_string_fill(addr_string, n_ptr->addr);
- pr_err("Attempt to establish second link on <%s> to %s\n",
- b_ptr->name, addr_string);
- return NULL;
- }
+void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
+ struct tipc_link *rcv_l,
+ struct sk_buff_head *xmitq)
+{
+ u16 ack = snd_l->snd_nxt - 1;
- l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC);
- if (!l_ptr) {
- pr_warn("Link creation failed, no memory\n");
- return NULL;
+ snd_l->ackers--;
+ rcv_l->bc_peer_is_up = true;
+ rcv_l->state = LINK_ESTABLISHED;
+ tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq, NULL);
+ trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!");
+ tipc_link_reset(rcv_l);
+ rcv_l->state = LINK_RESET;
+ if (!snd_l->ackers) {
+ trace_tipc_link_reset(snd_l, TIPC_DUMP_ALL, "zero ackers!");
+ tipc_link_reset(snd_l);
+ snd_l->state = LINK_RESET;
+ __skb_queue_purge(xmitq);
}
-
- l_ptr->addr = peer;
- if_name = strchr(b_ptr->name, ':') + 1;
- sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
- tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr),
- tipc_node(tipc_own_addr),
- if_name,
- tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
- /* note: peer i/f name is updated by reset/activate message */
- memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr));
- l_ptr->owner = n_ptr;
- l_ptr->checkpoint = 1;
- l_ptr->peer_session = INVALID_SESSION;
- l_ptr->b_ptr = b_ptr;
- link_set_supervision_props(l_ptr, b_ptr->tolerance);
- l_ptr->state = RESET_UNKNOWN;
-
- l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg;
- msg = l_ptr->pmsg;
- tipc_msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr);
- msg_set_size(msg, sizeof(l_ptr->proto_msg));
- msg_set_session(msg, (tipc_random & 0xffff));
- msg_set_bearer_id(msg, b_ptr->identity);
- strcpy((char *)msg_data(msg), if_name);
-
- l_ptr->priority = b_ptr->priority;
- tipc_link_set_queue_limits(l_ptr, b_ptr->window);
-
- link_init_max_pkt(l_ptr);
-
- l_ptr->next_out_no = 1;
- INIT_LIST_HEAD(&l_ptr->waiting_ports);
-
- link_reset_statistics(l_ptr);
-
- tipc_node_attach_link(n_ptr, l_ptr);
-
- k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr);
- list_add_tail(&l_ptr->link_list, &b_ptr->links);
- tipc_k_signal((Handler)link_start, (unsigned long)l_ptr);
-
- return l_ptr;
}
-/**
- * tipc_link_delete - delete a link
- * @l_ptr: pointer to link
- *
- * Note: 'tipc_net_lock' is write_locked, bearer is locked.
- * This routine must not grab the node lock until after link timer cancellation
- * to avoid a potential deadlock situation.
- */
-void tipc_link_delete(struct tipc_link *l_ptr)
+int tipc_link_bc_peers(struct tipc_link *l)
{
- if (!l_ptr) {
- pr_err("Attempt to delete non-existent link\n");
- return;
- }
+ return l->ackers;
+}
- k_cancel_timer(&l_ptr->timer);
+static u16 link_bc_rcv_gap(struct tipc_link *l)
+{
+ struct sk_buff *skb = skb_peek(&l->deferdq);
+ u16 gap = 0;
- tipc_node_lock(l_ptr->owner);
- tipc_link_reset(l_ptr);
- tipc_node_detach_link(l_ptr->owner, l_ptr);
- tipc_link_stop(l_ptr);
- list_del_init(&l_ptr->link_list);
- tipc_node_unlock(l_ptr->owner);
- k_term_timer(&l_ptr->timer);
- kfree(l_ptr);
+ if (more(l->snd_nxt, l->rcv_nxt))
+ gap = l->snd_nxt - l->rcv_nxt;
+ if (skb)
+ gap = buf_seqno(skb) - l->rcv_nxt;
+ return gap;
}
-static void link_start(struct tipc_link *l_ptr)
+void tipc_link_set_mtu(struct tipc_link *l, int mtu)
{
- tipc_node_lock(l_ptr->owner);
- link_state_event(l_ptr, STARTING_EVT);
- tipc_node_unlock(l_ptr->owner);
+ l->mtu = mtu;
}
-/**
- * link_schedule_port - schedule port for deferred sending
- * @l_ptr: pointer to link
- * @origport: reference to sending port
- * @sz: amount of data to be sent
- *
- * Schedules port for renewed sending of messages after link congestion
- * has abated.
- */
-static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz)
-{
- struct tipc_port *p_ptr;
-
- spin_lock_bh(&tipc_port_list_lock);
- p_ptr = tipc_port_lock(origport);
- if (p_ptr) {
- if (!p_ptr->wakeup)
- goto exit;
- if (!list_empty(&p_ptr->wait_list))
- goto exit;
- p_ptr->congested = 1;
- p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt);
- list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports);
- l_ptr->stats.link_congs++;
-exit:
- tipc_port_unlock(p_ptr);
- }
- spin_unlock_bh(&tipc_port_list_lock);
- return -ELINKCONG;
+int tipc_link_mtu(struct tipc_link *l)
+{
+ return l->mtu;
}
-void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all)
+int tipc_link_mss(struct tipc_link *l)
{
- struct tipc_port *p_ptr;
- struct tipc_port *temp_p_ptr;
- int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size;
-
- if (all)
- win = 100000;
- if (win <= 0)
- return;
- if (!spin_trylock_bh(&tipc_port_list_lock))
- return;
- if (link_congested(l_ptr))
- goto exit;
- list_for_each_entry_safe(p_ptr, temp_p_ptr, &l_ptr->waiting_ports,
- wait_list) {
- if (win <= 0)
- break;
- list_del_init(&p_ptr->wait_list);
- spin_lock_bh(p_ptr->lock);
- p_ptr->congested = 0;
- p_ptr->wakeup(p_ptr);
- win -= p_ptr->waiting_pkts;
- spin_unlock_bh(p_ptr->lock);
- }
-
-exit:
- spin_unlock_bh(&tipc_port_list_lock);
+#ifdef CONFIG_TIPC_CRYPTO
+ return l->mtu - INT_H_SIZE - EMSG_OVERHEAD;
+#else
+ return l->mtu - INT_H_SIZE;
+#endif
}
-/**
- * link_release_outqueue - purge link's outbound message queue
- * @l_ptr: pointer to link
- */
-static void link_release_outqueue(struct tipc_link *l_ptr)
+u16 tipc_link_rcv_nxt(struct tipc_link *l)
{
- struct sk_buff *buf = l_ptr->first_out;
- struct sk_buff *next;
+ return l->rcv_nxt;
+}
- while (buf) {
- next = buf->next;
- kfree_skb(buf);
- buf = next;
- }
- l_ptr->first_out = NULL;
- l_ptr->out_queue_size = 0;
+u16 tipc_link_acked(struct tipc_link *l)
+{
+ return l->acked;
}
-/**
- * tipc_link_reset_fragments - purge link's inbound message fragments queue
- * @l_ptr: pointer to link
- */
-void tipc_link_reset_fragments(struct tipc_link *l_ptr)
+char *tipc_link_name(struct tipc_link *l)
{
- struct sk_buff *buf = l_ptr->defragm_buf;
- struct sk_buff *next;
+ return l->name;
+}
- while (buf) {
- next = buf->next;
- kfree_skb(buf);
- buf = next;
- }
- l_ptr->defragm_buf = NULL;
+u32 tipc_link_state(struct tipc_link *l)
+{
+ return l->state;
}
/**
- * tipc_link_stop - purge all inbound and outbound messages associated with link
- * @l_ptr: pointer to link
+ * tipc_link_create - create a new link
+ * @net: pointer to associated network namespace
+ * @if_name: associated interface name
+ * @bearer_id: id (index) of associated bearer
+ * @tolerance: link tolerance to be used by link
+ * @net_plane: network plane (A,B,c..) this link belongs to
+ * @mtu: mtu to be advertised by link
+ * @priority: priority to be used by link
+ * @min_win: minimal send window to be used by link
+ * @max_win: maximal send window to be used by link
+ * @session: session to be used by link
+ * @peer: node id of peer node
+ * @peer_caps: bitmap describing peer node capabilities
+ * @bc_sndlink: the namespace global link used for broadcast sending
+ * @bc_rcvlink: the peer specific link used for broadcast reception
+ * @inputq: queue to put messages ready for delivery
+ * @namedq: queue to put binding table update messages ready for delivery
+ * @link: return value, pointer to put the created link
+ * @self: local unicast link id
+ * @peer_id: 128-bit ID of peer
+ *
+ * Return: true if link was created, otherwise false
*/
-void tipc_link_stop(struct tipc_link *l_ptr)
-{
- struct sk_buff *buf;
- struct sk_buff *next;
-
- buf = l_ptr->oldest_deferred_in;
- while (buf) {
- next = buf->next;
- kfree_skb(buf);
- buf = next;
- }
-
- buf = l_ptr->first_out;
- while (buf) {
- next = buf->next;
- kfree_skb(buf);
- buf = next;
- }
-
- tipc_link_reset_fragments(l_ptr);
+bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
+ int tolerance, char net_plane, u32 mtu, int priority,
+ u32 min_win, u32 max_win, u32 session, u32 self,
+ u32 peer, u8 *peer_id, u16 peer_caps,
+ struct tipc_link *bc_sndlink,
+ struct tipc_link *bc_rcvlink,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *namedq,
+ struct tipc_link **link)
+{
+ char peer_str[NODE_ID_STR_LEN] = {0,};
+ char self_str[NODE_ID_STR_LEN] = {0,};
+ struct tipc_link *l;
- kfree_skb(l_ptr->proto_msg_queue);
- l_ptr->proto_msg_queue = NULL;
+ l = kzalloc(sizeof(*l), GFP_ATOMIC);
+ if (!l)
+ return false;
+ *link = l;
+ l->session = session;
+
+ /* Set link name for unicast links only */
+ if (peer_id) {
+ if (tipc_nodeid2string(self_str, tipc_own_id(net)) > NODE_ID_LEN)
+ sprintf(self_str, "%x", self);
+ if (tipc_nodeid2string(peer_str, peer_id) > NODE_ID_LEN)
+ sprintf(peer_str, "%x", peer);
+ }
+ /* Peer i/f name will be completed by reset/activate message */
+ snprintf(l->name, sizeof(l->name), "%s:%s-%s:unknown",
+ self_str, if_name, peer_str);
+
+ strcpy(l->if_name, if_name);
+ l->addr = peer;
+ l->peer_caps = peer_caps;
+ l->net = net;
+ l->in_session = false;
+ l->bearer_id = bearer_id;
+ l->tolerance = tolerance;
+ if (bc_rcvlink)
+ bc_rcvlink->tolerance = tolerance;
+ l->net_plane = net_plane;
+ l->advertised_mtu = mtu;
+ l->mtu = mtu;
+ l->priority = priority;
+ tipc_link_set_queue_limits(l, min_win, max_win);
+ l->ackers = 1;
+ l->bc_sndlink = bc_sndlink;
+ l->bc_rcvlink = bc_rcvlink;
+ l->inputq = inputq;
+ l->namedq = namedq;
+ l->state = LINK_RESETTING;
+ __skb_queue_head_init(&l->transmq);
+ __skb_queue_head_init(&l->backlogq);
+ __skb_queue_head_init(&l->deferdq);
+ __skb_queue_head_init(&l->failover_deferdq);
+ skb_queue_head_init(&l->wakeupq);
+ skb_queue_head_init(l->inputq);
+ return true;
}
-void tipc_link_reset(struct tipc_link *l_ptr)
+/**
+ * tipc_link_bc_create - create new link to be used for broadcast
+ * @net: pointer to associated network namespace
+ * @mtu: mtu to be used initially if no peers
+ * @min_win: minimal send window to be used by link
+ * @max_win: maximal send window to be used by link
+ * @inputq: queue to put messages ready for delivery
+ * @namedq: queue to put binding table update messages ready for delivery
+ * @link: return value, pointer to put the created link
+ * @ownnode: identity of own node
+ * @peer: node id of peer node
+ * @peer_id: 128-bit ID of peer
+ * @peer_caps: bitmap describing peer node capabilities
+ * @bc_sndlink: the namespace global link used for broadcast sending
+ *
+ * Return: true if link was created, otherwise false
+ */
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id,
+ int mtu, u32 min_win, u32 max_win, u16 peer_caps,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *namedq,
+ struct tipc_link *bc_sndlink,
+ struct tipc_link **link)
{
- struct sk_buff *buf;
- u32 prev_state = l_ptr->state;
- u32 checkpoint = l_ptr->next_in_no;
- int was_active_link = tipc_link_is_active(l_ptr);
-
- msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff));
-
- /* Link is down, accept any session */
- l_ptr->peer_session = INVALID_SESSION;
-
- /* Prepare for max packet size negotiation */
- link_init_max_pkt(l_ptr);
-
- l_ptr->state = RESET_UNKNOWN;
-
- if ((prev_state == RESET_UNKNOWN) || (prev_state == RESET_RESET))
- return;
+ struct tipc_link *l;
- tipc_node_link_down(l_ptr->owner, l_ptr);
- tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr);
+ if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, min_win,
+ max_win, 0, ownnode, peer, NULL, peer_caps,
+ bc_sndlink, NULL, inputq, namedq, link))
+ return false;
- if (was_active_link && tipc_node_active_links(l_ptr->owner) &&
- l_ptr->owner->permit_changeover) {
- l_ptr->reset_checkpoint = checkpoint;
- l_ptr->exp_msg_count = START_CHANGEOVER;
- }
+ l = *link;
+ if (peer_id) {
+ char peer_str[NODE_ID_STR_LEN] = {0,};
- /* Clean up all queues: */
- link_release_outqueue(l_ptr);
- kfree_skb(l_ptr->proto_msg_queue);
- l_ptr->proto_msg_queue = NULL;
- buf = l_ptr->oldest_deferred_in;
- while (buf) {
- struct sk_buff *next = buf->next;
- kfree_skb(buf);
- buf = next;
+ if (tipc_nodeid2string(peer_str, peer_id) > NODE_ID_LEN)
+ sprintf(peer_str, "%x", peer);
+ /* Broadcast receiver link name: "broadcast-link:<peer>" */
+ snprintf(l->name, sizeof(l->name), "%s:%s", tipc_bclink_name,
+ peer_str);
+ } else {
+ strcpy(l->name, tipc_bclink_name);
}
- if (!list_empty(&l_ptr->waiting_ports))
- tipc_link_wakeup_ports(l_ptr, 1);
+ trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!");
+ tipc_link_reset(l);
+ l->state = LINK_RESET;
+ l->ackers = 0;
+ l->bc_rcvlink = l;
- l_ptr->retransm_queue_head = 0;
- l_ptr->retransm_queue_size = 0;
- l_ptr->last_out = NULL;
- l_ptr->first_out = NULL;
- l_ptr->next_out = NULL;
- l_ptr->unacked_window = 0;
- l_ptr->checkpoint = 1;
- l_ptr->next_out_no = 1;
- l_ptr->deferred_inqueue_sz = 0;
- l_ptr->oldest_deferred_in = NULL;
- l_ptr->newest_deferred_in = NULL;
- l_ptr->fsm_msg_cnt = 0;
- l_ptr->stale_count = 0;
- link_reset_statistics(l_ptr);
-}
+ /* Broadcast send link is always up */
+ if (link_is_bc_sndlink(l))
+ l->state = LINK_ESTABLISHED;
+ /* Disable replicast if even a single peer doesn't support it */
+ if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST))
+ tipc_bcast_toggle_rcast(net, false);
-static void link_activate(struct tipc_link *l_ptr)
-{
- l_ptr->next_in_no = l_ptr->stats.recv_info = 1;
- tipc_node_link_up(l_ptr->owner, l_ptr);
- tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr);
+ return true;
}
/**
- * link_state_event - link finite state machine
- * @l_ptr: pointer to link
- * @event: state machine event to process
+ * tipc_link_fsm_evt - link finite state machine
+ * @l: pointer to link
+ * @evt: state machine event to be processed
*/
-static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
+int tipc_link_fsm_evt(struct tipc_link *l, int evt)
{
- struct tipc_link *other;
- u32 cont_intv = l_ptr->continuity_interval;
-
- if (!l_ptr->started && (event != STARTING_EVT))
- return; /* Not yet. */
-
- if (link_blocked(l_ptr)) {
- if (event == TIMEOUT_EVT)
- link_set_timer(l_ptr, cont_intv);
- return; /* Changeover going on */
- }
+ int rc = 0;
+ int old_state = l->state;
- switch (l_ptr->state) {
- case WORKING_WORKING:
- switch (event) {
- case TRAFFIC_MSG_EVT:
- case ACTIVATE_MSG:
+ switch (l->state) {
+ case LINK_RESETTING:
+ switch (evt) {
+ case LINK_PEER_RESET_EVT:
+ l->state = LINK_PEER_RESET;
break;
- case TIMEOUT_EVT:
- if (l_ptr->next_in_no != l_ptr->checkpoint) {
- l_ptr->checkpoint = l_ptr->next_in_no;
- if (tipc_bclink_acks_missing(l_ptr->owner)) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 0, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- } else if (l_ptr->max_pkt < l_ptr->max_pkt_target) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 1, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- }
- link_set_timer(l_ptr, cont_intv);
- break;
- }
- l_ptr->state = WORKING_UNKNOWN;
- l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv / 4);
+ case LINK_RESET_EVT:
+ l->state = LINK_RESET;
+ break;
+ case LINK_FAILURE_EVT:
+ case LINK_FAILOVER_BEGIN_EVT:
+ case LINK_ESTABLISH_EVT:
+ case LINK_FAILOVER_END_EVT:
+ case LINK_SYNCH_BEGIN_EVT:
+ case LINK_SYNCH_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case LINK_RESET:
+ switch (evt) {
+ case LINK_PEER_RESET_EVT:
+ l->state = LINK_ESTABLISHING;
+ break;
+ case LINK_FAILOVER_BEGIN_EVT:
+ l->state = LINK_FAILINGOVER;
break;
- case RESET_MSG:
- pr_info("%s<%s>, requested by peer\n", link_rst_msg,
- l_ptr->name);
- tipc_link_reset(l_ptr);
- l_ptr->state = RESET_RESET;
- l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
+ case LINK_FAILURE_EVT:
+ case LINK_RESET_EVT:
+ case LINK_ESTABLISH_EVT:
+ case LINK_FAILOVER_END_EVT:
break;
+ case LINK_SYNCH_BEGIN_EVT:
+ case LINK_SYNCH_END_EVT:
default:
- pr_err("%s%u in WW state\n", link_unk_evt, event);
+ goto illegal_evt;
}
break;
- case WORKING_UNKNOWN:
- switch (event) {
- case TRAFFIC_MSG_EVT:
- case ACTIVATE_MSG:
- l_ptr->state = WORKING_WORKING;
- l_ptr->fsm_msg_cnt = 0;
- link_set_timer(l_ptr, cont_intv);
+ case LINK_PEER_RESET:
+ switch (evt) {
+ case LINK_RESET_EVT:
+ l->state = LINK_ESTABLISHING;
break;
- case RESET_MSG:
- pr_info("%s<%s>, requested by peer while probing\n",
- link_rst_msg, l_ptr->name);
- tipc_link_reset(l_ptr);
- l_ptr->state = RESET_RESET;
- l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
+ case LINK_PEER_RESET_EVT:
+ case LINK_ESTABLISH_EVT:
+ case LINK_FAILURE_EVT:
break;
- case TIMEOUT_EVT:
- if (l_ptr->next_in_no != l_ptr->checkpoint) {
- l_ptr->state = WORKING_WORKING;
- l_ptr->fsm_msg_cnt = 0;
- l_ptr->checkpoint = l_ptr->next_in_no;
- if (tipc_bclink_acks_missing(l_ptr->owner)) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 0, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- }
- link_set_timer(l_ptr, cont_intv);
- } else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 1, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv / 4);
- } else { /* Link has failed */
- pr_warn("%s<%s>, peer not responding\n",
- link_rst_msg, l_ptr->name);
- tipc_link_reset(l_ptr);
- l_ptr->state = RESET_UNKNOWN;
- l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, RESET_MSG,
- 0, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
- }
+ case LINK_SYNCH_BEGIN_EVT:
+ case LINK_SYNCH_END_EVT:
+ case LINK_FAILOVER_BEGIN_EVT:
+ case LINK_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case LINK_FAILINGOVER:
+ switch (evt) {
+ case LINK_FAILOVER_END_EVT:
+ l->state = LINK_RESET;
+ break;
+ case LINK_PEER_RESET_EVT:
+ case LINK_RESET_EVT:
+ case LINK_ESTABLISH_EVT:
+ case LINK_FAILURE_EVT:
break;
+ case LINK_FAILOVER_BEGIN_EVT:
+ case LINK_SYNCH_BEGIN_EVT:
+ case LINK_SYNCH_END_EVT:
default:
- pr_err("%s%u in WU state\n", link_unk_evt, event);
+ goto illegal_evt;
}
break;
- case RESET_UNKNOWN:
- switch (event) {
- case TRAFFIC_MSG_EVT:
+ case LINK_ESTABLISHING:
+ switch (evt) {
+ case LINK_ESTABLISH_EVT:
+ l->state = LINK_ESTABLISHED;
break;
- case ACTIVATE_MSG:
- other = l_ptr->owner->active_links[0];
- if (other && link_working_unknown(other))
- break;
- l_ptr->state = WORKING_WORKING;
- l_ptr->fsm_msg_cnt = 0;
- link_activate(l_ptr);
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- if (l_ptr->owner->working_links == 1)
- tipc_link_send_sync(l_ptr);
- link_set_timer(l_ptr, cont_intv);
+ case LINK_FAILOVER_BEGIN_EVT:
+ l->state = LINK_FAILINGOVER;
break;
- case RESET_MSG:
- l_ptr->state = RESET_RESET;
- l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 1, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
+ case LINK_RESET_EVT:
+ l->state = LINK_RESET;
break;
- case STARTING_EVT:
- l_ptr->started = 1;
- /* fall through */
- case TIMEOUT_EVT:
- tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
+ case LINK_FAILURE_EVT:
+ case LINK_PEER_RESET_EVT:
+ case LINK_SYNCH_BEGIN_EVT:
+ case LINK_FAILOVER_END_EVT:
break;
+ case LINK_SYNCH_END_EVT:
default:
- pr_err("%s%u in RU state\n", link_unk_evt, event);
+ goto illegal_evt;
}
break;
- case RESET_RESET:
- switch (event) {
- case TRAFFIC_MSG_EVT:
- case ACTIVATE_MSG:
- other = l_ptr->owner->active_links[0];
- if (other && link_working_unknown(other))
- break;
- l_ptr->state = WORKING_WORKING;
- l_ptr->fsm_msg_cnt = 0;
- link_activate(l_ptr);
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- if (l_ptr->owner->working_links == 1)
- tipc_link_send_sync(l_ptr);
- link_set_timer(l_ptr, cont_intv);
+ case LINK_ESTABLISHED:
+ switch (evt) {
+ case LINK_PEER_RESET_EVT:
+ l->state = LINK_PEER_RESET;
+ rc |= TIPC_LINK_DOWN_EVT;
break;
- case RESET_MSG:
+ case LINK_FAILURE_EVT:
+ l->state = LINK_RESETTING;
+ rc |= TIPC_LINK_DOWN_EVT;
break;
- case TIMEOUT_EVT:
- tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
- l_ptr->fsm_msg_cnt++;
- link_set_timer(l_ptr, cont_intv);
+ case LINK_RESET_EVT:
+ l->state = LINK_RESET;
break;
+ case LINK_ESTABLISH_EVT:
+ case LINK_SYNCH_END_EVT:
+ break;
+ case LINK_SYNCH_BEGIN_EVT:
+ l->state = LINK_SYNCHING;
+ break;
+ case LINK_FAILOVER_BEGIN_EVT:
+ case LINK_FAILOVER_END_EVT:
default:
- pr_err("%s%u in RR state\n", link_unk_evt, event);
+ goto illegal_evt;
+ }
+ break;
+ case LINK_SYNCHING:
+ switch (evt) {
+ case LINK_PEER_RESET_EVT:
+ l->state = LINK_PEER_RESET;
+ rc |= TIPC_LINK_DOWN_EVT;
+ break;
+ case LINK_FAILURE_EVT:
+ l->state = LINK_RESETTING;
+ rc |= TIPC_LINK_DOWN_EVT;
+ break;
+ case LINK_RESET_EVT:
+ l->state = LINK_RESET;
+ break;
+ case LINK_ESTABLISH_EVT:
+ case LINK_SYNCH_BEGIN_EVT:
+ break;
+ case LINK_SYNCH_END_EVT:
+ l->state = LINK_ESTABLISHED;
+ break;
+ case LINK_FAILOVER_BEGIN_EVT:
+ case LINK_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
}
break;
default:
- pr_err("Unknown link state %u/%u\n", l_ptr->state, event);
+ pr_err("Unknown FSM state %x in %s\n", l->state, l->name);
}
+ trace_tipc_link_fsm(l->name, old_state, l->state, evt);
+ return rc;
+illegal_evt:
+ pr_err("Illegal FSM event %x in state %x on link %s\n",
+ evt, l->state, l->name);
+ trace_tipc_link_fsm(l->name, old_state, l->state, evt);
+ return rc;
}
-/*
- * link_bundle_buf(): Append contents of a buffer to
- * the tail of an existing one.
+/* link_profile_stats - update statistical profiling of traffic
*/
-static int link_bundle_buf(struct tipc_link *l_ptr, struct sk_buff *bundler,
- struct sk_buff *buf)
+static void link_profile_stats(struct tipc_link *l)
{
- struct tipc_msg *bundler_msg = buf_msg(bundler);
- struct tipc_msg *msg = buf_msg(buf);
- u32 size = msg_size(msg);
- u32 bundle_size = msg_size(bundler_msg);
- u32 to_pos = align(bundle_size);
- u32 pad = to_pos - bundle_size;
-
- if (msg_user(bundler_msg) != MSG_BUNDLER)
- return 0;
- if (msg_type(bundler_msg) != OPEN_MSG)
- return 0;
- if (skb_tailroom(bundler) < (pad + size))
- return 0;
- if (l_ptr->max_pkt < (to_pos + size))
- return 0;
+ struct sk_buff *skb;
+ struct tipc_msg *msg;
+ int length;
- skb_put(bundler, pad + size);
- skb_copy_to_linear_data_offset(bundler, to_pos, buf->data, size);
- msg_set_size(bundler_msg, to_pos + size);
- msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1);
- kfree_skb(buf);
- l_ptr->stats.sent_bundled++;
- return 1;
-}
+ /* Update counters used in statistical profiling of send traffic */
+ l->stats.accu_queue_sz += skb_queue_len(&l->transmq);
+ l->stats.queue_sz_counts++;
-static void link_add_to_outqueue(struct tipc_link *l_ptr,
- struct sk_buff *buf,
- struct tipc_msg *msg)
-{
- u32 ack = mod(l_ptr->next_in_no - 1);
- u32 seqno = mod(l_ptr->next_out_no++);
-
- msg_set_word(msg, 2, ((ack << 16) | seqno));
- msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- buf->next = NULL;
- if (l_ptr->first_out) {
- l_ptr->last_out->next = buf;
- l_ptr->last_out = buf;
- } else
- l_ptr->first_out = l_ptr->last_out = buf;
+ skb = skb_peek(&l->transmq);
+ if (!skb)
+ return;
+ msg = buf_msg(skb);
+ length = msg_size(msg);
- l_ptr->out_queue_size++;
- if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz)
- l_ptr->stats.max_queue_sz = l_ptr->out_queue_size;
+ if (msg_user(msg) == MSG_FRAGMENTER) {
+ if (msg_type(msg) != FIRST_FRAGMENT)
+ return;
+ length = msg_size(msg_inner_hdr(msg));
+ }
+ l->stats.msg_lengths_total += length;
+ l->stats.msg_length_counts++;
+ if (length <= 64)
+ l->stats.msg_length_profile[0]++;
+ else if (length <= 256)
+ l->stats.msg_length_profile[1]++;
+ else if (length <= 1024)
+ l->stats.msg_length_profile[2]++;
+ else if (length <= 4096)
+ l->stats.msg_length_profile[3]++;
+ else if (length <= 16384)
+ l->stats.msg_length_profile[4]++;
+ else if (length <= 32768)
+ l->stats.msg_length_profile[5]++;
+ else
+ l->stats.msg_length_profile[6]++;
}
-static void link_add_chain_to_outqueue(struct tipc_link *l_ptr,
- struct sk_buff *buf_chain,
- u32 long_msgno)
+/**
+ * tipc_link_too_silent - check if link is "too silent"
+ * @l: tipc link to be checked
+ *
+ * Return: true if the link 'silent_intv_cnt' is about to reach the
+ * 'abort_limit' value, otherwise false
+ */
+bool tipc_link_too_silent(struct tipc_link *l)
{
- struct sk_buff *buf;
- struct tipc_msg *msg;
-
- if (!l_ptr->next_out)
- l_ptr->next_out = buf_chain;
- while (buf_chain) {
- buf = buf_chain;
- buf_chain = buf_chain->next;
-
- msg = buf_msg(buf);
- msg_set_long_msgno(msg, long_msgno);
- link_add_to_outqueue(l_ptr, buf, msg);
- }
+ return (l->silent_intv_cnt + 2 > l->abort_limit);
}
-/*
- * tipc_link_send_buf() is the 'full path' for messages, called from
- * inside TIPC when the 'fast path' in tipc_send_buf
- * has failed, and from link_send()
+/* tipc_link_timeout - perform periodic task as instructed from node timeout
*/
-int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
-{
- struct tipc_msg *msg = buf_msg(buf);
- u32 size = msg_size(msg);
- u32 dsz = msg_data_sz(msg);
- u32 queue_size = l_ptr->out_queue_size;
- u32 imp = tipc_msg_tot_importance(msg);
- u32 queue_limit = l_ptr->queue_limit[imp];
- u32 max_packet = l_ptr->max_pkt;
-
- /* Match msg importance against queue limits: */
- if (unlikely(queue_size >= queue_limit)) {
- if (imp <= TIPC_CRITICAL_IMPORTANCE) {
- link_schedule_port(l_ptr, msg_origport(msg), size);
- kfree_skb(buf);
- return -ELINKCONG;
- }
- kfree_skb(buf);
- if (imp > CONN_MANAGER) {
- pr_warn("%s<%s>, send queue full", link_rst_msg,
- l_ptr->name);
- tipc_link_reset(l_ptr);
+int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
+{
+ int mtyp = 0;
+ int rc = 0;
+ bool state = false;
+ bool probe = false;
+ bool setup = false;
+ u16 bc_snt = l->bc_sndlink->snd_nxt - 1;
+ u16 bc_acked = l->bc_rcvlink->acked;
+ struct tipc_mon_state *mstate = &l->mon_state;
+
+ trace_tipc_link_timeout(l, TIPC_DUMP_NONE, " ");
+ trace_tipc_link_too_silent(l, TIPC_DUMP_ALL, " ");
+ switch (l->state) {
+ case LINK_ESTABLISHED:
+ case LINK_SYNCHING:
+ mtyp = STATE_MSG;
+ link_profile_stats(l);
+ tipc_mon_get_state(l->net, l->addr, mstate, l->bearer_id);
+ if (mstate->reset || (l->silent_intv_cnt > l->abort_limit))
+ return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ state = bc_acked != bc_snt;
+ state |= l->bc_rcvlink->rcv_unacked;
+ state |= l->rcv_unacked;
+ state |= !skb_queue_empty(&l->transmq);
+ probe = mstate->probing;
+ probe |= l->silent_intv_cnt;
+ if (probe || mstate->monitoring)
+ l->silent_intv_cnt++;
+ probe |= !skb_queue_empty(&l->deferdq);
+ if (l->snd_nxt == l->checkpoint) {
+ tipc_link_update_cwin(l, 0, 0);
+ probe = true;
}
- return dsz;
+ l->checkpoint = l->snd_nxt;
+ break;
+ case LINK_RESET:
+ setup = l->rst_cnt++ <= 4;
+ setup |= !(l->rst_cnt % 16);
+ mtyp = RESET_MSG;
+ break;
+ case LINK_ESTABLISHING:
+ setup = true;
+ mtyp = ACTIVATE_MSG;
+ break;
+ case LINK_PEER_RESET:
+ case LINK_RESETTING:
+ case LINK_FAILINGOVER:
+ break;
+ default:
+ break;
}
- /* Fragmentation needed ? */
- if (size > max_packet)
- return link_send_long_buf(l_ptr, buf);
+ if (state || probe || setup)
+ tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq);
- /* Packet can be queued or sent. */
- if (likely(!tipc_bearer_blocked(l_ptr->b_ptr) &&
- !link_congested(l_ptr))) {
- link_add_to_outqueue(l_ptr, buf, msg);
-
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
- l_ptr->unacked_window = 0;
- return dsz;
- }
- /* Congestion: can message be bundled ? */
- if ((msg_user(msg) != CHANGEOVER_PROTOCOL) &&
- (msg_user(msg) != MSG_FRAGMENTER)) {
-
- /* Try adding message to an existing bundle */
- if (l_ptr->next_out &&
- link_bundle_buf(l_ptr, l_ptr->last_out, buf))
- return dsz;
-
- /* Try creating a new bundle */
- if (size <= max_packet * 2 / 3) {
- struct sk_buff *bundler = tipc_buf_acquire(max_packet);
- struct tipc_msg bundler_hdr;
-
- if (bundler) {
- tipc_msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
- INT_H_SIZE, l_ptr->addr);
- skb_copy_to_linear_data(bundler, &bundler_hdr,
- INT_H_SIZE);
- skb_trim(bundler, INT_H_SIZE);
- link_bundle_buf(l_ptr, bundler, buf);
- buf = bundler;
- msg = buf_msg(buf);
- l_ptr->stats.sent_bundles++;
- }
- }
- }
- if (!l_ptr->next_out)
- l_ptr->next_out = buf;
- link_add_to_outqueue(l_ptr, buf, msg);
- return dsz;
+ return rc;
}
-/*
- * tipc_link_send(): same as tipc_link_send_buf(), but the link to use has
- * not been selected yet, and the the owner node is not locked
- * Called by TIPC internal users, e.g. the name distributor
+/**
+ * link_schedule_user - schedule a message sender for wakeup after congestion
+ * @l: congested link
+ * @hdr: header of message that is being sent
+ * Create pseudo msg to send back to user when congestion abates
*/
-int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
-{
- struct tipc_link *l_ptr;
- struct tipc_node *n_ptr;
- int res = -ELINKCONG;
-
- read_lock_bh(&tipc_net_lock);
- n_ptr = tipc_node_find(dest);
- if (n_ptr) {
- tipc_node_lock(n_ptr);
- l_ptr = n_ptr->active_links[selector & 1];
- if (l_ptr)
- res = tipc_link_send_buf(l_ptr, buf);
- else
- kfree_skb(buf);
- tipc_node_unlock(n_ptr);
- } else {
- kfree_skb(buf);
- }
- read_unlock_bh(&tipc_net_lock);
- return res;
+static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr)
+{
+ u32 dnode = tipc_own_addr(l->net);
+ u32 dport = msg_origport(hdr);
+ struct sk_buff *skb;
+
+ /* Create and schedule wakeup pseudo message */
+ skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
+ dnode, l->addr, dport, 0, 0);
+ if (!skb)
+ return -ENOBUFS;
+ msg_set_dest_droppable(buf_msg(skb), true);
+ TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr);
+ skb_queue_tail(&l->wakeupq, skb);
+ l->stats.link_congs++;
+ trace_tipc_link_conges(l, TIPC_DUMP_ALL, "wakeup scheduled!");
+ return -ELINKCONG;
}
-/*
- * tipc_link_send_sync - synchronize broadcast link endpoints.
- *
- * Give a newly added peer node the sequence number where it should
- * start receiving and acking broadcast packets.
- *
- * Called with node locked
+/**
+ * link_prepare_wakeup - prepare users for wakeup after congestion
+ * @l: congested link
+ * Wake up a number of waiting users, as permitted by available space
+ * in the send queue
*/
-static void tipc_link_send_sync(struct tipc_link *l)
+static void link_prepare_wakeup(struct tipc_link *l)
{
- struct sk_buff *buf;
- struct tipc_msg *msg;
+ struct sk_buff_head *wakeupq = &l->wakeupq;
+ struct sk_buff_head *inputq = l->inputq;
+ struct sk_buff *skb, *tmp;
+ struct sk_buff_head tmpq;
+ int avail[5] = {0,};
+ int imp = 0;
- buf = tipc_buf_acquire(INT_H_SIZE);
- if (!buf)
- return;
+ __skb_queue_head_init(&tmpq);
+
+ for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++)
+ avail[imp] = l->backlog[imp].limit - l->backlog[imp].len;
+
+ skb_queue_walk_safe(wakeupq, skb, tmp) {
+ imp = TIPC_SKB_CB(skb)->chain_imp;
+ if (avail[imp] <= 0)
+ continue;
+ avail[imp]--;
+ __skb_unlink(skb, wakeupq);
+ __skb_queue_tail(&tmpq, skb);
+ }
+
+ spin_lock_bh(&inputq->lock);
+ skb_queue_splice_tail(&tmpq, inputq);
+ spin_unlock_bh(&inputq->lock);
- msg = buf_msg(buf);
- tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, l->addr);
- msg_set_last_bcast(msg, l->owner->bclink.acked);
- link_add_chain_to_outqueue(l, buf, 0);
- tipc_link_push_queue(l);
}
-/*
- * tipc_link_recv_sync - synchronize broadcast link endpoints.
- * Receive the sequence number where we should start receiving and
- * acking broadcast packets from a newly added peer node, and open
- * up for reception of such packets.
- *
- * Called with node locked
+/**
+ * tipc_link_set_skb_retransmit_time - set the time at which retransmission of
+ * the given skb should be next attempted
+ * @skb: skb to set a future retransmission time for
+ * @l: link the skb will be transmitted on
*/
-static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf)
+static void tipc_link_set_skb_retransmit_time(struct sk_buff *skb,
+ struct tipc_link *l)
{
- struct tipc_msg *msg = buf_msg(buf);
-
- n->bclink.last_sent = n->bclink.last_in = msg_last_bcast(msg);
- n->bclink.recv_permitted = true;
- kfree_skb(buf);
+ if (link_is_bc_sndlink(l))
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
+ else
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
+}
+
+void tipc_link_reset(struct tipc_link *l)
+{
+ struct sk_buff_head list;
+ u32 imp;
+
+ __skb_queue_head_init(&list);
+
+ l->in_session = false;
+ /* Force re-synch of peer session number before establishing */
+ l->peer_session--;
+ l->session++;
+ l->mtu = l->advertised_mtu;
+
+ spin_lock_bh(&l->wakeupq.lock);
+ skb_queue_splice_init(&l->wakeupq, &list);
+ spin_unlock_bh(&l->wakeupq.lock);
+
+ spin_lock_bh(&l->inputq->lock);
+ skb_queue_splice_init(&list, l->inputq);
+ spin_unlock_bh(&l->inputq->lock);
+
+ __skb_queue_purge(&l->transmq);
+ __skb_queue_purge(&l->deferdq);
+ __skb_queue_purge(&l->backlogq);
+ __skb_queue_purge(&l->failover_deferdq);
+ for (imp = 0; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) {
+ l->backlog[imp].len = 0;
+ l->backlog[imp].target_bskb = NULL;
+ }
+ kfree_skb(l->reasm_buf);
+ kfree_skb(l->reasm_tnlmsg);
+ kfree_skb(l->failover_reasm_skb);
+ l->reasm_buf = NULL;
+ l->reasm_tnlmsg = NULL;
+ l->failover_reasm_skb = NULL;
+ l->rcv_unacked = 0;
+ l->snd_nxt = 1;
+ l->rcv_nxt = 1;
+ l->snd_nxt_state = 1;
+ l->rcv_nxt_state = 1;
+ l->acked = 0;
+ l->last_gap = 0;
+ kfree(l->last_ga);
+ l->last_ga = NULL;
+ l->silent_intv_cnt = 0;
+ l->rst_cnt = 0;
+ l->bc_peer_is_up = false;
+ memset(&l->mon_state, 0, sizeof(l->mon_state));
+ tipc_link_reset_stats(l);
}
-/*
- * tipc_link_send_names - send name table entries to new neighbor
+/**
+ * tipc_link_xmit(): enqueue buffer list according to queue situation
+ * @l: link to use
+ * @list: chain of buffers containing message
+ * @xmitq: returned list of packets to be sent by caller
*
- * Send routine for bulk delivery of name table messages when contact
- * with a new neighbor occurs. No link congestion checking is performed
- * because name table messages *must* be delivered. The messages must be
- * small enough not to require fragmentation.
- * Called without any locks held.
+ * Consumes the buffer chain.
+ * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
+ * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
*/
-void tipc_link_send_names(struct list_head *message_list, u32 dest)
-{
- struct tipc_node *n_ptr;
- struct tipc_link *l_ptr;
- struct sk_buff *buf;
- struct sk_buff *temp_buf;
-
- if (list_empty(message_list))
- return;
+int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff_head *backlogq = &l->backlogq;
+ struct sk_buff_head *transmq = &l->transmq;
+ struct sk_buff *skb, *_skb;
+ u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+ u16 ack = l->rcv_nxt - 1;
+ u16 seqno = l->snd_nxt;
+ int pkt_cnt = skb_queue_len(list);
+ unsigned int mss = tipc_link_mss(l);
+ unsigned int cwin = l->window;
+ unsigned int mtu = l->mtu;
+ struct tipc_msg *hdr;
+ bool new_bundle;
+ int rc = 0;
+ int imp;
+
+ if (pkt_cnt <= 0)
+ return 0;
- read_lock_bh(&tipc_net_lock);
- n_ptr = tipc_node_find(dest);
- if (n_ptr) {
- tipc_node_lock(n_ptr);
- l_ptr = n_ptr->active_links[0];
- if (l_ptr) {
- /* convert circular list to linear list */
- ((struct sk_buff *)message_list->prev)->next = NULL;
- link_add_chain_to_outqueue(l_ptr,
- (struct sk_buff *)message_list->next, 0);
- tipc_link_push_queue(l_ptr);
- INIT_LIST_HEAD(message_list);
+ hdr = buf_msg(skb_peek(list));
+ if (unlikely(msg_size(hdr) > mtu)) {
+ pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n",
+ skb_queue_len(list), msg_user(hdr),
+ msg_type(hdr), msg_size(hdr), mtu);
+ __skb_queue_purge(list);
+ return -EMSGSIZE;
+ }
+
+ imp = msg_importance(hdr);
+ /* Allow oversubscription of one data msg per source at congestion */
+ if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) {
+ if (imp == TIPC_SYSTEM_IMPORTANCE) {
+ pr_warn("%s<%s>, link overflow", link_rst_msg, l->name);
+ __skb_queue_purge(list);
+ return -ENOBUFS;
}
- tipc_node_unlock(n_ptr);
- }
- read_unlock_bh(&tipc_net_lock);
-
- /* discard the messages if they couldn't be sent */
- list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) {
- list_del((struct list_head *)buf);
- kfree_skb(buf);
- }
-}
-
-/*
- * link_send_buf_fast: Entry for data messages where the
- * destination link is known and the header is complete,
- * inclusive total message length. Very time critical.
- * Link is locked. Returns user data length.
- */
-static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
- u32 *used_max_pkt)
-{
- struct tipc_msg *msg = buf_msg(buf);
- int res = msg_data_sz(msg);
-
- if (likely(!link_congested(l_ptr))) {
- if (likely(msg_size(msg) <= l_ptr->max_pkt)) {
- if (likely(!tipc_bearer_blocked(l_ptr->b_ptr))) {
- link_add_to_outqueue(l_ptr, buf, msg);
- tipc_bearer_send(l_ptr->b_ptr, buf,
- &l_ptr->media_addr);
- l_ptr->unacked_window = 0;
- return res;
+ rc = link_schedule_user(l, hdr);
+ }
+
+ if (pkt_cnt > 1) {
+ l->stats.sent_fragmented++;
+ l->stats.sent_fragments += pkt_cnt;
+ }
+
+ /* Prepare each packet for sending, and add to relevant queue: */
+ while ((skb = __skb_dequeue(list))) {
+ if (likely(skb_queue_len(transmq) < cwin)) {
+ hdr = buf_msg(skb);
+ msg_set_seqno(hdr, seqno);
+ msg_set_ack(hdr, ack);
+ msg_set_bcast_ack(hdr, bc_ack);
+ _skb = skb_clone(skb, GFP_ATOMIC);
+ if (!_skb) {
+ kfree_skb(skb);
+ __skb_queue_purge(list);
+ return -ENOBUFS;
}
- } else
- *used_max_pkt = l_ptr->max_pkt;
+ __skb_queue_tail(transmq, skb);
+ tipc_link_set_skb_retransmit_time(skb, l);
+ __skb_queue_tail(xmitq, _skb);
+ TIPC_SKB_CB(skb)->ackers = l->ackers;
+ l->rcv_unacked = 0;
+ l->stats.sent_pkts++;
+ seqno++;
+ continue;
+ }
+ if (tipc_msg_try_bundle(l->backlog[imp].target_bskb, &skb,
+ mss, l->addr, &new_bundle)) {
+ if (skb) {
+ /* Keep a ref. to the skb for next try */
+ l->backlog[imp].target_bskb = skb;
+ l->backlog[imp].len++;
+ __skb_queue_tail(backlogq, skb);
+ } else {
+ if (new_bundle) {
+ l->stats.sent_bundles++;
+ l->stats.sent_bundled++;
+ }
+ l->stats.sent_bundled++;
+ }
+ continue;
+ }
+ l->backlog[imp].target_bskb = NULL;
+ l->backlog[imp].len += (1 + skb_queue_len(list));
+ __skb_queue_tail(backlogq, skb);
+ skb_queue_splice_tail_init(list, backlogq);
}
- return tipc_link_send_buf(l_ptr, buf); /* All other cases */
+ l->snd_nxt = seqno;
+ return rc;
}
-/*
- * tipc_link_send_sections_fast: Entry for messages where the
- * destination processor is known and the header is complete,
- * except for total message length.
- * Returns user data length or errno.
- */
-int tipc_link_send_sections_fast(struct tipc_port *sender,
- struct iovec const *msg_sect,
- const u32 num_sect, unsigned int total_len,
- u32 destaddr)
-{
- struct tipc_msg *hdr = &sender->phdr;
- struct tipc_link *l_ptr;
- struct sk_buff *buf;
- struct tipc_node *node;
- int res;
- u32 selector = msg_origport(hdr) & 1;
-
-again:
- /*
- * Try building message using port's max_pkt hint.
- * (Must not hold any locks while building message.)
- */
- res = tipc_msg_build(hdr, msg_sect, num_sect, total_len,
- sender->max_pkt, &buf);
- /* Exit if build request was invalid */
- if (unlikely(res < 0))
- return res;
-
- read_lock_bh(&tipc_net_lock);
- node = tipc_node_find(destaddr);
- if (likely(node)) {
- tipc_node_lock(node);
- l_ptr = node->active_links[selector];
- if (likely(l_ptr)) {
- if (likely(buf)) {
- res = link_send_buf_fast(l_ptr, buf,
- &sender->max_pkt);
-exit:
- tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
- return res;
- }
-
- /* Exit if link (or bearer) is congested */
- if (link_congested(l_ptr) ||
- tipc_bearer_blocked(l_ptr->b_ptr)) {
- res = link_schedule_port(l_ptr,
- sender->ref, res);
- goto exit;
- }
+static void tipc_link_update_cwin(struct tipc_link *l, int released,
+ bool retransmitted)
+{
+ int bklog_len = skb_queue_len(&l->backlogq);
+ struct sk_buff_head *txq = &l->transmq;
+ int txq_len = skb_queue_len(txq);
+ u16 cwin = l->window;
- /*
- * Message size exceeds max_pkt hint; update hint,
- * then re-try fast path or fragment the message
- */
- sender->max_pkt = l_ptr->max_pkt;
- tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
+ /* Enter fast recovery */
+ if (unlikely(retransmitted)) {
+ l->ssthresh = max_t(u16, l->window / 2, 300);
+ l->window = min_t(u16, l->ssthresh, l->window);
+ return;
+ }
+ /* Enter slow start */
+ if (unlikely(!released)) {
+ l->ssthresh = max_t(u16, l->window / 2, 300);
+ l->window = l->min_win;
+ return;
+ }
+ /* Don't increase window if no pressure on the transmit queue */
+ if (txq_len + bklog_len < cwin)
+ return;
+ /* Don't increase window if there are holes the transmit queue */
+ if (txq_len && l->snd_nxt - buf_seqno(skb_peek(txq)) != txq_len)
+ return;
- if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
- goto again;
+ l->cong_acks += released;
- return link_send_sections_long(sender, msg_sect,
- num_sect, total_len,
- destaddr);
- }
- tipc_node_unlock(node);
+ /* Slow start */
+ if (cwin <= l->ssthresh) {
+ l->window = min_t(u16, cwin + released, l->max_win);
+ return;
}
- read_unlock_bh(&tipc_net_lock);
+ /* Congestion avoidance */
+ if (l->cong_acks < cwin)
+ return;
+ l->window = min_t(u16, ++cwin, l->max_win);
+ l->cong_acks = 0;
+}
+
+static void tipc_link_advance_backlog(struct tipc_link *l,
+ struct sk_buff_head *xmitq)
+{
+ u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+ struct sk_buff_head *txq = &l->transmq;
+ struct sk_buff *skb, *_skb;
+ u16 ack = l->rcv_nxt - 1;
+ u16 seqno = l->snd_nxt;
+ struct tipc_msg *hdr;
+ u16 cwin = l->window;
+ u32 imp;
- /* Couldn't find a link to the destination node */
- if (buf)
- return tipc_reject_msg(buf, TIPC_ERR_NO_NODE);
- if (res >= 0)
- return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect,
- total_len, TIPC_ERR_NO_NODE);
- return res;
+ while (skb_queue_len(txq) < cwin) {
+ skb = skb_peek(&l->backlogq);
+ if (!skb)
+ break;
+ _skb = skb_clone(skb, GFP_ATOMIC);
+ if (!_skb)
+ break;
+ __skb_dequeue(&l->backlogq);
+ hdr = buf_msg(skb);
+ imp = msg_importance(hdr);
+ l->backlog[imp].len--;
+ if (unlikely(skb == l->backlog[imp].target_bskb))
+ l->backlog[imp].target_bskb = NULL;
+ __skb_queue_tail(&l->transmq, skb);
+ tipc_link_set_skb_retransmit_time(skb, l);
+
+ __skb_queue_tail(xmitq, _skb);
+ TIPC_SKB_CB(skb)->ackers = l->ackers;
+ msg_set_seqno(hdr, seqno);
+ msg_set_ack(hdr, ack);
+ msg_set_bcast_ack(hdr, bc_ack);
+ l->rcv_unacked = 0;
+ l->stats.sent_pkts++;
+ seqno++;
+ }
+ l->snd_nxt = seqno;
}
-/*
- * link_send_sections_long(): Entry for long messages where the
- * destination node is known and the header is complete,
- * inclusive total message length.
- * Link and bearer congestion status have been checked to be ok,
- * and are ignored if they change.
- *
- * Note that fragments do not use the full link MTU so that they won't have
- * to undergo refragmentation if link changeover causes them to be sent
- * over another link with an additional tunnel header added as prefix.
- * (Refragmentation will still occur if the other link has a smaller MTU.)
+/**
+ * link_retransmit_failure() - Detect repeated retransmit failures
+ * @l: tipc link sender
+ * @r: tipc link receiver (= l in case of unicast)
+ * @rc: returned code
*
- * Returns user data length or errno.
+ * Return: true if the repeated retransmit failures happens, otherwise
+ * false
*/
-static int link_send_sections_long(struct tipc_port *sender,
- struct iovec const *msg_sect,
- u32 num_sect, unsigned int total_len,
- u32 destaddr)
-{
- struct tipc_link *l_ptr;
- struct tipc_node *node;
- struct tipc_msg *hdr = &sender->phdr;
- u32 dsz = total_len;
- u32 max_pkt, fragm_sz, rest;
- struct tipc_msg fragm_hdr;
- struct sk_buff *buf, *buf_chain, *prev;
- u32 fragm_crs, fragm_rest, hsz, sect_rest;
- const unchar *sect_crs;
- int curr_sect;
- u32 fragm_no;
- int res = 0;
-
-again:
- fragm_no = 1;
- max_pkt = sender->max_pkt - INT_H_SIZE;
- /* leave room for tunnel header in case of link changeover */
- fragm_sz = max_pkt - INT_H_SIZE;
- /* leave room for fragmentation header in each fragment */
- rest = dsz;
- fragm_crs = 0;
- fragm_rest = 0;
- sect_rest = 0;
- sect_crs = NULL;
- curr_sect = -1;
-
- /* Prepare reusable fragment header */
- tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
- INT_H_SIZE, msg_destnode(hdr));
- msg_set_size(&fragm_hdr, max_pkt);
- msg_set_fragm_no(&fragm_hdr, 1);
-
- /* Prepare header of first fragment */
- buf_chain = buf = tipc_buf_acquire(max_pkt);
- if (!buf)
- return -ENOMEM;
- buf->next = NULL;
- skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
- hsz = msg_hdr_sz(hdr);
- skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz);
-
- /* Chop up message */
- fragm_crs = INT_H_SIZE + hsz;
- fragm_rest = fragm_sz - hsz;
-
- do { /* For all sections */
- u32 sz;
-
- if (!sect_rest) {
- sect_rest = msg_sect[++curr_sect].iov_len;
- sect_crs = (const unchar *)msg_sect[curr_sect].iov_base;
- }
-
- if (sect_rest < fragm_rest)
- sz = sect_rest;
- else
- sz = fragm_rest;
-
- if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) {
- res = -EFAULT;
-error:
- for (; buf_chain; buf_chain = buf) {
- buf = buf_chain->next;
- kfree_skb(buf_chain);
- }
- return res;
- }
- sect_crs += sz;
- sect_rest -= sz;
- fragm_crs += sz;
- fragm_rest -= sz;
- rest -= sz;
-
- if (!fragm_rest && rest) {
-
- /* Initiate new fragment: */
- if (rest <= fragm_sz) {
- fragm_sz = rest;
- msg_set_type(&fragm_hdr, LAST_FRAGMENT);
- } else {
- msg_set_type(&fragm_hdr, FRAGMENT);
- }
- msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
- msg_set_fragm_no(&fragm_hdr, ++fragm_no);
- prev = buf;
- buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE);
- if (!buf) {
- res = -ENOMEM;
- goto error;
- }
-
- buf->next = NULL;
- prev->next = buf;
- skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
- fragm_crs = INT_H_SIZE;
- fragm_rest = fragm_sz;
- }
- } while (rest > 0);
-
- /*
- * Now we have a buffer chain. Select a link and check
- * that packet size is still OK
- */
- node = tipc_node_find(destaddr);
- if (likely(node)) {
- tipc_node_lock(node);
- l_ptr = node->active_links[sender->ref & 1];
- if (!l_ptr) {
- tipc_node_unlock(node);
- goto reject;
- }
- if (l_ptr->max_pkt < max_pkt) {
- sender->max_pkt = l_ptr->max_pkt;
- tipc_node_unlock(node);
- for (; buf_chain; buf_chain = buf) {
- buf = buf_chain->next;
- kfree_skb(buf_chain);
- }
- goto again;
- }
+static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
+ int *rc)
+{
+ struct sk_buff *skb = skb_peek(&l->transmq);
+ struct tipc_msg *hdr;
+
+ if (!skb)
+ return false;
+
+ if (!TIPC_SKB_CB(skb)->retr_cnt)
+ return false;
+
+ if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp +
+ msecs_to_jiffies(r->tolerance * 10)))
+ return false;
+
+ hdr = buf_msg(skb);
+ if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr)))
+ return false;
+
+ pr_warn("Retransmission failure on link <%s>\n", l->name);
+ link_print(l, "State of link ");
+ pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
+ msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
+ pr_info("sqno %u, prev: %x, dest: %x\n",
+ msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr));
+ pr_info("retr_stamp %d, retr_cnt %d\n",
+ jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp),
+ TIPC_SKB_CB(skb)->retr_cnt);
+
+ trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
+ trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
+ trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
+
+ if (link_is_bc_sndlink(l)) {
+ r->state = LINK_RESET;
+ *rc |= TIPC_LINK_DOWN_EVT;
} else {
-reject:
- for (; buf_chain; buf_chain = buf) {
- buf = buf_chain->next;
- kfree_skb(buf_chain);
- }
- return tipc_port_reject_sections(sender, hdr, msg_sect, num_sect,
- total_len, TIPC_ERR_NO_NODE);
+ *rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
}
- /* Append chain of fragments to send queue & send them */
- l_ptr->long_msg_seq_no++;
- link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no);
- l_ptr->stats.sent_fragments += fragm_no;
- l_ptr->stats.sent_fragmented++;
- tipc_link_push_queue(l_ptr);
- tipc_node_unlock(node);
- return dsz;
+ return true;
}
-/*
- * tipc_link_push_packet: Push one unsent packet to the media
+/* tipc_data_input - deliver data and name distr msgs to upper layer
+ *
+ * Consumes buffer if message is of right type
+ * Node lock must be held
*/
-u32 tipc_link_push_packet(struct tipc_link *l_ptr)
-{
- struct sk_buff *buf = l_ptr->first_out;
- u32 r_q_size = l_ptr->retransm_queue_size;
- u32 r_q_head = l_ptr->retransm_queue_head;
-
- /* Step to position where retransmission failed, if any, */
- /* consider that buffers may have been released in meantime */
- if (r_q_size && buf) {
- u32 last = lesser(mod(r_q_head + r_q_size),
- link_last_sent(l_ptr));
- u32 first = buf_seqno(buf);
-
- while (buf && less(first, r_q_head)) {
- first = mod(first + 1);
- buf = buf->next;
+static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *inputq)
+{
+ struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq;
+ struct tipc_msg *hdr = buf_msg(skb);
+
+ switch (msg_user(hdr)) {
+ case TIPC_LOW_IMPORTANCE:
+ case TIPC_MEDIUM_IMPORTANCE:
+ case TIPC_HIGH_IMPORTANCE:
+ case TIPC_CRITICAL_IMPORTANCE:
+ if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) {
+ skb_queue_tail(mc_inputq, skb);
+ return true;
+ }
+ fallthrough;
+ case CONN_MANAGER:
+ skb_queue_tail(inputq, skb);
+ return true;
+ case GROUP_PROTOCOL:
+ skb_queue_tail(mc_inputq, skb);
+ return true;
+ case NAME_DISTRIBUTOR:
+ l->bc_rcvlink->state = LINK_ESTABLISHED;
+ skb_queue_tail(l->namedq, skb);
+ return true;
+ case MSG_BUNDLER:
+ case TUNNEL_PROTOCOL:
+ case MSG_FRAGMENTER:
+ case BCAST_PROTOCOL:
+ return false;
+#ifdef CONFIG_TIPC_CRYPTO
+ case MSG_CRYPTO:
+ if (sysctl_tipc_key_exchange_enabled &&
+ TIPC_SKB_CB(skb)->decrypted) {
+ tipc_crypto_msg_rcv(l->net, skb);
+ return true;
}
- l_ptr->retransm_queue_head = r_q_head = first;
- l_ptr->retransm_queue_size = r_q_size = mod(last - first);
+ fallthrough;
+#endif
+ default:
+ pr_warn("Dropping received illegal msg type\n");
+ kfree_skb(skb);
+ return true;
}
+}
- /* Continue retransmission now, if there is anything: */
- if (r_q_size && buf) {
- msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
- msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
- l_ptr->retransm_queue_head = mod(++r_q_head);
- l_ptr->retransm_queue_size = --r_q_size;
- l_ptr->stats.retransmitted++;
+/* tipc_link_input - process packet that has passed link protocol check
+ *
+ * Consumes buffer
+ */
+static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *inputq,
+ struct sk_buff **reasm_skb)
+{
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct sk_buff *iskb;
+ struct sk_buff_head tmpq;
+ int usr = msg_user(hdr);
+ int pos = 0;
+
+ if (usr == MSG_BUNDLER) {
+ skb_queue_head_init(&tmpq);
+ l->stats.recv_bundles++;
+ l->stats.recv_bundled += msg_msgcnt(hdr);
+ while (tipc_msg_extract(skb, &iskb, &pos))
+ tipc_data_input(l, iskb, &tmpq);
+ tipc_skb_queue_splice_tail(&tmpq, inputq);
return 0;
+ } else if (usr == MSG_FRAGMENTER) {
+ l->stats.recv_fragments++;
+ if (tipc_buf_append(reasm_skb, &skb)) {
+ l->stats.recv_fragmented++;
+ tipc_data_input(l, skb, inputq);
+ } else if (!*reasm_skb && !link_is_bc_rcvlink(l)) {
+ pr_warn_ratelimited("Unable to build fragment list\n");
+ return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ }
+ return 0;
+ } else if (usr == BCAST_PROTOCOL) {
+ tipc_bcast_lock(l->net);
+ tipc_link_bc_init_rcv(l->bc_rcvlink, hdr);
+ tipc_bcast_unlock(l->net);
}
- /* Send deferred protocol message, if any: */
- buf = l_ptr->proto_msg_queue;
- if (buf) {
- msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
- msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
- l_ptr->unacked_window = 0;
- kfree_skb(buf);
- l_ptr->proto_msg_queue = NULL;
+ kfree_skb(skb);
+ return 0;
+}
+
+/* tipc_link_tnl_rcv() - receive TUNNEL_PROTOCOL message, drop or process the
+ * inner message along with the ones in the old link's
+ * deferdq
+ * @l: tunnel link
+ * @skb: TUNNEL_PROTOCOL message
+ * @inputq: queue to put messages ready for delivery
+ */
+static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *inputq)
+{
+ struct sk_buff **reasm_skb = &l->failover_reasm_skb;
+ struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg;
+ struct sk_buff_head *fdefq = &l->failover_deferdq;
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct sk_buff *iskb;
+ int ipos = 0;
+ int rc = 0;
+ u16 seqno;
+
+ if (msg_type(hdr) == SYNCH_MSG) {
+ kfree_skb(skb);
return 0;
}
- /* Send one deferred data message, if send window not full: */
- buf = l_ptr->next_out;
- if (buf) {
- struct tipc_msg *msg = buf_msg(buf);
- u32 next = msg_seqno(msg);
- u32 first = buf_seqno(l_ptr->first_out);
-
- if (mod(next - first) < l_ptr->queue_limit[0]) {
- msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
- msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
- if (msg_user(msg) == MSG_BUNDLER)
- msg_set_type(msg, CLOSED_MSG);
- l_ptr->next_out = buf->next;
+ /* Not a fragment? */
+ if (likely(!msg_nof_fragms(hdr))) {
+ if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) {
+ pr_warn_ratelimited("Unable to extract msg, defq: %d\n",
+ skb_queue_len(fdefq));
return 0;
}
+ kfree_skb(skb);
+ } else {
+ /* Set fragment type for buf_append */
+ if (msg_fragm_no(hdr) == 1)
+ msg_set_type(hdr, FIRST_FRAGMENT);
+ else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr))
+ msg_set_type(hdr, FRAGMENT);
+ else
+ msg_set_type(hdr, LAST_FRAGMENT);
+
+ if (!tipc_buf_append(reasm_tnlmsg, &skb)) {
+ /* Successful but non-complete reassembly? */
+ if (*reasm_tnlmsg || link_is_bc_rcvlink(l))
+ return 0;
+ pr_warn_ratelimited("Unable to reassemble tunnel msg\n");
+ return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ }
+ iskb = skb;
}
- return 1;
-}
-
-/*
- * push_queue(): push out the unsent messages of a link where
- * congestion has abated. Node is locked
- */
-void tipc_link_push_queue(struct tipc_link *l_ptr)
-{
- u32 res;
-
- if (tipc_bearer_blocked(l_ptr->b_ptr))
- return;
do {
- res = tipc_link_push_packet(l_ptr);
- } while (!res);
-}
-
-static void link_reset_all(unsigned long addr)
-{
- struct tipc_node *n_ptr;
- char addr_string[16];
- u32 i;
-
- read_lock_bh(&tipc_net_lock);
- n_ptr = tipc_node_find((u32)addr);
- if (!n_ptr) {
- read_unlock_bh(&tipc_net_lock);
- return; /* node no longer exists */
- }
+ seqno = buf_seqno(iskb);
+ if (unlikely(less(seqno, l->drop_point))) {
+ kfree_skb(iskb);
+ continue;
+ }
+ if (unlikely(seqno != l->drop_point)) {
+ __tipc_skb_queue_sorted(fdefq, seqno, iskb);
+ continue;
+ }
- tipc_node_lock(n_ptr);
+ l->drop_point++;
+ if (!tipc_data_input(l, iskb, inputq))
+ rc |= tipc_link_input(l, iskb, inputq, reasm_skb);
+ if (unlikely(rc))
+ break;
+ } while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point)));
- pr_warn("Resetting all links to %s\n",
- tipc_addr_string_fill(addr_string, n_ptr->addr));
+ return rc;
+}
- for (i = 0; i < MAX_BEARERS; i++) {
- if (n_ptr->links[i]) {
- link_print(n_ptr->links[i], "Resetting link\n");
- tipc_link_reset(n_ptr->links[i]);
+/**
+ * tipc_get_gap_ack_blks - get Gap ACK blocks from PROTOCOL/STATE_MSG
+ * @ga: returned pointer to the Gap ACK blocks if any
+ * @l: the tipc link
+ * @hdr: the PROTOCOL/STATE_MSG header
+ * @uc: desired Gap ACK blocks type, i.e. unicast (= 1) or broadcast (= 0)
+ *
+ * Return: the total Gap ACK blocks size
+ */
+u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
+ struct tipc_msg *hdr, bool uc)
+{
+ struct tipc_gap_ack_blks *p;
+ u16 sz = 0;
+
+ /* Does peer support the Gap ACK blocks feature? */
+ if (l->peer_caps & TIPC_GAP_ACK_BLOCK) {
+ p = (struct tipc_gap_ack_blks *)msg_data(hdr);
+ sz = ntohs(p->len);
+ /* Sanity check */
+ if (sz == struct_size(p, gacks, size_add(p->ugack_cnt, p->bgack_cnt))) {
+ /* Good, check if the desired type exists */
+ if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt))
+ goto ok;
+ /* Backward compatible: peer might not support bc, but uc? */
+ } else if (uc && sz == struct_size(p, gacks, p->ugack_cnt)) {
+ if (p->ugack_cnt) {
+ p->bgack_cnt = 0;
+ goto ok;
+ }
}
}
+ /* Other cases: ignore! */
+ p = NULL;
- tipc_node_unlock(n_ptr);
- read_unlock_bh(&tipc_net_lock);
+ok:
+ *ga = p;
+ return sz;
}
-static void link_retransmit_failure(struct tipc_link *l_ptr,
- struct sk_buff *buf)
+static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga,
+ struct tipc_link *l, u8 start_index)
{
- struct tipc_msg *msg = buf_msg(buf);
-
- pr_warn("Retransmission failure on link <%s>\n", l_ptr->name);
-
- if (l_ptr->addr) {
- /* Handle failure on standard link */
- link_print(l_ptr, "Resetting link\n");
- tipc_link_reset(l_ptr);
-
- } else {
- /* Handle failure on broadcast link */
- struct tipc_node *n_ptr;
- char addr_string[16];
-
- pr_info("Msg seq number: %u, ", msg_seqno(msg));
- pr_cont("Outstanding acks: %lu\n",
- (unsigned long) TIPC_SKB_CB(buf)->handle);
-
- n_ptr = tipc_bclink_retransmit_to();
- tipc_node_lock(n_ptr);
+ struct tipc_gap_ack *gacks = &ga->gacks[start_index];
+ struct sk_buff *skb = skb_peek(&l->deferdq);
+ u16 expect, seqno = 0;
+ u8 n = 0;
- tipc_addr_string_fill(addr_string, n_ptr->addr);
- pr_info("Broadcast link info for %s\n", addr_string);
- pr_info("Reception permitted: %d, Acked: %u\n",
- n_ptr->bclink.recv_permitted,
- n_ptr->bclink.acked);
- pr_info("Last in: %u, Oos state: %u, Last sent: %u\n",
- n_ptr->bclink.last_in,
- n_ptr->bclink.oos_state,
- n_ptr->bclink.last_sent);
-
- tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr);
-
- tipc_node_unlock(n_ptr);
+ if (!skb)
+ return 0;
- l_ptr->stale_count = 0;
+ expect = buf_seqno(skb);
+ skb_queue_walk(&l->deferdq, skb) {
+ seqno = buf_seqno(skb);
+ if (unlikely(more(seqno, expect))) {
+ gacks[n].ack = htons(expect - 1);
+ gacks[n].gap = htons(seqno - expect);
+ if (++n >= MAX_GAP_ACK_BLKS / 2) {
+ pr_info_ratelimited("Gacks on %s: %d, ql: %d!\n",
+ l->name, n,
+ skb_queue_len(&l->deferdq));
+ return n;
+ }
+ } else if (unlikely(less(seqno, expect))) {
+ pr_warn("Unexpected skb in deferdq!\n");
+ continue;
+ }
+ expect = seqno + 1;
}
-}
-void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf,
- u32 retransmits)
-{
- struct tipc_msg *msg;
-
- if (!buf)
- return;
-
- msg = buf_msg(buf);
+ /* last block */
+ gacks[n].ack = htons(seqno);
+ gacks[n].gap = 0;
+ n++;
+ return n;
+}
- if (tipc_bearer_blocked(l_ptr->b_ptr)) {
- if (l_ptr->retransm_queue_size == 0) {
- l_ptr->retransm_queue_head = msg_seqno(msg);
- l_ptr->retransm_queue_size = retransmits;
+/* tipc_build_gap_ack_blks - build Gap ACK blocks
+ * @l: tipc unicast link
+ * @hdr: the tipc message buffer to store the Gap ACK blocks after built
+ *
+ * The function builds Gap ACK blocks for both the unicast & broadcast receiver
+ * links of a certain peer, the buffer after built has the network data format
+ * as found at the struct tipc_gap_ack_blks definition.
+ *
+ * returns the actual allocated memory size
+ */
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr)
+{
+ struct tipc_link *bcl = l->bc_rcvlink;
+ struct tipc_gap_ack_blks *ga;
+ u16 len;
+
+ ga = (struct tipc_gap_ack_blks *)msg_data(hdr);
+
+ /* Start with broadcast link first */
+ tipc_bcast_lock(bcl->net);
+ msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1);
+ msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
+ ga->bgack_cnt = __tipc_build_gap_ack_blks(ga, bcl, 0);
+ tipc_bcast_unlock(bcl->net);
+
+ /* Now for unicast link, but an explicit NACK only (???) */
+ ga->ugack_cnt = (msg_seq_gap(hdr)) ?
+ __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0;
+
+ /* Total len */
+ len = struct_size(ga, gacks, size_add(ga->bgack_cnt, ga->ugack_cnt));
+ ga->len = htons(len);
+ return len;
+}
+
+/* tipc_link_advance_transmq - advance TIPC link transmq queue by releasing
+ * acked packets, also doing retransmissions if
+ * gaps found
+ * @l: tipc link with transmq queue to be advanced
+ * @r: tipc link "receiver" i.e. in case of broadcast (= "l" if unicast)
+ * @acked: seqno of last packet acked by peer without any gaps before
+ * @gap: # of gap packets
+ * @ga: buffer pointer to Gap ACK blocks from peer
+ * @xmitq: queue for accumulating the retransmitted packets if any
+ * @retransmitted: returned boolean value if a retransmission is really issued
+ * @rc: returned code e.g. TIPC_LINK_DOWN_EVT if a repeated retransmit failures
+ * happens (- unlikely case)
+ *
+ * Return: the number of packets released from the link transmq
+ */
+static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
+ u16 acked, u16 gap,
+ struct tipc_gap_ack_blks *ga,
+ struct sk_buff_head *xmitq,
+ bool *retransmitted, int *rc)
+{
+ struct tipc_gap_ack_blks *last_ga = r->last_ga, *this_ga = NULL;
+ struct tipc_gap_ack *gacks = NULL;
+ struct sk_buff *skb, *_skb, *tmp;
+ struct tipc_msg *hdr;
+ u32 qlen = skb_queue_len(&l->transmq);
+ u16 nacked = acked, ngap = gap, gack_cnt = 0;
+ u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+ u16 ack = l->rcv_nxt - 1;
+ u16 seqno, n = 0;
+ u16 end = r->acked, start = end, offset = r->last_gap;
+ u16 si = (last_ga) ? last_ga->start_index : 0;
+ bool is_uc = !link_is_bc_sndlink(l);
+ bool bc_has_acked = false;
+
+ trace_tipc_link_retrans(r, acked + 1, acked + gap, &l->transmq);
+
+ /* Determine Gap ACK blocks if any for the particular link */
+ if (ga && is_uc) {
+ /* Get the Gap ACKs, uc part */
+ gack_cnt = ga->ugack_cnt;
+ gacks = &ga->gacks[ga->bgack_cnt];
+ } else if (ga) {
+ /* Copy the Gap ACKs, bc part, for later renewal if needed */
+ this_ga = kmemdup(ga, struct_size(ga, gacks, ga->bgack_cnt),
+ GFP_ATOMIC);
+ if (likely(this_ga)) {
+ this_ga->start_index = 0;
+ /* Start with the bc Gap ACKs */
+ gack_cnt = this_ga->bgack_cnt;
+ gacks = &this_ga->gacks[0];
} else {
- pr_err("Unexpected retransmit on link %s (qsize=%d)\n",
- l_ptr->name, l_ptr->retransm_queue_size);
+ /* Hmm, we can get in trouble..., simply ignore it */
+ pr_warn_ratelimited("Ignoring bc Gap ACKs, no memory\n");
}
- return;
- } else {
- /* Detect repeated retransmit failures on unblocked bearer */
- if (l_ptr->last_retransmitted == msg_seqno(msg)) {
- if (++l_ptr->stale_count > 100) {
- link_retransmit_failure(l_ptr, buf);
- return;
+ }
+
+ /* Advance the link transmq */
+ skb_queue_walk_safe(&l->transmq, skb, tmp) {
+ seqno = buf_seqno(skb);
+
+next_gap_ack:
+ if (less_eq(seqno, nacked)) {
+ if (is_uc)
+ goto release;
+ /* Skip packets peer has already acked */
+ if (!more(seqno, r->acked))
+ continue;
+ /* Get the next of last Gap ACK blocks */
+ while (more(seqno, end)) {
+ if (!last_ga || si >= last_ga->bgack_cnt)
+ break;
+ start = end + offset + 1;
+ end = ntohs(last_ga->gacks[si].ack);
+ offset = ntohs(last_ga->gacks[si].gap);
+ si++;
+ WARN_ONCE(more(start, end) ||
+ (!offset &&
+ si < last_ga->bgack_cnt) ||
+ si > MAX_GAP_ACK_BLKS,
+ "Corrupted Gap ACK: %d %d %d %d %d\n",
+ start, end, offset, si,
+ last_ga->bgack_cnt);
}
+ /* Check against the last Gap ACK block */
+ if (tipc_in_range(seqno, start, end))
+ continue;
+ /* Update/release the packet peer is acking */
+ bc_has_acked = true;
+ if (--TIPC_SKB_CB(skb)->ackers)
+ continue;
+release:
+ /* release skb */
+ __skb_unlink(skb, &l->transmq);
+ kfree_skb(skb);
+ } else if (less_eq(seqno, nacked + ngap)) {
+ /* First gap: check if repeated retrans failures? */
+ if (unlikely(seqno == acked + 1 &&
+ link_retransmit_failure(l, r, rc))) {
+ /* Ignore this bc Gap ACKs if any */
+ kfree(this_ga);
+ this_ga = NULL;
+ break;
+ }
+ /* retransmit skb if unrestricted*/
+ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
+ continue;
+ tipc_link_set_skb_retransmit_time(skb, l);
+ _skb = pskb_copy(skb, GFP_ATOMIC);
+ if (!_skb)
+ continue;
+ hdr = buf_msg(_skb);
+ msg_set_ack(hdr, ack);
+ msg_set_bcast_ack(hdr, bc_ack);
+ _skb->priority = TC_PRIO_CONTROL;
+ __skb_queue_tail(xmitq, _skb);
+ l->stats.retransmitted++;
+ if (!is_uc)
+ r->stats.retransmitted++;
+ *retransmitted = true;
+ /* Increase actual retrans counter & mark first time */
+ if (!TIPC_SKB_CB(skb)->retr_cnt++)
+ TIPC_SKB_CB(skb)->retr_stamp = jiffies;
} else {
- l_ptr->last_retransmitted = msg_seqno(msg);
- l_ptr->stale_count = 1;
+ /* retry with Gap ACK blocks if any */
+ if (n >= gack_cnt)
+ break;
+ nacked = ntohs(gacks[n].ack);
+ ngap = ntohs(gacks[n].gap);
+ n++;
+ goto next_gap_ack;
}
}
- while (retransmits && (buf != l_ptr->next_out) && buf) {
- msg = buf_msg(buf);
- msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
- msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
- buf = buf->next;
- retransmits--;
- l_ptr->stats.retransmitted++;
+ /* Renew last Gap ACK blocks for bc if needed */
+ if (bc_has_acked) {
+ if (this_ga) {
+ kfree(last_ga);
+ r->last_ga = this_ga;
+ r->last_gap = gap;
+ } else if (last_ga) {
+ if (less(acked, start)) {
+ si--;
+ offset = start - acked - 1;
+ } else if (less(acked, end)) {
+ acked = end;
+ }
+ if (si < last_ga->bgack_cnt) {
+ last_ga->start_index = si;
+ r->last_gap = offset;
+ } else {
+ kfree(last_ga);
+ r->last_ga = NULL;
+ r->last_gap = 0;
+ }
+ } else {
+ r->last_gap = 0;
+ }
+ r->acked = acked;
+ } else {
+ kfree(this_ga);
}
- l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0;
+ return qlen - skb_queue_len(&l->transmq);
}
-/**
- * link_insert_deferred_queue - insert deferred messages back into receive chain
+/* tipc_link_build_state_msg: prepare link state message for transmission
+ *
+ * Note that sending of broadcast ack is coordinated among nodes, to reduce
+ * risk of ack storms towards the sender
*/
-static struct sk_buff *link_insert_deferred_queue(struct tipc_link *l_ptr,
- struct sk_buff *buf)
+int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
{
- u32 seq_no;
+ if (!l)
+ return 0;
- if (l_ptr->oldest_deferred_in == NULL)
- return buf;
+ /* Broadcast ACK must be sent via a unicast link => defer to caller */
+ if (link_is_bc_rcvlink(l)) {
+ if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf)
+ return 0;
+ l->rcv_unacked = 0;
- seq_no = buf_seqno(l_ptr->oldest_deferred_in);
- if (seq_no == mod(l_ptr->next_in_no)) {
- l_ptr->newest_deferred_in->next = buf;
- buf = l_ptr->oldest_deferred_in;
- l_ptr->oldest_deferred_in = NULL;
- l_ptr->deferred_inqueue_sz = 0;
+ /* Use snd_nxt to store peer's snd_nxt in broadcast rcv link */
+ l->snd_nxt = l->rcv_nxt;
+ return TIPC_LINK_SND_STATE;
}
- return buf;
+ /* Unicast ACK */
+ l->rcv_unacked = 0;
+ l->stats.sent_acks++;
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
+ return 0;
}
-/**
- * link_recv_buf_validate - validate basic format of received message
- *
- * This routine ensures a TIPC message has an acceptable header, and at least
- * as much data as the header indicates it should. The routine also ensures
- * that the entire message header is stored in the main fragment of the message
- * buffer, to simplify future access to message header fields.
- *
- * Note: Having extra info present in the message header or data areas is OK.
- * TIPC will ignore the excess, under the assumption that it is optional info
- * introduced by a later release of the protocol.
+/* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message
*/
-static int link_recv_buf_validate(struct sk_buff *buf)
+void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
{
- static u32 min_data_hdr_size[8] = {
- SHORT_H_SIZE, MCAST_H_SIZE, NAMED_H_SIZE, BASIC_H_SIZE,
- MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE
- };
+ int mtyp = RESET_MSG;
+ struct sk_buff *skb;
- struct tipc_msg *msg;
- u32 tipc_hdr[2];
- u32 size;
- u32 hdr_size;
- u32 min_hdr_size;
+ if (l->state == LINK_ESTABLISHING)
+ mtyp = ACTIVATE_MSG;
- if (unlikely(buf->len < MIN_H_SIZE))
- return 0;
+ tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq);
- msg = skb_header_pointer(buf, 0, sizeof(tipc_hdr), tipc_hdr);
- if (msg == NULL)
- return 0;
+ /* Inform peer that this endpoint is going down if applicable */
+ skb = skb_peek_tail(xmitq);
+ if (skb && (l->state == LINK_RESET))
+ msg_set_peer_stopping(buf_msg(skb), 1);
+}
- if (unlikely(msg_version(msg) != TIPC_VERSION))
+/* tipc_link_build_nack_msg: prepare link nack message for transmission
+ * Note that sending of broadcast NACK is coordinated among nodes, to
+ * reduce the risk of NACK storms towards the sender
+ */
+static int tipc_link_build_nack_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq)
+{
+ u32 def_cnt = ++l->stats.deferred_recv;
+ struct sk_buff_head *dfq = &l->deferdq;
+ u32 defq_len = skb_queue_len(dfq);
+ int match1, match2;
+
+ if (link_is_bc_rcvlink(l)) {
+ match1 = def_cnt & 0xf;
+ match2 = tipc_own_addr(l->net) & 0xf;
+ if (match1 == match2)
+ return TIPC_LINK_SND_STATE;
return 0;
+ }
- size = msg_size(msg);
- hdr_size = msg_hdr_sz(msg);
- min_hdr_size = msg_isdata(msg) ?
- min_data_hdr_size[msg_type(msg)] : INT_H_SIZE;
-
- if (unlikely((hdr_size < min_hdr_size) ||
- (size < hdr_size) ||
- (buf->len < size) ||
- (size - hdr_size > TIPC_MAX_USER_MSG_SIZE)))
- return 0;
+ if (defq_len >= 3 && !((defq_len - 3) % 16)) {
+ u16 rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
- return pskb_may_pull(buf, hdr_size);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0,
+ rcvgap, 0, 0, xmitq);
+ }
+ return 0;
}
-/**
- * tipc_recv_msg - process TIPC messages arriving from off-node
- * @head: pointer to message buffer chain
- * @tb_ptr: pointer to bearer message arrived on
- *
- * Invoked with no locks held. Bearer pointer must point to a valid bearer
- * structure (i.e. cannot be NULL), but bearer can be inactive.
+/* tipc_link_rcv - process TIPC packets/messages arriving from off-node
+ * @l: the link that should handle the message
+ * @skb: TIPC packet
+ * @xmitq: queue to place packets to be sent after this call
*/
-void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
-{
- read_lock_bh(&tipc_net_lock);
- while (head) {
- struct tipc_node *n_ptr;
- struct tipc_link *l_ptr;
- struct sk_buff *crs;
- struct sk_buff *buf = head;
- struct tipc_msg *msg;
- u32 seq_no;
- u32 ackd;
- u32 released = 0;
- int type;
-
- head = head->next;
-
- /* Ensure bearer is still enabled */
- if (unlikely(!b_ptr->active))
- goto cont;
-
- /* Ensure message is well-formed */
- if (unlikely(!link_recv_buf_validate(buf)))
- goto cont;
-
- /* Ensure message data is a single contiguous unit */
- if (unlikely(skb_linearize(buf)))
- goto cont;
-
- /* Handle arrival of a non-unicast link message */
- msg = buf_msg(buf);
-
- if (unlikely(msg_non_seq(msg))) {
- if (msg_user(msg) == LINK_CONFIG)
- tipc_disc_recv_msg(buf, b_ptr);
- else
- tipc_bclink_recv_pkt(buf);
- continue;
- }
-
- /* Discard unicast link messages destined for another node */
- if (unlikely(!msg_short(msg) &&
- (msg_destnode(msg) != tipc_own_addr)))
- goto cont;
-
- /* Locate neighboring node that sent message */
- n_ptr = tipc_node_find(msg_prevnode(msg));
- if (unlikely(!n_ptr))
- goto cont;
- tipc_node_lock(n_ptr);
-
- /* Locate unicast link endpoint that should handle message */
- l_ptr = n_ptr->links[b_ptr->identity];
- if (unlikely(!l_ptr)) {
- tipc_node_unlock(n_ptr);
- goto cont;
- }
-
- /* Verify that communication with node is currently allowed */
- if ((n_ptr->block_setup & WAIT_PEER_DOWN) &&
- msg_user(msg) == LINK_PROTOCOL &&
- (msg_type(msg) == RESET_MSG ||
- msg_type(msg) == ACTIVATE_MSG) &&
- !msg_redundant_link(msg))
- n_ptr->block_setup &= ~WAIT_PEER_DOWN;
-
- if (n_ptr->block_setup) {
- tipc_node_unlock(n_ptr);
- goto cont;
- }
+int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff_head *defq = &l->deferdq;
+ struct tipc_msg *hdr = buf_msg(skb);
+ u16 seqno, rcv_nxt, win_lim;
+ int released = 0;
+ int rc = 0;
- /* Validate message sequence number info */
- seq_no = msg_seqno(msg);
- ackd = msg_ack(msg);
+ /* Verify and update link state */
+ if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
+ return tipc_link_proto_rcv(l, skb, xmitq);
- /* Release acked messages */
- if (n_ptr->bclink.recv_permitted)
- tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg));
+ /* Don't send probe at next timeout expiration */
+ l->silent_intv_cnt = 0;
- crs = l_ptr->first_out;
- while ((crs != l_ptr->next_out) &&
- less_eq(buf_seqno(crs), ackd)) {
- struct sk_buff *next = crs->next;
+ do {
+ hdr = buf_msg(skb);
+ seqno = msg_seqno(hdr);
+ rcv_nxt = l->rcv_nxt;
+ win_lim = rcv_nxt + TIPC_MAX_LINK_WIN;
+
+ if (unlikely(!tipc_link_is_up(l))) {
+ if (l->state == LINK_ESTABLISHING)
+ rc = TIPC_LINK_UP_EVT;
+ kfree_skb(skb);
+ break;
+ }
- kfree_skb(crs);
- crs = next;
- released++;
+ /* Drop if outside receive window */
+ if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) {
+ l->stats.duplicates++;
+ kfree_skb(skb);
+ break;
}
- if (released) {
- l_ptr->first_out = crs;
- l_ptr->out_queue_size -= released;
+ released += tipc_link_advance_transmq(l, l, msg_ack(hdr), 0,
+ NULL, NULL, NULL, NULL);
+
+ /* Defer delivery if sequence gap */
+ if (unlikely(seqno != rcv_nxt)) {
+ if (!__tipc_skb_queue_sorted(defq, seqno, skb))
+ l->stats.duplicates++;
+ rc |= tipc_link_build_nack_msg(l, xmitq);
+ break;
}
- /* Try sending any messages link endpoint has pending */
- if (unlikely(l_ptr->next_out))
- tipc_link_push_queue(l_ptr);
- if (unlikely(!list_empty(&l_ptr->waiting_ports)))
- tipc_link_wakeup_ports(l_ptr, 0);
- if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) {
- l_ptr->stats.sent_acks++;
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
- }
+ /* Deliver packet */
+ l->rcv_nxt++;
+ l->stats.recv_pkts++;
+
+ if (unlikely(msg_user(hdr) == TUNNEL_PROTOCOL))
+ rc |= tipc_link_tnl_rcv(l, skb, l->inputq);
+ else if (!tipc_data_input(l, skb, l->inputq))
+ rc |= tipc_link_input(l, skb, l->inputq, &l->reasm_buf);
+ if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
+ rc |= tipc_link_build_state_msg(l, xmitq);
+ if (unlikely(rc & ~TIPC_LINK_SND_STATE))
+ break;
+ } while ((skb = __tipc_skb_dequeue(defq, l->rcv_nxt)));
+
+ /* Forward queues and wake up waiting users */
+ if (released) {
+ tipc_link_update_cwin(l, released, 0);
+ tipc_link_advance_backlog(l, xmitq);
+ if (unlikely(!skb_queue_empty(&l->wakeupq)))
+ link_prepare_wakeup(l);
+ }
+ return rc;
+}
+
+static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
+ bool probe_reply, u16 rcvgap,
+ int tolerance, int priority,
+ struct sk_buff_head *xmitq)
+{
+ struct tipc_mon_state *mstate = &l->mon_state;
+ struct sk_buff_head *dfq = &l->deferdq;
+ struct tipc_link *bcl = l->bc_rcvlink;
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ bool node_up = tipc_link_is_up(bcl);
+ u16 glen = 0, bc_rcvgap = 0;
+ int dlen = 0;
+ void *data;
+
+ /* Don't send protocol message during reset or link failover */
+ if (tipc_link_is_blocked(l))
+ return;
- /* Now (finally!) process the incoming message */
-protocol_check:
- if (likely(link_working_working(l_ptr))) {
- if (likely(seq_no == mod(l_ptr->next_in_no))) {
- l_ptr->next_in_no++;
- if (unlikely(l_ptr->oldest_deferred_in))
- head = link_insert_deferred_queue(l_ptr,
- head);
-deliver:
- if (likely(msg_isdata(msg))) {
- tipc_node_unlock(n_ptr);
- tipc_port_recv_msg(buf);
- continue;
- }
- switch (msg_user(msg)) {
- int ret;
- case MSG_BUNDLER:
- l_ptr->stats.recv_bundles++;
- l_ptr->stats.recv_bundled +=
- msg_msgcnt(msg);
- tipc_node_unlock(n_ptr);
- tipc_link_recv_bundle(buf);
- continue;
- case NAME_DISTRIBUTOR:
- n_ptr->bclink.recv_permitted = true;
- tipc_node_unlock(n_ptr);
- tipc_named_recv(buf);
- continue;
- case BCAST_PROTOCOL:
- tipc_link_recv_sync(n_ptr, buf);
- tipc_node_unlock(n_ptr);
- continue;
- case CONN_MANAGER:
- tipc_node_unlock(n_ptr);
- tipc_port_recv_proto_msg(buf);
- continue;
- case MSG_FRAGMENTER:
- l_ptr->stats.recv_fragments++;
- ret = tipc_link_recv_fragment(
- &l_ptr->defragm_buf,
- &buf, &msg);
- if (ret == 1) {
- l_ptr->stats.recv_fragmented++;
- goto deliver;
- }
- if (ret == -1)
- l_ptr->next_in_no--;
- break;
- case CHANGEOVER_PROTOCOL:
- type = msg_type(msg);
- if (link_recv_changeover_msg(&l_ptr,
- &buf)) {
- msg = buf_msg(buf);
- seq_no = msg_seqno(msg);
- if (type == ORIGINAL_MSG)
- goto deliver;
- goto protocol_check;
- }
- break;
- default:
- kfree_skb(buf);
- buf = NULL;
- break;
- }
- tipc_node_unlock(n_ptr);
- tipc_net_route_msg(buf);
- continue;
- }
- link_handle_out_of_seq_msg(l_ptr, buf);
- head = link_insert_deferred_queue(l_ptr, head);
- tipc_node_unlock(n_ptr);
- continue;
- }
+ if (!tipc_link_is_up(l) && (mtyp == STATE_MSG))
+ return;
- /* Link is not in state WORKING_WORKING */
- if (msg_user(msg) == LINK_PROTOCOL) {
- link_recv_proto_msg(l_ptr, buf);
- head = link_insert_deferred_queue(l_ptr, head);
- tipc_node_unlock(n_ptr);
- continue;
- }
+ if ((probe || probe_reply) && !skb_queue_empty(dfq))
+ rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
- /* Traffic message. Conditionally activate link */
- link_state_event(l_ptr, TRAFFIC_MSG_EVT);
+ skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
+ tipc_max_domain_size + MAX_GAP_ACK_BLKS_SZ,
+ l->addr, tipc_own_addr(l->net), 0, 0, 0);
+ if (!skb)
+ return;
- if (link_working_working(l_ptr)) {
- /* Re-insert buffer in front of queue */
- buf->next = head;
- head = buf;
- tipc_node_unlock(n_ptr);
- continue;
+ hdr = buf_msg(skb);
+ data = msg_data(hdr);
+ msg_set_session(hdr, l->session);
+ msg_set_bearer_id(hdr, l->bearer_id);
+ msg_set_net_plane(hdr, l->net_plane);
+ msg_set_next_sent(hdr, l->snd_nxt);
+ msg_set_ack(hdr, l->rcv_nxt - 1);
+ msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1);
+ msg_set_bc_ack_invalid(hdr, !node_up);
+ msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
+ msg_set_link_tolerance(hdr, tolerance);
+ msg_set_linkprio(hdr, priority);
+ msg_set_redundant_link(hdr, node_up);
+ msg_set_seq_gap(hdr, 0);
+ msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2);
+
+ if (mtyp == STATE_MSG) {
+ if (l->peer_caps & TIPC_LINK_PROTO_SEQNO)
+ msg_set_seqno(hdr, l->snd_nxt_state++);
+ msg_set_seq_gap(hdr, rcvgap);
+ bc_rcvgap = link_bc_rcv_gap(bcl);
+ msg_set_bc_gap(hdr, bc_rcvgap);
+ msg_set_probe(hdr, probe);
+ msg_set_is_keepalive(hdr, probe || probe_reply);
+ if (l->peer_caps & TIPC_GAP_ACK_BLOCK)
+ glen = tipc_build_gap_ack_blks(l, hdr);
+ tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id);
+ msg_set_size(hdr, INT_H_SIZE + glen + dlen);
+ skb_trim(skb, INT_H_SIZE + glen + dlen);
+ l->stats.sent_states++;
+ l->rcv_unacked = 0;
+ } else {
+ /* RESET_MSG or ACTIVATE_MSG */
+ if (mtyp == ACTIVATE_MSG) {
+ msg_set_dest_session_valid(hdr, 1);
+ msg_set_dest_session(hdr, l->peer_session);
}
- tipc_node_unlock(n_ptr);
-cont:
- kfree_skb(buf);
+ msg_set_max_pkt(hdr, l->advertised_mtu);
+ strcpy(data, l->if_name);
+ msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME);
+ skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME);
+ }
+ if (probe)
+ l->stats.sent_probes++;
+ if (rcvgap)
+ l->stats.sent_nacks++;
+ if (bc_rcvgap)
+ bcl->stats.sent_nacks++;
+ skb->priority = TC_PRIO_CONTROL;
+ __skb_queue_tail(xmitq, skb);
+ trace_tipc_proto_build(skb, false, l->name);
+}
+
+void tipc_link_create_dummy_tnl_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq)
+{
+ u32 onode = tipc_own_addr(l->net);
+ struct tipc_msg *hdr, *ihdr;
+ struct sk_buff_head tnlq;
+ struct sk_buff *skb;
+ u32 dnode = l->addr;
+
+ __skb_queue_head_init(&tnlq);
+ skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG,
+ INT_H_SIZE, BASIC_H_SIZE,
+ dnode, onode, 0, 0, 0);
+ if (!skb) {
+ pr_warn("%sunable to create tunnel packet\n", link_co_err);
+ return;
}
- read_unlock_bh(&tipc_net_lock);
+
+ hdr = buf_msg(skb);
+ msg_set_msgcnt(hdr, 1);
+ msg_set_bearer_id(hdr, l->peer_bearer_id);
+
+ ihdr = (struct tipc_msg *)msg_data(hdr);
+ tipc_msg_init(onode, ihdr, TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
+ BASIC_H_SIZE, dnode);
+ msg_set_errcode(ihdr, TIPC_ERR_NO_PORT);
+ __skb_queue_tail(&tnlq, skb);
+ tipc_link_xmit(l, &tnlq, xmitq);
}
-/**
- * tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue
- *
- * Returns increase in queue length (i.e. 0 or 1)
+/* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets
+ * with contents of the link's transmit and backlog queues.
*/
-u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail,
- struct sk_buff *buf)
-{
- struct sk_buff *queue_buf;
- struct sk_buff **prev;
- u32 seq_no = buf_seqno(buf);
+void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
+ int mtyp, struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb, *tnlskb;
+ struct tipc_msg *hdr, tnlhdr;
+ struct sk_buff_head *queue = &l->transmq;
+ struct sk_buff_head tmpxq, tnlq, frags;
+ u16 pktlen, pktcnt, seqno = l->snd_nxt;
+ bool pktcnt_need_update = false;
+ u16 syncpt;
+ int rc;
+
+ if (!tnl)
+ return;
- buf->next = NULL;
+ __skb_queue_head_init(&tnlq);
+ /* Link Synching:
+ * From now on, send only one single ("dummy") SYNCH message
+ * to peer. The SYNCH message does not contain any data, just
+ * a header conveying the synch point to the peer.
+ */
+ if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) {
+ tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG,
+ INT_H_SIZE, 0, l->addr,
+ tipc_own_addr(l->net),
+ 0, 0, 0);
+ if (!tnlskb) {
+ pr_warn("%sunable to create dummy SYNCH_MSG\n",
+ link_co_err);
+ return;
+ }
- /* Empty queue ? */
- if (*head == NULL) {
- *head = *tail = buf;
- return 1;
+ hdr = buf_msg(tnlskb);
+ syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1;
+ msg_set_syncpt(hdr, syncpt);
+ msg_set_bearer_id(hdr, l->peer_bearer_id);
+ __skb_queue_tail(&tnlq, tnlskb);
+ tipc_link_xmit(tnl, &tnlq, xmitq);
+ return;
}
- /* Last ? */
- if (less(buf_seqno(*tail), seq_no)) {
- (*tail)->next = buf;
- *tail = buf;
- return 1;
+ __skb_queue_head_init(&tmpxq);
+ __skb_queue_head_init(&frags);
+ /* At least one packet required for safe algorithm => add dummy */
+ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
+ BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net),
+ 0, 0, TIPC_ERR_NO_PORT);
+ if (!skb) {
+ pr_warn("%sunable to create tunnel packet\n", link_co_err);
+ return;
}
+ __skb_queue_tail(&tnlq, skb);
+ tipc_link_xmit(l, &tnlq, &tmpxq);
+ __skb_queue_purge(&tmpxq);
- /* Locate insertion point in queue, then insert; discard if duplicate */
- prev = head;
- queue_buf = *head;
- for (;;) {
- u32 curr_seqno = buf_seqno(queue_buf);
-
- if (seq_no == curr_seqno) {
- kfree_skb(buf);
- return 0;
+ /* Initialize reusable tunnel packet header */
+ tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL,
+ mtyp, INT_H_SIZE, l->addr);
+ if (mtyp == SYNCH_MSG)
+ pktcnt = l->snd_nxt - buf_seqno(skb_peek(&l->transmq));
+ else
+ pktcnt = skb_queue_len(&l->transmq);
+ pktcnt += skb_queue_len(&l->backlogq);
+ msg_set_msgcnt(&tnlhdr, pktcnt);
+ msg_set_bearer_id(&tnlhdr, l->peer_bearer_id);
+tnl:
+ /* Wrap each packet into a tunnel packet */
+ skb_queue_walk(queue, skb) {
+ hdr = buf_msg(skb);
+ if (queue == &l->backlogq)
+ msg_set_seqno(hdr, seqno++);
+ pktlen = msg_size(hdr);
+
+ /* Tunnel link MTU is not large enough? This could be
+ * due to:
+ * 1) Link MTU has just changed or set differently;
+ * 2) Or FAILOVER on the top of a SYNCH message
+ *
+ * The 2nd case should not happen if peer supports
+ * TIPC_TUNNEL_ENHANCED
+ */
+ if (pktlen > tnl->mtu - INT_H_SIZE) {
+ if (mtyp == FAILOVER_MSG &&
+ (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) {
+ rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu,
+ &frags);
+ if (rc) {
+ pr_warn("%sunable to frag msg: rc %d\n",
+ link_co_err, rc);
+ return;
+ }
+ pktcnt += skb_queue_len(&frags) - 1;
+ pktcnt_need_update = true;
+ skb_queue_splice_tail_init(&frags, &tnlq);
+ continue;
+ }
+ /* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED
+ * => Just warn it and return!
+ */
+ pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n",
+ link_co_err, msg_user(hdr),
+ msg_type(hdr), msg_size(hdr));
+ return;
}
- if (less(seq_no, curr_seqno))
- break;
-
- prev = &queue_buf->next;
- queue_buf = queue_buf->next;
+ msg_set_size(&tnlhdr, pktlen + INT_H_SIZE);
+ tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC);
+ if (!tnlskb) {
+ pr_warn("%sunable to send packet\n", link_co_err);
+ return;
+ }
+ skb_copy_to_linear_data(tnlskb, &tnlhdr, INT_H_SIZE);
+ skb_copy_to_linear_data_offset(tnlskb, INT_H_SIZE, hdr, pktlen);
+ __skb_queue_tail(&tnlq, tnlskb);
+ }
+ if (queue != &l->backlogq) {
+ queue = &l->backlogq;
+ goto tnl;
}
- buf->next = queue_buf;
- *prev = buf;
- return 1;
-}
+ if (pktcnt_need_update)
+ skb_queue_walk(&tnlq, skb) {
+ hdr = buf_msg(skb);
+ msg_set_msgcnt(hdr, pktcnt);
+ }
-/*
- * link_handle_out_of_seq_msg - handle arrival of out-of-sequence packet
- */
-static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
- struct sk_buff *buf)
-{
- u32 seq_no = buf_seqno(buf);
+ tipc_link_xmit(tnl, &tnlq, xmitq);
- if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) {
- link_recv_proto_msg(l_ptr, buf);
- return;
- }
+ if (mtyp == FAILOVER_MSG) {
+ struct sk_buff_head *fdefq = &tnl->failover_deferdq;
- /* Record OOS packet arrival (force mismatch on next timeout) */
- l_ptr->checkpoint--;
+ tnl->drop_point = l->rcv_nxt;
+ tnl->failover_reasm_skb = l->reasm_buf;
+ l->reasm_buf = NULL;
- /*
- * Discard packet if a duplicate; otherwise add it to deferred queue
- * and notify peer of gap as per protocol specification
- */
- if (less(seq_no, mod(l_ptr->next_in_no))) {
- l_ptr->stats.duplicates++;
- kfree_skb(buf);
- return;
+ /* Failover the link's deferdq */
+ if (unlikely(!skb_queue_empty(fdefq))) {
+ pr_warn("Link failover deferdq not empty: %d!\n",
+ skb_queue_len(fdefq));
+ __skb_queue_purge(fdefq);
+ }
+ skb_queue_splice_init(&l->deferdq, fdefq);
}
-
- if (tipc_link_defer_pkt(&l_ptr->oldest_deferred_in,
- &l_ptr->newest_deferred_in, buf)) {
- l_ptr->deferred_inqueue_sz++;
- l_ptr->stats.deferred_recv++;
- if ((l_ptr->deferred_inqueue_sz % 16) == 1)
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
- } else
- l_ptr->stats.duplicates++;
}
-/*
- * Send protocol message to the other endpoint.
+/**
+ * tipc_link_failover_prepare() - prepare tnl for link failover
+ *
+ * This is a special version of the precursor - tipc_link_tnl_prepare(),
+ * see the tipc_node_link_failover() for details
+ *
+ * @l: failover link
+ * @tnl: tunnel link
+ * @xmitq: queue for messages to be xmited
*/
-void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ,
- int probe_msg, u32 gap, u32 tolerance,
- u32 priority, u32 ack_mtu)
-{
- struct sk_buff *buf = NULL;
- struct tipc_msg *msg = l_ptr->pmsg;
- u32 msg_size = sizeof(l_ptr->proto_msg);
- int r_flag;
-
- /* Discard any previous message that was deferred due to congestion */
- if (l_ptr->proto_msg_queue) {
- kfree_skb(l_ptr->proto_msg_queue);
- l_ptr->proto_msg_queue = NULL;
- }
-
- if (link_blocked(l_ptr))
- return;
-
- /* Abort non-RESET send if communication with node is prohibited */
- if ((l_ptr->owner->block_setup) && (msg_typ != RESET_MSG))
- return;
-
- /* Create protocol message with "out-of-sequence" sequence number */
- msg_set_type(msg, msg_typ);
- msg_set_net_plane(msg, l_ptr->b_ptr->net_plane);
- msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- msg_set_last_bcast(msg, tipc_bclink_get_last_sent());
+void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff_head *fdefq = &tnl->failover_deferdq;
- if (msg_typ == STATE_MSG) {
- u32 next_sent = mod(l_ptr->next_out_no);
+ tipc_link_create_dummy_tnl_msg(tnl, xmitq);
- if (!tipc_link_is_up(l_ptr))
- return;
- if (l_ptr->next_out)
- next_sent = buf_seqno(l_ptr->next_out);
- msg_set_next_sent(msg, next_sent);
- if (l_ptr->oldest_deferred_in) {
- u32 rec = buf_seqno(l_ptr->oldest_deferred_in);
- gap = mod(rec - mod(l_ptr->next_in_no));
- }
- msg_set_seq_gap(msg, gap);
- if (gap)
- l_ptr->stats.sent_nacks++;
- msg_set_link_tolerance(msg, tolerance);
- msg_set_linkprio(msg, priority);
- msg_set_max_pkt(msg, ack_mtu);
- msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
- msg_set_probe(msg, probe_msg != 0);
- if (probe_msg) {
- u32 mtu = l_ptr->max_pkt;
-
- if ((mtu < l_ptr->max_pkt_target) &&
- link_working_working(l_ptr) &&
- l_ptr->fsm_msg_cnt) {
- msg_size = (mtu + (l_ptr->max_pkt_target - mtu)/2 + 2) & ~3;
- if (l_ptr->max_pkt_probes == 10) {
- l_ptr->max_pkt_target = (msg_size - 4);
- l_ptr->max_pkt_probes = 0;
- msg_size = (mtu + (l_ptr->max_pkt_target - mtu)/2 + 2) & ~3;
- }
- l_ptr->max_pkt_probes++;
- }
+ /* This failover link endpoint was never established before,
+ * so it has not received anything from peer.
+ * Otherwise, it must be a normal failover situation or the
+ * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes
+ * would have to start over from scratch instead.
+ */
+ tnl->drop_point = 1;
+ tnl->failover_reasm_skb = NULL;
- l_ptr->stats.sent_probes++;
- }
- l_ptr->stats.sent_states++;
- } else { /* RESET_MSG or ACTIVATE_MSG */
- msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1));
- msg_set_seq_gap(msg, 0);
- msg_set_next_sent(msg, 1);
- msg_set_probe(msg, 0);
- msg_set_link_tolerance(msg, l_ptr->tolerance);
- msg_set_linkprio(msg, l_ptr->priority);
- msg_set_max_pkt(msg, l_ptr->max_pkt_target);
+ /* Initiate the link's failover deferdq */
+ if (unlikely(!skb_queue_empty(fdefq))) {
+ pr_warn("Link failover deferdq not empty: %d!\n",
+ skb_queue_len(fdefq));
+ __skb_queue_purge(fdefq);
}
+}
- r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr));
- msg_set_redundant_link(msg, r_flag);
- msg_set_linkprio(msg, l_ptr->priority);
- msg_set_size(msg, msg_size);
-
- msg_set_seqno(msg, mod(l_ptr->next_out_no + (0xffff/2)));
-
- buf = tipc_buf_acquire(msg_size);
- if (!buf)
- return;
+/* tipc_link_validate_msg(): validate message against current link state
+ * Returns true if message should be accepted, otherwise false
+ */
+bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr)
+{
+ u16 curr_session = l->peer_session;
+ u16 session = msg_session(hdr);
+ int mtyp = msg_type(hdr);
- skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
- buf->priority = TC_PRIO_CONTROL;
+ if (msg_user(hdr) != LINK_PROTOCOL)
+ return true;
- /* Defer message if bearer is already blocked */
- if (tipc_bearer_blocked(l_ptr->b_ptr)) {
- l_ptr->proto_msg_queue = buf;
- return;
+ switch (mtyp) {
+ case RESET_MSG:
+ if (!l->in_session)
+ return true;
+ /* Accept only RESET with new session number */
+ return more(session, curr_session);
+ case ACTIVATE_MSG:
+ if (!l->in_session)
+ return true;
+ /* Accept only ACTIVATE with new or current session number */
+ return !less(session, curr_session);
+ case STATE_MSG:
+ /* Accept only STATE with current session number */
+ if (!l->in_session)
+ return false;
+ if (session != curr_session)
+ return false;
+ /* Extra sanity check */
+ if (!tipc_link_is_up(l) && msg_ack(hdr))
+ return false;
+ if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO))
+ return true;
+ /* Accept only STATE with new sequence number */
+ return !less(msg_seqno(hdr), l->rcv_nxt_state);
+ default:
+ return false;
}
-
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
- l_ptr->unacked_window = 0;
- kfree_skb(buf);
}
-/*
- * Receive protocol message :
+/* tipc_link_proto_rcv(): receive link level protocol message :
* Note that network plane id propagates through the network, and may
- * change at any time. The node with lowest address rules
+ * change at any time. The node with lowest numerical id determines
+ * network plane
*/
-static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf)
-{
- u32 rec_gap = 0;
- u32 max_pkt_info;
- u32 max_pkt_ack;
- u32 msg_tol;
- struct tipc_msg *msg = buf_msg(buf);
+static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
+{
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_gap_ack_blks *ga = NULL;
+ bool reply = msg_probe(hdr), retransmitted = false;
+ u32 dlen = msg_data_sz(hdr), glen = 0, msg_max;
+ u16 peers_snd_nxt = msg_next_sent(hdr);
+ u16 peers_tol = msg_link_tolerance(hdr);
+ u16 peers_prio = msg_linkprio(hdr);
+ u16 gap = msg_seq_gap(hdr);
+ u16 ack = msg_ack(hdr);
+ u16 rcv_nxt = l->rcv_nxt;
+ u16 rcvgap = 0;
+ int mtyp = msg_type(hdr);
+ int rc = 0, released;
+ char *if_name;
+ void *data;
+
+ trace_tipc_proto_rcv(skb, false, l->name);
+
+ if (dlen > U16_MAX)
+ goto exit;
- if (link_blocked(l_ptr))
+ if (tipc_link_is_blocked(l) || !xmitq)
goto exit;
- /* record unnumbered packet arrival (force mismatch on next timeout) */
- l_ptr->checkpoint--;
+ if (tipc_own_addr(l->net) > msg_prevnode(hdr))
+ l->net_plane = msg_net_plane(hdr);
- if (l_ptr->b_ptr->net_plane != msg_net_plane(msg))
- if (tipc_own_addr > msg_prevnode(msg))
- l_ptr->b_ptr->net_plane = msg_net_plane(msg);
+ if (skb_linearize(skb))
+ goto exit;
- l_ptr->owner->permit_changeover = msg_redundant_link(msg);
+ hdr = buf_msg(skb);
+ data = msg_data(hdr);
- switch (msg_type(msg)) {
+ if (!tipc_link_validate_msg(l, hdr)) {
+ trace_tipc_skb_dump(skb, false, "PROTO invalid (1)!");
+ trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (1)!");
+ goto exit;
+ }
+ switch (mtyp) {
case RESET_MSG:
- if (!link_working_unknown(l_ptr) &&
- (l_ptr->peer_session != INVALID_SESSION)) {
- if (less_eq(msg_session(msg), l_ptr->peer_session))
- break; /* duplicate or old reset: ignore */
- }
+ case ACTIVATE_MSG:
+ msg_max = msg_max_pkt(hdr);
+ if (msg_max < tipc_bearer_min_mtu(l->net, l->bearer_id))
+ break;
+ /* Complete own link name with peer's interface name */
+ if_name = strrchr(l->name, ':') + 1;
+ if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME)
+ break;
+ if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME)
+ break;
+ strscpy(if_name, data, TIPC_MAX_IF_NAME);
- if (!msg_redundant_link(msg) && (link_working_working(l_ptr) ||
- link_working_unknown(l_ptr))) {
- /*
- * peer has lost contact -- don't allow peer's links
- * to reactivate before we recognize loss & clean up
- */
- l_ptr->owner->block_setup = WAIT_NODE_DOWN;
+ /* Update own tolerance if peer indicates a non-zero value */
+ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
+ l->tolerance = peers_tol;
+ l->bc_rcvlink->tolerance = peers_tol;
}
+ /* Update own priority if peer's priority is higher */
+ if (tipc_in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
+ l->priority = peers_prio;
- link_state_event(l_ptr, RESET_MSG);
-
- /* fall thru' */
- case ACTIVATE_MSG:
- /* Update link settings according other endpoint's values */
- strcpy((strrchr(l_ptr->name, ':') + 1), (char *)msg_data(msg));
-
- msg_tol = msg_link_tolerance(msg);
- if (msg_tol > l_ptr->tolerance)
- link_set_supervision_props(l_ptr, msg_tol);
-
- if (msg_linkprio(msg) > l_ptr->priority)
- l_ptr->priority = msg_linkprio(msg);
-
- max_pkt_info = msg_max_pkt(msg);
- if (max_pkt_info) {
- if (max_pkt_info < l_ptr->max_pkt_target)
- l_ptr->max_pkt_target = max_pkt_info;
- if (l_ptr->max_pkt > l_ptr->max_pkt_target)
- l_ptr->max_pkt = l_ptr->max_pkt_target;
- } else {
- l_ptr->max_pkt = l_ptr->max_pkt_target;
+ /* If peer is going down we want full re-establish cycle */
+ if (msg_peer_stopping(hdr)) {
+ rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ break;
}
- /* Synchronize broadcast link info, if not done previously */
- if (!tipc_node_is_up(l_ptr->owner)) {
- l_ptr->owner->bclink.last_sent =
- l_ptr->owner->bclink.last_in =
- msg_last_bcast(msg);
- l_ptr->owner->bclink.oos_state = 0;
+ /* If this endpoint was re-created while peer was ESTABLISHING
+ * it doesn't know current session number. Force re-synch.
+ */
+ if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) &&
+ l->session != msg_dest_session(hdr)) {
+ if (less(l->session, msg_dest_session(hdr)))
+ l->session = msg_dest_session(hdr) + 1;
+ break;
}
- l_ptr->peer_session = msg_session(msg);
- l_ptr->peer_bearer_id = msg_bearer_id(msg);
+ /* ACTIVATE_MSG serves as PEER_RESET if link is already down */
+ if (mtyp == RESET_MSG || !tipc_link_is_up(l))
+ rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
+
+ /* ACTIVATE_MSG takes up link if it was already locally reset */
+ if (mtyp == ACTIVATE_MSG && l->state == LINK_ESTABLISHING)
+ rc = TIPC_LINK_UP_EVT;
- if (msg_type(msg) == ACTIVATE_MSG)
- link_state_event(l_ptr, ACTIVATE_MSG);
+ l->peer_session = msg_session(hdr);
+ l->in_session = true;
+ l->peer_bearer_id = msg_bearer_id(hdr);
+ if (l->mtu > msg_max)
+ l->mtu = msg_max;
break;
- case STATE_MSG:
- msg_tol = msg_link_tolerance(msg);
- if (msg_tol)
- link_set_supervision_props(l_ptr, msg_tol);
-
- if (msg_linkprio(msg) &&
- (msg_linkprio(msg) != l_ptr->priority)) {
- pr_warn("%s<%s>, priority change %u->%u\n",
- link_rst_msg, l_ptr->name, l_ptr->priority,
- msg_linkprio(msg));
- l_ptr->priority = msg_linkprio(msg);
- tipc_link_reset(l_ptr); /* Enforce change to take effect */
- break;
- }
- link_state_event(l_ptr, TRAFFIC_MSG_EVT);
- l_ptr->stats.recv_states++;
- if (link_reset_unknown(l_ptr))
+ case STATE_MSG:
+ /* Validate Gap ACK blocks, drop if invalid */
+ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
+ if (glen > dlen)
break;
- if (less_eq(mod(l_ptr->next_in_no), msg_next_sent(msg))) {
- rec_gap = mod(msg_next_sent(msg) -
- mod(l_ptr->next_in_no));
- }
+ l->rcv_nxt_state = msg_seqno(hdr) + 1;
- max_pkt_ack = msg_max_pkt(msg);
- if (max_pkt_ack > l_ptr->max_pkt) {
- l_ptr->max_pkt = max_pkt_ack;
- l_ptr->max_pkt_probes = 0;
+ /* Update own tolerance if peer indicates a non-zero value */
+ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
+ l->tolerance = peers_tol;
+ l->bc_rcvlink->tolerance = peers_tol;
}
-
- max_pkt_ack = 0;
- if (msg_probe(msg)) {
- l_ptr->stats.recv_probes++;
- if (msg_size(msg) > sizeof(l_ptr->proto_msg))
- max_pkt_ack = msg_size(msg);
+ /* Update own prio if peer indicates a different value */
+ if ((peers_prio != l->priority) &&
+ tipc_in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) {
+ l->priority = peers_prio;
+ rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
}
- /* Protocol message before retransmits, reduce loss risk */
- if (l_ptr->owner->bclink.recv_permitted)
- tipc_bclink_update_link_state(l_ptr->owner,
- msg_last_bcast(msg));
+ l->silent_intv_cnt = 0;
+ l->stats.recv_states++;
+ if (msg_probe(hdr))
+ l->stats.recv_probes++;
- if (rec_gap || (msg_probe(msg))) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 0, rec_gap, 0, 0, max_pkt_ack);
- }
- if (msg_seq_gap(msg)) {
- l_ptr->stats.recv_nacks++;
- tipc_link_retransmit(l_ptr, l_ptr->first_out,
- msg_seq_gap(msg));
+ if (!tipc_link_is_up(l)) {
+ if (l->state == LINK_ESTABLISHING)
+ rc = TIPC_LINK_UP_EVT;
+ break;
}
- break;
+
+ tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
+ &l->mon_state, l->bearer_id);
+
+ /* Send NACK if peer has sent pkts we haven't received yet */
+ if ((reply || msg_is_keepalive(hdr)) &&
+ more(peers_snd_nxt, rcv_nxt) &&
+ !tipc_link_is_synching(l) &&
+ skb_queue_empty(&l->deferdq))
+ rcvgap = peers_snd_nxt - l->rcv_nxt;
+ if (rcvgap || reply)
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
+ rcvgap, 0, 0, xmitq);
+
+ released = tipc_link_advance_transmq(l, l, ack, gap, ga, xmitq,
+ &retransmitted, &rc);
+ if (gap)
+ l->stats.recv_nacks++;
+ if (released || retransmitted)
+ tipc_link_update_cwin(l, released, retransmitted);
+ if (released)
+ tipc_link_advance_backlog(l, xmitq);
+ if (unlikely(!skb_queue_empty(&l->wakeupq)))
+ link_prepare_wakeup(l);
}
exit:
- kfree_skb(buf);
+ kfree_skb(skb);
+ return rc;
}
-
-/*
- * tipc_link_tunnel(): Send one message via a link belonging to
- * another bearer. Owner node is locked.
+/* tipc_link_build_bc_proto_msg() - create broadcast protocol message
+ */
+static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast,
+ u16 peers_snd_nxt,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb;
+ struct tipc_msg *hdr;
+ struct sk_buff *dfrd_skb = skb_peek(&l->deferdq);
+ u16 ack = l->rcv_nxt - 1;
+ u16 gap_to = peers_snd_nxt - 1;
+
+ skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE,
+ 0, l->addr, tipc_own_addr(l->net), 0, 0, 0);
+ if (!skb)
+ return false;
+ hdr = buf_msg(skb);
+ msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
+ msg_set_bcast_ack(hdr, ack);
+ msg_set_bcgap_after(hdr, ack);
+ if (dfrd_skb)
+ gap_to = buf_seqno(dfrd_skb) - 1;
+ msg_set_bcgap_to(hdr, gap_to);
+ msg_set_non_seq(hdr, bcast);
+ __skb_queue_tail(xmitq, skb);
+ return true;
+}
+
+/* tipc_link_build_bc_init_msg() - synchronize broadcast link endpoints.
+ *
+ * Give a newly added peer node the sequence number where it should
+ * start receiving and acking broadcast packets.
*/
-static void tipc_link_tunnel(struct tipc_link *l_ptr,
- struct tipc_msg *tunnel_hdr, struct tipc_msg *msg,
- u32 selector)
+static void tipc_link_build_bc_init_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq)
{
- struct tipc_link *tunnel;
- struct sk_buff *buf;
- u32 length = msg_size(msg);
+ struct sk_buff_head list;
- tunnel = l_ptr->owner->active_links[selector & 1];
- if (!tipc_link_is_up(tunnel)) {
- pr_warn("%stunnel link no longer available\n", link_co_err);
- return;
- }
- msg_set_size(tunnel_hdr, length + INT_H_SIZE);
- buf = tipc_buf_acquire(length + INT_H_SIZE);
- if (!buf) {
- pr_warn("%sunable to send tunnel msg\n", link_co_err);
+ __skb_queue_head_init(&list);
+ if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list))
return;
- }
- skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE);
- skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length);
- tipc_link_send_buf(tunnel, buf);
+ msg_set_bc_ack_invalid(buf_msg(skb_peek(&list)), true);
+ tipc_link_xmit(l, &list, xmitq);
}
-
-
-/*
- * changeover(): Send whole message queue via the remaining link
- * Owner node is locked.
+/* tipc_link_bc_init_rcv - receive initial broadcast synch data from peer
*/
-void tipc_link_changeover(struct tipc_link *l_ptr)
+void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
{
- u32 msgcount = l_ptr->out_queue_size;
- struct sk_buff *crs = l_ptr->first_out;
- struct tipc_link *tunnel = l_ptr->owner->active_links[0];
- struct tipc_msg tunnel_hdr;
- int split_bundles;
+ int mtyp = msg_type(hdr);
+ u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
- if (!tunnel)
+ if (tipc_link_is_up(l))
return;
- if (!l_ptr->owner->permit_changeover) {
- pr_warn("%speer did not permit changeover\n", link_co_err);
+ if (msg_user(hdr) == BCAST_PROTOCOL) {
+ l->rcv_nxt = peers_snd_nxt;
+ l->state = LINK_ESTABLISHED;
return;
}
- tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
- ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr);
- msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
- msg_set_msgcnt(&tunnel_hdr, msgcount);
-
- if (!l_ptr->first_out) {
- struct sk_buff *buf;
+ if (l->peer_caps & TIPC_BCAST_SYNCH)
+ return;
- buf = tipc_buf_acquire(INT_H_SIZE);
- if (buf) {
- skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE);
- msg_set_size(&tunnel_hdr, INT_H_SIZE);
- tipc_link_send_buf(tunnel, buf);
- } else {
- pr_warn("%sunable to send changeover msg\n",
- link_co_err);
- }
+ if (msg_peer_node_is_up(hdr))
return;
- }
- split_bundles = (l_ptr->owner->active_links[0] !=
- l_ptr->owner->active_links[1]);
+ /* Compatibility: accept older, less safe initial synch data */
+ if ((mtyp == RESET_MSG) || (mtyp == ACTIVATE_MSG))
+ l->rcv_nxt = peers_snd_nxt;
+}
- while (crs) {
- struct tipc_msg *msg = buf_msg(crs);
+/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
+ */
+int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
+ struct sk_buff_head *xmitq)
+{
+ u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
+ int rc = 0;
- if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) {
- struct tipc_msg *m = msg_get_wrapped(msg);
- unchar *pos = (unchar *)m;
+ if (!tipc_link_is_up(l))
+ return rc;
- msgcount = msg_msgcnt(msg);
- while (msgcount--) {
- msg_set_seqno(m, msg_seqno(msg));
- tipc_link_tunnel(l_ptr, &tunnel_hdr, m,
- msg_link_selector(m));
- pos += align(msg_size(m));
- m = (struct tipc_msg *)pos;
- }
- } else {
- tipc_link_tunnel(l_ptr, &tunnel_hdr, msg,
- msg_link_selector(msg));
- }
- crs = crs->next;
- }
-}
+ if (!msg_peer_node_is_up(hdr))
+ return rc;
-void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *tunnel)
-{
- struct sk_buff *iter;
- struct tipc_msg tunnel_hdr;
-
- tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
- DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr);
- msg_set_msgcnt(&tunnel_hdr, l_ptr->out_queue_size);
- msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
- iter = l_ptr->first_out;
- while (iter) {
- struct sk_buff *outbuf;
- struct tipc_msg *msg = buf_msg(iter);
- u32 length = msg_size(msg);
-
- if (msg_user(msg) == MSG_BUNDLER)
- msg_set_type(msg, CLOSED_MSG);
- msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); /* Update */
- msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- msg_set_size(&tunnel_hdr, length + INT_H_SIZE);
- outbuf = tipc_buf_acquire(length + INT_H_SIZE);
- if (outbuf == NULL) {
- pr_warn("%sunable to send duplicate msg\n",
- link_co_err);
- return;
- }
- skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE);
- skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data,
- length);
- tipc_link_send_buf(tunnel, outbuf);
- if (!tipc_link_is_up(l_ptr))
- return;
- iter = iter->next;
- }
-}
+ /* Open when peer acknowledges our bcast init msg (pkt #1) */
+ if (msg_ack(hdr))
+ l->bc_peer_is_up = true;
-/**
- * buf_extract - extracts embedded TIPC message from another message
- * @skb: encapsulating message buffer
- * @from_pos: offset to extract from
- *
- * Returns a new message buffer containing an embedded message. The
- * encapsulating message itself is left unchanged.
- */
-static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
-{
- struct tipc_msg *msg = (struct tipc_msg *)(skb->data + from_pos);
- u32 size = msg_size(msg);
- struct sk_buff *eb;
+ if (!l->bc_peer_is_up)
+ return rc;
- eb = tipc_buf_acquire(size);
- if (eb)
- skb_copy_to_linear_data(eb, msg, size);
- return eb;
-}
+ /* Ignore if peers_snd_nxt goes beyond receive window */
+ if (more(peers_snd_nxt, l->rcv_nxt + l->window))
+ return rc;
-/*
- * link_recv_changeover_msg(): Receive tunneled packet sent
- * via other link. Node is locked. Return extracted buffer.
- */
-static int link_recv_changeover_msg(struct tipc_link **l_ptr,
- struct sk_buff **buf)
-{
- struct sk_buff *tunnel_buf = *buf;
- struct tipc_link *dest_link;
- struct tipc_msg *msg;
- struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf);
- u32 msg_typ = msg_type(tunnel_msg);
- u32 msg_count = msg_msgcnt(tunnel_msg);
- u32 bearer_id = msg_bearer_id(tunnel_msg);
+ l->snd_nxt = peers_snd_nxt;
+ if (link_bc_rcv_gap(l))
+ rc |= TIPC_LINK_SND_STATE;
- if (bearer_id >= MAX_BEARERS)
- goto exit;
- dest_link = (*l_ptr)->owner->links[bearer_id];
- if (!dest_link)
- goto exit;
- if (dest_link == *l_ptr) {
- pr_err("Unexpected changeover message on link <%s>\n",
- (*l_ptr)->name);
- goto exit;
- }
- *l_ptr = dest_link;
- msg = msg_get_wrapped(tunnel_msg);
-
- if (msg_typ == DUPLICATE_MSG) {
- if (less(msg_seqno(msg), mod(dest_link->next_in_no)))
- goto exit;
- *buf = buf_extract(tunnel_buf, INT_H_SIZE);
- if (*buf == NULL) {
- pr_warn("%sduplicate msg dropped\n", link_co_err);
- goto exit;
- }
- kfree_skb(tunnel_buf);
- return 1;
- }
+ /* Return now if sender supports nack via STATE messages */
+ if (l->peer_caps & TIPC_BCAST_STATE_NACK)
+ return rc;
- /* First original message ?: */
- if (tipc_link_is_up(dest_link)) {
- pr_info("%s<%s>, changeover initiated by peer\n", link_rst_msg,
- dest_link->name);
- tipc_link_reset(dest_link);
- dest_link->exp_msg_count = msg_count;
- if (!msg_count)
- goto exit;
- } else if (dest_link->exp_msg_count == START_CHANGEOVER) {
- dest_link->exp_msg_count = msg_count;
- if (!msg_count)
- goto exit;
+ /* Otherwise, be backwards compatible */
+
+ if (!more(peers_snd_nxt, l->rcv_nxt)) {
+ l->nack_state = BC_NACK_SND_CONDITIONAL;
+ return 0;
}
- /* Receive original message */
- if (dest_link->exp_msg_count == 0) {
- pr_warn("%sgot too many tunnelled messages\n", link_co_err);
- goto exit;
+ /* Don't NACK if one was recently sent or peeked */
+ if (l->nack_state == BC_NACK_SND_SUPPRESS) {
+ l->nack_state = BC_NACK_SND_UNCONDITIONAL;
+ return 0;
}
- dest_link->exp_msg_count--;
- if (less(msg_seqno(msg), dest_link->reset_checkpoint)) {
- goto exit;
- } else {
- *buf = buf_extract(tunnel_buf, INT_H_SIZE);
- if (*buf != NULL) {
- kfree_skb(tunnel_buf);
- return 1;
- } else {
- pr_warn("%soriginal msg dropped\n", link_co_err);
- }
+
+ /* Conditionally delay NACK sending until next synch rcv */
+ if (l->nack_state == BC_NACK_SND_CONDITIONAL) {
+ l->nack_state = BC_NACK_SND_UNCONDITIONAL;
+ if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN)
+ return 0;
}
-exit:
- *buf = NULL;
- kfree_skb(tunnel_buf);
+
+ /* Send NACK now but suppress next one */
+ tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq);
+ l->nack_state = BC_NACK_SND_SUPPRESS;
return 0;
}
-/*
- * Bundler functionality:
- */
-void tipc_link_recv_bundle(struct sk_buff *buf)
+int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap,
+ struct tipc_gap_ack_blks *ga,
+ struct sk_buff_head *xmitq,
+ struct sk_buff_head *retrq)
{
- u32 msgcount = msg_msgcnt(buf_msg(buf));
- u32 pos = INT_H_SIZE;
- struct sk_buff *obuf;
-
- while (msgcount--) {
- obuf = buf_extract(buf, pos);
- if (obuf == NULL) {
- pr_warn("Link unable to unbundle message(s)\n");
- break;
- }
- pos += align(msg_size(buf_msg(obuf)));
- tipc_net_route_msg(obuf);
- }
- kfree_skb(buf);
-}
+ struct tipc_link *l = r->bc_sndlink;
+ bool unused = false;
+ int rc = 0;
-/*
- * Fragmentation/defragmentation:
- */
-
-/*
- * link_send_long_buf: Entry for buffers needing fragmentation.
- * The buffer is complete, inclusive total message length.
- * Returns user data length.
- */
-static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
-{
- struct sk_buff *buf_chain = NULL;
- struct sk_buff *buf_chain_tail = (struct sk_buff *)&buf_chain;
- struct tipc_msg *inmsg = buf_msg(buf);
- struct tipc_msg fragm_hdr;
- u32 insize = msg_size(inmsg);
- u32 dsz = msg_data_sz(inmsg);
- unchar *crs = buf->data;
- u32 rest = insize;
- u32 pack_sz = l_ptr->max_pkt;
- u32 fragm_sz = pack_sz - INT_H_SIZE;
- u32 fragm_no = 0;
- u32 destaddr;
-
- if (msg_short(inmsg))
- destaddr = l_ptr->addr;
- else
- destaddr = msg_destnode(inmsg);
+ if (!tipc_link_is_up(r) || !r->bc_peer_is_up)
+ return 0;
- /* Prepare reusable fragment header: */
- tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
- INT_H_SIZE, destaddr);
+ if (gap) {
+ l->stats.recv_nacks++;
+ r->stats.recv_nacks++;
+ }
- /* Chop up message: */
- while (rest > 0) {
- struct sk_buff *fragm;
+ if (less(acked, r->acked) || (acked == r->acked && !gap && !ga))
+ return 0;
- if (rest <= fragm_sz) {
- fragm_sz = rest;
- msg_set_type(&fragm_hdr, LAST_FRAGMENT);
- }
- fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE);
- if (fragm == NULL) {
- kfree_skb(buf);
- while (buf_chain) {
- buf = buf_chain;
- buf_chain = buf_chain->next;
- kfree_skb(buf);
- }
- return -ENOMEM;
- }
- msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
- fragm_no++;
- msg_set_fragm_no(&fragm_hdr, fragm_no);
- skb_copy_to_linear_data(fragm, &fragm_hdr, INT_H_SIZE);
- skb_copy_to_linear_data_offset(fragm, INT_H_SIZE, crs,
- fragm_sz);
- buf_chain_tail->next = fragm;
- buf_chain_tail = fragm;
-
- rest -= fragm_sz;
- crs += fragm_sz;
- msg_set_type(&fragm_hdr, FRAGMENT);
- }
- kfree_skb(buf);
+ trace_tipc_link_bc_ack(r, acked, gap, &l->transmq);
+ tipc_link_advance_transmq(l, r, acked, gap, ga, retrq, &unused, &rc);
- /* Append chain of fragments to send queue & send them */
- l_ptr->long_msg_seq_no++;
- link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no);
- l_ptr->stats.sent_fragments += fragm_no;
- l_ptr->stats.sent_fragmented++;
- tipc_link_push_queue(l_ptr);
+ tipc_link_advance_backlog(l, xmitq);
+ if (unlikely(!skb_queue_empty(&l->wakeupq)))
+ link_prepare_wakeup(l);
- return dsz;
+ return rc;
}
-/*
- * A pending message being re-assembled must store certain values
- * to handle subsequent fragments correctly. The following functions
- * help storing these values in unused, available fields in the
- * pending message. This makes dynamic memory allocation unnecessary.
+/* tipc_link_bc_nack_rcv(): receive broadcast nack message
+ * This function is here for backwards compatibility, since
+ * no BCAST_PROTOCOL/STATE messages occur from TIPC v2.5.
*/
-static void set_long_msg_seqno(struct sk_buff *buf, u32 seqno)
+int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
- msg_set_seqno(buf_msg(buf), seqno);
-}
+ struct tipc_msg *hdr = buf_msg(skb);
+ u32 dnode = msg_destnode(hdr);
+ int mtyp = msg_type(hdr);
+ u16 acked = msg_bcast_ack(hdr);
+ u16 from = acked + 1;
+ u16 to = msg_bcgap_to(hdr);
+ u16 peers_snd_nxt = to + 1;
+ int rc = 0;
-static u32 get_fragm_size(struct sk_buff *buf)
-{
- return msg_ack(buf_msg(buf));
-}
+ kfree_skb(skb);
-static void set_fragm_size(struct sk_buff *buf, u32 sz)
-{
- msg_set_ack(buf_msg(buf), sz);
-}
-
-static u32 get_expected_frags(struct sk_buff *buf)
-{
- return msg_bcast_ack(buf_msg(buf));
-}
+ if (!tipc_link_is_up(l) || !l->bc_peer_is_up)
+ return 0;
-static void set_expected_frags(struct sk_buff *buf, u32 exp)
-{
- msg_set_bcast_ack(buf_msg(buf), exp);
-}
+ if (mtyp != STATE_MSG)
+ return 0;
-/*
- * tipc_link_recv_fragment(): Called with node lock on. Returns
- * the reassembled buffer if message is complete.
- */
-int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
- struct tipc_msg **m)
-{
- struct sk_buff *prev = NULL;
- struct sk_buff *fbuf = *fb;
- struct tipc_msg *fragm = buf_msg(fbuf);
- struct sk_buff *pbuf = *pending;
- u32 long_msg_seq_no = msg_long_msgno(fragm);
-
- *fb = NULL;
-
- /* Is there an incomplete message waiting for this fragment? */
- while (pbuf && ((buf_seqno(pbuf) != long_msg_seq_no) ||
- (msg_orignode(fragm) != msg_orignode(buf_msg(pbuf))))) {
- prev = pbuf;
- pbuf = pbuf->next;
+ if (dnode == tipc_own_addr(l->net)) {
+ rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq,
+ xmitq);
+ l->stats.recv_nacks++;
+ return rc;
}
- if (!pbuf && (msg_type(fragm) == FIRST_FRAGMENT)) {
- struct tipc_msg *imsg = (struct tipc_msg *)msg_data(fragm);
- u32 msg_sz = msg_size(imsg);
- u32 fragm_sz = msg_data_sz(fragm);
- u32 exp_fragm_cnt;
- u32 max = TIPC_MAX_USER_MSG_SIZE + NAMED_H_SIZE;
-
- if (msg_type(imsg) == TIPC_MCAST_MSG)
- max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE;
- if (fragm_sz == 0 || msg_size(imsg) > max) {
- kfree_skb(fbuf);
- return 0;
- }
- exp_fragm_cnt = msg_sz / fragm_sz + !!(msg_sz % fragm_sz);
- pbuf = tipc_buf_acquire(msg_size(imsg));
- if (pbuf != NULL) {
- pbuf->next = *pending;
- *pending = pbuf;
- skb_copy_to_linear_data(pbuf, imsg,
- msg_data_sz(fragm));
- /* Prepare buffer for subsequent fragments. */
- set_long_msg_seqno(pbuf, long_msg_seq_no);
- set_fragm_size(pbuf, fragm_sz);
- set_expected_frags(pbuf, exp_fragm_cnt - 1);
- } else {
- pr_debug("Link unable to reassemble fragmented message\n");
- kfree_skb(fbuf);
- return -1;
- }
- kfree_skb(fbuf);
- return 0;
- } else if (pbuf && (msg_type(fragm) != FIRST_FRAGMENT)) {
- u32 dsz = msg_data_sz(fragm);
- u32 fsz = get_fragm_size(pbuf);
- u32 crs = ((msg_fragm_no(fragm) - 1) * fsz);
- u32 exp_frags = get_expected_frags(pbuf) - 1;
- skb_copy_to_linear_data_offset(pbuf, crs,
- msg_data(fragm), dsz);
- kfree_skb(fbuf);
-
- /* Is message complete? */
- if (exp_frags == 0) {
- if (prev)
- prev->next = pbuf->next;
- else
- *pending = pbuf->next;
- msg_reset_reroute_cnt(buf_msg(pbuf));
- *fb = pbuf;
- *m = buf_msg(pbuf);
- return 1;
- }
- set_expected_frags(pbuf, exp_frags);
- return 0;
- }
- kfree_skb(fbuf);
+ /* Msg for other node => suppress own NACK at next sync if applicable */
+ if (more(peers_snd_nxt, l->rcv_nxt) && !less(l->rcv_nxt, from))
+ l->nack_state = BC_NACK_SND_SUPPRESS;
+
return 0;
}
-static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance)
+void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win)
{
- if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL))
- return;
+ int max_bulk = TIPC_MAX_PUBL / (l->mtu / ITEM_SIZE);
- l_ptr->tolerance = tolerance;
- l_ptr->continuity_interval =
- ((tolerance / 4) > 500) ? 500 : tolerance / 4;
- l_ptr->abort_limit = tolerance / (l_ptr->continuity_interval / 4);
+ l->min_win = min_win;
+ l->ssthresh = max_win;
+ l->max_win = max_win;
+ l->window = min_win;
+ l->backlog[TIPC_LOW_IMPORTANCE].limit = min_win * 2;
+ l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = min_win * 4;
+ l->backlog[TIPC_HIGH_IMPORTANCE].limit = min_win * 6;
+ l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = min_win * 8;
+ l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk;
}
-void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window)
+/**
+ * tipc_link_reset_stats - reset link statistics
+ * @l: pointer to link
+ */
+void tipc_link_reset_stats(struct tipc_link *l)
{
- /* Data messages from this node, inclusive FIRST_FRAGM */
- l_ptr->queue_limit[TIPC_LOW_IMPORTANCE] = window;
- l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE] = (window / 3) * 4;
- l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE] = (window / 3) * 5;
- l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE] = (window / 3) * 6;
- /* Transiting data messages,inclusive FIRST_FRAGM */
- l_ptr->queue_limit[TIPC_LOW_IMPORTANCE + 4] = 300;
- l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE + 4] = 600;
- l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE + 4] = 900;
- l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE + 4] = 1200;
- l_ptr->queue_limit[CONN_MANAGER] = 1200;
- l_ptr->queue_limit[CHANGEOVER_PROTOCOL] = 2500;
- l_ptr->queue_limit[NAME_DISTRIBUTOR] = 3000;
- /* FRAGMENT and LAST_FRAGMENT packets */
- l_ptr->queue_limit[MSG_FRAGMENTER] = 4000;
+ memset(&l->stats, 0, sizeof(l->stats));
}
-/**
- * link_find_link - locate link by name
- * @name: ptr to link name string
- * @node: ptr to area to be filled with ptr to associated node
- *
- * Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted;
- * this also prevents link deletion.
- *
- * Returns pointer to link (or 0 if invalid link name).
- */
-static struct tipc_link *link_find_link(const char *name,
- struct tipc_node **node)
+static void link_print(struct tipc_link *l, const char *str)
{
- struct tipc_link_name link_name_parts;
- struct tipc_bearer *b_ptr;
- struct tipc_link *l_ptr;
+ struct sk_buff *hskb = skb_peek(&l->transmq);
+ u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt - 1;
+ u16 tail = l->snd_nxt - 1;
- if (!link_name_validate(name, &link_name_parts))
- return NULL;
+ pr_info("%s Link <%s> state %x\n", str, l->name, l->state);
+ pr_info("XMTQ: %u [%u-%u], BKLGQ: %u, SNDNX: %u, RCVNX: %u\n",
+ skb_queue_len(&l->transmq), head, tail,
+ skb_queue_len(&l->backlogq), l->snd_nxt, l->rcv_nxt);
+}
- b_ptr = tipc_bearer_find_interface(link_name_parts.if_local);
- if (!b_ptr)
- return NULL;
+/* Parse and validate nested (link) properties valid for media, bearer and link
+ */
+int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[])
+{
+ int err;
- *node = tipc_node_find(link_name_parts.addr_peer);
- if (!*node)
- return NULL;
+ err = nla_parse_nested_deprecated(props, TIPC_NLA_PROP_MAX, prop,
+ tipc_nl_prop_policy, NULL);
+ if (err)
+ return err;
- l_ptr = (*node)->links[b_ptr->identity];
- if (!l_ptr || strcmp(l_ptr->name, name))
- return NULL;
+ if (props[TIPC_NLA_PROP_PRIO]) {
+ u32 prio;
- return l_ptr;
-}
-
-/**
- * link_value_is_valid -- validate proposed link tolerance/priority/window
- *
- * @cmd: value type (TIPC_CMD_SET_LINK_*)
- * @new_value: the new value
- *
- * Returns 1 if value is within range, 0 if not.
- */
-static int link_value_is_valid(u16 cmd, u32 new_value)
-{
- switch (cmd) {
- case TIPC_CMD_SET_LINK_TOL:
- return (new_value >= TIPC_MIN_LINK_TOL) &&
- (new_value <= TIPC_MAX_LINK_TOL);
- case TIPC_CMD_SET_LINK_PRI:
- return (new_value <= TIPC_MAX_LINK_PRI);
- case TIPC_CMD_SET_LINK_WINDOW:
- return (new_value >= TIPC_MIN_LINK_WIN) &&
- (new_value <= TIPC_MAX_LINK_WIN);
+ prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
+ if (prio > TIPC_MAX_LINK_PRI)
+ return -EINVAL;
}
- return 0;
-}
-/**
- * link_cmd_set_value - change priority/tolerance/window for link/bearer/media
- * @name: ptr to link, bearer, or media name
- * @new_value: new value of link, bearer, or media setting
- * @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*)
- *
- * Caller must hold 'tipc_net_lock' to ensure link/bearer/media is not deleted.
- *
- * Returns 0 if value updated and negative value on error.
- */
-static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd)
-{
- struct tipc_node *node;
- struct tipc_link *l_ptr;
- struct tipc_bearer *b_ptr;
- struct tipc_media *m_ptr;
-
- l_ptr = link_find_link(name, &node);
- if (l_ptr) {
- /*
- * acquire node lock for tipc_link_send_proto_msg().
- * see "TIPC locking policy" in net.c.
- */
- tipc_node_lock(node);
- switch (cmd) {
- case TIPC_CMD_SET_LINK_TOL:
- link_set_supervision_props(l_ptr, new_value);
- tipc_link_send_proto_msg(l_ptr,
- STATE_MSG, 0, 0, new_value, 0, 0);
- break;
- case TIPC_CMD_SET_LINK_PRI:
- l_ptr->priority = new_value;
- tipc_link_send_proto_msg(l_ptr,
- STATE_MSG, 0, 0, 0, new_value, 0);
- break;
- case TIPC_CMD_SET_LINK_WINDOW:
- tipc_link_set_queue_limits(l_ptr, new_value);
- break;
- }
- tipc_node_unlock(node);
- return 0;
- }
+ if (props[TIPC_NLA_PROP_TOL]) {
+ u32 tol;
- b_ptr = tipc_bearer_find(name);
- if (b_ptr) {
- switch (cmd) {
- case TIPC_CMD_SET_LINK_TOL:
- b_ptr->tolerance = new_value;
- return 0;
- case TIPC_CMD_SET_LINK_PRI:
- b_ptr->priority = new_value;
- return 0;
- case TIPC_CMD_SET_LINK_WINDOW:
- b_ptr->window = new_value;
- return 0;
- }
- return -EINVAL;
+ tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
+ if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL))
+ return -EINVAL;
}
- m_ptr = tipc_media_find(name);
- if (!m_ptr)
- return -ENODEV;
- switch (cmd) {
- case TIPC_CMD_SET_LINK_TOL:
- m_ptr->tolerance = new_value;
- return 0;
- case TIPC_CMD_SET_LINK_PRI:
- m_ptr->priority = new_value;
- return 0;
- case TIPC_CMD_SET_LINK_WINDOW:
- m_ptr->window = new_value;
- return 0;
+ if (props[TIPC_NLA_PROP_WIN]) {
+ u32 max_win;
+
+ max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ if (max_win < TIPC_DEF_LINK_WIN || max_win > TIPC_MAX_LINK_WIN)
+ return -EINVAL;
}
- return -EINVAL;
-}
-struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space,
- u16 cmd)
-{
- struct tipc_link_config *args;
- u32 new_value;
- int res;
+ return 0;
+}
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_CONFIG))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s)
+{
+ int i;
+ struct nlattr *stats;
+
+ struct nla_map {
+ u32 key;
+ u32 val;
+ };
+
+ struct nla_map map[] = {
+ {TIPC_NLA_STATS_RX_INFO, 0},
+ {TIPC_NLA_STATS_RX_FRAGMENTS, s->recv_fragments},
+ {TIPC_NLA_STATS_RX_FRAGMENTED, s->recv_fragmented},
+ {TIPC_NLA_STATS_RX_BUNDLES, s->recv_bundles},
+ {TIPC_NLA_STATS_RX_BUNDLED, s->recv_bundled},
+ {TIPC_NLA_STATS_TX_INFO, 0},
+ {TIPC_NLA_STATS_TX_FRAGMENTS, s->sent_fragments},
+ {TIPC_NLA_STATS_TX_FRAGMENTED, s->sent_fragmented},
+ {TIPC_NLA_STATS_TX_BUNDLES, s->sent_bundles},
+ {TIPC_NLA_STATS_TX_BUNDLED, s->sent_bundled},
+ {TIPC_NLA_STATS_MSG_PROF_TOT, (s->msg_length_counts) ?
+ s->msg_length_counts : 1},
+ {TIPC_NLA_STATS_MSG_LEN_CNT, s->msg_length_counts},
+ {TIPC_NLA_STATS_MSG_LEN_TOT, s->msg_lengths_total},
+ {TIPC_NLA_STATS_MSG_LEN_P0, s->msg_length_profile[0]},
+ {TIPC_NLA_STATS_MSG_LEN_P1, s->msg_length_profile[1]},
+ {TIPC_NLA_STATS_MSG_LEN_P2, s->msg_length_profile[2]},
+ {TIPC_NLA_STATS_MSG_LEN_P3, s->msg_length_profile[3]},
+ {TIPC_NLA_STATS_MSG_LEN_P4, s->msg_length_profile[4]},
+ {TIPC_NLA_STATS_MSG_LEN_P5, s->msg_length_profile[5]},
+ {TIPC_NLA_STATS_MSG_LEN_P6, s->msg_length_profile[6]},
+ {TIPC_NLA_STATS_RX_STATES, s->recv_states},
+ {TIPC_NLA_STATS_RX_PROBES, s->recv_probes},
+ {TIPC_NLA_STATS_RX_NACKS, s->recv_nacks},
+ {TIPC_NLA_STATS_RX_DEFERRED, s->deferred_recv},
+ {TIPC_NLA_STATS_TX_STATES, s->sent_states},
+ {TIPC_NLA_STATS_TX_PROBES, s->sent_probes},
+ {TIPC_NLA_STATS_TX_NACKS, s->sent_nacks},
+ {TIPC_NLA_STATS_TX_ACKS, s->sent_acks},
+ {TIPC_NLA_STATS_RETRANSMITTED, s->retransmitted},
+ {TIPC_NLA_STATS_DUPLICATES, s->duplicates},
+ {TIPC_NLA_STATS_LINK_CONGS, s->link_congs},
+ {TIPC_NLA_STATS_MAX_QUEUE, s->max_queue_sz},
+ {TIPC_NLA_STATS_AVG_QUEUE, s->queue_sz_counts ?
+ (s->accu_queue_sz / s->queue_sz_counts) : 0}
+ };
+
+ stats = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS);
+ if (!stats)
+ return -EMSGSIZE;
+
+ for (i = 0; i < ARRAY_SIZE(map); i++)
+ if (nla_put_u32(skb, map[i].key, map[i].val))
+ goto msg_full;
+
+ nla_nest_end(skb, stats);
- args = (struct tipc_link_config *)TLV_DATA(req_tlv_area);
- new_value = ntohl(args->value);
+ return 0;
+msg_full:
+ nla_nest_cancel(skb, stats);
+
+ return -EMSGSIZE;
+}
+
+/* Caller should hold appropriate locks to protect the link */
+int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_link *link, int nlflags)
+{
+ u32 self = tipc_own_addr(net);
+ struct nlattr *attrs;
+ struct nlattr *prop;
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ nlflags, TIPC_NL_LINK_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK);
+ if (!attrs)
+ goto msg_full;
+
+ if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name))
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(self)))
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu))
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->stats.recv_pkts))
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->stats.sent_pkts))
+ goto attr_msg_full;
+
+ if (tipc_link_is_up(link))
+ if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP))
+ goto attr_msg_full;
+ if (link->active)
+ if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE))
+ goto attr_msg_full;
+
+ prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP);
+ if (!prop)
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority))
+ goto prop_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, link->tolerance))
+ goto prop_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN,
+ link->window))
+ goto prop_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority))
+ goto prop_msg_full;
+ nla_nest_end(msg->skb, prop);
+
+ err = __tipc_nl_add_stats(msg->skb, &link->stats);
+ if (err)
+ goto attr_msg_full;
+
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
- if (!link_value_is_valid(cmd, new_value))
- return tipc_cfg_reply_error_string(
- "cannot change, value invalid");
+ return 0;
- if (!strcmp(args->name, tipc_bclink_name)) {
- if ((cmd == TIPC_CMD_SET_LINK_WINDOW) &&
- (tipc_bclink_set_queue_limits(new_value) == 0))
- return tipc_cfg_reply_none();
- return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
- " (cannot change setting on broadcast link)");
- }
+prop_msg_full:
+ nla_nest_cancel(msg->skb, prop);
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
+
+ return -EMSGSIZE;
+}
+
+static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb,
+ struct tipc_stats *stats)
+{
+ int i;
+ struct nlattr *nest;
+
+ struct nla_map {
+ __u32 key;
+ __u32 val;
+ };
+
+ struct nla_map map[] = {
+ {TIPC_NLA_STATS_RX_INFO, stats->recv_pkts},
+ {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments},
+ {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented},
+ {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles},
+ {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled},
+ {TIPC_NLA_STATS_TX_INFO, stats->sent_pkts},
+ {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments},
+ {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented},
+ {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles},
+ {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled},
+ {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks},
+ {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv},
+ {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks},
+ {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks},
+ {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted},
+ {TIPC_NLA_STATS_DUPLICATES, stats->duplicates},
+ {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs},
+ {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz},
+ {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ?
+ (stats->accu_queue_sz / stats->queue_sz_counts) : 0}
+ };
+
+ nest = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS);
+ if (!nest)
+ return -EMSGSIZE;
+
+ for (i = 0; i < ARRAY_SIZE(map); i++)
+ if (nla_put_u32(skb, map[i].key, map[i].val))
+ goto msg_full;
+
+ nla_nest_end(skb, nest);
- read_lock_bh(&tipc_net_lock);
- res = link_cmd_set_value(args->name, new_value, cmd);
- read_unlock_bh(&tipc_net_lock);
- if (res)
- return tipc_cfg_reply_error_string("cannot change link setting");
+ return 0;
+msg_full:
+ nla_nest_cancel(skb, nest);
- return tipc_cfg_reply_none();
+ return -EMSGSIZE;
}
-/**
- * link_reset_statistics - reset link statistics
- * @l_ptr: pointer to link
- */
-static void link_reset_statistics(struct tipc_link *l_ptr)
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_link *bcl)
{
- memset(&l_ptr->stats, 0, sizeof(l_ptr->stats));
- l_ptr->stats.sent_info = l_ptr->next_out_no;
- l_ptr->stats.recv_info = l_ptr->next_in_no;
-}
+ int err;
+ void *hdr;
+ struct nlattr *attrs;
+ struct nlattr *prop;
+ u32 bc_mode = tipc_bcast_get_mode(net);
+ u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net);
-struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space)
-{
- char *link_name;
- struct tipc_link *l_ptr;
- struct tipc_node *node;
+ if (!bcl)
+ return 0;
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+ tipc_bcast_lock(net);
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ NLM_F_MULTI, TIPC_NL_LINK_GET);
+ if (!hdr) {
+ tipc_bcast_unlock(net);
+ return -EMSGSIZE;
+ }
+
+ attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK);
+ if (!attrs)
+ goto msg_full;
+
+ /* The broadcast link is always up */
+ if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP))
+ goto attr_msg_full;
+
+ if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST))
+ goto attr_msg_full;
+ if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name))
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, 0))
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, 0))
+ goto attr_msg_full;
+
+ prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP);
+ if (!prop)
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->max_win))
+ goto prop_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode))
+ goto prop_msg_full;
+ if (bc_mode & BCLINK_MODE_SEL)
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST_RATIO,
+ bc_ratio))
+ goto prop_msg_full;
+ nla_nest_end(msg->skb, prop);
+
+ err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats);
+ if (err)
+ goto attr_msg_full;
+
+ tipc_bcast_unlock(net);
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
- link_name = (char *)TLV_DATA(req_tlv_area);
- if (!strcmp(link_name, tipc_bclink_name)) {
- if (tipc_bclink_reset_stats())
- return tipc_cfg_reply_error_string("link not found");
- return tipc_cfg_reply_none();
- }
+ return 0;
- read_lock_bh(&tipc_net_lock);
- l_ptr = link_find_link(link_name, &node);
- if (!l_ptr) {
- read_unlock_bh(&tipc_net_lock);
- return tipc_cfg_reply_error_string("link not found");
- }
+prop_msg_full:
+ nla_nest_cancel(msg->skb, prop);
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ tipc_bcast_unlock(net);
+ genlmsg_cancel(msg->skb, hdr);
- tipc_node_lock(node);
- link_reset_statistics(l_ptr);
- tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
- return tipc_cfg_reply_none();
+ return -EMSGSIZE;
}
-/**
- * percent - convert count to a percentage of total (rounding up or down)
- */
-static u32 percent(u32 count, u32 total)
+void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
+ struct sk_buff_head *xmitq)
{
- return (count * 100 + (total / 2)) / total;
+ l->tolerance = tol;
+ if (l->bc_rcvlink)
+ l->bc_rcvlink->tolerance = tol;
+ if (tipc_link_is_up(l))
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
}
-/**
- * tipc_link_stats - print link statistics
- * @name: link name
- * @buf: print buffer area
- * @buf_size: size of print buffer area
- *
- * Returns length of print buffer data string (or 0 if error)
- */
-static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
+void tipc_link_set_prio(struct tipc_link *l, u32 prio,
+ struct sk_buff_head *xmitq)
{
- struct tipc_link *l;
- struct tipc_stats *s;
- struct tipc_node *node;
- char *status;
- u32 profile_total = 0;
- int ret;
-
- if (!strcmp(name, tipc_bclink_name))
- return tipc_bclink_stats(buf, buf_size);
-
- read_lock_bh(&tipc_net_lock);
- l = link_find_link(name, &node);
- if (!l) {
- read_unlock_bh(&tipc_net_lock);
- return 0;
- }
- tipc_node_lock(node);
- s = &l->stats;
-
- if (tipc_link_is_active(l))
- status = "ACTIVE";
- else if (tipc_link_is_up(l))
- status = "STANDBY";
- else
- status = "DEFUNCT";
-
- ret = tipc_snprintf(buf, buf_size, "Link <%s>\n"
- " %s MTU:%u Priority:%u Tolerance:%u ms"
- " Window:%u packets\n",
- l->name, status, l->max_pkt, l->priority,
- l->tolerance, l->queue_limit[0]);
-
- ret += tipc_snprintf(buf + ret, buf_size - ret,
- " RX packets:%u fragments:%u/%u bundles:%u/%u\n",
- l->next_in_no - s->recv_info, s->recv_fragments,
- s->recv_fragmented, s->recv_bundles,
- s->recv_bundled);
-
- ret += tipc_snprintf(buf + ret, buf_size - ret,
- " TX packets:%u fragments:%u/%u bundles:%u/%u\n",
- l->next_out_no - s->sent_info, s->sent_fragments,
- s->sent_fragmented, s->sent_bundles,
- s->sent_bundled);
-
- profile_total = s->msg_length_counts;
- if (!profile_total)
- profile_total = 1;
-
- ret += tipc_snprintf(buf + ret, buf_size - ret,
- " TX profile sample:%u packets average:%u octets\n"
- " 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% "
- "-16384:%u%% -32768:%u%% -66000:%u%%\n",
- s->msg_length_counts,
- s->msg_lengths_total / profile_total,
- percent(s->msg_length_profile[0], profile_total),
- percent(s->msg_length_profile[1], profile_total),
- percent(s->msg_length_profile[2], profile_total),
- percent(s->msg_length_profile[3], profile_total),
- percent(s->msg_length_profile[4], profile_total),
- percent(s->msg_length_profile[5], profile_total),
- percent(s->msg_length_profile[6], profile_total));
-
- ret += tipc_snprintf(buf + ret, buf_size - ret,
- " RX states:%u probes:%u naks:%u defs:%u"
- " dups:%u\n", s->recv_states, s->recv_probes,
- s->recv_nacks, s->deferred_recv, s->duplicates);
-
- ret += tipc_snprintf(buf + ret, buf_size - ret,
- " TX states:%u probes:%u naks:%u acks:%u"
- " dups:%u\n", s->sent_states, s->sent_probes,
- s->sent_nacks, s->sent_acks, s->retransmitted);
-
- ret += tipc_snprintf(buf + ret, buf_size - ret,
- " Congestion link:%u Send queue"
- " max:%u avg:%u\n", s->link_congs,
- s->max_queue_sz, s->queue_sz_counts ?
- (s->accu_queue_sz / s->queue_sz_counts) : 0);
-
- tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
- return ret;
-}
-
-struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space)
-{
- struct sk_buff *buf;
- struct tlv_desc *rep_tlv;
- int str_len;
- int pb_len;
- char *pb;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
- buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
- if (!buf)
- return NULL;
-
- rep_tlv = (struct tlv_desc *)buf->data;
- pb = TLV_DATA(rep_tlv);
- pb_len = ULTRA_STRING_MAX_LEN;
- str_len = tipc_link_stats((char *)TLV_DATA(req_tlv_area),
- pb, pb_len);
- if (!str_len) {
- kfree_skb(buf);
- return tipc_cfg_reply_error_string("link not found");
- }
- str_len += 1; /* for "\0" */
- skb_put(buf, TLV_SPACE(str_len));
- TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+ l->priority = prio;
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq);
+}
- return buf;
+void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
+{
+ l->abort_limit = limit;
}
/**
- * tipc_link_get_max_pkt - get maximum packet size to use when sending to destination
- * @dest: network address of destination node
- * @selector: used to select from set of active links
- *
- * If no active link can be found, uses default maximum packet size.
+ * tipc_link_dump - dump TIPC link data
+ * @l: tipc link to be dumped
+ * @dqueues: bitmask to decide if any link queue to be dumped?
+ * - TIPC_DUMP_NONE: don't dump link queues
+ * - TIPC_DUMP_TRANSMQ: dump link transmq queue
+ * - TIPC_DUMP_BACKLOGQ: dump link backlog queue
+ * - TIPC_DUMP_DEFERDQ: dump link deferd queue
+ * - TIPC_DUMP_INPUTQ: dump link input queue
+ * - TIPC_DUMP_WAKEUP: dump link wakeup queue
+ * - TIPC_DUMP_ALL: dump all the link queues above
+ * @buf: returned buffer of dump data in format
*/
-u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
-{
- struct tipc_node *n_ptr;
- struct tipc_link *l_ptr;
- u32 res = MAX_PKT_DEFAULT;
-
- if (dest == tipc_own_addr)
- return MAX_MSG_SIZE;
-
- read_lock_bh(&tipc_net_lock);
- n_ptr = tipc_node_find(dest);
- if (n_ptr) {
- tipc_node_lock(n_ptr);
- l_ptr = n_ptr->active_links[selector & 1];
- if (l_ptr)
- res = l_ptr->max_pkt;
- tipc_node_unlock(n_ptr);
- }
- read_unlock_bh(&tipc_net_lock);
- return res;
-}
-
-static void link_print(struct tipc_link *l_ptr, const char *str)
+int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf)
{
- pr_info("%s Link %x<%s>:", str, l_ptr->addr, l_ptr->b_ptr->name);
+ int i = 0;
+ size_t sz = (dqueues) ? LINK_LMAX : LINK_LMIN;
+ struct sk_buff_head *list;
+ struct sk_buff *hskb, *tskb;
+ u32 len;
- if (link_working_unknown(l_ptr))
- pr_cont(":WU\n");
- else if (link_reset_reset(l_ptr))
- pr_cont(":RR\n");
- else if (link_reset_unknown(l_ptr))
- pr_cont(":RU\n");
- else if (link_working_working(l_ptr))
- pr_cont(":WW\n");
- else
- pr_cont("\n");
+ if (!l) {
+ i += scnprintf(buf, sz, "link data: (null)\n");
+ return i;
+ }
+
+ i += scnprintf(buf, sz, "link data: %x", l->addr);
+ i += scnprintf(buf + i, sz - i, " %x", l->state);
+ i += scnprintf(buf + i, sz - i, " %u", l->in_session);
+ i += scnprintf(buf + i, sz - i, " %u", l->session);
+ i += scnprintf(buf + i, sz - i, " %u", l->peer_session);
+ i += scnprintf(buf + i, sz - i, " %u", l->snd_nxt);
+ i += scnprintf(buf + i, sz - i, " %u", l->rcv_nxt);
+ i += scnprintf(buf + i, sz - i, " %u", l->snd_nxt_state);
+ i += scnprintf(buf + i, sz - i, " %u", l->rcv_nxt_state);
+ i += scnprintf(buf + i, sz - i, " %x", l->peer_caps);
+ i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt);
+ i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt);
+ i += scnprintf(buf + i, sz - i, " %u", 0);
+ i += scnprintf(buf + i, sz - i, " %u", 0);
+ i += scnprintf(buf + i, sz - i, " %u", l->acked);
+
+ list = &l->transmq;
+ len = skb_queue_len(list);
+ hskb = skb_peek(list);
+ tskb = skb_peek_tail(list);
+ i += scnprintf(buf + i, sz - i, " | %u %u %u", len,
+ (hskb) ? msg_seqno(buf_msg(hskb)) : 0,
+ (tskb) ? msg_seqno(buf_msg(tskb)) : 0);
+
+ list = &l->deferdq;
+ len = skb_queue_len(list);
+ hskb = skb_peek(list);
+ tskb = skb_peek_tail(list);
+ i += scnprintf(buf + i, sz - i, " | %u %u %u", len,
+ (hskb) ? msg_seqno(buf_msg(hskb)) : 0,
+ (tskb) ? msg_seqno(buf_msg(tskb)) : 0);
+
+ list = &l->backlogq;
+ len = skb_queue_len(list);
+ hskb = skb_peek(list);
+ tskb = skb_peek_tail(list);
+ i += scnprintf(buf + i, sz - i, " | %u %u %u", len,
+ (hskb) ? msg_seqno(buf_msg(hskb)) : 0,
+ (tskb) ? msg_seqno(buf_msg(tskb)) : 0);
+
+ list = l->inputq;
+ len = skb_queue_len(list);
+ hskb = skb_peek(list);
+ tskb = skb_peek_tail(list);
+ i += scnprintf(buf + i, sz - i, " | %u %u %u\n", len,
+ (hskb) ? msg_seqno(buf_msg(hskb)) : 0,
+ (tskb) ? msg_seqno(buf_msg(tskb)) : 0);
+
+ if (dqueues & TIPC_DUMP_TRANSMQ) {
+ i += scnprintf(buf + i, sz - i, "transmq: ");
+ i += tipc_list_dump(&l->transmq, false, buf + i);
+ }
+ if (dqueues & TIPC_DUMP_BACKLOGQ) {
+ i += scnprintf(buf + i, sz - i,
+ "backlogq: <%u %u %u %u %u>, ",
+ l->backlog[TIPC_LOW_IMPORTANCE].len,
+ l->backlog[TIPC_MEDIUM_IMPORTANCE].len,
+ l->backlog[TIPC_HIGH_IMPORTANCE].len,
+ l->backlog[TIPC_CRITICAL_IMPORTANCE].len,
+ l->backlog[TIPC_SYSTEM_IMPORTANCE].len);
+ i += tipc_list_dump(&l->backlogq, false, buf + i);
+ }
+ if (dqueues & TIPC_DUMP_DEFERDQ) {
+ i += scnprintf(buf + i, sz - i, "deferdq: ");
+ i += tipc_list_dump(&l->deferdq, false, buf + i);
+ }
+ if (dqueues & TIPC_DUMP_INPUTQ) {
+ i += scnprintf(buf + i, sz - i, "inputq: ");
+ i += tipc_list_dump(l->inputq, false, buf + i);
+ }
+ if (dqueues & TIPC_DUMP_WAKEUP) {
+ i += scnprintf(buf + i, sz - i, "wakeup: ");
+ i += tipc_list_dump(&l->wakeupq, false, buf + i);
+ }
+
+ return i;
}
diff --git a/net/tipc/link.h b/net/tipc/link.h
index c048ed1cbd76..d80f5649b395 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -1,7 +1,7 @@
/*
* net/tipc/link.h: Include file for TIPC link code
*
- * Copyright (c) 1995-2006, Ericsson AB
+ * Copyright (c) 1995-2006, 2013-2014, Ericsson AB
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -37,283 +37,122 @@
#ifndef _TIPC_LINK_H
#define _TIPC_LINK_H
+#include <net/genetlink.h>
#include "msg.h"
#include "node.h"
-/*
- * Out-of-range value for link sequence numbers
- */
-#define INVALID_LINK_SEQ 0x10000
+/* TIPC-specific error codes
+*/
+#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */
-/*
- * Link states
+/* Link FSM events:
*/
-#define WORKING_WORKING 560810u
-#define WORKING_UNKNOWN 560811u
-#define RESET_UNKNOWN 560812u
-#define RESET_RESET 560813u
-
-/*
- * Starting value for maximum packet size negotiation on unicast links
- * (unless bearer MTU is less)
- */
-#define MAX_PKT_DEFAULT 1500
-
-struct tipc_stats {
- u32 sent_info; /* used in counting # sent packets */
- u32 recv_info; /* used in counting # recv'd packets */
- u32 sent_states;
- u32 recv_states;
- u32 sent_probes;
- u32 recv_probes;
- u32 sent_nacks;
- u32 recv_nacks;
- u32 sent_acks;
- u32 sent_bundled;
- u32 sent_bundles;
- u32 recv_bundled;
- u32 recv_bundles;
- u32 retransmitted;
- u32 sent_fragmented;
- u32 sent_fragments;
- u32 recv_fragmented;
- u32 recv_fragments;
- u32 link_congs; /* # port sends blocked by congestion */
- u32 deferred_recv;
- u32 duplicates;
- u32 max_queue_sz; /* send queue size high water mark */
- u32 accu_queue_sz; /* used for send queue size profiling */
- u32 queue_sz_counts; /* used for send queue size profiling */
- u32 msg_length_counts; /* used for message length profiling */
- u32 msg_lengths_total; /* used for message length profiling */
- u32 msg_length_profile[7]; /* used for msg. length profiling */
+enum {
+ LINK_ESTABLISH_EVT = 0xec1ab1e,
+ LINK_PEER_RESET_EVT = 0x9eed0e,
+ LINK_FAILURE_EVT = 0xfa110e,
+ LINK_RESET_EVT = 0x10ca1d0e,
+ LINK_FAILOVER_BEGIN_EVT = 0xfa110bee,
+ LINK_FAILOVER_END_EVT = 0xfa110ede,
+ LINK_SYNCH_BEGIN_EVT = 0xc1ccbee,
+ LINK_SYNCH_END_EVT = 0xc1ccede
};
-/**
- * struct tipc_link - TIPC link data structure
- * @addr: network address of link's peer node
- * @name: link name character string
- * @media_addr: media address to use when sending messages over link
- * @timer: link timer
- * @owner: pointer to peer node
- * @link_list: adjacent links in bearer's list of links
- * @started: indicates if link has been started
- * @checkpoint: reference point for triggering link continuity checking
- * @peer_session: link session # being used by peer end of link
- * @peer_bearer_id: bearer id used by link's peer endpoint
- * @b_ptr: pointer to bearer used by link
- * @tolerance: minimum link continuity loss needed to reset link [in ms]
- * @continuity_interval: link continuity testing interval [in ms]
- * @abort_limit: # of unacknowledged continuity probes needed to reset link
- * @state: current state of link FSM
- * @blocked: indicates if link has been administratively blocked
- * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state
- * @proto_msg: template for control messages generated by link
- * @pmsg: convenience pointer to "proto_msg" field
- * @priority: current link priority
- * @queue_limit: outbound message queue congestion thresholds (indexed by user)
- * @exp_msg_count: # of tunnelled messages expected during link changeover
- * @reset_checkpoint: seq # of last acknowledged message at time of link reset
- * @max_pkt: current maximum packet size for this link
- * @max_pkt_target: desired maximum packet size for this link
- * @max_pkt_probes: # of probes based on current (max_pkt, max_pkt_target)
- * @out_queue_size: # of messages in outbound message queue
- * @first_out: ptr to first outbound message in queue
- * @last_out: ptr to last outbound message in queue
- * @next_out_no: next sequence number to use for outbound messages
- * @last_retransmitted: sequence number of most recently retransmitted message
- * @stale_count: # of identical retransmit requests made by peer
- * @next_in_no: next sequence number to expect for inbound messages
- * @deferred_inqueue_sz: # of messages in inbound message queue
- * @oldest_deferred_in: ptr to first inbound message in queue
- * @newest_deferred_in: ptr to last inbound message in queue
- * @unacked_window: # of inbound messages rx'd without ack'ing back to peer
- * @proto_msg_queue: ptr to (single) outbound control message
- * @retransm_queue_size: number of messages to retransmit
- * @retransm_queue_head: sequence number of first message to retransmit
- * @next_out: ptr to first unsent outbound message in queue
- * @waiting_ports: linked list of ports waiting for link congestion to abate
- * @long_msg_seq_no: next identifier to use for outbound fragmented messages
- * @defragm_buf: list of partially reassembled inbound message fragments
- * @stats: collects statistics regarding link activity
+/* Events returned from link at packet reception or at timeout
*/
-struct tipc_link {
- u32 addr;
- char name[TIPC_MAX_LINK_NAME];
- struct tipc_media_addr media_addr;
- struct timer_list timer;
- struct tipc_node *owner;
- struct list_head link_list;
-
- /* Management and link supervision data */
- int started;
- u32 checkpoint;
- u32 peer_session;
- u32 peer_bearer_id;
- struct tipc_bearer *b_ptr;
- u32 tolerance;
- u32 continuity_interval;
- u32 abort_limit;
- int state;
- int blocked;
- u32 fsm_msg_cnt;
- struct {
- unchar hdr[INT_H_SIZE];
- unchar body[TIPC_MAX_IF_NAME];
- } proto_msg;
- struct tipc_msg *pmsg;
- u32 priority;
- u32 queue_limit[15]; /* queue_limit[0]==window limit */
-
- /* Changeover */
- u32 exp_msg_count;
- u32 reset_checkpoint;
-
- /* Max packet negotiation */
- u32 max_pkt;
- u32 max_pkt_target;
- u32 max_pkt_probes;
-
- /* Sending */
- u32 out_queue_size;
- struct sk_buff *first_out;
- struct sk_buff *last_out;
- u32 next_out_no;
- u32 last_retransmitted;
- u32 stale_count;
-
- /* Reception */
- u32 next_in_no;
- u32 deferred_inqueue_sz;
- struct sk_buff *oldest_deferred_in;
- struct sk_buff *newest_deferred_in;
- u32 unacked_window;
-
- /* Congestion handling */
- struct sk_buff *proto_msg_queue;
- u32 retransm_queue_size;
- u32 retransm_queue_head;
- struct sk_buff *next_out;
- struct list_head waiting_ports;
-
- /* Fragmentation/defragmentation */
- u32 long_msg_seq_no;
- struct sk_buff *defragm_buf;
-
- /* Statistics */
- struct tipc_stats stats;
+enum {
+ TIPC_LINK_UP_EVT = 1,
+ TIPC_LINK_DOWN_EVT = (1 << 1),
+ TIPC_LINK_SND_STATE = (1 << 2)
};
-struct tipc_port;
-
-struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
- struct tipc_bearer *b_ptr,
- const struct tipc_media_addr *media_addr);
-void tipc_link_delete(struct tipc_link *l_ptr);
-void tipc_link_changeover(struct tipc_link *l_ptr);
-void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *dest);
-void tipc_link_reset_fragments(struct tipc_link *l_ptr);
-int tipc_link_is_up(struct tipc_link *l_ptr);
-int tipc_link_is_active(struct tipc_link *l_ptr);
-u32 tipc_link_push_packet(struct tipc_link *l_ptr);
-void tipc_link_stop(struct tipc_link *l_ptr);
-struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd);
-struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space);
-struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space);
-void tipc_link_reset(struct tipc_link *l_ptr);
-int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector);
-void tipc_link_send_names(struct list_head *message_list, u32 dest);
-int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf);
-u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
-int tipc_link_send_sections_fast(struct tipc_port *sender,
- struct iovec const *msg_sect,
- const u32 num_sect,
- unsigned int total_len,
- u32 destnode);
-void tipc_link_recv_bundle(struct sk_buff *buf);
-int tipc_link_recv_fragment(struct sk_buff **pending,
- struct sk_buff **fb,
- struct tipc_msg **msg);
-void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, int prob,
- u32 gap, u32 tolerance, u32 priority,
- u32 acked_mtu);
-void tipc_link_push_queue(struct tipc_link *l_ptr);
-u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail,
- struct sk_buff *buf);
-void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all);
-void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window);
-void tipc_link_retransmit(struct tipc_link *l_ptr,
- struct sk_buff *start, u32 retransmits);
-
-/*
- * Link sequence number manipulation routines (uses modulo 2**16 arithmetic)
- */
-static inline u32 buf_seqno(struct sk_buff *buf)
-{
- return msg_seqno(buf_msg(buf));
-}
-
-static inline u32 mod(u32 x)
-{
- return x & 0xffffu;
-}
-
-static inline int between(u32 lower, u32 upper, u32 n)
-{
- if ((lower < n) && (n < upper))
- return 1;
- if ((upper < lower) && ((n > lower) || (n < upper)))
- return 1;
- return 0;
-}
-
-static inline int less_eq(u32 left, u32 right)
-{
- return mod(right - left) < 32768u;
-}
-
-static inline int less(u32 left, u32 right)
-{
- return less_eq(left, right) && (mod(right) != mod(left));
-}
-
-static inline u32 lesser(u32 left, u32 right)
-{
- return less_eq(left, right) ? left : right;
-}
-
-
-/*
- * Link status checking routines
+/* Starting value for maximum packet size negotiation on unicast links
+ * (unless bearer MTU is less)
*/
-static inline int link_working_working(struct tipc_link *l_ptr)
-{
- return l_ptr->state == WORKING_WORKING;
-}
-
-static inline int link_working_unknown(struct tipc_link *l_ptr)
-{
- return l_ptr->state == WORKING_UNKNOWN;
-}
-
-static inline int link_reset_unknown(struct tipc_link *l_ptr)
-{
- return l_ptr->state == RESET_UNKNOWN;
-}
-
-static inline int link_reset_reset(struct tipc_link *l_ptr)
-{
- return l_ptr->state == RESET_RESET;
-}
-
-static inline int link_blocked(struct tipc_link *l_ptr)
-{
- return l_ptr->exp_msg_count || l_ptr->blocked;
-}
-
-static inline int link_congested(struct tipc_link *l_ptr)
-{
- return l_ptr->out_queue_size >= l_ptr->queue_limit[0];
-}
+#define MAX_PKT_DEFAULT 1500
+bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
+ int tolerance, char net_plane, u32 mtu, int priority,
+ u32 min_win, u32 max_win, u32 session, u32 ownnode,
+ u32 peer, u8 *peer_id, u16 peer_caps,
+ struct tipc_link *bc_sndlink,
+ struct tipc_link *bc_rcvlink,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *namedq,
+ struct tipc_link **link);
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id,
+ int mtu, u32 min_win, u32 max_win, u16 peer_caps,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *namedq,
+ struct tipc_link *bc_sndlink,
+ struct tipc_link **link);
+void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
+ int mtyp, struct sk_buff_head *xmitq);
+void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl,
+ struct sk_buff_head *xmitq);
+void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
+ struct sk_buff_head *xmitq);
+void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
+int tipc_link_fsm_evt(struct tipc_link *l, int evt);
+bool tipc_link_is_up(struct tipc_link *l);
+bool tipc_link_peer_is_down(struct tipc_link *l);
+bool tipc_link_is_reset(struct tipc_link *l);
+bool tipc_link_is_establishing(struct tipc_link *l);
+bool tipc_link_is_synching(struct tipc_link *l);
+bool tipc_link_is_failingover(struct tipc_link *l);
+bool tipc_link_is_blocked(struct tipc_link *l);
+void tipc_link_set_active(struct tipc_link *l, bool active);
+void tipc_link_reset(struct tipc_link *l);
+void tipc_link_reset_stats(struct tipc_link *l);
+int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list,
+ struct sk_buff_head *xmitq);
+struct sk_buff_head *tipc_link_inputq(struct tipc_link *l);
+u16 tipc_link_rcv_nxt(struct tipc_link *l);
+u16 tipc_link_acked(struct tipc_link *l);
+u32 tipc_link_id(struct tipc_link *l);
+char *tipc_link_name(struct tipc_link *l);
+u32 tipc_link_state(struct tipc_link *l);
+char tipc_link_plane(struct tipc_link *l);
+int tipc_link_prio(struct tipc_link *l);
+int tipc_link_min_win(struct tipc_link *l);
+int tipc_link_max_win(struct tipc_link *l);
+void tipc_link_update_caps(struct tipc_link *l, u16 capabilities);
+bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr);
+unsigned long tipc_link_tolerance(struct tipc_link *l);
+void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
+ struct sk_buff_head *xmitq);
+void tipc_link_set_prio(struct tipc_link *l, u32 prio,
+ struct sk_buff_head *xmitq);
+void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit);
+void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win);
+int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_link *link, int nlflags);
+int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
+int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq);
+int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq);
+int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
+void tipc_link_add_bc_peer(struct tipc_link *snd_l,
+ struct tipc_link *uc_l,
+ struct sk_buff_head *xmitq);
+void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
+ struct tipc_link *rcv_l,
+ struct sk_buff_head *xmitq);
+int tipc_link_bc_peers(struct tipc_link *l);
+void tipc_link_set_mtu(struct tipc_link *l, int mtu);
+int tipc_link_mtu(struct tipc_link *l);
+int tipc_link_mss(struct tipc_link *l);
+u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
+ struct tipc_msg *hdr, bool uc);
+int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap,
+ struct tipc_gap_ack_blks *ga,
+ struct sk_buff_head *xmitq,
+ struct sk_buff_head *retrq);
+void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
+int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
+ struct sk_buff_head *xmitq);
+int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq);
+bool tipc_link_too_silent(struct tipc_link *l);
+struct net *tipc_link_net(struct tipc_link *l);
#endif
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
new file mode 100644
index 000000000000..572b79bf76ce
--- /dev/null
+++ b/net/tipc/monitor.c
@@ -0,0 +1,875 @@
+/*
+ * net/tipc/monitor.c
+ *
+ * Copyright (c) 2016, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <net/genetlink.h>
+#include "core.h"
+#include "addr.h"
+#include "monitor.h"
+#include "bearer.h"
+
+#define MAX_MON_DOMAIN 64
+#define MON_TIMEOUT 120000
+#define MAX_PEER_DOWN_EVENTS 4
+
+/* struct tipc_mon_domain: domain record to be transferred between peers
+ * @len: actual size of domain record
+ * @gen: current generation of sender's domain
+ * @ack_gen: most recent generation of self's domain acked by peer
+ * @member_cnt: number of domain member nodes described in this record
+ * @up_map: bit map indicating which of the members the sender considers up
+ * @members: identity of the domain members
+ */
+struct tipc_mon_domain {
+ u16 len;
+ u16 gen;
+ u16 ack_gen;
+ u16 member_cnt;
+ u64 up_map;
+ u32 members[MAX_MON_DOMAIN];
+};
+
+/* struct tipc_peer: state of a peer node and its domain
+ * @addr: tipc node identity of peer
+ * @head_map: shows which other nodes currently consider peer 'up'
+ * @domain: most recent domain record from peer
+ * @hash: position in hashed lookup list
+ * @list: position in linked list, in circular ascending order by 'addr'
+ * @applied: number of reported domain members applied on this monitor list
+ * @is_up: peer is up as seen from this node
+ * @is_head: peer is assigned domain head as seen from this node
+ * @is_local: peer is in local domain and should be continuously monitored
+ * @down_cnt: - numbers of other peers which have reported this on lost
+ */
+struct tipc_peer {
+ u32 addr;
+ struct tipc_mon_domain *domain;
+ struct hlist_node hash;
+ struct list_head list;
+ u8 applied;
+ u8 down_cnt;
+ bool is_up;
+ bool is_head;
+ bool is_local;
+};
+
+struct tipc_monitor {
+ struct hlist_head peers[NODE_HTABLE_SIZE];
+ int peer_cnt;
+ struct tipc_peer *self;
+ rwlock_t lock;
+ struct tipc_mon_domain cache;
+ u16 list_gen;
+ u16 dom_gen;
+ struct net *net;
+ struct timer_list timer;
+ unsigned long timer_intv;
+};
+
+static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
+{
+ return tipc_net(net)->monitors[bearer_id];
+}
+
+const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
+
+static inline u16 mon_cpu_to_le16(u16 val)
+{
+ return (__force __u16)htons(val);
+}
+
+static inline u32 mon_cpu_to_le32(u32 val)
+{
+ return (__force __u32)htonl(val);
+}
+
+static inline u64 mon_cpu_to_le64(u64 val)
+{
+ return (__force __u64)cpu_to_be64(val);
+}
+
+static inline u16 mon_le16_to_cpu(u16 val)
+{
+ return ntohs((__force __be16)val);
+}
+
+static inline u32 mon_le32_to_cpu(u32 val)
+{
+ return ntohl((__force __be32)val);
+}
+
+static inline u64 mon_le64_to_cpu(u64 val)
+{
+ return be64_to_cpu((__force __be64)val);
+}
+
+/* dom_rec_len(): actual length of domain record for transport
+ */
+static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
+{
+ return (offsetof(struct tipc_mon_domain, members)) + (mcnt * sizeof(u32));
+}
+
+/* dom_size() : calculate size of own domain based on number of peers
+ */
+static int dom_size(int peers)
+{
+ int i = 0;
+
+ while ((i * i) < peers)
+ i++;
+ return min(i, MAX_MON_DOMAIN);
+}
+
+static void map_set(u64 *up_map, int i, unsigned int v)
+{
+ *up_map &= ~(1ULL << i);
+ *up_map |= ((u64)v << i);
+}
+
+static int map_get(u64 up_map, int i)
+{
+ return (up_map & (1ULL << i)) >> i;
+}
+
+static struct tipc_peer *peer_prev(struct tipc_peer *peer)
+{
+ return list_last_entry(&peer->list, struct tipc_peer, list);
+}
+
+static struct tipc_peer *peer_nxt(struct tipc_peer *peer)
+{
+ return list_first_entry(&peer->list, struct tipc_peer, list);
+}
+
+static struct tipc_peer *peer_head(struct tipc_peer *peer)
+{
+ while (!peer->is_head)
+ peer = peer_prev(peer);
+ return peer;
+}
+
+static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr)
+{
+ struct tipc_peer *peer;
+ unsigned int thash = tipc_hashfn(addr);
+
+ hlist_for_each_entry(peer, &mon->peers[thash], hash) {
+ if (peer->addr == addr)
+ return peer;
+ }
+ return NULL;
+}
+
+static struct tipc_peer *get_self(struct net *net, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+
+ return mon->self;
+}
+
+static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon)
+{
+ struct tipc_net *tn = tipc_net(net);
+
+ return mon->peer_cnt > tn->mon_threshold;
+}
+
+/* mon_identify_lost_members() : - identify amd mark potentially lost members
+ */
+static void mon_identify_lost_members(struct tipc_peer *peer,
+ struct tipc_mon_domain *dom_bef,
+ int applied_bef)
+{
+ struct tipc_peer *member = peer;
+ struct tipc_mon_domain *dom_aft = peer->domain;
+ int applied_aft = peer->applied;
+ int i;
+
+ for (i = 0; i < applied_bef; i++) {
+ member = peer_nxt(member);
+
+ /* Do nothing if self or peer already see member as down */
+ if (!member->is_up || !map_get(dom_bef->up_map, i))
+ continue;
+
+ /* Loss of local node must be detected by active probing */
+ if (member->is_local)
+ continue;
+
+ /* Start probing if member was removed from applied domain */
+ if (!applied_aft || (applied_aft < i)) {
+ member->down_cnt = 1;
+ continue;
+ }
+
+ /* Member loss is confirmed if it is still in applied domain */
+ if (!map_get(dom_aft->up_map, i))
+ member->down_cnt++;
+ }
+}
+
+/* mon_apply_domain() : match a peer's domain record against monitor list
+ */
+static void mon_apply_domain(struct tipc_monitor *mon,
+ struct tipc_peer *peer)
+{
+ struct tipc_mon_domain *dom = peer->domain;
+ struct tipc_peer *member;
+ u32 addr;
+ int i;
+
+ if (!dom || !peer->is_up)
+ return;
+
+ /* Scan across domain members and match against monitor list */
+ peer->applied = 0;
+ member = peer_nxt(peer);
+ for (i = 0; i < dom->member_cnt; i++) {
+ addr = dom->members[i];
+ if (addr != member->addr)
+ return;
+ peer->applied++;
+ member = peer_nxt(member);
+ }
+}
+
+/* mon_update_local_domain() : update after peer addition/removal/up/down
+ */
+static void mon_update_local_domain(struct tipc_monitor *mon)
+{
+ struct tipc_peer *self = mon->self;
+ struct tipc_mon_domain *cache = &mon->cache;
+ struct tipc_mon_domain *dom = self->domain;
+ struct tipc_peer *peer = self;
+ u64 prev_up_map = dom->up_map;
+ u16 member_cnt, i;
+ bool diff;
+
+ /* Update local domain size based on current size of cluster */
+ member_cnt = dom_size(mon->peer_cnt) - 1;
+ self->applied = member_cnt;
+
+ /* Update native and cached outgoing local domain records */
+ dom->len = dom_rec_len(dom, member_cnt);
+ diff = dom->member_cnt != member_cnt;
+ dom->member_cnt = member_cnt;
+ for (i = 0; i < member_cnt; i++) {
+ peer = peer_nxt(peer);
+ diff |= dom->members[i] != peer->addr;
+ dom->members[i] = peer->addr;
+ map_set(&dom->up_map, i, peer->is_up);
+ cache->members[i] = mon_cpu_to_le32(peer->addr);
+ }
+ diff |= dom->up_map != prev_up_map;
+ if (!diff)
+ return;
+ dom->gen = ++mon->dom_gen;
+ cache->len = mon_cpu_to_le16(dom->len);
+ cache->gen = mon_cpu_to_le16(dom->gen);
+ cache->member_cnt = mon_cpu_to_le16(member_cnt);
+ cache->up_map = mon_cpu_to_le64(dom->up_map);
+ mon_apply_domain(mon, self);
+}
+
+/* mon_update_neighbors() : update preceding neighbors of added/removed peer
+ */
+static void mon_update_neighbors(struct tipc_monitor *mon,
+ struct tipc_peer *peer)
+{
+ int dz, i;
+
+ dz = dom_size(mon->peer_cnt);
+ for (i = 0; i < dz; i++) {
+ mon_apply_domain(mon, peer);
+ peer = peer_prev(peer);
+ }
+}
+
+/* mon_assign_roles() : reassign peer roles after a network change
+ * The monitor list is consistent at this stage; i.e., each peer is monitoring
+ * a set of domain members as matched between domain record and the monitor list
+ */
+static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
+{
+ struct tipc_peer *peer = peer_nxt(head);
+ struct tipc_peer *self = mon->self;
+ int i = 0;
+
+ for (; peer != self; peer = peer_nxt(peer)) {
+ peer->is_local = false;
+
+ /* Update domain member */
+ if (i++ < head->applied) {
+ peer->is_head = false;
+ if (head == self)
+ peer->is_local = true;
+ continue;
+ }
+ /* Assign next domain head */
+ if (!peer->is_up)
+ continue;
+ if (peer->is_head)
+ break;
+ head = peer;
+ head->is_head = true;
+ i = 0;
+ }
+ mon->list_gen++;
+}
+
+void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self;
+ struct tipc_peer *peer, *prev, *head;
+
+ if (!mon)
+ return;
+
+ self = get_self(net, bearer_id);
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer)
+ goto exit;
+ prev = peer_prev(peer);
+ list_del(&peer->list);
+ hlist_del(&peer->hash);
+ kfree(peer->domain);
+ kfree(peer);
+ mon->peer_cnt--;
+ head = peer_head(prev);
+ if (head == self)
+ mon_update_local_domain(mon);
+ mon_update_neighbors(mon, prev);
+
+ /* Revert to full-mesh monitoring if we reach threshold */
+ if (!tipc_mon_is_active(net, mon)) {
+ list_for_each_entry(peer, &self->list, list) {
+ kfree(peer->domain);
+ peer->domain = NULL;
+ peer->applied = 0;
+ }
+ }
+ mon_assign_roles(mon, head);
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr,
+ struct tipc_peer **peer)
+{
+ struct tipc_peer *self = mon->self;
+ struct tipc_peer *cur, *prev, *p;
+
+ p = kzalloc(sizeof(*p), GFP_ATOMIC);
+ *peer = p;
+ if (!p)
+ return false;
+ p->addr = addr;
+
+ /* Add new peer to lookup list */
+ INIT_LIST_HEAD(&p->list);
+ hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]);
+
+ /* Sort new peer into iterator list, in ascending circular order */
+ prev = self;
+ list_for_each_entry(cur, &self->list, list) {
+ if ((addr > prev->addr) && (addr < cur->addr))
+ break;
+ if (((addr < cur->addr) || (addr > prev->addr)) &&
+ (prev->addr > cur->addr))
+ break;
+ prev = cur;
+ }
+ list_add_tail(&p->list, &cur->list);
+ mon->peer_cnt++;
+ mon_update_neighbors(mon, p);
+ return true;
+}
+
+void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *peer, *head;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer && !tipc_mon_add_peer(mon, addr, &peer))
+ goto exit;
+ peer->is_up = true;
+ head = peer_head(peer);
+ if (head == self)
+ mon_update_local_domain(mon);
+ mon_assign_roles(mon, head);
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self;
+ struct tipc_peer *peer, *head;
+ struct tipc_mon_domain *dom;
+ int applied;
+
+ if (!mon)
+ return;
+
+ self = get_self(net, bearer_id);
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer) {
+ pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id);
+ goto exit;
+ }
+ applied = peer->applied;
+ peer->applied = 0;
+ dom = peer->domain;
+ peer->domain = NULL;
+ if (peer->is_head)
+ mon_identify_lost_members(peer, dom, applied);
+ kfree(dom);
+ peer->is_up = false;
+ peer->is_head = false;
+ peer->is_local = false;
+ peer->down_cnt = 0;
+ head = peer_head(peer);
+ if (head == self)
+ mon_update_local_domain(mon);
+ mon_assign_roles(mon, head);
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+/* tipc_mon_rcv - process monitor domain event message
+ */
+void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
+ struct tipc_mon_state *state, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_mon_domain *arrv_dom = data;
+ struct tipc_mon_domain dom_bef;
+ struct tipc_mon_domain *dom;
+ struct tipc_peer *peer;
+ u16 new_member_cnt = mon_le16_to_cpu(arrv_dom->member_cnt);
+ int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
+ u16 new_gen = mon_le16_to_cpu(arrv_dom->gen);
+ u16 acked_gen = mon_le16_to_cpu(arrv_dom->ack_gen);
+ u16 arrv_dlen = mon_le16_to_cpu(arrv_dom->len);
+ bool probing = state->probing;
+ int i, applied_bef;
+
+ state->probing = false;
+
+ /* Sanity check received domain record */
+ if (new_member_cnt > MAX_MON_DOMAIN)
+ return;
+ if (dlen < dom_rec_len(arrv_dom, 0))
+ return;
+ if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
+ return;
+ if (dlen < new_dlen || arrv_dlen != new_dlen)
+ return;
+
+ /* Synch generation numbers with peer if link just came up */
+ if (!state->synched) {
+ state->peer_gen = new_gen - 1;
+ state->acked_gen = acked_gen;
+ state->synched = true;
+ }
+
+ if (more(acked_gen, state->acked_gen))
+ state->acked_gen = acked_gen;
+
+ /* Drop duplicate unless we are waiting for a probe response */
+ if (!more(new_gen, state->peer_gen) && !probing)
+ return;
+
+ write_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (!peer || !peer->is_up)
+ goto exit;
+
+ /* Peer is confirmed, stop any ongoing probing */
+ peer->down_cnt = 0;
+
+ /* Task is done for duplicate record */
+ if (!more(new_gen, state->peer_gen))
+ goto exit;
+
+ state->peer_gen = new_gen;
+
+ /* Cache current domain record for later use */
+ dom_bef.member_cnt = 0;
+ dom = peer->domain;
+ if (dom)
+ memcpy(&dom_bef, dom, dom->len);
+
+ /* Transform and store received domain record */
+ if (!dom || (dom->len < new_dlen)) {
+ kfree(dom);
+ dom = kmalloc(new_dlen, GFP_ATOMIC);
+ peer->domain = dom;
+ if (!dom)
+ goto exit;
+ }
+ dom->len = new_dlen;
+ dom->gen = new_gen;
+ dom->member_cnt = new_member_cnt;
+ dom->up_map = mon_le64_to_cpu(arrv_dom->up_map);
+ for (i = 0; i < new_member_cnt; i++)
+ dom->members[i] = mon_le32_to_cpu(arrv_dom->members[i]);
+
+ /* Update peers affected by this domain record */
+ applied_bef = peer->applied;
+ mon_apply_domain(mon, peer);
+ mon_identify_lost_members(peer, &dom_bef, applied_bef);
+ mon_assign_roles(mon, peer_head(peer));
+exit:
+ write_unlock_bh(&mon->lock);
+}
+
+void tipc_mon_prep(struct net *net, void *data, int *dlen,
+ struct tipc_mon_state *state, int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_mon_domain *dom = data;
+ u16 gen = mon->dom_gen;
+ u16 len;
+
+ /* Send invalid record if not active */
+ if (!tipc_mon_is_active(net, mon)) {
+ dom->len = 0;
+ return;
+ }
+
+ /* Send only a dummy record with ack if peer has acked our last sent */
+ if (likely(state->acked_gen == gen)) {
+ len = dom_rec_len(dom, 0);
+ *dlen = len;
+ dom->len = mon_cpu_to_le16(len);
+ dom->gen = mon_cpu_to_le16(gen);
+ dom->ack_gen = mon_cpu_to_le16(state->peer_gen);
+ dom->member_cnt = 0;
+ return;
+ }
+ /* Send the full record */
+ read_lock_bh(&mon->lock);
+ len = mon_le16_to_cpu(mon->cache.len);
+ *dlen = len;
+ memcpy(data, &mon->cache, len);
+ read_unlock_bh(&mon->lock);
+ dom->ack_gen = mon_cpu_to_le16(state->peer_gen);
+}
+
+void tipc_mon_get_state(struct net *net, u32 addr,
+ struct tipc_mon_state *state,
+ int bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *peer;
+
+ if (!tipc_mon_is_active(net, mon)) {
+ state->probing = false;
+ state->monitoring = true;
+ return;
+ }
+
+ /* Used cached state if table has not changed */
+ if (!state->probing &&
+ (state->list_gen == mon->list_gen) &&
+ (state->acked_gen == mon->dom_gen))
+ return;
+
+ read_lock_bh(&mon->lock);
+ peer = get_peer(mon, addr);
+ if (peer) {
+ state->probing = state->acked_gen != mon->dom_gen;
+ state->probing |= peer->down_cnt;
+ state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS;
+ state->monitoring = peer->is_local;
+ state->monitoring |= peer->is_head;
+ state->list_gen = mon->list_gen;
+ }
+ read_unlock_bh(&mon->lock);
+}
+
+static void mon_timeout(struct timer_list *t)
+{
+ struct tipc_monitor *mon = timer_container_of(mon, t, timer);
+ struct tipc_peer *self;
+ int best_member_cnt = dom_size(mon->peer_cnt) - 1;
+
+ write_lock_bh(&mon->lock);
+ self = mon->self;
+ if (self && (best_member_cnt != self->applied)) {
+ mon_update_local_domain(mon);
+ mon_assign_roles(mon, self);
+ }
+ write_unlock_bh(&mon->lock);
+ mod_timer(&mon->timer, jiffies + mon->timer_intv);
+}
+
+int tipc_mon_create(struct net *net, int bearer_id)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_monitor *mon;
+ struct tipc_peer *self;
+ struct tipc_mon_domain *dom;
+
+ if (tn->monitors[bearer_id])
+ return 0;
+
+ mon = kzalloc(sizeof(*mon), GFP_ATOMIC);
+ self = kzalloc(sizeof(*self), GFP_ATOMIC);
+ dom = kzalloc(sizeof(*dom), GFP_ATOMIC);
+ if (!mon || !self || !dom) {
+ kfree(mon);
+ kfree(self);
+ kfree(dom);
+ return -ENOMEM;
+ }
+ tn->monitors[bearer_id] = mon;
+ rwlock_init(&mon->lock);
+ mon->net = net;
+ mon->peer_cnt = 1;
+ mon->self = self;
+ self->domain = dom;
+ self->addr = tipc_own_addr(net);
+ self->is_up = true;
+ self->is_head = true;
+ INIT_LIST_HEAD(&self->list);
+ timer_setup(&mon->timer, mon_timeout, 0);
+ mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
+ mod_timer(&mon->timer, jiffies + mon->timer_intv);
+ return 0;
+}
+
+void tipc_mon_delete(struct net *net, int bearer_id)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *self;
+ struct tipc_peer *peer, *tmp;
+
+ if (!mon)
+ return;
+
+ self = get_self(net, bearer_id);
+ write_lock_bh(&mon->lock);
+ tn->monitors[bearer_id] = NULL;
+ list_for_each_entry_safe(peer, tmp, &self->list, list) {
+ list_del(&peer->list);
+ hlist_del(&peer->hash);
+ kfree(peer->domain);
+ kfree(peer);
+ }
+ mon->self = NULL;
+ write_unlock_bh(&mon->lock);
+ timer_shutdown_sync(&mon->timer);
+ kfree(self->domain);
+ kfree(self);
+ kfree(mon);
+}
+
+void tipc_mon_reinit_self(struct net *net)
+{
+ struct tipc_monitor *mon;
+ int bearer_id;
+
+ for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+ mon = tipc_monitor(net, bearer_id);
+ if (!mon)
+ continue;
+ write_lock_bh(&mon->lock);
+ if (mon->self)
+ mon->self->addr = tipc_own_addr(net);
+ write_unlock_bh(&mon->lock);
+ }
+}
+
+int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size)
+{
+ struct tipc_net *tn = tipc_net(net);
+
+ if (cluster_size > TIPC_CLUSTER_SIZE)
+ return -EINVAL;
+
+ tn->mon_threshold = cluster_size;
+
+ return 0;
+}
+
+int tipc_nl_monitor_get_threshold(struct net *net)
+{
+ struct tipc_net *tn = tipc_net(net);
+
+ return tn->mon_threshold;
+}
+
+static int __tipc_nl_add_monitor_peer(struct tipc_peer *peer,
+ struct tipc_nl_msg *msg)
+{
+ struct tipc_mon_domain *dom = peer->domain;
+ struct nlattr *attrs;
+ void *hdr;
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ NLM_F_MULTI, TIPC_NL_MON_PEER_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON_PEER);
+ if (!attrs)
+ goto msg_full;
+
+ if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_ADDR, peer->addr))
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_APPLIED, peer->applied))
+ goto attr_msg_full;
+
+ if (peer->is_up)
+ if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_UP))
+ goto attr_msg_full;
+ if (peer->is_local)
+ if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_LOCAL))
+ goto attr_msg_full;
+ if (peer->is_head)
+ if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_HEAD))
+ goto attr_msg_full;
+
+ if (dom) {
+ if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_DOMGEN, dom->gen))
+ goto attr_msg_full;
+ if (nla_put_u64_64bit(msg->skb, TIPC_NLA_MON_PEER_UPMAP,
+ dom->up_map, TIPC_NLA_MON_PEER_PAD))
+ goto attr_msg_full;
+ if (nla_put(msg->skb, TIPC_NLA_MON_PEER_MEMBERS,
+ dom->member_cnt * sizeof(u32), &dom->members))
+ goto attr_msg_full;
+ }
+
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
+ return 0;
+
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
+
+ return -EMSGSIZE;
+}
+
+int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg,
+ u32 bearer_id, u32 *prev_node)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ struct tipc_peer *peer;
+
+ if (!mon)
+ return -EINVAL;
+
+ read_lock_bh(&mon->lock);
+ peer = mon->self;
+ do {
+ if (*prev_node) {
+ if (peer->addr == *prev_node)
+ *prev_node = 0;
+ else
+ continue;
+ }
+ if (__tipc_nl_add_monitor_peer(peer, msg)) {
+ *prev_node = peer->addr;
+ read_unlock_bh(&mon->lock);
+ return -EMSGSIZE;
+ }
+ } while ((peer = peer_nxt(peer)) != mon->self);
+ read_unlock_bh(&mon->lock);
+
+ return 0;
+}
+
+int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg,
+ u32 bearer_id)
+{
+ struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
+ char bearer_name[TIPC_MAX_BEARER_NAME];
+ struct nlattr *attrs;
+ void *hdr;
+ int ret;
+
+ ret = tipc_bearer_get_name(net, bearer_name, bearer_id);
+ if (ret || !mon)
+ return 0;
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ NLM_F_MULTI, TIPC_NL_MON_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON);
+ if (!attrs)
+ goto msg_full;
+
+ read_lock_bh(&mon->lock);
+ if (nla_put_u32(msg->skb, TIPC_NLA_MON_REF, bearer_id))
+ goto attr_msg_full;
+ if (tipc_mon_is_active(net, mon))
+ if (nla_put_flag(msg->skb, TIPC_NLA_MON_ACTIVE))
+ goto attr_msg_full;
+ if (nla_put_string(msg->skb, TIPC_NLA_MON_BEARER_NAME, bearer_name))
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEERCNT, mon->peer_cnt))
+ goto attr_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_MON_LISTGEN, mon->list_gen))
+ goto attr_msg_full;
+
+ read_unlock_bh(&mon->lock);
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
+
+ return 0;
+
+attr_msg_full:
+ read_unlock_bh(&mon->lock);
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
+
+ return -EMSGSIZE;
+}
diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h
new file mode 100644
index 000000000000..ed63d2e650b0
--- /dev/null
+++ b/net/tipc/monitor.h
@@ -0,0 +1,83 @@
+/*
+ * net/tipc/monitor.h
+ *
+ * Copyright (c) 2015, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_MONITOR_H
+#define _TIPC_MONITOR_H
+
+#include "netlink.h"
+
+/* struct tipc_mon_state: link instance's cache of monitor list and domain state
+ * @list_gen: current generation of this node's monitor list
+ * @gen: current generation of this node's local domain
+ * @peer_gen: most recent domain generation received from peer
+ * @acked_gen: most recent generation of self's domain acked by peer
+ * @monitoring: this peer endpoint should continuously monitored
+ * @probing: peer endpoint should be temporarily probed for potential loss
+ * @synched: domain record's generation has been synched with peer after reset
+ */
+struct tipc_mon_state {
+ u16 list_gen;
+ u16 peer_gen;
+ u16 acked_gen;
+ bool monitoring :1;
+ bool probing :1;
+ bool reset :1;
+ bool synched :1;
+};
+
+int tipc_mon_create(struct net *net, int bearer_id);
+void tipc_mon_delete(struct net *net, int bearer_id);
+
+void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id);
+void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id);
+void tipc_mon_prep(struct net *net, void *data, int *dlen,
+ struct tipc_mon_state *state, int bearer_id);
+void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
+ struct tipc_mon_state *state, int bearer_id);
+void tipc_mon_get_state(struct net *net, u32 addr,
+ struct tipc_mon_state *state,
+ int bearer_id);
+void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id);
+
+int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size);
+int tipc_nl_monitor_get_threshold(struct net *net);
+int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg,
+ u32 bearer_id);
+int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg,
+ u32 bearer_id, u32 *prev_node);
+void tipc_mon_reinit_self(struct net *net);
+
+extern const int tipc_max_domain_size;
+#endif
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ced60e2fc4f7..76284fc538eb 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -1,7 +1,7 @@
/*
* net/tipc/msg.c: TIPC message header routines
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -34,73 +34,822 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <net/sock.h>
#include "core.h"
#include "msg.h"
+#include "addr.h"
+#include "name_table.h"
+#include "crypto.h"
-u32 tipc_msg_tot_importance(struct tipc_msg *m)
+#define BUF_ALIGN(x) ALIGN(x, 4)
+#define MAX_FORWARD_SIZE 1024
+#ifdef CONFIG_TIPC_CRYPTO
+#define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16)
+#define BUF_OVERHEAD (BUF_HEADROOM + TIPC_AES_GCM_TAG_SIZE)
+#else
+#define BUF_HEADROOM (LL_MAX_HEADER + 48)
+#define BUF_OVERHEAD BUF_HEADROOM
+#endif
+
+const int one_page_mtu = PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) -
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+/**
+ * tipc_buf_acquire - creates a TIPC message buffer
+ * @size: message size (including TIPC header)
+ * @gfp: memory allocation flags
+ *
+ * Return: a new buffer with data pointers set to the specified size.
+ *
+ * NOTE:
+ * Headroom is reserved to allow prepending of a data link header.
+ * There may also be unrequested tailroom present at the buffer's end.
+ */
+struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp)
{
- if (likely(msg_isdata(m))) {
- if (likely(msg_orignode(m) == tipc_own_addr))
- return msg_importance(m);
- return msg_importance(m) + 4;
+ struct sk_buff *skb;
+
+ skb = alloc_skb_fclone(BUF_OVERHEAD + size, gfp);
+ if (skb) {
+ skb_reserve(skb, BUF_HEADROOM);
+ skb_put(skb, size);
+ skb->next = NULL;
}
- if ((msg_user(m) == MSG_FRAGMENTER) &&
- (msg_type(m) == FIRST_FRAGMENT))
- return msg_importance(msg_get_wrapped(m));
- return msg_importance(m);
+ return skb;
}
-
-void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
- u32 destnode)
+void tipc_msg_init(u32 own_node, struct tipc_msg *m, u32 user, u32 type,
+ u32 hsize, u32 dnode)
{
memset(m, 0, hsize);
msg_set_version(m);
msg_set_user(m, user);
msg_set_hdr_sz(m, hsize);
msg_set_size(m, hsize);
- msg_set_prevnode(m, tipc_own_addr);
+ msg_set_prevnode(m, own_node);
msg_set_type(m, type);
- msg_set_orignode(m, tipc_own_addr);
- msg_set_destnode(m, destnode);
+ if (hsize > SHORT_H_SIZE) {
+ msg_set_orignode(m, own_node);
+ msg_set_destnode(m, dnode);
+ }
+}
+
+struct sk_buff *tipc_msg_create(uint user, uint type,
+ uint hdr_sz, uint data_sz, u32 dnode,
+ u32 onode, u32 dport, u32 oport, int errcode)
+{
+ struct tipc_msg *msg;
+ struct sk_buff *buf;
+
+ buf = tipc_buf_acquire(hdr_sz + data_sz, GFP_ATOMIC);
+ if (unlikely(!buf))
+ return NULL;
+
+ msg = buf_msg(buf);
+ tipc_msg_init(onode, msg, user, type, hdr_sz, dnode);
+ msg_set_size(msg, hdr_sz + data_sz);
+ msg_set_origport(msg, oport);
+ msg_set_destport(msg, dport);
+ msg_set_errcode(msg, errcode);
+ return buf;
+}
+
+/* tipc_buf_append(): Append a buffer to the fragment list of another buffer
+ * @*headbuf: in: NULL for first frag, otherwise value returned from prev call
+ * out: set when successful non-complete reassembly, otherwise NULL
+ * @*buf: in: the buffer to append. Always defined
+ * out: head buf after successful complete reassembly, otherwise NULL
+ * Returns 1 when reassembly complete, otherwise 0
+ */
+int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
+{
+ struct sk_buff *head = *headbuf;
+ struct sk_buff *frag = *buf;
+ struct sk_buff *tail = NULL;
+ struct tipc_msg *msg;
+ u32 fragid;
+ int delta;
+ bool headstolen;
+
+ if (!frag)
+ goto err;
+
+ msg = buf_msg(frag);
+ fragid = msg_type(msg);
+ frag->next = NULL;
+ skb_pull(frag, msg_hdr_sz(msg));
+
+ if (fragid == FIRST_FRAGMENT) {
+ if (unlikely(head))
+ goto err;
+ if (skb_has_frag_list(frag) && __skb_linearize(frag))
+ goto err;
+ *buf = NULL;
+ frag = skb_unshare(frag, GFP_ATOMIC);
+ if (unlikely(!frag))
+ goto err;
+ head = *headbuf = frag;
+ TIPC_SKB_CB(head)->tail = NULL;
+ return 0;
+ }
+
+ if (!head)
+ goto err;
+
+ /* Either the input skb ownership is transferred to headskb
+ * or the input skb is freed, clear the reference to avoid
+ * bad access on error path.
+ */
+ *buf = NULL;
+ if (skb_try_coalesce(head, frag, &headstolen, &delta)) {
+ kfree_skb_partial(frag, headstolen);
+ } else {
+ tail = TIPC_SKB_CB(head)->tail;
+ if (!skb_has_frag_list(head))
+ skb_shinfo(head)->frag_list = frag;
+ else
+ tail->next = frag;
+ head->truesize += frag->truesize;
+ head->data_len += frag->len;
+ head->len += frag->len;
+ TIPC_SKB_CB(head)->tail = frag;
+ }
+
+ if (fragid == LAST_FRAGMENT) {
+ TIPC_SKB_CB(head)->validated = 0;
+ if (unlikely(!tipc_msg_validate(&head)))
+ goto err;
+ *buf = head;
+ TIPC_SKB_CB(head)->tail = NULL;
+ *headbuf = NULL;
+ return 1;
+ }
+ return 0;
+err:
+ kfree_skb(*buf);
+ kfree_skb(*headbuf);
+ *buf = *headbuf = NULL;
+ return 0;
}
/**
- * tipc_msg_build - create message using specified header and data
+ * tipc_msg_append(): Append data to tail of an existing buffer queue
+ * @_hdr: header to be used
+ * @m: the data to be appended
+ * @mss: max allowable size of buffer
+ * @dlen: size of data to be appended
+ * @txq: queue to append to
+ *
+ * Return: the number of 1k blocks appended or errno value
+ */
+int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
+ int mss, struct sk_buff_head *txq)
+{
+ struct sk_buff *skb;
+ int accounted, total, curr;
+ int mlen, cpy, rem = dlen;
+ struct tipc_msg *hdr;
+
+ skb = skb_peek_tail(txq);
+ accounted = skb ? msg_blocks(buf_msg(skb)) : 0;
+ total = accounted;
+
+ do {
+ if (!skb || skb->len >= mss) {
+ skb = tipc_buf_acquire(mss, GFP_KERNEL);
+ if (unlikely(!skb))
+ return -ENOMEM;
+ skb_orphan(skb);
+ skb_trim(skb, MIN_H_SIZE);
+ hdr = buf_msg(skb);
+ skb_copy_to_linear_data(skb, _hdr, MIN_H_SIZE);
+ msg_set_hdr_sz(hdr, MIN_H_SIZE);
+ msg_set_size(hdr, MIN_H_SIZE);
+ __skb_queue_tail(txq, skb);
+ total += 1;
+ }
+ hdr = buf_msg(skb);
+ curr = msg_blocks(hdr);
+ mlen = msg_size(hdr);
+ cpy = min_t(size_t, rem, mss - mlen);
+ if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter))
+ return -EFAULT;
+ msg_set_size(hdr, mlen + cpy);
+ skb_put(skb, cpy);
+ rem -= cpy;
+ total += msg_blocks(hdr) - curr;
+ } while (rem > 0);
+ return total - accounted;
+}
+
+/* tipc_msg_validate - validate basic format of received message
*
- * Note: Caller must not hold any locks in case copy_from_user() is interrupted!
+ * This routine ensures a TIPC message has an acceptable header, and at least
+ * as much data as the header indicates it should. The routine also ensures
+ * that the entire message header is stored in the main fragment of the message
+ * buffer, to simplify future access to message header fields.
*
- * Returns message data size or errno
+ * Note: Having extra info present in the message header or data areas is OK.
+ * TIPC will ignore the excess, under the assumption that it is optional info
+ * introduced by a later release of the protocol.
*/
-int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
- u32 num_sect, unsigned int total_len, int max_size,
- struct sk_buff **buf)
+bool tipc_msg_validate(struct sk_buff **_skb)
{
- int dsz, sz, hsz, pos, res, cnt;
+ struct sk_buff *skb = *_skb;
+ struct tipc_msg *hdr;
+ int msz, hsz;
- dsz = total_len;
- pos = hsz = msg_hdr_sz(hdr);
- sz = hsz + dsz;
- msg_set_size(hdr, sz);
- if (unlikely(sz > max_size)) {
- *buf = NULL;
- return dsz;
+ /* Ensure that flow control ratio condition is satisfied */
+ if (unlikely(skb->truesize / buf_roundup_len(skb) >= 4)) {
+ skb = skb_copy_expand(skb, BUF_HEADROOM, 0, GFP_ATOMIC);
+ if (!skb)
+ return false;
+ kfree_skb(*_skb);
+ *_skb = skb;
}
- *buf = tipc_buf_acquire(sz);
- if (!(*buf))
+ if (unlikely(TIPC_SKB_CB(skb)->validated))
+ return true;
+
+ if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE)))
+ return false;
+
+ hsz = msg_hdr_sz(buf_msg(skb));
+ if (unlikely(hsz < MIN_H_SIZE) || (hsz > MAX_H_SIZE))
+ return false;
+ if (unlikely(!pskb_may_pull(skb, hsz)))
+ return false;
+
+ hdr = buf_msg(skb);
+ if (unlikely(msg_version(hdr) != TIPC_VERSION))
+ return false;
+
+ msz = msg_size(hdr);
+ if (unlikely(msz < hsz))
+ return false;
+ if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE))
+ return false;
+ if (unlikely(skb->len < msz))
+ return false;
+
+ TIPC_SKB_CB(skb)->validated = 1;
+ return true;
+}
+
+/**
+ * tipc_msg_fragment - build a fragment skb list for TIPC message
+ *
+ * @skb: TIPC message skb
+ * @hdr: internal msg header to be put on the top of the fragments
+ * @pktmax: max size of a fragment incl. the header
+ * @frags: returned fragment skb list
+ *
+ * Return: 0 if the fragmentation is successful, otherwise: -EINVAL
+ * or -ENOMEM
+ */
+int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
+ int pktmax, struct sk_buff_head *frags)
+{
+ int pktno, nof_fragms, dsz, dmax, eat;
+ struct tipc_msg *_hdr;
+ struct sk_buff *_skb;
+ u8 *data;
+
+ /* Non-linear buffer? */
+ if (skb_linearize(skb))
return -ENOMEM;
- skb_copy_to_linear_data(*buf, hdr, hsz);
- for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
- skb_copy_to_linear_data_offset(*buf, pos,
- msg_sect[cnt].iov_base,
- msg_sect[cnt].iov_len);
- pos += msg_sect[cnt].iov_len;
+
+ data = (u8 *)skb->data;
+ dsz = msg_size(buf_msg(skb));
+ dmax = pktmax - INT_H_SIZE;
+ if (dsz <= dmax || !dmax)
+ return -EINVAL;
+
+ nof_fragms = dsz / dmax + 1;
+ for (pktno = 1; pktno <= nof_fragms; pktno++) {
+ if (pktno < nof_fragms)
+ eat = dmax;
+ else
+ eat = dsz % dmax;
+ /* Allocate a new fragment */
+ _skb = tipc_buf_acquire(INT_H_SIZE + eat, GFP_ATOMIC);
+ if (!_skb)
+ goto error;
+ skb_orphan(_skb);
+ __skb_queue_tail(frags, _skb);
+ /* Copy header & data to the fragment */
+ skb_copy_to_linear_data(_skb, hdr, INT_H_SIZE);
+ skb_copy_to_linear_data_offset(_skb, INT_H_SIZE, data, eat);
+ data += eat;
+ /* Update the fragment's header */
+ _hdr = buf_msg(_skb);
+ msg_set_fragm_no(_hdr, pktno);
+ msg_set_nof_fragms(_hdr, nof_fragms);
+ msg_set_size(_hdr, INT_H_SIZE + eat);
}
- if (likely(res))
- return dsz;
+ return 0;
- kfree_skb(*buf);
- *buf = NULL;
- return -EFAULT;
+error:
+ __skb_queue_purge(frags);
+ __skb_queue_head_init(frags);
+ return -ENOMEM;
+}
+
+/**
+ * tipc_msg_build - create buffer chain containing specified header and data
+ * @mhdr: Message header, to be prepended to data
+ * @m: User message
+ * @offset: buffer offset for fragmented messages (FIXME)
+ * @dsz: Total length of user data
+ * @pktmax: Max packet size that can be used
+ * @list: Buffer or chain of buffers to be returned to caller
+ *
+ * Note that the recursive call we are making here is safe, since it can
+ * logically go only one further level down.
+ *
+ * Return: message data size or errno: -ENOMEM, -EFAULT
+ */
+int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
+ int dsz, int pktmax, struct sk_buff_head *list)
+{
+ int mhsz = msg_hdr_sz(mhdr);
+ struct tipc_msg pkthdr;
+ int msz = mhsz + dsz;
+ int pktrem = pktmax;
+ struct sk_buff *skb;
+ int drem = dsz;
+ int pktno = 1;
+ char *pktpos;
+ int pktsz;
+ int rc;
+
+ msg_set_size(mhdr, msz);
+
+ /* No fragmentation needed? */
+ if (likely(msz <= pktmax)) {
+ skb = tipc_buf_acquire(msz, GFP_KERNEL);
+
+ /* Fall back to smaller MTU if node local message */
+ if (unlikely(!skb)) {
+ if (pktmax != MAX_MSG_SIZE)
+ return -ENOMEM;
+ rc = tipc_msg_build(mhdr, m, offset, dsz,
+ one_page_mtu, list);
+ if (rc != dsz)
+ return rc;
+ if (tipc_msg_assemble(list))
+ return dsz;
+ return -ENOMEM;
+ }
+ skb_orphan(skb);
+ __skb_queue_tail(list, skb);
+ skb_copy_to_linear_data(skb, mhdr, mhsz);
+ pktpos = skb->data + mhsz;
+ if (copy_from_iter_full(pktpos, dsz, &m->msg_iter))
+ return dsz;
+ rc = -EFAULT;
+ goto error;
+ }
+
+ /* Prepare reusable fragment header */
+ tipc_msg_init(msg_prevnode(mhdr), &pkthdr, MSG_FRAGMENTER,
+ FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr));
+ msg_set_size(&pkthdr, pktmax);
+ msg_set_fragm_no(&pkthdr, pktno);
+ msg_set_importance(&pkthdr, msg_importance(mhdr));
+
+ /* Prepare first fragment */
+ skb = tipc_buf_acquire(pktmax, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ skb_orphan(skb);
+ __skb_queue_tail(list, skb);
+ pktpos = skb->data;
+ skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE);
+ pktpos += INT_H_SIZE;
+ pktrem -= INT_H_SIZE;
+ skb_copy_to_linear_data_offset(skb, INT_H_SIZE, mhdr, mhsz);
+ pktpos += mhsz;
+ pktrem -= mhsz;
+
+ do {
+ if (drem < pktrem)
+ pktrem = drem;
+
+ if (!copy_from_iter_full(pktpos, pktrem, &m->msg_iter)) {
+ rc = -EFAULT;
+ goto error;
+ }
+ drem -= pktrem;
+
+ if (!drem)
+ break;
+
+ /* Prepare new fragment: */
+ if (drem < (pktmax - INT_H_SIZE))
+ pktsz = drem + INT_H_SIZE;
+ else
+ pktsz = pktmax;
+ skb = tipc_buf_acquire(pktsz, GFP_KERNEL);
+ if (!skb) {
+ rc = -ENOMEM;
+ goto error;
+ }
+ skb_orphan(skb);
+ __skb_queue_tail(list, skb);
+ msg_set_type(&pkthdr, FRAGMENT);
+ msg_set_size(&pkthdr, pktsz);
+ msg_set_fragm_no(&pkthdr, ++pktno);
+ skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE);
+ pktpos = skb->data + INT_H_SIZE;
+ pktrem = pktsz - INT_H_SIZE;
+
+ } while (1);
+ msg_set_type(buf_msg(skb), LAST_FRAGMENT);
+ return dsz;
+error:
+ __skb_queue_purge(list);
+ __skb_queue_head_init(list);
+ return rc;
+}
+
+/**
+ * tipc_msg_bundle - Append contents of a buffer to tail of an existing one
+ * @bskb: the bundle buffer to append to
+ * @msg: message to be appended
+ * @max: max allowable size for the bundle buffer
+ *
+ * Return: "true" if bundling has been performed, otherwise "false"
+ */
+static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg,
+ u32 max)
+{
+ struct tipc_msg *bmsg = buf_msg(bskb);
+ u32 msz, bsz, offset, pad;
+
+ msz = msg_size(msg);
+ bsz = msg_size(bmsg);
+ offset = BUF_ALIGN(bsz);
+ pad = offset - bsz;
+
+ if (unlikely(skb_tailroom(bskb) < (pad + msz)))
+ return false;
+ if (unlikely(max < (offset + msz)))
+ return false;
+
+ skb_put(bskb, pad + msz);
+ skb_copy_to_linear_data_offset(bskb, offset, msg, msz);
+ msg_set_size(bmsg, offset + msz);
+ msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1);
+ return true;
+}
+
+/**
+ * tipc_msg_try_bundle - Try to bundle a new message to the last one
+ * @tskb: the last/target message to which the new one will be appended
+ * @skb: the new message skb pointer
+ * @mss: max message size (header inclusive)
+ * @dnode: destination node for the message
+ * @new_bundle: if this call made a new bundle or not
+ *
+ * Return: "true" if the new message skb is potential for bundling this time or
+ * later, in the case a bundling has been done this time, the skb is consumed
+ * (the skb pointer = NULL).
+ * Otherwise, "false" if the skb cannot be bundled at all.
+ */
+bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
+ u32 dnode, bool *new_bundle)
+{
+ struct tipc_msg *msg, *inner, *outer;
+ u32 tsz;
+
+ /* First, check if the new buffer is suitable for bundling */
+ msg = buf_msg(*skb);
+ if (msg_user(msg) == MSG_FRAGMENTER)
+ return false;
+ if (msg_user(msg) == TUNNEL_PROTOCOL)
+ return false;
+ if (msg_user(msg) == BCAST_PROTOCOL)
+ return false;
+ if (mss <= INT_H_SIZE + msg_size(msg))
+ return false;
+
+ /* Ok, but the last/target buffer can be empty? */
+ if (unlikely(!tskb))
+ return true;
+
+ /* Is it a bundle already? Try to bundle the new message to it */
+ if (msg_user(buf_msg(tskb)) == MSG_BUNDLER) {
+ *new_bundle = false;
+ goto bundle;
+ }
+
+ /* Make a new bundle of the two messages if possible */
+ tsz = msg_size(buf_msg(tskb));
+ if (unlikely(mss < BUF_ALIGN(INT_H_SIZE + tsz) + msg_size(msg)))
+ return true;
+ if (unlikely(pskb_expand_head(tskb, INT_H_SIZE, mss - tsz - INT_H_SIZE,
+ GFP_ATOMIC)))
+ return true;
+ inner = buf_msg(tskb);
+ skb_push(tskb, INT_H_SIZE);
+ outer = buf_msg(tskb);
+ tipc_msg_init(msg_prevnode(inner), outer, MSG_BUNDLER, 0, INT_H_SIZE,
+ dnode);
+ msg_set_importance(outer, msg_importance(inner));
+ msg_set_size(outer, INT_H_SIZE + tsz);
+ msg_set_msgcnt(outer, 1);
+ *new_bundle = true;
+
+bundle:
+ if (likely(tipc_msg_bundle(tskb, msg, mss))) {
+ consume_skb(*skb);
+ *skb = NULL;
+ }
+ return true;
+}
+
+/**
+ * tipc_msg_extract(): extract bundled inner packet from buffer
+ * @skb: buffer to be extracted from.
+ * @iskb: extracted inner buffer, to be returned
+ * @pos: position in outer message of msg to be extracted.
+ * Returns position of next msg.
+ * Consumes outer buffer when last packet extracted
+ * Return: true when there is an extracted buffer, otherwise false
+ */
+bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
+{
+ struct tipc_msg *hdr, *ihdr;
+ int imsz;
+
+ *iskb = NULL;
+ if (unlikely(skb_linearize(skb)))
+ goto none;
+
+ hdr = buf_msg(skb);
+ if (unlikely(*pos > (msg_data_sz(hdr) - MIN_H_SIZE)))
+ goto none;
+
+ ihdr = (struct tipc_msg *)(msg_data(hdr) + *pos);
+ imsz = msg_size(ihdr);
+
+ if ((*pos + imsz) > msg_data_sz(hdr))
+ goto none;
+
+ *iskb = tipc_buf_acquire(imsz, GFP_ATOMIC);
+ if (!*iskb)
+ goto none;
+
+ skb_copy_to_linear_data(*iskb, ihdr, imsz);
+ if (unlikely(!tipc_msg_validate(iskb)))
+ goto none;
+
+ *pos += BUF_ALIGN(imsz);
+ return true;
+none:
+ kfree_skb(skb);
+ kfree_skb(*iskb);
+ *iskb = NULL;
+ return false;
+}
+
+/**
+ * tipc_msg_reverse(): swap source and destination addresses and add error code
+ * @own_node: originating node id for reversed message
+ * @skb: buffer containing message to be reversed; will be consumed
+ * @err: error code to be set in message, if any
+ * Replaces consumed buffer with new one when successful
+ * Return: true if success, otherwise false
+ */
+bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
+{
+ struct sk_buff *_skb = *skb;
+ struct tipc_msg *_hdr, *hdr;
+ int hlen, dlen;
+
+ if (skb_linearize(_skb))
+ goto exit;
+ _hdr = buf_msg(_skb);
+ dlen = min_t(uint, msg_data_sz(_hdr), MAX_FORWARD_SIZE);
+ hlen = msg_hdr_sz(_hdr);
+
+ if (msg_dest_droppable(_hdr))
+ goto exit;
+ if (msg_errcode(_hdr))
+ goto exit;
+
+ /* Never return SHORT header */
+ if (hlen == SHORT_H_SIZE)
+ hlen = BASIC_H_SIZE;
+
+ /* Don't return data along with SYN+, - sender has a clone */
+ if (msg_is_syn(_hdr) && err == TIPC_ERR_OVERLOAD)
+ dlen = 0;
+
+ /* Allocate new buffer to return */
+ *skb = tipc_buf_acquire(hlen + dlen, GFP_ATOMIC);
+ if (!*skb)
+ goto exit;
+ memcpy((*skb)->data, _skb->data, msg_hdr_sz(_hdr));
+ memcpy((*skb)->data + hlen, msg_data(_hdr), dlen);
+
+ /* Build reverse header in new buffer */
+ hdr = buf_msg(*skb);
+ msg_set_hdr_sz(hdr, hlen);
+ msg_set_errcode(hdr, err);
+ msg_set_non_seq(hdr, 0);
+ msg_set_origport(hdr, msg_destport(_hdr));
+ msg_set_destport(hdr, msg_origport(_hdr));
+ msg_set_destnode(hdr, msg_prevnode(_hdr));
+ msg_set_prevnode(hdr, own_node);
+ msg_set_orignode(hdr, own_node);
+ msg_set_size(hdr, hlen + dlen);
+ skb_orphan(_skb);
+ kfree_skb(_skb);
+ return true;
+exit:
+ kfree_skb(_skb);
+ *skb = NULL;
+ return false;
+}
+
+bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy)
+{
+ struct sk_buff *skb, *_skb;
+
+ skb_queue_walk(msg, skb) {
+ _skb = skb_clone(skb, GFP_ATOMIC);
+ if (!_skb) {
+ __skb_queue_purge(cpy);
+ pr_err_ratelimited("Failed to clone buffer chain\n");
+ return false;
+ }
+ __skb_queue_tail(cpy, _skb);
+ }
+ return true;
+}
+
+/**
+ * tipc_msg_lookup_dest(): try to find new destination for named message
+ * @net: pointer to associated network namespace
+ * @skb: the buffer containing the message.
+ * @err: error code to be used by caller if lookup fails
+ * Does not consume buffer
+ * Return: true if a destination is found, false otherwise
+ */
+bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
+{
+ struct tipc_msg *msg = buf_msg(skb);
+ u32 scope = msg_lookup_scope(msg);
+ u32 self = tipc_own_addr(net);
+ u32 inst = msg_nameinst(msg);
+ struct tipc_socket_addr sk;
+ struct tipc_uaddr ua;
+
+ if (!msg_isdata(msg))
+ return false;
+ if (!msg_named(msg))
+ return false;
+ if (msg_errcode(msg))
+ return false;
+ *err = TIPC_ERR_NO_NAME;
+ if (skb_linearize(skb))
+ return false;
+ msg = buf_msg(skb);
+ if (msg_reroute_cnt(msg))
+ return false;
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, scope,
+ msg_nametype(msg), inst, inst);
+ sk.node = tipc_scope2node(net, scope);
+ if (!tipc_nametbl_lookup_anycast(net, &ua, &sk))
+ return false;
+ msg_incr_reroute_cnt(msg);
+ if (sk.node != self)
+ msg_set_prevnode(msg, self);
+ msg_set_destnode(msg, sk.node);
+ msg_set_destport(msg, sk.ref);
+ *err = TIPC_OK;
+
+ return true;
+}
+
+/* tipc_msg_assemble() - assemble chain of fragments into one message
+ */
+bool tipc_msg_assemble(struct sk_buff_head *list)
+{
+ struct sk_buff *skb, *tmp = NULL;
+
+ if (skb_queue_len(list) == 1)
+ return true;
+
+ while ((skb = __skb_dequeue(list))) {
+ skb->next = NULL;
+ if (tipc_buf_append(&tmp, &skb)) {
+ __skb_queue_tail(list, skb);
+ return true;
+ }
+ if (!tmp)
+ break;
+ }
+ __skb_queue_purge(list);
+ __skb_queue_head_init(list);
+ pr_warn("Failed do assemble buffer\n");
+ return false;
+}
+
+/* tipc_msg_reassemble() - clone a buffer chain of fragments and
+ * reassemble the clones into one message
+ */
+bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq)
+{
+ struct sk_buff *skb, *_skb;
+ struct sk_buff *frag = NULL;
+ struct sk_buff *head = NULL;
+ int hdr_len;
+
+ /* Copy header if single buffer */
+ if (skb_queue_len(list) == 1) {
+ skb = skb_peek(list);
+ hdr_len = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb));
+ _skb = __pskb_copy(skb, hdr_len, GFP_ATOMIC);
+ if (!_skb)
+ return false;
+ __skb_queue_tail(rcvq, _skb);
+ return true;
+ }
+
+ /* Clone all fragments and reassemble */
+ skb_queue_walk(list, skb) {
+ frag = skb_clone(skb, GFP_ATOMIC);
+ if (!frag)
+ goto error;
+ frag->next = NULL;
+ if (tipc_buf_append(&head, &frag))
+ break;
+ if (!head)
+ goto error;
+ }
+ __skb_queue_tail(rcvq, frag);
+ return true;
+error:
+ pr_warn("Failed do clone local mcast rcv buffer\n");
+ kfree_skb(head);
+ return false;
+}
+
+bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
+ struct sk_buff_head *cpy)
+{
+ struct sk_buff *skb, *_skb;
+
+ skb_queue_walk(msg, skb) {
+ _skb = pskb_copy(skb, GFP_ATOMIC);
+ if (!_skb) {
+ __skb_queue_purge(cpy);
+ return false;
+ }
+ msg_set_destnode(buf_msg(_skb), dst);
+ __skb_queue_tail(cpy, _skb);
+ }
+ return true;
+}
+
+/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
+ * @list: list to be appended to
+ * @seqno: sequence number of buffer to add
+ * @skb: buffer to add
+ */
+bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+ struct sk_buff *skb)
+{
+ struct sk_buff *_skb, *tmp;
+
+ if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) {
+ __skb_queue_head(list, skb);
+ return true;
+ }
+
+ if (more(seqno, buf_seqno(skb_peek_tail(list)))) {
+ __skb_queue_tail(list, skb);
+ return true;
+ }
+
+ skb_queue_walk_safe(list, _skb, tmp) {
+ if (more(seqno, buf_seqno(_skb)))
+ continue;
+ if (seqno == buf_seqno(_skb))
+ break;
+ __skb_queue_before(list, _skb, skb);
+ return true;
+ }
+ kfree_skb(skb);
+ return false;
+}
+
+void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
+{
+ if (tipc_msg_reverse(tipc_own_addr(net), &skb, err))
+ __skb_queue_tail(xmitq, skb);
}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 5e4ccf5c27df..c5eec16213d7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,7 +1,7 @@
/*
* net/tipc/msg.h: Include file for TIPC message header routines
*
- * Copyright (c) 2000-2007, Ericsson AB
+ * Copyright (c) 2000-2007, 2014-2017 Ericsson AB
* Copyright (c) 2005-2008, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -37,7 +37,8 @@
#ifndef _TIPC_MSG_H
#define _TIPC_MSG_H
-#include "bearer.h"
+#include <linux/tipc.h>
+#include "core.h"
/*
* Constants and routines used to read and write TIPC payload message headers
@@ -45,6 +46,7 @@
* Note: Some items are also used with TIPC internal message headers
*/
#define TIPC_VERSION 2
+struct plist;
/*
* Payload message users are defined in TIPC's public API:
@@ -53,14 +55,36 @@
* - TIPC_HIGH_IMPORTANCE
* - TIPC_CRITICAL_IMPORTANCE
*/
+#define TIPC_SYSTEM_IMPORTANCE 4
+
/*
* Payload message types
*/
-#define TIPC_CONN_MSG 0
-#define TIPC_MCAST_MSG 1
-#define TIPC_NAMED_MSG 2
-#define TIPC_DIRECT_MSG 3
+#define TIPC_CONN_MSG 0
+#define TIPC_MCAST_MSG 1
+#define TIPC_NAMED_MSG 2
+#define TIPC_DIRECT_MSG 3
+#define TIPC_GRP_MEMBER_EVT 4
+#define TIPC_GRP_BCAST_MSG 5
+#define TIPC_GRP_MCAST_MSG 6
+#define TIPC_GRP_UCAST_MSG 7
+
+/*
+ * Internal message users
+ */
+#define BCAST_PROTOCOL 5
+#define MSG_BUNDLER 6
+#define LINK_PROTOCOL 7
+#define CONN_MANAGER 8
+#define GROUP_PROTOCOL 9
+#define TUNNEL_PROTOCOL 10
+#define NAME_DISTRIBUTOR 11
+#define MSG_FRAGMENTER 12
+#define LINK_CONFIG 13
+#define MSG_CRYPTO 14
+#define SOCK_WAKEUP 14 /* pseudo user */
+#define TOP_SRV 15 /* pseudo user */
/*
* Message header sizes
@@ -69,19 +93,114 @@
#define BASIC_H_SIZE 32 /* Basic payload message */
#define NAMED_H_SIZE 40 /* Named payload message */
#define MCAST_H_SIZE 44 /* Multicast payload message */
+#define GROUP_H_SIZE 44 /* Group payload message */
#define INT_H_SIZE 40 /* Internal messages */
#define MIN_H_SIZE 24 /* Smallest legal TIPC header size */
#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
+#define TIPC_MEDIA_INFO_OFFSET 5
+
+extern const int one_page_mtu;
+
+struct tipc_skb_cb {
+ union {
+ struct {
+ struct sk_buff *tail;
+ unsigned long nxt_retr;
+ unsigned long retr_stamp;
+ u32 bytes_read;
+ u32 orig_member;
+ u16 chain_imp;
+ u16 ackers;
+ u16 retr_cnt;
+ } __packed;
+#ifdef CONFIG_TIPC_CRYPTO
+ struct {
+ struct tipc_crypto *rx;
+ struct tipc_aead *last;
+ u8 recurs;
+ } tx_clone_ctx __packed;
+#endif
+ } __packed;
+ union {
+ struct {
+ u8 validated:1;
+#ifdef CONFIG_TIPC_CRYPTO
+ u8 encrypted:1;
+ u8 decrypted:1;
+#define SKB_PROBING 1
+#define SKB_GRACING 2
+ u8 xmit_type:2;
+ u8 tx_clone_deferred:1;
+#endif
+ };
+ u8 flags;
+ };
+ u8 reserved;
+#ifdef CONFIG_TIPC_CRYPTO
+ void *crypto_ctx;
+#endif
+} __packed;
-#define TIPC_MEDIA_ADDR_OFFSET 5
-
+#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
struct tipc_msg {
__be32 hdr[15];
};
+/* struct tipc_gap_ack - TIPC Gap ACK block
+ * @ack: seqno of the last consecutive packet in link deferdq
+ * @gap: number of gap packets since the last ack
+ *
+ * E.g:
+ * link deferdq: 1 2 3 4 10 11 13 14 15 20
+ * --> Gap ACK blocks: <4, 5>, <11, 1>, <15, 4>, <20, 0>
+ */
+struct tipc_gap_ack {
+ __be16 ack;
+ __be16 gap;
+};
+
+/* struct tipc_gap_ack_blks
+ * @len: actual length of the record
+ * @ugack_cnt: number of Gap ACK blocks for unicast (following the broadcast
+ * ones)
+ * @start_index: starting index for "valid" broadcast Gap ACK blocks
+ * @bgack_cnt: number of Gap ACK blocks for broadcast in the record
+ * @gacks: array of Gap ACK blocks
+ *
+ * 31 16 15 0
+ * +-------------+-------------+-------------+-------------+
+ * | bgack_cnt | ugack_cnt | len |
+ * +-------------+-------------+-------------+-------------+ -
+ * | gap | ack | |
+ * +-------------+-------------+-------------+-------------+ > bc gacks
+ * : : : |
+ * +-------------+-------------+-------------+-------------+ -
+ * | gap | ack | |
+ * +-------------+-------------+-------------+-------------+ > uc gacks
+ * : : : |
+ * +-------------+-------------+-------------+-------------+ -
+ */
+struct tipc_gap_ack_blks {
+ __be16 len;
+ union {
+ u8 ugack_cnt;
+ u8 start_index;
+ };
+ u8 bgack_cnt;
+ struct tipc_gap_ack gacks[];
+};
+
+#define MAX_GAP_ACK_BLKS 128
+#define MAX_GAP_ACK_BLKS_SZ (sizeof(struct tipc_gap_ack_blks) + \
+ sizeof(struct tipc_gap_ack) * MAX_GAP_ACK_BLKS)
+
+static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
+{
+ return (struct tipc_msg *)skb->data;
+}
static inline u32 msg_word(struct tipc_msg *m, u32 pos)
{
@@ -107,14 +226,6 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w,
m->hdr[w] |= htonl(val);
}
-static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b)
-{
- u32 temp = msg->hdr[a];
-
- msg->hdr[a] = msg->hdr[b];
- msg->hdr[b] = temp;
-}
-
/*
* Word 0
*/
@@ -143,16 +254,6 @@ static inline void msg_set_user(struct tipc_msg *m, u32 n)
msg_set_bits(m, 0, 25, 0xf, n);
}
-static inline u32 msg_importance(struct tipc_msg *m)
-{
- return msg_bits(m, 0, 25, 0xf);
-}
-
-static inline void msg_set_importance(struct tipc_msg *m, u32 i)
-{
- msg_set_user(m, i);
-}
-
static inline u32 msg_hdr_sz(struct tipc_msg *m)
{
return msg_bits(m, 0, 21, 0xf) << 2;
@@ -168,6 +269,11 @@ static inline u32 msg_size(struct tipc_msg *m)
return msg_bits(m, 0, 0, 0x1ffff);
}
+static inline u32 msg_blocks(struct tipc_msg *m)
+{
+ return (msg_size(m) / 1024) + 1;
+}
+
static inline u32 msg_data_sz(struct tipc_msg *m)
{
return msg_size(m) - msg_hdr_sz(m);
@@ -183,6 +289,16 @@ static inline void msg_set_non_seq(struct tipc_msg *m, u32 n)
msg_set_bits(m, 0, 20, 1, n);
}
+static inline int msg_is_syn(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 17, 1);
+}
+
+static inline void msg_set_syn(struct tipc_msg *m, u32 d)
+{
+ msg_set_bits(m, 0, 17, 1, d);
+}
+
static inline int msg_dest_droppable(struct tipc_msg *m)
{
return msg_bits(m, 0, 19, 1);
@@ -193,6 +309,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d)
msg_set_bits(m, 0, 19, 1, d);
}
+static inline int msg_is_keepalive(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 19, 1);
+}
+
+static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d)
+{
+ msg_set_bits(m, 0, 19, 1, d);
+}
+
static inline int msg_src_droppable(struct tipc_msg *m)
{
return msg_bits(m, 0, 18, 1);
@@ -203,11 +329,50 @@ static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d)
msg_set_bits(m, 0, 18, 1, d);
}
+static inline int msg_ack_required(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 18, 1);
+}
+
+static inline void msg_set_ack_required(struct tipc_msg *m)
+{
+ msg_set_bits(m, 0, 18, 1, 1);
+}
+
+static inline int msg_nagle_ack(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 18, 1);
+}
+
+static inline void msg_set_nagle_ack(struct tipc_msg *m)
+{
+ msg_set_bits(m, 0, 18, 1, 1);
+}
+
+static inline bool msg_is_rcast(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 18, 0x1);
+}
+
+static inline void msg_set_is_rcast(struct tipc_msg *m, bool d)
+{
+ msg_set_bits(m, 0, 18, 0x1, d);
+}
+
static inline void msg_set_size(struct tipc_msg *m, u32 sz)
{
m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz);
}
+static inline unchar *msg_data(struct tipc_msg *m)
+{
+ return ((unchar *)m) + msg_hdr_sz(m);
+}
+
+static inline struct tipc_msg *msg_inner_hdr(struct tipc_msg *m)
+{
+ return (struct tipc_msg *)msg_data(m);
+}
/*
* Word 1
@@ -222,6 +387,18 @@ static inline void msg_set_type(struct tipc_msg *m, u32 n)
msg_set_bits(m, 1, 29, 0x7, n);
}
+static inline int msg_in_group(struct tipc_msg *m)
+{
+ int mtyp = msg_type(m);
+
+ return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG;
+}
+
+static inline bool msg_is_grp_evt(struct tipc_msg *m)
+{
+ return msg_type(m) == TIPC_GRP_MEMBER_EVT;
+}
+
static inline u32 msg_named(struct tipc_msg *m)
{
return msg_type(m) == TIPC_NAMED_MSG;
@@ -229,7 +406,10 @@ static inline u32 msg_named(struct tipc_msg *m)
static inline u32 msg_mcast(struct tipc_msg *m)
{
- return msg_type(m) == TIPC_MCAST_MSG;
+ int mtyp = msg_type(m);
+
+ return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) ||
+ (mtyp == TIPC_GRP_MCAST_MSG));
}
static inline u32 msg_connected(struct tipc_msg *m)
@@ -237,6 +417,11 @@ static inline u32 msg_connected(struct tipc_msg *m)
return msg_type(m) == TIPC_CONN_MSG;
}
+static inline u32 msg_direct(struct tipc_msg *m)
+{
+ return msg_type(m) == TIPC_DIRECT_MSG;
+}
+
static inline u32 msg_errcode(struct tipc_msg *m)
{
return msg_bits(m, 1, 25, 0xf);
@@ -247,6 +432,36 @@ static inline void msg_set_errcode(struct tipc_msg *m, u32 err)
msg_set_bits(m, 1, 25, 0xf, err);
}
+static inline void msg_set_bulk(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 28, 0x1, 1);
+}
+
+static inline u32 msg_is_bulk(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 28, 0x1);
+}
+
+static inline void msg_set_last_bulk(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 27, 0x1, 1);
+}
+
+static inline u32 msg_is_last_bulk(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 27, 0x1);
+}
+
+static inline void msg_set_non_legacy(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 26, 0x1, 1);
+}
+
+static inline u32 msg_is_legacy(struct tipc_msg *m)
+{
+ return !msg_bits(m, 1, 26, 0x1);
+}
+
static inline u32 msg_reroute_cnt(struct tipc_msg *m)
{
return msg_bits(m, 1, 21, 0xf);
@@ -257,11 +472,6 @@ static inline void msg_incr_reroute_cnt(struct tipc_msg *m)
msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1);
}
-static inline void msg_reset_reroute_cnt(struct tipc_msg *m)
-{
- msg_set_bits(m, 1, 21, 0xf, 0);
-}
-
static inline u32 msg_lookup_scope(struct tipc_msg *m)
{
return msg_bits(m, 1, 19, 0x3);
@@ -272,36 +482,58 @@ static inline void msg_set_lookup_scope(struct tipc_msg *m, u32 n)
msg_set_bits(m, 1, 19, 0x3, n);
}
-static inline u32 msg_bcast_ack(struct tipc_msg *m)
+static inline u16 msg_bcast_ack(struct tipc_msg *m)
{
return msg_bits(m, 1, 0, 0xffff);
}
-static inline void msg_set_bcast_ack(struct tipc_msg *m, u32 n)
+static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 1, 0, 0xffff, n);
}
+/* Note: reusing bits in word 1 for ACTIVATE_MSG only, to re-synch
+ * link peer session number
+ */
+static inline bool msg_dest_session_valid(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 16, 0x1);
+}
+
+static inline void msg_set_dest_session_valid(struct tipc_msg *m, bool valid)
+{
+ msg_set_bits(m, 1, 16, 0x1, valid);
+}
+
+static inline u16 msg_dest_session(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 0, 0xffff);
+}
+
+static inline void msg_set_dest_session(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 1, 0, 0xffff, n);
+}
/*
* Word 2
*/
-static inline u32 msg_ack(struct tipc_msg *m)
+static inline u16 msg_ack(struct tipc_msg *m)
{
return msg_bits(m, 2, 16, 0xffff);
}
-static inline void msg_set_ack(struct tipc_msg *m, u32 n)
+static inline void msg_set_ack(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 2, 16, 0xffff, n);
}
-static inline u32 msg_seqno(struct tipc_msg *m)
+static inline u16 msg_seqno(struct tipc_msg *m)
{
return msg_bits(m, 2, 0, 0xffff);
}
-static inline void msg_set_seqno(struct tipc_msg *m, u32 n)
+static inline void msg_set_seqno(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 2, 0, 0xffff, n);
}
@@ -309,6 +541,29 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n)
/*
* Words 3-10
*/
+static inline u32 msg_importance(struct tipc_msg *m)
+{
+ int usr = msg_user(m);
+
+ if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m)))
+ return usr;
+ if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))
+ return msg_bits(m, 9, 0, 0x7);
+ return TIPC_SYSTEM_IMPORTANCE;
+}
+
+static inline void msg_set_importance(struct tipc_msg *m, u32 i)
+{
+ int usr = msg_user(m);
+
+ if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)))
+ msg_set_bits(m, 9, 0, 0x7, i);
+ else if (i < TIPC_SYSTEM_IMPORTANCE)
+ msg_set_user(m, i);
+ else
+ pr_warn("Trying to set illegal importance in message\n");
+}
+
static inline u32 msg_prevnode(struct tipc_msg *m)
{
return msg_word(m, 3);
@@ -321,6 +576,8 @@ static inline void msg_set_prevnode(struct tipc_msg *m, u32 a)
static inline u32 msg_origport(struct tipc_msg *m)
{
+ if (msg_user(m) == MSG_FRAGMENTER)
+ m = msg_inner_hdr(m);
return msg_word(m, 4);
}
@@ -329,6 +586,16 @@ static inline void msg_set_origport(struct tipc_msg *m, u32 p)
msg_set_word(m, 4, p);
}
+static inline u16 msg_named_seqno(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 0, 0xffff);
+}
+
+static inline void msg_set_named_seqno(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 4, 0, 0xffff, n);
+}
+
static inline u32 msg_destport(struct tipc_msg *m)
{
return msg_word(m, 5);
@@ -416,34 +683,11 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
msg_set_word(m, 10, n);
}
-static inline unchar *msg_data(struct tipc_msg *m)
-{
- return ((unchar *)m) + msg_hdr_sz(m);
-}
-
-static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
-{
- return (struct tipc_msg *)msg_data(m);
-}
-
/*
* Constants and routines used to read and write TIPC internal message headers
*/
/*
- * Internal message users
- */
-#define BCAST_PROTOCOL 5
-#define MSG_BUNDLER 6
-#define LINK_PROTOCOL 7
-#define CONN_MANAGER 8
-#define ROUTE_DISTRIBUTOR 9 /* obsoleted */
-#define CHANGEOVER_PROTOCOL 10
-#define NAME_DISTRIBUTOR 11
-#define MSG_FRAGMENTER 12
-#define LINK_CONFIG 13
-
-/*
* Connection management protocol message types
*/
#define CONN_PROBE 0
@@ -473,15 +717,29 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
/*
* Changeover tunnel message types
*/
-#define DUPLICATE_MSG 0
-#define ORIGINAL_MSG 1
+#define SYNCH_MSG 0
+#define FAILOVER_MSG 1
/*
* Config protocol message types
*/
#define DSC_REQ_MSG 0
#define DSC_RESP_MSG 1
+#define DSC_TRIAL_MSG 2
+#define DSC_TRIAL_FAIL_MSG 3
+
+/*
+ * Group protocol message types
+ */
+#define GRP_JOIN_MSG 0
+#define GRP_LEAVE_MSG 1
+#define GRP_ADV_MSG 2
+#define GRP_ACK_MSG 3
+#define GRP_RECLAIM_MSG 4
+#define GRP_REMIT_MSG 5
+/* Crypto message types */
+#define KEY_DISTR_MSG 0
/*
* Word 1
@@ -506,6 +764,15 @@ static inline void msg_set_node_sig(struct tipc_msg *m, u32 n)
msg_set_bits(m, 1, 0, 0xffff, n);
}
+static inline u32 msg_node_capabilities(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 15, 0x1fff);
+}
+
+static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n)
+{
+ msg_set_bits(m, 1, 15, 0x1fff, n);
+}
/*
* Word 2
@@ -520,11 +787,6 @@ static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n)
msg_set_word(m, 2, n);
}
-static inline u32 msg_bcgap_after(struct tipc_msg *m)
-{
- return msg_bits(m, 2, 16, 0xffff);
-}
-
static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n)
{
msg_set_bits(m, 2, 16, 0xffff, n);
@@ -540,7 +802,6 @@ static inline void msg_set_bcgap_to(struct tipc_msg *m, u32 n)
msg_set_bits(m, 2, 0, 0xffff, n);
}
-
/*
* Word 4
*/
@@ -549,40 +810,42 @@ static inline u32 msg_last_bcast(struct tipc_msg *m)
return msg_bits(m, 4, 16, 0xffff);
}
+static inline u32 msg_bc_snd_nxt(struct tipc_msg *m)
+{
+ return msg_last_bcast(m) + 1;
+}
+
static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n)
{
msg_set_bits(m, 4, 16, 0xffff, n);
}
-
-static inline u32 msg_fragm_no(struct tipc_msg *m)
+static inline u32 msg_nof_fragms(struct tipc_msg *m)
{
- return msg_bits(m, 4, 16, 0xffff);
+ return msg_bits(m, 4, 0, 0xffff);
}
-static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n)
+static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n)
{
- msg_set_bits(m, 4, 16, 0xffff, n);
+ msg_set_bits(m, 4, 0, 0xffff, n);
}
-
-static inline u32 msg_next_sent(struct tipc_msg *m)
+static inline u32 msg_fragm_no(struct tipc_msg *m)
{
- return msg_bits(m, 4, 0, 0xffff);
+ return msg_bits(m, 4, 16, 0xffff);
}
-static inline void msg_set_next_sent(struct tipc_msg *m, u32 n)
+static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n)
{
- msg_set_bits(m, 4, 0, 0xffff, n);
+ msg_set_bits(m, 4, 16, 0xffff, n);
}
-
-static inline u32 msg_long_msgno(struct tipc_msg *m)
+static inline u16 msg_next_sent(struct tipc_msg *m)
{
return msg_bits(m, 4, 0, 0xffff);
}
-static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n)
+static inline void msg_set_next_sent(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 4, 0, 0xffff, n);
}
@@ -599,23 +862,20 @@ static inline void msg_set_bc_netid(struct tipc_msg *m, u32 id)
static inline u32 msg_link_selector(struct tipc_msg *m)
{
+ if (msg_user(m) == MSG_FRAGMENTER)
+ m = (void *)msg_data(m);
return msg_bits(m, 4, 0, 1);
}
-static inline void msg_set_link_selector(struct tipc_msg *m, u32 n)
-{
- msg_set_bits(m, 4, 0, 1, n);
-}
-
/*
* Word 5
*/
-static inline u32 msg_session(struct tipc_msg *m)
+static inline u16 msg_session(struct tipc_msg *m)
{
return msg_bits(m, 5, 16, 0xffff);
}
-static inline void msg_set_session(struct tipc_msg *m, u32 n)
+static inline void msg_set_session(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 5, 16, 0xffff, n);
}
@@ -670,34 +930,91 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r)
msg_set_bits(m, 5, 12, 0x1, r);
}
+static inline u32 msg_peer_stopping(struct tipc_msg *m)
+{
+ return msg_bits(m, 5, 13, 0x1);
+}
+
+static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s)
+{
+ msg_set_bits(m, 5, 13, 0x1, s);
+}
+
+static inline bool msg_bc_ack_invalid(struct tipc_msg *m)
+{
+ switch (msg_user(m)) {
+ case BCAST_PROTOCOL:
+ case NAME_DISTRIBUTOR:
+ case LINK_PROTOCOL:
+ return msg_bits(m, 5, 14, 0x1);
+ default:
+ return false;
+ }
+}
+
+static inline void msg_set_bc_ack_invalid(struct tipc_msg *m, bool invalid)
+{
+ msg_set_bits(m, 5, 14, 0x1, invalid);
+}
+
static inline char *msg_media_addr(struct tipc_msg *m)
{
- return (char *)&m->hdr[TIPC_MEDIA_ADDR_OFFSET];
+ return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET];
+}
+
+static inline u32 msg_bc_gap(struct tipc_msg *m)
+{
+ return msg_bits(m, 8, 0, 0x3ff);
+}
+
+static inline void msg_set_bc_gap(struct tipc_msg *m, u32 n)
+{
+ msg_set_bits(m, 8, 0, 0x3ff, n);
}
/*
* Word 9
*/
-static inline u32 msg_msgcnt(struct tipc_msg *m)
+static inline u16 msg_msgcnt(struct tipc_msg *m)
{
return msg_bits(m, 9, 16, 0xffff);
}
-static inline void msg_set_msgcnt(struct tipc_msg *m, u32 n)
+static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 9, 16, 0xffff, n);
}
-static inline u32 msg_bcast_tag(struct tipc_msg *m)
+static inline u16 msg_syncpt(struct tipc_msg *m)
{
return msg_bits(m, 9, 16, 0xffff);
}
-static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n)
+static inline void msg_set_syncpt(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 9, 16, 0xffff, n);
}
+static inline u32 msg_conn_ack(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u16 msg_adv_win(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 0, 0xffff);
+}
+
+static inline void msg_set_adv_win(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 0, 0xffff, n);
+}
+
static inline u32 msg_max_pkt(struct tipc_msg *m)
{
return msg_bits(m, 9, 16, 0xffff) * 4;
@@ -718,10 +1035,276 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
msg_set_bits(m, 9, 0, 0xffff, n);
}
-u32 tipc_msg_tot_importance(struct tipc_msg *m);
-void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
- u32 destnode);
-int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
- u32 num_sect, unsigned int total_len, int max_size,
- struct sk_buff **buf);
+static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u16 msg_grp_bc_acked(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u16 msg_grp_remitted(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+/* Word 10
+ */
+static inline u16 msg_grp_evt(struct tipc_msg *m)
+{
+ return msg_bits(m, 10, 0, 0x3);
+}
+
+static inline void msg_set_grp_evt(struct tipc_msg *m, int n)
+{
+ msg_set_bits(m, 10, 0, 0x3, n);
+}
+
+static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m)
+{
+ return msg_bits(m, 10, 0, 0x1);
+}
+
+static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n)
+{
+ msg_set_bits(m, 10, 0, 0x1, n);
+}
+
+static inline u16 msg_grp_bc_seqno(struct tipc_msg *m)
+{
+ return msg_bits(m, 10, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n)
+{
+ msg_set_bits(m, 10, 16, 0xffff, n);
+}
+
+static inline bool msg_peer_link_is_up(struct tipc_msg *m)
+{
+ if (likely(msg_user(m) != LINK_PROTOCOL))
+ return true;
+ if (msg_type(m) == STATE_MSG)
+ return true;
+ return false;
+}
+
+static inline bool msg_peer_node_is_up(struct tipc_msg *m)
+{
+ if (msg_peer_link_is_up(m))
+ return true;
+ return msg_redundant_link(m);
+}
+
+static inline bool msg_is_reset(struct tipc_msg *hdr)
+{
+ return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG);
+}
+
+/* Word 13
+ */
+static inline void msg_set_peer_net_hash(struct tipc_msg *m, u32 n)
+{
+ msg_set_word(m, 13, n);
+}
+
+static inline u32 msg_peer_net_hash(struct tipc_msg *m)
+{
+ return msg_word(m, 13);
+}
+
+/* Word 14
+ */
+static inline u32 msg_sugg_node_addr(struct tipc_msg *m)
+{
+ return msg_word(m, 14);
+}
+
+static inline void msg_set_sugg_node_addr(struct tipc_msg *m, u32 n)
+{
+ msg_set_word(m, 14, n);
+}
+
+static inline void msg_set_node_id(struct tipc_msg *hdr, u8 *id)
+{
+ memcpy(msg_data(hdr), id, 16);
+}
+
+static inline u8 *msg_node_id(struct tipc_msg *hdr)
+{
+ return (u8 *)msg_data(hdr);
+}
+
+struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
+bool tipc_msg_validate(struct sk_buff **_skb);
+bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
+void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
+ struct sk_buff_head *xmitq);
+void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
+ u32 hsize, u32 destnode);
+struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
+ uint data_sz, u32 dnode, u32 onode,
+ u32 dport, u32 oport, int errcode);
+int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
+bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
+ u32 dnode, bool *new_bundle);
+bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
+int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
+ int pktmax, struct sk_buff_head *frags);
+int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
+ int offset, int dsz, int mtu, struct sk_buff_head *list);
+int tipc_msg_append(struct tipc_msg *hdr, struct msghdr *m, int dlen,
+ int mss, struct sk_buff_head *txq);
+bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
+bool tipc_msg_assemble(struct sk_buff_head *list);
+bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
+bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
+ struct sk_buff_head *cpy);
+bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+ struct sk_buff *skb);
+bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy);
+
+static inline u16 buf_seqno(struct sk_buff *skb)
+{
+ return msg_seqno(buf_msg(skb));
+}
+
+static inline int buf_roundup_len(struct sk_buff *skb)
+{
+ return (skb->len / 1024 + 1) * 1024;
+}
+
+/* tipc_skb_peek(): peek and reserve first buffer in list
+ * @list: list to be peeked in
+ * Returns pointer to first buffer in list, if any
+ */
+static inline struct sk_buff *tipc_skb_peek(struct sk_buff_head *list,
+ spinlock_t *lock)
+{
+ struct sk_buff *skb;
+
+ spin_lock_bh(lock);
+ skb = skb_peek(list);
+ if (skb)
+ skb_get(skb);
+ spin_unlock_bh(lock);
+ return skb;
+}
+
+/* tipc_skb_peek_port(): find a destination port, ignoring all destinations
+ * up to and including 'filter'.
+ * Note: ignoring previously tried destinations minimizes the risk of
+ * contention on the socket lock
+ * @list: list to be peeked in
+ * @filter: last destination to be ignored from search
+ * Returns a destination port number, of applicable.
+ */
+static inline u32 tipc_skb_peek_port(struct sk_buff_head *list, u32 filter)
+{
+ struct sk_buff *skb;
+ u32 dport = 0;
+ bool ignore = true;
+
+ spin_lock_bh(&list->lock);
+ skb_queue_walk(list, skb) {
+ dport = msg_destport(buf_msg(skb));
+ if (!filter || skb_queue_is_last(list, skb))
+ break;
+ if (dport == filter)
+ ignore = false;
+ else if (!ignore)
+ break;
+ }
+ spin_unlock_bh(&list->lock);
+ return dport;
+}
+
+/* tipc_skb_dequeue(): unlink first buffer with dest 'dport' from list
+ * @list: list to be unlinked from
+ * @dport: selection criteria for buffer to unlink
+ */
+static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list,
+ u32 dport)
+{
+ struct sk_buff *_skb, *tmp, *skb = NULL;
+
+ spin_lock_bh(&list->lock);
+ skb_queue_walk_safe(list, _skb, tmp) {
+ if (msg_destport(buf_msg(_skb)) == dport) {
+ __skb_unlink(_skb, list);
+ skb = _skb;
+ break;
+ }
+ }
+ spin_unlock_bh(&list->lock);
+ return skb;
+}
+
+/* tipc_skb_queue_splice_tail - append an skb list to lock protected list
+ * @list: the new list to append. Not lock protected
+ * @head: target list. Lock protected.
+ */
+static inline void tipc_skb_queue_splice_tail(struct sk_buff_head *list,
+ struct sk_buff_head *head)
+{
+ spin_lock_bh(&head->lock);
+ skb_queue_splice_tail(list, head);
+ spin_unlock_bh(&head->lock);
+}
+
+/* tipc_skb_queue_splice_tail_init - merge two lock protected skb lists
+ * @list: the new list to add. Lock protected. Will be reinitialized
+ * @head: target list. Lock protected.
+ */
+static inline void tipc_skb_queue_splice_tail_init(struct sk_buff_head *list,
+ struct sk_buff_head *head)
+{
+ struct sk_buff_head tmp;
+
+ __skb_queue_head_init(&tmp);
+
+ spin_lock_bh(&list->lock);
+ skb_queue_splice_tail_init(list, &tmp);
+ spin_unlock_bh(&list->lock);
+ tipc_skb_queue_splice_tail(&tmp, head);
+}
+
+/* __tipc_skb_dequeue() - dequeue the head skb according to expected seqno
+ * @list: list to be dequeued from
+ * @seqno: seqno of the expected msg
+ *
+ * returns skb dequeued from the list if its seqno is less than or equal to
+ * the expected one, otherwise the skb is still hold
+ *
+ * Note: must be used with appropriate locks held only
+ */
+static inline struct sk_buff *__tipc_skb_dequeue(struct sk_buff_head *list,
+ u16 seqno)
+{
+ struct sk_buff *skb = skb_peek(list);
+
+ if (skb && less_eq(buf_seqno(skb), seqno)) {
+ __skb_unlink(skb, list);
+ return skb;
+ }
+ return NULL;
+}
+
#endif
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index e0d08055754e..190b49c5cbc3 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -1,8 +1,9 @@
/*
* net/tipc/name_distr.c: TIPC name distribution code
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2019, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -38,330 +39,373 @@
#include "link.h"
#include "name_distr.h"
-#define ITEM_SIZE sizeof(struct distr_item)
-
-/**
- * struct distr_item - publication info distributed to other nodes
- * @type: name sequence type
- * @lower: name sequence lower bound
- * @upper: name sequence upper bound
- * @ref: publishing port reference
- * @key: publication key
- *
- * ===> All fields are stored in network byte order. <===
- *
- * First 3 fields identify (name or) name sequence being published.
- * Reference field uniquely identifies port that published name sequence.
- * Key field uniquely identifies publication, in the event a port has
- * multiple publications of the same name sequence.
- *
- * Note: There is no field that identifies the publishing node because it is
- * the same for all items contained within a publication message.
- */
-struct distr_item {
- __be32 type;
- __be32 lower;
- __be32 upper;
- __be32 ref;
- __be32 key;
-};
-
-/**
- * struct publ_list - list of publications made by this node
- * @list: circular list of publications
- * @list_size: number of entries in list
- */
-struct publ_list {
- struct list_head list;
- u32 size;
-};
-
-static struct publ_list publ_zone = {
- .list = LIST_HEAD_INIT(publ_zone.list),
- .size = 0,
-};
-
-static struct publ_list publ_cluster = {
- .list = LIST_HEAD_INIT(publ_cluster.list),
- .size = 0,
-};
-
-static struct publ_list publ_node = {
- .list = LIST_HEAD_INIT(publ_node.list),
- .size = 0,
-};
-
-static struct publ_list *publ_lists[] = {
- NULL,
- &publ_zone, /* publ_lists[TIPC_ZONE_SCOPE] */
- &publ_cluster, /* publ_lists[TIPC_CLUSTER_SCOPE] */
- &publ_node /* publ_lists[TIPC_NODE_SCOPE] */
-};
-
+int sysctl_tipc_named_timeout __read_mostly = 2000;
/**
* publ_to_item - add publication info to a publication message
+ * @p: publication info
+ * @i: location of item in the message
*/
static void publ_to_item(struct distr_item *i, struct publication *p)
{
- i->type = htonl(p->type);
- i->lower = htonl(p->lower);
- i->upper = htonl(p->upper);
- i->ref = htonl(p->ref);
+ i->type = htonl(p->sr.type);
+ i->lower = htonl(p->sr.lower);
+ i->upper = htonl(p->sr.upper);
+ i->port = htonl(p->sk.ref);
i->key = htonl(p->key);
}
/**
* named_prepare_buf - allocate & initialize a publication message
+ * @net: the associated network namespace
+ * @type: message type
+ * @size: payload size
+ * @dest: destination node
+ *
+ * The buffer returned is of size INT_H_SIZE + payload size
*/
-static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
+static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
+ u32 dest)
{
- struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size);
+ struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC);
+ u32 self = tipc_own_addr(net);
struct tipc_msg *msg;
if (buf != NULL) {
msg = buf_msg(buf);
- tipc_msg_init(msg, NAME_DISTRIBUTOR, type, INT_H_SIZE, dest);
+ tipc_msg_init(self, msg, NAME_DISTRIBUTOR,
+ type, INT_H_SIZE, dest);
msg_set_size(msg, INT_H_SIZE + size);
}
return buf;
}
-static void named_cluster_distribute(struct sk_buff *buf)
-{
- struct sk_buff *buf_copy;
- struct tipc_node *n_ptr;
-
- list_for_each_entry(n_ptr, &tipc_node_list, list) {
- if (tipc_node_active_links(n_ptr)) {
- buf_copy = skb_copy(buf, GFP_ATOMIC);
- if (!buf_copy)
- break;
- msg_set_destnode(buf_msg(buf_copy), n_ptr->addr);
- tipc_link_send(buf_copy, n_ptr->addr, n_ptr->addr);
- }
- }
-
- kfree_skb(buf);
-}
-
/**
* tipc_named_publish - tell other nodes about a new publication by this node
+ * @net: the associated network namespace
+ * @p: the new publication
*/
-void tipc_named_publish(struct publication *publ)
+struct sk_buff *tipc_named_publish(struct net *net, struct publication *p)
{
- struct sk_buff *buf;
+ struct name_table *nt = tipc_name_table(net);
struct distr_item *item;
+ struct sk_buff *skb;
- list_add_tail(&publ->local_list, &publ_lists[publ->scope]->list);
- publ_lists[publ->scope]->size++;
-
- if (publ->scope == TIPC_NODE_SCOPE)
- return;
-
- buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0);
- if (!buf) {
+ if (p->scope == TIPC_NODE_SCOPE) {
+ list_add_tail_rcu(&p->binding_node, &nt->node_scope);
+ return NULL;
+ }
+ write_lock_bh(&nt->cluster_scope_lock);
+ list_add_tail(&p->binding_node, &nt->cluster_scope);
+ write_unlock_bh(&nt->cluster_scope_lock);
+ skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
+ if (!skb) {
pr_warn("Publication distribution failure\n");
- return;
+ return NULL;
}
-
- item = (struct distr_item *)msg_data(buf_msg(buf));
- publ_to_item(item, publ);
- named_cluster_distribute(buf);
+ msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
+ msg_set_non_legacy(buf_msg(skb));
+ item = (struct distr_item *)msg_data(buf_msg(skb));
+ publ_to_item(item, p);
+ return skb;
}
/**
* tipc_named_withdraw - tell other nodes about a withdrawn publication by this node
+ * @net: the associated network namespace
+ * @p: the withdrawn publication
*/
-void tipc_named_withdraw(struct publication *publ)
+struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *p)
{
- struct sk_buff *buf;
+ struct name_table *nt = tipc_name_table(net);
struct distr_item *item;
+ struct sk_buff *skb;
- list_del(&publ->local_list);
- publ_lists[publ->scope]->size--;
-
- if (publ->scope == TIPC_NODE_SCOPE)
- return;
+ write_lock_bh(&nt->cluster_scope_lock);
+ list_del(&p->binding_node);
+ write_unlock_bh(&nt->cluster_scope_lock);
+ if (p->scope == TIPC_NODE_SCOPE)
+ return NULL;
- buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0);
- if (!buf) {
+ skb = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0);
+ if (!skb) {
pr_warn("Withdrawal distribution failure\n");
- return;
+ return NULL;
}
-
- item = (struct distr_item *)msg_data(buf_msg(buf));
- publ_to_item(item, publ);
- named_cluster_distribute(buf);
+ msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
+ msg_set_non_legacy(buf_msg(skb));
+ item = (struct distr_item *)msg_data(buf_msg(skb));
+ publ_to_item(item, p);
+ return skb;
}
-/*
+/**
* named_distribute - prepare name info for bulk distribution to another node
+ * @net: the associated network namespace
+ * @list: list of messages (buffers) to be returned from this function
+ * @dnode: node to be updated
+ * @pls: linked list of publication items to be packed into buffer chain
+ * @seqno: sequence number for this message
*/
-static void named_distribute(struct list_head *message_list, u32 node,
- struct publ_list *pls, u32 max_item_buf)
+static void named_distribute(struct net *net, struct sk_buff_head *list,
+ u32 dnode, struct list_head *pls, u16 seqno)
{
struct publication *publ;
- struct sk_buff *buf = NULL;
+ struct sk_buff *skb = NULL;
struct distr_item *item = NULL;
- u32 left = 0;
- u32 rest = pls->size * ITEM_SIZE;
-
- list_for_each_entry(publ, &pls->list, local_list) {
- if (!buf) {
- left = (rest <= max_item_buf) ? rest : max_item_buf;
- rest -= left;
- buf = named_prepare_buf(PUBLICATION, left, node);
- if (!buf) {
+ u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) /
+ ITEM_SIZE) * ITEM_SIZE;
+ u32 msg_rem = msg_dsz;
+ struct tipc_msg *hdr;
+
+ list_for_each_entry(publ, pls, binding_node) {
+ /* Prepare next buffer: */
+ if (!skb) {
+ skb = named_prepare_buf(net, PUBLICATION, msg_rem,
+ dnode);
+ if (!skb) {
pr_warn("Bulk publication failure\n");
return;
}
- item = (struct distr_item *)msg_data(buf_msg(buf));
+ hdr = buf_msg(skb);
+ msg_set_bc_ack_invalid(hdr, true);
+ msg_set_bulk(hdr);
+ msg_set_non_legacy(hdr);
+ item = (struct distr_item *)msg_data(hdr);
}
+
+ /* Pack publication into message: */
publ_to_item(item, publ);
item++;
- left -= ITEM_SIZE;
- if (!left) {
- list_add_tail((struct list_head *)buf, message_list);
- buf = NULL;
+ msg_rem -= ITEM_SIZE;
+
+ /* Append full buffer to list: */
+ if (!msg_rem) {
+ __skb_queue_tail(list, skb);
+ skb = NULL;
+ msg_rem = msg_dsz;
}
}
+ if (skb) {
+ hdr = buf_msg(skb);
+ msg_set_size(hdr, INT_H_SIZE + (msg_dsz - msg_rem));
+ skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem));
+ __skb_queue_tail(list, skb);
+ }
+ hdr = buf_msg(skb_peek_tail(list));
+ msg_set_last_bulk(hdr);
+ msg_set_named_seqno(hdr, seqno);
}
/**
* tipc_named_node_up - tell specified node about all publications by this node
+ * @net: the associated network namespace
+ * @dnode: destination node
+ * @capabilities: peer node's capabilities
*/
-void tipc_named_node_up(unsigned long nodearg)
+void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities)
{
- struct tipc_node *n_ptr;
- struct tipc_link *l_ptr;
- struct list_head message_list;
- u32 node = (u32)nodearg;
- u32 max_item_buf = 0;
-
- /* compute maximum amount of publication data to send per message */
- read_lock_bh(&tipc_net_lock);
- n_ptr = tipc_node_find(node);
- if (n_ptr) {
- tipc_node_lock(n_ptr);
- l_ptr = n_ptr->active_links[0];
- if (l_ptr)
- max_item_buf = ((l_ptr->max_pkt - INT_H_SIZE) /
- ITEM_SIZE) * ITEM_SIZE;
- tipc_node_unlock(n_ptr);
- }
- read_unlock_bh(&tipc_net_lock);
- if (!max_item_buf)
- return;
-
- /* create list of publication messages, then send them as a unit */
- INIT_LIST_HEAD(&message_list);
-
- read_lock_bh(&tipc_nametbl_lock);
- named_distribute(&message_list, node, &publ_cluster, max_item_buf);
- named_distribute(&message_list, node, &publ_zone, max_item_buf);
- read_unlock_bh(&tipc_nametbl_lock);
-
- tipc_link_send_names(&message_list, node);
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
+ struct sk_buff_head head;
+ u16 seqno;
+
+ __skb_queue_head_init(&head);
+ spin_lock_bh(&tn->nametbl_lock);
+ if (!(capabilities & TIPC_NAMED_BCAST))
+ nt->rc_dests++;
+ seqno = nt->snd_nxt;
+ spin_unlock_bh(&tn->nametbl_lock);
+
+ read_lock_bh(&nt->cluster_scope_lock);
+ named_distribute(net, &head, dnode, &nt->cluster_scope, seqno);
+ tipc_node_xmit(net, &head, dnode, 0);
+ read_unlock_bh(&nt->cluster_scope_lock);
}
/**
- * named_purge_publ - remove publication associated with a failed node
+ * tipc_publ_purge - remove publication associated with a failed node
+ * @net: the associated network namespace
+ * @p: the publication to remove
+ * @addr: failed node's address
*
* Invoked for each publication issued by a newly failed node.
* Removes publication structure from name table & deletes it.
*/
-static void named_purge_publ(struct publication *publ)
+static void tipc_publ_purge(struct net *net, struct publication *p, u32 addr)
{
- struct publication *p;
+ struct tipc_net *tn = tipc_net(net);
+ struct publication *_p;
+ struct tipc_uaddr ua;
+
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, p->scope, p->sr.type,
+ p->sr.lower, p->sr.upper);
+ spin_lock_bh(&tn->nametbl_lock);
+ _p = tipc_nametbl_remove_publ(net, &ua, &p->sk, p->key);
+ if (_p)
+ tipc_node_unsubscribe(net, &_p->binding_node, addr);
+ spin_unlock_bh(&tn->nametbl_lock);
+ if (_p)
+ kfree_rcu(_p, rcu);
+}
+
+void tipc_publ_notify(struct net *net, struct list_head *nsub_list,
+ u32 addr, u16 capabilities)
+{
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
- write_lock_bh(&tipc_nametbl_lock);
- p = tipc_nametbl_remove_publ(publ->type, publ->lower,
- publ->node, publ->ref, publ->key);
- if (p)
- tipc_nodesub_unsubscribe(&p->subscr);
- write_unlock_bh(&tipc_nametbl_lock);
-
- if (p != publ) {
- pr_err("Unable to remove publication from failed node\n"
- " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n",
- publ->type, publ->lower, publ->node, publ->ref,
- publ->key);
+ struct publication *publ, *tmp;
+
+ list_for_each_entry_safe(publ, tmp, nsub_list, binding_node)
+ tipc_publ_purge(net, publ, addr);
+ spin_lock_bh(&tn->nametbl_lock);
+ if (!(capabilities & TIPC_NAMED_BCAST))
+ nt->rc_dests--;
+ spin_unlock_bh(&tn->nametbl_lock);
+}
+
+/**
+ * tipc_update_nametbl - try to process a nametable update and notify
+ * subscribers
+ * @net: the associated network namespace
+ * @i: location of item in the message
+ * @node: node address
+ * @dtype: name distributor message type
+ *
+ * tipc_nametbl_lock must be held.
+ * Return: the publication item if successful, otherwise NULL.
+ */
+static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
+ u32 node, u32 dtype)
+{
+ struct publication *p = NULL;
+ struct tipc_socket_addr sk;
+ struct tipc_uaddr ua;
+ u32 key = ntohl(i->key);
+
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE,
+ ntohl(i->type), ntohl(i->lower), ntohl(i->upper));
+ sk.ref = ntohl(i->port);
+ sk.node = node;
+
+ if (dtype == PUBLICATION) {
+ p = tipc_nametbl_insert_publ(net, &ua, &sk, key);
+ if (p) {
+ tipc_node_subscribe(net, &p->binding_node, node);
+ return true;
+ }
+ } else if (dtype == WITHDRAWAL) {
+ p = tipc_nametbl_remove_publ(net, &ua, &sk, key);
+ if (p) {
+ tipc_node_unsubscribe(net, &p->binding_node, node);
+ kfree_rcu(p, rcu);
+ return true;
+ }
+ pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n",
+ ua.sr.type, ua.sr.lower, node);
+ } else {
+ pr_warn_ratelimited("Unknown name table message received\n");
}
+ return false;
+}
+
+static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open)
+{
+ struct sk_buff *skb, *tmp;
+ struct tipc_msg *hdr;
+ u16 seqno;
+
+ spin_lock_bh(&namedq->lock);
+ skb_queue_walk_safe(namedq, skb, tmp) {
+ if (unlikely(skb_linearize(skb))) {
+ __skb_unlink(skb, namedq);
+ kfree_skb(skb);
+ continue;
+ }
+ hdr = buf_msg(skb);
+ seqno = msg_named_seqno(hdr);
+ if (msg_is_last_bulk(hdr)) {
+ *rcv_nxt = seqno;
+ *open = true;
+ }
+
+ if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) {
+ __skb_unlink(skb, namedq);
+ spin_unlock_bh(&namedq->lock);
+ return skb;
+ }
- kfree(p);
+ if (*open && (*rcv_nxt == seqno)) {
+ (*rcv_nxt)++;
+ __skb_unlink(skb, namedq);
+ spin_unlock_bh(&namedq->lock);
+ return skb;
+ }
+
+ if (less(seqno, *rcv_nxt)) {
+ __skb_unlink(skb, namedq);
+ kfree_skb(skb);
+ continue;
+ }
+ }
+ spin_unlock_bh(&namedq->lock);
+ return NULL;
}
/**
- * tipc_named_recv - process name table update message sent by another node
+ * tipc_named_rcv - process name table update messages sent by another node
+ * @net: the associated network namespace
+ * @namedq: queue to receive from
+ * @rcv_nxt: store last received seqno here
+ * @open: last bulk msg was received (FIXME)
*/
-void tipc_named_recv(struct sk_buff *buf)
+void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open)
{
- struct publication *publ;
- struct tipc_msg *msg = buf_msg(buf);
- struct distr_item *item = (struct distr_item *)msg_data(msg);
- u32 count = msg_data_sz(msg) / ITEM_SIZE;
-
- write_lock_bh(&tipc_nametbl_lock);
- while (count--) {
- if (msg_type(msg) == PUBLICATION) {
- publ = tipc_nametbl_insert_publ(ntohl(item->type),
- ntohl(item->lower),
- ntohl(item->upper),
- TIPC_CLUSTER_SCOPE,
- msg_orignode(msg),
- ntohl(item->ref),
- ntohl(item->key));
- if (publ) {
- tipc_nodesub_subscribe(&publ->subscr,
- msg_orignode(msg),
- publ,
- (net_ev_handler)
- named_purge_publ);
- }
- } else if (msg_type(msg) == WITHDRAWAL) {
- publ = tipc_nametbl_remove_publ(ntohl(item->type),
- ntohl(item->lower),
- msg_orignode(msg),
- ntohl(item->ref),
- ntohl(item->key));
-
- if (publ) {
- tipc_nodesub_unsubscribe(&publ->subscr);
- kfree(publ);
- } else {
- pr_err("Unable to remove publication by node 0x%x\n"
- " (type=%u, lower=%u, ref=%u, key=%u)\n",
- msg_orignode(msg), ntohl(item->type),
- ntohl(item->lower), ntohl(item->ref),
- ntohl(item->key));
- }
- } else {
- pr_warn("Unrecognized name table message received\n");
+ struct tipc_net *tn = tipc_net(net);
+ struct distr_item *item;
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ u32 count, node;
+
+ spin_lock_bh(&tn->nametbl_lock);
+ while ((skb = tipc_named_dequeue(namedq, rcv_nxt, open))) {
+ hdr = buf_msg(skb);
+ node = msg_orignode(hdr);
+ item = (struct distr_item *)msg_data(hdr);
+ count = msg_data_sz(hdr) / ITEM_SIZE;
+ while (count--) {
+ tipc_update_nametbl(net, item, node, msg_type(hdr));
+ item++;
}
- item++;
+ kfree_skb(skb);
}
- write_unlock_bh(&tipc_nametbl_lock);
- kfree_skb(buf);
+ spin_unlock_bh(&tn->nametbl_lock);
}
/**
* tipc_named_reinit - re-initialize local publications
+ * @net: the associated network namespace
*
* This routine is called whenever TIPC networking is enabled.
* All name table entries published by this node are updated to reflect
* the node's new network address.
*/
-void tipc_named_reinit(void)
+void tipc_named_reinit(struct net *net)
{
- struct publication *publ;
- int scope;
-
- write_lock_bh(&tipc_nametbl_lock);
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
+ struct publication *p;
+ u32 self = tipc_own_addr(net);
- for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
- list_for_each_entry(publ, &publ_lists[scope]->list, local_list)
- publ->node = tipc_own_addr;
+ spin_lock_bh(&tn->nametbl_lock);
- write_unlock_bh(&tipc_nametbl_lock);
+ list_for_each_entry_rcu(p, &nt->node_scope, binding_node)
+ p->sk.node = self;
+ list_for_each_entry_rcu(p, &nt->cluster_scope, binding_node)
+ p->sk.node = self;
+ nt->rc_dests = 0;
+ spin_unlock_bh(&tn->nametbl_lock);
}
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 1e41bdd4f255..c677f6f082df 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -39,10 +39,41 @@
#include "name_table.h"
-void tipc_named_publish(struct publication *publ);
-void tipc_named_withdraw(struct publication *publ);
-void tipc_named_node_up(unsigned long node);
-void tipc_named_recv(struct sk_buff *buf);
-void tipc_named_reinit(void);
+#define ITEM_SIZE sizeof(struct distr_item)
+
+/**
+ * struct distr_item - publication info distributed to other nodes
+ * @type: name sequence type
+ * @lower: name sequence lower bound
+ * @upper: name sequence upper bound
+ * @port: publishing port reference
+ * @key: publication key
+ *
+ * ===> All fields are stored in network byte order. <===
+ *
+ * First 3 fields identify (name or) name sequence being published.
+ * Reference field uniquely identifies port that published name sequence.
+ * Key field uniquely identifies publication, in the event a port has
+ * multiple publications of the same name sequence.
+ *
+ * Note: There is no field that identifies the publishing node because it is
+ * the same for all items contained within a publication message.
+ */
+struct distr_item {
+ __be32 type;
+ __be32 lower;
+ __be32 upper;
+ __be32 port;
+ __be32 key;
+};
+
+struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
+struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
+void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities);
+void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open);
+void tipc_named_reinit(struct net *net);
+void tipc_publ_notify(struct net *net, struct list_head *nsub_list,
+ u32 addr, u16 capabilities);
#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 09dcd54b04e1..e74940eab3a4 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1,8 +1,9 @@
/*
* net/tipc/name_table.c: TIPC name table code
*
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2004-2008, 2010-2011, Wind River Systems
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
+ * Copyright (c) 2004-2008, 2010-2014, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,930 +35,1170 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <net/sock.h>
+#include <linux/list_sort.h>
+#include <linux/rbtree_augmented.h>
#include "core.h"
-#include "config.h"
+#include "netlink.h"
#include "name_table.h"
#include "name_distr.h"
#include "subscr.h"
-#include "port.h"
-
-#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
-
-/**
- * struct name_info - name sequence publication info
- * @node_list: circular list of publications made by own node
- * @cluster_list: circular list of publications made by own cluster
- * @zone_list: circular list of publications made by own zone
- * @node_list_size: number of entries in "node_list"
- * @cluster_list_size: number of entries in "cluster_list"
- * @zone_list_size: number of entries in "zone_list"
- *
- * Note: The zone list always contains at least one entry, since all
- * publications of the associated name sequence belong to it.
- * (The cluster and node lists may be empty.)
- */
-struct name_info {
- struct list_head node_list;
- struct list_head cluster_list;
- struct list_head zone_list;
- u32 node_list_size;
- u32 cluster_list_size;
- u32 zone_list_size;
-};
+#include "bcast.h"
+#include "addr.h"
+#include "node.h"
+#include "group.h"
/**
- * struct sub_seq - container for all published instances of a name sequence
- * @lower: name sequence lower bound
- * @upper: name sequence upper bound
- * @info: pointer to name sequence publication info
+ * struct service_range - container for all bindings of a service range
+ * @lower: service range lower bound
+ * @upper: service range upper bound
+ * @tree_node: member of service range RB tree
+ * @max: largest 'upper' in this node subtree
+ * @local_publ: list of identical publications made from this node
+ * Used by closest_first lookup and multicast lookup algorithm
+ * @all_publ: all publications identical to this one, whatever node and scope
+ * Used by round-robin lookup algorithm
*/
-struct sub_seq {
+struct service_range {
u32 lower;
u32 upper;
- struct name_info *info;
+ struct rb_node tree_node;
+ u32 max;
+ struct list_head local_publ;
+ struct list_head all_publ;
};
/**
- * struct name_seq - container for all published instances of a name type
- * @type: 32 bit 'type' value for name sequence
- * @sseq: pointer to dynamically-sized array of sub-sequences of this 'type';
- * sub-sequences are sorted in ascending order
- * @alloc: number of sub-sequences currently in array
- * @first_free: array index of first unused sub-sequence entry
- * @ns_list: links to adjacent name sequences in hash chain
- * @subscriptions: list of subscriptions for this 'type'
- * @lock: spinlock controlling access to publication lists of all sub-sequences
+ * struct tipc_service - container for all published instances of a service type
+ * @type: 32 bit 'type' value for service
+ * @publ_cnt: increasing counter for publications in this service
+ * @ranges: rb tree containing all service ranges for this service
+ * @service_list: links to adjacent name ranges in hash chain
+ * @subscriptions: list of subscriptions for this service type
+ * @lock: spinlock controlling access to pertaining service ranges/publications
+ * @rcu: RCU callback head used for deferred freeing
*/
-struct name_seq {
+struct tipc_service {
u32 type;
- struct sub_seq *sseqs;
- u32 alloc;
- u32 first_free;
- struct hlist_node ns_list;
+ u32 publ_cnt;
+ struct rb_root ranges;
+ struct hlist_node service_list;
struct list_head subscriptions;
- spinlock_t lock;
+ spinlock_t lock; /* Covers service range list */
+ struct rcu_head rcu;
};
+#define service_range_upper(sr) ((sr)->upper)
+RB_DECLARE_CALLBACKS_MAX(static, sr_callbacks,
+ struct service_range, tree_node, u32, max,
+ service_range_upper)
+
+#define service_range_entry(rbtree_node) \
+ (container_of(rbtree_node, struct service_range, tree_node))
+
+#define service_range_overlap(sr, start, end) \
+ ((sr)->lower <= (end) && (sr)->upper >= (start))
+
/**
- * struct name_table - table containing all existing port name publications
- * @types: pointer to fixed-sized array of name sequence lists,
- * accessed via hashing on 'type'; name sequence lists are *not* sorted
- * @local_publ_count: number of publications issued by this node
+ * service_range_foreach_match - iterate over tipc service rbtree for each
+ * range match
+ * @sr: the service range pointer as a loop cursor
+ * @sc: the pointer to tipc service which holds the service range rbtree
+ * @start: beginning of the search range (end >= start) for matching
+ * @end: end of the search range (end >= start) for matching
*/
-struct name_table {
- struct hlist_head *types;
- u32 local_publ_count;
-};
+#define service_range_foreach_match(sr, sc, start, end) \
+ for (sr = service_range_match_first((sc)->ranges.rb_node, \
+ start, \
+ end); \
+ sr; \
+ sr = service_range_match_next(&(sr)->tree_node, \
+ start, \
+ end))
-static struct name_table table;
-DEFINE_RWLOCK(tipc_nametbl_lock);
-
-static int hash(int x)
+/**
+ * service_range_match_first - find first service range matching a range
+ * @n: the root node of service range rbtree for searching
+ * @start: beginning of the search range (end >= start) for matching
+ * @end: end of the search range (end >= start) for matching
+ *
+ * Return: the leftmost service range node in the rbtree that overlaps the
+ * specific range if any. Otherwise, returns NULL.
+ */
+static struct service_range *service_range_match_first(struct rb_node *n,
+ u32 start, u32 end)
{
- return x & (TIPC_NAMETBL_SIZE - 1);
+ struct service_range *sr;
+ struct rb_node *l, *r;
+
+ /* Non overlaps in tree at all? */
+ if (!n || service_range_entry(n)->max < start)
+ return NULL;
+
+ while (n) {
+ l = n->rb_left;
+ if (l && service_range_entry(l)->max >= start) {
+ /* A leftmost overlap range node must be one in the left
+ * subtree. If not, it has lower > end, then nodes on
+ * the right side cannot satisfy the condition either.
+ */
+ n = l;
+ continue;
+ }
+
+ /* No one in the left subtree can match, return if this node is
+ * an overlap i.e. leftmost.
+ */
+ sr = service_range_entry(n);
+ if (service_range_overlap(sr, start, end))
+ return sr;
+
+ /* Ok, try to lookup on the right side */
+ r = n->rb_right;
+ if (sr->lower <= end &&
+ r && service_range_entry(r)->max >= start) {
+ n = r;
+ continue;
+ }
+ break;
+ }
+
+ return NULL;
}
/**
- * publ_create - create a publication structure
+ * service_range_match_next - find next service range matching a range
+ * @n: a node in service range rbtree from which the searching starts
+ * @start: beginning of the search range (end >= start) for matching
+ * @end: end of the search range (end >= start) for matching
+ *
+ * Return: the next service range node to the given node in the rbtree that
+ * overlaps the specific range if any. Otherwise, returns NULL.
*/
-static struct publication *publ_create(u32 type, u32 lower, u32 upper,
- u32 scope, u32 node, u32 port_ref,
- u32 key)
+static struct service_range *service_range_match_next(struct rb_node *n,
+ u32 start, u32 end)
{
- struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
- if (publ == NULL) {
- pr_warn("Publication creation failure, no memory\n");
- return NULL;
+ struct service_range *sr;
+ struct rb_node *p, *r;
+
+ while (n) {
+ r = n->rb_right;
+ if (r && service_range_entry(r)->max >= start)
+ /* A next overlap range node must be one in the right
+ * subtree. If not, it has lower > end, then any next
+ * successor (- an ancestor) of this node cannot
+ * satisfy the condition either.
+ */
+ return service_range_match_first(r, start, end);
+
+ /* No one in the right subtree can match, go up to find an
+ * ancestor of this node which is parent of a left-hand child.
+ */
+ while ((p = rb_parent(n)) && n == p->rb_right)
+ n = p;
+ if (!p)
+ break;
+
+ /* Return if this ancestor is an overlap */
+ sr = service_range_entry(p);
+ if (service_range_overlap(sr, start, end))
+ return sr;
+
+ /* Ok, try to lookup more from this ancestor */
+ if (sr->lower <= end) {
+ n = p;
+ continue;
+ }
+ break;
}
- publ->type = type;
- publ->lower = lower;
- publ->upper = upper;
- publ->scope = scope;
- publ->node = node;
- publ->ref = port_ref;
- publ->key = key;
- INIT_LIST_HEAD(&publ->local_list);
- INIT_LIST_HEAD(&publ->pport_list);
- INIT_LIST_HEAD(&publ->subscr.nodesub_list);
- return publ;
+ return NULL;
+}
+
+static int hash(int x)
+{
+ return x & (TIPC_NAMETBL_SIZE - 1);
}
/**
- * tipc_subseq_alloc - allocate a specified number of sub-sequence structures
+ * tipc_publ_create - create a publication structure
+ * @ua: the service range the user is binding to
+ * @sk: the address of the socket that is bound
+ * @key: publication key
*/
-static struct sub_seq *tipc_subseq_alloc(u32 cnt)
+static struct publication *tipc_publ_create(struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk,
+ u32 key)
{
- struct sub_seq *sseq = kcalloc(cnt, sizeof(struct sub_seq), GFP_ATOMIC);
- return sseq;
+ struct publication *p = kzalloc(sizeof(*p), GFP_ATOMIC);
+
+ if (!p)
+ return NULL;
+
+ p->sr = ua->sr;
+ p->sk = *sk;
+ p->scope = ua->scope;
+ p->key = key;
+ INIT_LIST_HEAD(&p->binding_sock);
+ INIT_LIST_HEAD(&p->binding_node);
+ INIT_LIST_HEAD(&p->local_publ);
+ INIT_LIST_HEAD(&p->all_publ);
+ INIT_LIST_HEAD(&p->list);
+ return p;
}
/**
- * tipc_nameseq_create - create a name sequence structure for the specified 'type'
+ * tipc_service_create - create a service structure for the specified 'type'
+ * @net: network namespace
+ * @ua: address representing the service to be bound
*
- * Allocates a single sub-sequence structure and sets it to all 0's.
+ * Allocates a single range structure and sets it to all 0's.
*/
-static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_head)
+static struct tipc_service *tipc_service_create(struct net *net,
+ struct tipc_uaddr *ua)
{
- struct name_seq *nseq = kzalloc(sizeof(*nseq), GFP_ATOMIC);
- struct sub_seq *sseq = tipc_subseq_alloc(1);
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_service *service;
+ struct hlist_head *hd;
- if (!nseq || !sseq) {
- pr_warn("Name sequence creation failed, no memory\n");
- kfree(nseq);
- kfree(sseq);
+ service = kzalloc(sizeof(*service), GFP_ATOMIC);
+ if (!service) {
+ pr_warn("Service creation failed, no memory\n");
return NULL;
}
- spin_lock_init(&nseq->lock);
- nseq->type = type;
- nseq->sseqs = sseq;
- nseq->alloc = 1;
- INIT_HLIST_NODE(&nseq->ns_list);
- INIT_LIST_HEAD(&nseq->subscriptions);
- hlist_add_head(&nseq->ns_list, seq_head);
- return nseq;
+ spin_lock_init(&service->lock);
+ service->type = ua->sr.type;
+ service->ranges = RB_ROOT;
+ INIT_HLIST_NODE(&service->service_list);
+ INIT_LIST_HEAD(&service->subscriptions);
+ hd = &nt->services[hash(ua->sr.type)];
+ hlist_add_head_rcu(&service->service_list, hd);
+ return service;
}
-/*
- * nameseq_delete_empty - deletes a name sequence structure if now unused
+/* tipc_service_find_range - find service range matching publication parameters
*/
-static void nameseq_delete_empty(struct name_seq *seq)
+static struct service_range *tipc_service_find_range(struct tipc_service *sc,
+ struct tipc_uaddr *ua)
{
- if (!seq->first_free && list_empty(&seq->subscriptions)) {
- hlist_del_init(&seq->ns_list);
- kfree(seq->sseqs);
- kfree(seq);
- }
-}
+ struct service_range *sr;
-/**
- * nameseq_find_subseq - find sub-sequence (if any) matching a name instance
- *
- * Very time-critical, so binary searches through sub-sequence array.
- */
-static struct sub_seq *nameseq_find_subseq(struct name_seq *nseq,
- u32 instance)
-{
- struct sub_seq *sseqs = nseq->sseqs;
- int low = 0;
- int high = nseq->first_free - 1;
- int mid;
-
- while (low <= high) {
- mid = (low + high) / 2;
- if (instance < sseqs[mid].lower)
- high = mid - 1;
- else if (instance > sseqs[mid].upper)
- low = mid + 1;
- else
- return &sseqs[mid];
+ service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
+ /* Look for exact match */
+ if (sr->lower == ua->sr.lower && sr->upper == ua->sr.upper)
+ return sr;
}
+
return NULL;
}
-/**
- * nameseq_locate_subseq - determine position of name instance in sub-sequence
- *
- * Returns index in sub-sequence array of the entry that contains the specified
- * instance value; if no entry contains that value, returns the position
- * where a new entry for it would be inserted in the array.
- *
- * Note: Similar to binary search code for locating a sub-sequence.
- */
-static u32 nameseq_locate_subseq(struct name_seq *nseq, u32 instance)
-{
- struct sub_seq *sseqs = nseq->sseqs;
- int low = 0;
- int high = nseq->first_free - 1;
- int mid;
-
- while (low <= high) {
- mid = (low + high) / 2;
- if (instance < sseqs[mid].lower)
- high = mid - 1;
- else if (instance > sseqs[mid].upper)
- low = mid + 1;
+static struct service_range *tipc_service_create_range(struct tipc_service *sc,
+ struct publication *p)
+{
+ struct rb_node **n, *parent = NULL;
+ struct service_range *sr;
+ u32 lower = p->sr.lower;
+ u32 upper = p->sr.upper;
+
+ n = &sc->ranges.rb_node;
+ while (*n) {
+ parent = *n;
+ sr = service_range_entry(parent);
+ if (lower == sr->lower && upper == sr->upper)
+ return sr;
+ if (sr->max < upper)
+ sr->max = upper;
+ if (lower <= sr->lower)
+ n = &parent->rb_left;
else
- return mid;
+ n = &parent->rb_right;
}
- return low;
+ sr = kzalloc(sizeof(*sr), GFP_ATOMIC);
+ if (!sr)
+ return NULL;
+ sr->lower = lower;
+ sr->upper = upper;
+ sr->max = upper;
+ INIT_LIST_HEAD(&sr->local_publ);
+ INIT_LIST_HEAD(&sr->all_publ);
+ rb_link_node(&sr->tree_node, parent, n);
+ rb_insert_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
+ return sr;
}
-/**
- * tipc_nameseq_insert_publ
- */
-static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
- u32 type, u32 lower, u32 upper,
- u32 scope, u32 node, u32 port, u32 key)
-{
- struct tipc_subscription *s;
- struct tipc_subscription *st;
- struct publication *publ;
- struct sub_seq *sseq;
- struct name_info *info;
- int created_subseq = 0;
-
- sseq = nameseq_find_subseq(nseq, lower);
- if (sseq) {
-
- /* Lower end overlaps existing entry => need an exact match */
- if ((sseq->lower != lower) || (sseq->upper != upper)) {
- pr_warn("Cannot publish {%u,%u,%u}, overlap error\n",
- type, lower, upper);
- return NULL;
- }
-
- info = sseq->info;
-
- /* Check if an identical publication already exists */
- list_for_each_entry(publ, &info->zone_list, zone_list) {
- if ((publ->ref == port) && (publ->key == key) &&
- (!publ->node || (publ->node == node)))
- return NULL;
- }
- } else {
- u32 inspos;
- struct sub_seq *freesseq;
-
- /* Find where lower end should be inserted */
- inspos = nameseq_locate_subseq(nseq, lower);
-
- /* Fail if upper end overlaps into an existing entry */
- if ((inspos < nseq->first_free) &&
- (upper >= nseq->sseqs[inspos].lower)) {
- pr_warn("Cannot publish {%u,%u,%u}, overlap error\n",
- type, lower, upper);
- return NULL;
- }
-
- /* Ensure there is space for new sub-sequence */
- if (nseq->first_free == nseq->alloc) {
- struct sub_seq *sseqs = tipc_subseq_alloc(nseq->alloc * 2);
-
- if (!sseqs) {
- pr_warn("Cannot publish {%u,%u,%u}, no memory\n",
- type, lower, upper);
- return NULL;
- }
- memcpy(sseqs, nseq->sseqs,
- nseq->alloc * sizeof(struct sub_seq));
- kfree(nseq->sseqs);
- nseq->sseqs = sseqs;
- nseq->alloc *= 2;
- }
-
- info = kzalloc(sizeof(*info), GFP_ATOMIC);
- if (!info) {
- pr_warn("Cannot publish {%u,%u,%u}, no memory\n",
- type, lower, upper);
- return NULL;
+static bool tipc_service_insert_publ(struct net *net,
+ struct tipc_service *sc,
+ struct publication *p)
+{
+ struct tipc_subscription *sub, *tmp;
+ struct service_range *sr;
+ struct publication *_p;
+ u32 node = p->sk.node;
+ bool first = false;
+ bool res = false;
+ u32 key = p->key;
+
+ spin_lock_bh(&sc->lock);
+ sr = tipc_service_create_range(sc, p);
+ if (!sr)
+ goto exit;
+
+ first = list_empty(&sr->all_publ);
+
+ /* Return if the publication already exists */
+ list_for_each_entry(_p, &sr->all_publ, all_publ) {
+ if (_p->key == key && (!_p->sk.node || _p->sk.node == node)) {
+ pr_debug("Failed to bind duplicate %u,%u,%u/%u:%u/%u\n",
+ p->sr.type, p->sr.lower, p->sr.upper,
+ node, p->sk.ref, key);
+ goto exit;
}
-
- INIT_LIST_HEAD(&info->node_list);
- INIT_LIST_HEAD(&info->cluster_list);
- INIT_LIST_HEAD(&info->zone_list);
-
- /* Insert new sub-sequence */
- sseq = &nseq->sseqs[inspos];
- freesseq = &nseq->sseqs[nseq->first_free];
- memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof(*sseq));
- memset(sseq, 0, sizeof(*sseq));
- nseq->first_free++;
- sseq->lower = lower;
- sseq->upper = upper;
- sseq->info = info;
- created_subseq = 1;
- }
-
- /* Insert a publication */
- publ = publ_create(type, lower, upper, scope, node, port, key);
- if (!publ)
- return NULL;
-
- list_add(&publ->zone_list, &info->zone_list);
- info->zone_list_size++;
-
- if (in_own_cluster(node)) {
- list_add(&publ->cluster_list, &info->cluster_list);
- info->cluster_list_size++;
}
- if (in_own_node(node)) {
- list_add(&publ->node_list, &info->node_list);
- info->node_list_size++;
- }
+ if (in_own_node(net, p->sk.node))
+ list_add(&p->local_publ, &sr->local_publ);
+ list_add(&p->all_publ, &sr->all_publ);
+ p->id = sc->publ_cnt++;
/* Any subscriptions waiting for notification? */
- list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
- tipc_subscr_report_overlap(s,
- publ->lower,
- publ->upper,
- TIPC_PUBLISHED,
- publ->ref,
- publ->node,
- created_subseq);
+ list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
+ tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, first);
}
- return publ;
+ res = true;
+exit:
+ if (!res)
+ pr_warn("Failed to bind to %u,%u,%u\n",
+ p->sr.type, p->sr.lower, p->sr.upper);
+ spin_unlock_bh(&sc->lock);
+ return res;
}
/**
- * tipc_nameseq_remove_publ
- *
- * NOTE: There may be cases where TIPC is asked to remove a publication
- * that is not in the name table. For example, if another node issues a
- * publication for a name sequence that overlaps an existing name sequence
- * the publication will not be recorded, which means the publication won't
- * be found when the name sequence is later withdrawn by that node.
- * A failed withdraw request simply returns a failure indication and lets the
- * caller issue any error or warning messages associated with such a problem.
+ * tipc_service_remove_publ - remove a publication from a service
+ * @r: service_range to remove publication from
+ * @sk: address publishing socket
+ * @key: target publication key
*/
-static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 inst,
- u32 node, u32 ref, u32 key)
+static struct publication *tipc_service_remove_publ(struct service_range *r,
+ struct tipc_socket_addr *sk,
+ u32 key)
{
- struct publication *publ;
- struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
- struct name_info *info;
- struct sub_seq *free;
- struct tipc_subscription *s, *st;
- int removed_subseq = 0;
-
- if (!sseq)
- return NULL;
+ struct publication *p;
+ u32 node = sk->node;
- info = sseq->info;
-
- /* Locate publication, if it exists */
- list_for_each_entry(publ, &info->zone_list, zone_list) {
- if ((publ->key == key) && (publ->ref == ref) &&
- (!publ->node || (publ->node == node)))
- goto found;
+ list_for_each_entry(p, &r->all_publ, all_publ) {
+ if (p->key != key || (node && node != p->sk.node))
+ continue;
+ list_del(&p->all_publ);
+ list_del(&p->local_publ);
+ return p;
}
return NULL;
+}
-found:
- /* Remove publication from zone scope list */
- list_del(&publ->zone_list);
- info->zone_list_size--;
-
- /* Remove publication from cluster scope list, if present */
- if (in_own_cluster(node)) {
- list_del(&publ->cluster_list);
- info->cluster_list_size--;
- }
-
- /* Remove publication from node scope list, if present */
- if (in_own_node(node)) {
- list_del(&publ->node_list);
- info->node_list_size--;
- }
-
- /* Contract subseq list if no more publications for that subseq */
- if (list_empty(&info->zone_list)) {
- kfree(info);
- free = &nseq->sseqs[nseq->first_free--];
- memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq));
- removed_subseq = 1;
- }
-
- /* Notify any waiting subscriptions */
- list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
- tipc_subscr_report_overlap(s,
- publ->lower,
- publ->upper,
- TIPC_WITHDRAWN,
- publ->ref,
- publ->node,
- removed_subseq);
- }
+/*
+ * Code reused: time_after32() for the same purpose
+ */
+#define publication_after(pa, pb) time_after32((pa)->id, (pb)->id)
+static int tipc_publ_sort(void *priv, const struct list_head *a,
+ const struct list_head *b)
+{
+ struct publication *pa, *pb;
- return publ;
+ pa = container_of(a, struct publication, list);
+ pb = container_of(b, struct publication, list);
+ return publication_after(pa, pb);
}
/**
- * tipc_nameseq_subscribe - attach a subscription, and issue
- * the prescribed number of events if there is any sub-
- * sequence overlapping with the requested sequence
+ * tipc_service_subscribe - attach a subscription, and optionally
+ * issue the prescribed number of events if there is any service
+ * range overlapping with the requested range
+ * @service: the tipc_service to attach the @sub to
+ * @sub: the subscription to attach
*/
-static void tipc_nameseq_subscribe(struct name_seq *nseq,
- struct tipc_subscription *s)
+static void tipc_service_subscribe(struct tipc_service *service,
+ struct tipc_subscription *sub)
{
- struct sub_seq *sseq = nseq->sseqs;
+ struct publication *p, *first, *tmp;
+ struct list_head publ_list;
+ struct service_range *sr;
+ u32 filter, lower, upper;
- list_add(&s->nameseq_list, &nseq->subscriptions);
+ filter = sub->s.filter;
+ lower = sub->s.seq.lower;
+ upper = sub->s.seq.upper;
- if (!sseq)
+ tipc_sub_get(sub);
+ list_add(&sub->service_list, &service->subscriptions);
+
+ if (filter & TIPC_SUB_NO_STATUS)
return;
- while (sseq != &nseq->sseqs[nseq->first_free]) {
- if (tipc_subscr_overlap(s, sseq->lower, sseq->upper)) {
- struct publication *crs;
- struct name_info *info = sseq->info;
- int must_report = 1;
-
- list_for_each_entry(crs, &info->zone_list, zone_list) {
- tipc_subscr_report_overlap(s,
- sseq->lower,
- sseq->upper,
- TIPC_PUBLISHED,
- crs->ref,
- crs->node,
- must_report);
- must_report = 0;
- }
+ INIT_LIST_HEAD(&publ_list);
+ service_range_foreach_match(sr, service, lower, upper) {
+ first = NULL;
+ list_for_each_entry(p, &sr->all_publ, all_publ) {
+ if (filter & TIPC_SUB_PORTS)
+ list_add_tail(&p->list, &publ_list);
+ else if (!first || publication_after(first, p))
+ /* Pick this range's *first* publication */
+ first = p;
}
- sseq++;
+ if (first)
+ list_add_tail(&first->list, &publ_list);
+ }
+
+ /* Sort the publications before reporting */
+ list_sort(NULL, &publ_list, tipc_publ_sort);
+ list_for_each_entry_safe(p, tmp, &publ_list, list) {
+ tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, true);
+ list_del_init(&p->list);
}
}
-static struct name_seq *nametbl_find_seq(u32 type)
+static struct tipc_service *tipc_service_find(struct net *net,
+ struct tipc_uaddr *ua)
{
- struct hlist_head *seq_head;
- struct name_seq *ns;
-
- seq_head = &table.types[hash(type)];
- hlist_for_each_entry(ns, seq_head, ns_list) {
- if (ns->type == type)
- return ns;
+ struct name_table *nt = tipc_name_table(net);
+ struct hlist_head *service_head;
+ struct tipc_service *service;
+
+ service_head = &nt->services[hash(ua->sr.type)];
+ hlist_for_each_entry_rcu(service, service_head, service_list) {
+ if (service->type == ua->sr.type)
+ return service;
}
-
return NULL;
};
-struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
- u32 scope, u32 node, u32 port, u32 key)
+struct publication *tipc_nametbl_insert_publ(struct net *net,
+ struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk,
+ u32 key)
{
- struct name_seq *seq = nametbl_find_seq(type);
-
- if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) ||
- (lower > upper)) {
- pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n",
- type, lower, upper, scope);
- return NULL;
- }
+ struct tipc_service *sc;
+ struct publication *p;
- if (!seq)
- seq = tipc_nameseq_create(type, &table.types[hash(type)]);
- if (!seq)
+ p = tipc_publ_create(ua, sk, key);
+ if (!p)
return NULL;
- return tipc_nameseq_insert_publ(seq, type, lower, upper,
- scope, node, port, key);
+ sc = tipc_service_find(net, ua);
+ if (!sc)
+ sc = tipc_service_create(net, ua);
+ if (sc && tipc_service_insert_publ(net, sc, p))
+ return p;
+ kfree(p);
+ return NULL;
}
-struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
- u32 node, u32 ref, u32 key)
+struct publication *tipc_nametbl_remove_publ(struct net *net,
+ struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk,
+ u32 key)
{
- struct publication *publ;
- struct name_seq *seq = nametbl_find_seq(type);
+ struct tipc_subscription *sub, *tmp;
+ struct publication *p = NULL;
+ struct service_range *sr;
+ struct tipc_service *sc;
+ bool last;
+
+ sc = tipc_service_find(net, ua);
+ if (!sc)
+ goto exit;
- if (!seq)
- return NULL;
+ spin_lock_bh(&sc->lock);
+ sr = tipc_service_find_range(sc, ua);
+ if (!sr)
+ goto unlock;
+ p = tipc_service_remove_publ(sr, sk, key);
+ if (!p)
+ goto unlock;
- publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key);
- nameseq_delete_empty(seq);
- return publ;
+ /* Notify any waiting subscriptions */
+ last = list_empty(&sr->all_publ);
+ list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
+ tipc_sub_report_overlap(sub, p, TIPC_WITHDRAWN, last);
+ }
+
+ /* Remove service range item if this was its last publication */
+ if (list_empty(&sr->all_publ)) {
+ rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
+ kfree(sr);
+ }
+
+ /* Delete service item if no more publications and subscriptions */
+ if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) {
+ hlist_del_init_rcu(&sc->service_list);
+ kfree_rcu(sc, rcu);
+ }
+unlock:
+ spin_unlock_bh(&sc->lock);
+exit:
+ if (!p) {
+ pr_err("Failed to remove unknown binding: %u,%u,%u/%u:%u/%u\n",
+ ua->sr.type, ua->sr.lower, ua->sr.upper,
+ sk->node, sk->ref, key);
+ }
+ return p;
}
/**
- * tipc_nametbl_translate - perform name translation
+ * tipc_nametbl_lookup_anycast - perform service instance to socket translation
+ * @net: network namespace
+ * @ua: service address to look up
+ * @sk: address to socket we want to find
*
- * On entry, 'destnode' is the search domain used during translation.
+ * On entry, a non-zero 'sk->node' indicates the node where we want lookup to be
+ * performed, which may not be this one.
*
* On exit:
- * - if name translation is deferred to another node/cluster/zone,
- * leaves 'destnode' unchanged (will be non-zero) and returns 0
- * - if name translation is attempted and succeeds, sets 'destnode'
- * to publishing node and returns port reference (will be non-zero)
- * - if name translation is attempted and fails, sets 'destnode' to 0
- * and returns 0
+ *
+ * - If lookup is deferred to another node, leave 'sk->node' unchanged and
+ * return 'true'.
+ * - If lookup is successful, set the 'sk->node' and 'sk->ref' (== portid) which
+ * represent the bound socket and return 'true'.
+ * - If lookup fails, return 'false'
+ *
+ * Note that for legacy users (node configured with Z.C.N address format) the
+ * 'closest-first' lookup algorithm must be maintained, i.e., if sk.node is 0
+ * we must look in the local binding list first
*/
-u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
+bool tipc_nametbl_lookup_anycast(struct net *net,
+ struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk)
{
- struct sub_seq *sseq;
- struct name_info *info;
- struct publication *publ;
- struct name_seq *seq;
- u32 ref = 0;
- u32 node = 0;
-
- if (!tipc_in_scope(*destnode, tipc_own_addr))
- return 0;
+ struct tipc_net *tn = tipc_net(net);
+ bool legacy = tn->legacy_addr_format;
+ u32 self = tipc_own_addr(net);
+ u32 inst = ua->sa.instance;
+ struct service_range *r;
+ struct tipc_service *sc;
+ struct publication *p;
+ struct list_head *l;
+ bool res = false;
+
+ if (!tipc_in_scope(legacy, sk->node, self))
+ return true;
+
+ rcu_read_lock();
+ sc = tipc_service_find(net, ua);
+ if (unlikely(!sc))
+ goto exit;
- read_lock_bh(&tipc_nametbl_lock);
- seq = nametbl_find_seq(type);
- if (unlikely(!seq))
- goto not_found;
- sseq = nameseq_find_subseq(seq, instance);
- if (unlikely(!sseq))
- goto not_found;
- spin_lock_bh(&seq->lock);
- info = sseq->info;
-
- /* Closest-First Algorithm */
- if (likely(!*destnode)) {
- if (!list_empty(&info->node_list)) {
- publ = list_first_entry(&info->node_list,
- struct publication,
- node_list);
- list_move_tail(&publ->node_list,
- &info->node_list);
- } else if (!list_empty(&info->cluster_list)) {
- publ = list_first_entry(&info->cluster_list,
- struct publication,
- cluster_list);
- list_move_tail(&publ->cluster_list,
- &info->cluster_list);
+ spin_lock_bh(&sc->lock);
+ service_range_foreach_match(r, sc, inst, inst) {
+ /* Select lookup algo: local, closest-first or round-robin */
+ if (sk->node == self) {
+ l = &r->local_publ;
+ if (list_empty(l))
+ continue;
+ p = list_first_entry(l, struct publication, local_publ);
+ list_move_tail(&p->local_publ, &r->local_publ);
+ } else if (legacy && !sk->node && !list_empty(&r->local_publ)) {
+ l = &r->local_publ;
+ p = list_first_entry(l, struct publication, local_publ);
+ list_move_tail(&p->local_publ, &r->local_publ);
} else {
- publ = list_first_entry(&info->zone_list,
- struct publication,
- zone_list);
- list_move_tail(&publ->zone_list,
- &info->zone_list);
+ l = &r->all_publ;
+ p = list_first_entry(l, struct publication, all_publ);
+ list_move_tail(&p->all_publ, &r->all_publ);
}
+ *sk = p->sk;
+ res = true;
+ /* Todo: as for legacy, pick the first matching range only, a
+ * "true" round-robin will be performed as needed.
+ */
+ break;
}
+ spin_unlock_bh(&sc->lock);
- /* Round-Robin Algorithm */
- else if (*destnode == tipc_own_addr) {
- if (list_empty(&info->node_list))
- goto no_match;
- publ = list_first_entry(&info->node_list, struct publication,
- node_list);
- list_move_tail(&publ->node_list, &info->node_list);
- } else if (in_own_cluster_exact(*destnode)) {
- if (list_empty(&info->cluster_list))
- goto no_match;
- publ = list_first_entry(&info->cluster_list, struct publication,
- cluster_list);
- list_move_tail(&publ->cluster_list, &info->cluster_list);
- } else {
- publ = list_first_entry(&info->zone_list, struct publication,
- zone_list);
- list_move_tail(&publ->zone_list, &info->zone_list);
- }
+exit:
+ rcu_read_unlock();
+ return res;
+}
+
+/* tipc_nametbl_lookup_group(): lookup destinaton(s) in a communication group
+ * Returns a list of one (== group anycast) or more (== group multicast)
+ * destination socket/node pairs matching the given address.
+ * The requester may or may not want to exclude himself from the list.
+ */
+bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua,
+ struct list_head *dsts, int *dstcnt,
+ u32 exclude, bool mcast)
+{
+ u32 self = tipc_own_addr(net);
+ u32 inst = ua->sa.instance;
+ struct service_range *sr;
+ struct tipc_service *sc;
+ struct publication *p;
+
+ *dstcnt = 0;
+ rcu_read_lock();
+ sc = tipc_service_find(net, ua);
+ if (unlikely(!sc))
+ goto exit;
- ref = publ->ref;
- node = publ->node;
+ spin_lock_bh(&sc->lock);
+
+ /* Todo: a full search i.e. service_range_foreach_match() instead? */
+ sr = service_range_match_first(sc->ranges.rb_node, inst, inst);
+ if (!sr)
+ goto no_match;
+
+ list_for_each_entry(p, &sr->all_publ, all_publ) {
+ if (p->scope != ua->scope)
+ continue;
+ if (p->sk.ref == exclude && p->sk.node == self)
+ continue;
+ tipc_dest_push(dsts, p->sk.node, p->sk.ref);
+ (*dstcnt)++;
+ if (mcast)
+ continue;
+ list_move_tail(&p->all_publ, &sr->all_publ);
+ break;
+ }
no_match:
- spin_unlock_bh(&seq->lock);
-not_found:
- read_unlock_bh(&tipc_nametbl_lock);
- *destnode = node;
- return ref;
+ spin_unlock_bh(&sc->lock);
+exit:
+ rcu_read_unlock();
+ return !list_empty(dsts);
}
-/**
- * tipc_nametbl_mc_translate - find multicast destinations
- *
- * Creates list of all local ports that overlap the given multicast address;
- * also determines if any off-node ports overlap.
- *
- * Note: Publications with a scope narrower than 'limit' are ignored.
- * (i.e. local node-scope publications mustn't receive messages arriving
- * from another node, even if the multcast link brought it here)
- *
- * Returns non-zero if any off-node ports overlap
+/* tipc_nametbl_lookup_mcast_sockets(): look up node local destinaton sockets
+ * matching the given address
+ * Used on nodes which have received a multicast/broadcast message
+ * Returns a list of local sockets
*/
-int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
- struct tipc_port_list *dports)
-{
- struct name_seq *seq;
- struct sub_seq *sseq;
- struct sub_seq *sseq_stop;
- struct name_info *info;
- int res = 0;
-
- read_lock_bh(&tipc_nametbl_lock);
- seq = nametbl_find_seq(type);
- if (!seq)
+void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
+ struct list_head *dports)
+{
+ struct service_range *sr;
+ struct tipc_service *sc;
+ struct publication *p;
+ u8 scope = ua->scope;
+
+ rcu_read_lock();
+ sc = tipc_service_find(net, ua);
+ if (!sc)
goto exit;
- spin_lock_bh(&seq->lock);
+ spin_lock_bh(&sc->lock);
+ service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
+ list_for_each_entry(p, &sr->local_publ, local_publ) {
+ if (scope == p->scope || scope == TIPC_ANY_SCOPE)
+ tipc_dest_push(dports, 0, p->sk.ref);
+ }
+ }
+ spin_unlock_bh(&sc->lock);
+exit:
+ rcu_read_unlock();
+}
- sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
- sseq_stop = seq->sseqs + seq->first_free;
- for (; sseq != sseq_stop; sseq++) {
- struct publication *publ;
+/* tipc_nametbl_lookup_mcast_nodes(): look up all destination nodes matching
+ * the given address. Used in sending node.
+ * Used on nodes which are sending out a multicast/broadcast message
+ * Returns a list of nodes, including own node if applicable
+ */
+void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_nlist *nodes)
+{
+ struct service_range *sr;
+ struct tipc_service *sc;
+ struct publication *p;
- if (sseq->lower > upper)
- break;
+ rcu_read_lock();
+ sc = tipc_service_find(net, ua);
+ if (!sc)
+ goto exit;
- info = sseq->info;
- list_for_each_entry(publ, &info->node_list, node_list) {
- if (publ->scope <= limit)
- tipc_port_list_add(dports, publ->ref);
+ spin_lock_bh(&sc->lock);
+ service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
+ list_for_each_entry(p, &sr->all_publ, all_publ) {
+ tipc_nlist_add(nodes, p->sk.node);
}
-
- if (info->cluster_list_size != info->node_list_size)
- res = 1;
}
+ spin_unlock_bh(&sc->lock);
+exit:
+ rcu_read_unlock();
+}
+
+/* tipc_nametbl_build_group - build list of communication group members
+ */
+void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
+ struct tipc_uaddr *ua)
+{
+ struct service_range *sr;
+ struct tipc_service *sc;
+ struct publication *p;
+ struct rb_node *n;
+
+ rcu_read_lock();
+ sc = tipc_service_find(net, ua);
+ if (!sc)
+ goto exit;
- spin_unlock_bh(&seq->lock);
+ spin_lock_bh(&sc->lock);
+ for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
+ sr = container_of(n, struct service_range, tree_node);
+ list_for_each_entry(p, &sr->all_publ, all_publ) {
+ if (p->scope != ua->scope)
+ continue;
+ tipc_group_add_member(grp, p->sk.node, p->sk.ref,
+ p->sr.lower);
+ }
+ }
+ spin_unlock_bh(&sc->lock);
exit:
- read_unlock_bh(&tipc_nametbl_lock);
- return res;
+ rcu_read_unlock();
}
-/*
- * tipc_nametbl_publish - add name publication to network name tables
+/* tipc_nametbl_publish - add service binding to name table
*/
-struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
- u32 scope, u32 port_ref, u32 key)
+struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk, u32 key)
{
- struct publication *publ;
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
+ struct publication *p = NULL;
+ struct sk_buff *skb = NULL;
+ u32 rc_dests;
- if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) {
- pr_warn("Publication failed, local publication limit reached (%u)\n",
- TIPC_MAX_PUBLICATIONS);
- return NULL;
+ spin_lock_bh(&tn->nametbl_lock);
+
+ if (nt->local_publ_count >= TIPC_MAX_PUBL) {
+ pr_warn("Bind failed, max limit %u reached\n", TIPC_MAX_PUBL);
+ goto exit;
}
- write_lock_bh(&tipc_nametbl_lock);
- publ = tipc_nametbl_insert_publ(type, lower, upper, scope,
- tipc_own_addr, port_ref, key);
- if (likely(publ)) {
- table.local_publ_count++;
- tipc_named_publish(publ);
+ p = tipc_nametbl_insert_publ(net, ua, sk, key);
+ if (p) {
+ nt->local_publ_count++;
+ skb = tipc_named_publish(net, p);
}
- write_unlock_bh(&tipc_nametbl_lock);
- return publ;
+ rc_dests = nt->rc_dests;
+exit:
+ spin_unlock_bh(&tn->nametbl_lock);
+
+ if (skb)
+ tipc_node_broadcast(net, skb, rc_dests);
+ return p;
+
}
/**
- * tipc_nametbl_withdraw - withdraw name publication from network name tables
+ * tipc_nametbl_withdraw - withdraw a service binding
+ * @net: network namespace
+ * @ua: service address/range being unbound
+ * @sk: address of the socket being unbound from
+ * @key: target publication key
*/
-int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
-{
- struct publication *publ;
-
- write_lock_bh(&tipc_nametbl_lock);
- publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key);
- if (likely(publ)) {
- table.local_publ_count--;
- tipc_named_withdraw(publ);
- write_unlock_bh(&tipc_nametbl_lock);
- list_del_init(&publ->pport_list);
- kfree(publ);
- return 1;
- }
- write_unlock_bh(&tipc_nametbl_lock);
- pr_err("Unable to remove local publication\n"
- "(type=%u, lower=%u, ref=%u, key=%u)\n",
- type, lower, ref, key);
- return 0;
+void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk, u32 key)
+{
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
+ struct sk_buff *skb = NULL;
+ struct publication *p;
+ u32 rc_dests;
+
+ spin_lock_bh(&tn->nametbl_lock);
+
+ p = tipc_nametbl_remove_publ(net, ua, sk, key);
+ if (p) {
+ nt->local_publ_count--;
+ skb = tipc_named_withdraw(net, p);
+ list_del_init(&p->binding_sock);
+ kfree_rcu(p, rcu);
+ }
+ rc_dests = nt->rc_dests;
+ spin_unlock_bh(&tn->nametbl_lock);
+
+ if (skb)
+ tipc_node_broadcast(net, skb, rc_dests);
}
/**
* tipc_nametbl_subscribe - add a subscription object to the name table
+ * @sub: subscription to add
*/
-void tipc_nametbl_subscribe(struct tipc_subscription *s)
-{
- u32 type = s->seq.type;
- struct name_seq *seq;
-
- write_lock_bh(&tipc_nametbl_lock);
- seq = nametbl_find_seq(type);
- if (!seq)
- seq = tipc_nameseq_create(type, &table.types[hash(type)]);
- if (seq) {
- spin_lock_bh(&seq->lock);
- tipc_nameseq_subscribe(seq, s);
- spin_unlock_bh(&seq->lock);
+bool tipc_nametbl_subscribe(struct tipc_subscription *sub)
+{
+ struct tipc_net *tn = tipc_net(sub->net);
+ u32 type = sub->s.seq.type;
+ struct tipc_service *sc;
+ struct tipc_uaddr ua;
+ bool res = true;
+
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type,
+ sub->s.seq.lower, sub->s.seq.upper);
+ spin_lock_bh(&tn->nametbl_lock);
+ sc = tipc_service_find(sub->net, &ua);
+ if (!sc)
+ sc = tipc_service_create(sub->net, &ua);
+ if (sc) {
+ spin_lock_bh(&sc->lock);
+ tipc_service_subscribe(sc, sub);
+ spin_unlock_bh(&sc->lock);
} else {
- pr_warn("Failed to create subscription for {%u,%u,%u}\n",
- s->seq.type, s->seq.lower, s->seq.upper);
+ pr_warn("Failed to subscribe for {%u,%u,%u}\n",
+ type, sub->s.seq.lower, sub->s.seq.upper);
+ res = false;
}
- write_unlock_bh(&tipc_nametbl_lock);
+ spin_unlock_bh(&tn->nametbl_lock);
+ return res;
}
/**
* tipc_nametbl_unsubscribe - remove a subscription object from name table
+ * @sub: subscription to remove
*/
-void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
+void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
{
- struct name_seq *seq;
+ struct tipc_net *tn = tipc_net(sub->net);
+ struct tipc_service *sc;
+ struct tipc_uaddr ua;
+
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+ sub->s.seq.type, sub->s.seq.lower, sub->s.seq.upper);
+ spin_lock_bh(&tn->nametbl_lock);
+ sc = tipc_service_find(sub->net, &ua);
+ if (!sc)
+ goto exit;
- write_lock_bh(&tipc_nametbl_lock);
- seq = nametbl_find_seq(s->seq.type);
- if (seq != NULL) {
- spin_lock_bh(&seq->lock);
- list_del_init(&s->nameseq_list);
- spin_unlock_bh(&seq->lock);
- nameseq_delete_empty(seq);
+ spin_lock_bh(&sc->lock);
+ list_del_init(&sub->service_list);
+ tipc_sub_put(sub);
+
+ /* Delete service item if no more publications and subscriptions */
+ if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) {
+ hlist_del_init_rcu(&sc->service_list);
+ kfree_rcu(sc, rcu);
}
- write_unlock_bh(&tipc_nametbl_lock);
+ spin_unlock_bh(&sc->lock);
+exit:
+ spin_unlock_bh(&tn->nametbl_lock);
}
+int tipc_nametbl_init(struct net *net)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct name_table *nt;
+ int i;
+
+ nt = kzalloc(sizeof(*nt), GFP_KERNEL);
+ if (!nt)
+ return -ENOMEM;
+
+ for (i = 0; i < TIPC_NAMETBL_SIZE; i++)
+ INIT_HLIST_HEAD(&nt->services[i]);
+
+ INIT_LIST_HEAD(&nt->node_scope);
+ INIT_LIST_HEAD(&nt->cluster_scope);
+ rwlock_init(&nt->cluster_scope_lock);
+ tn->nametbl = nt;
+ spin_lock_init(&tn->nametbl_lock);
+ return 0;
+}
/**
- * subseq_list - print specified sub-sequence contents into the given buffer
+ * tipc_service_delete - purge all publications for a service and delete it
+ * @net: the associated network namespace
+ * @sc: tipc_service to delete
*/
-static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth,
- u32 index)
+static void tipc_service_delete(struct net *net, struct tipc_service *sc)
{
- char portIdStr[27];
- const char *scope_str[] = {"", " zone", " cluster", " node"};
- struct publication *publ;
- struct name_info *info;
- int ret;
-
- ret = tipc_snprintf(buf, len, "%-10u %-10u ", sseq->lower, sseq->upper);
-
- if (depth == 2) {
- ret += tipc_snprintf(buf - ret, len + ret, "\n");
- return ret;
+ struct service_range *sr, *tmpr;
+ struct publication *p, *tmp;
+
+ spin_lock_bh(&sc->lock);
+ rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) {
+ list_for_each_entry_safe(p, tmp, &sr->all_publ, all_publ) {
+ tipc_service_remove_publ(sr, &p->sk, p->key);
+ kfree_rcu(p, rcu);
+ }
+ rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
+ kfree(sr);
}
+ hlist_del_init_rcu(&sc->service_list);
+ spin_unlock_bh(&sc->lock);
+ kfree_rcu(sc, rcu);
+}
- info = sseq->info;
+void tipc_nametbl_stop(struct net *net)
+{
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
+ struct hlist_head *service_head;
+ struct tipc_service *service;
+ u32 i;
- list_for_each_entry(publ, &info->zone_list, zone_list) {
- sprintf(portIdStr, "<%u.%u.%u:%u>",
- tipc_zone(publ->node), tipc_cluster(publ->node),
- tipc_node(publ->node), publ->ref);
- ret += tipc_snprintf(buf + ret, len - ret, "%-26s ", portIdStr);
- if (depth > 3) {
- ret += tipc_snprintf(buf + ret, len - ret, "%-10u %s",
- publ->key, scope_str[publ->scope]);
+ /* Verify name table is empty and purge any lingering
+ * publications, then release the name table
+ */
+ spin_lock_bh(&tn->nametbl_lock);
+ for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
+ if (hlist_empty(&nt->services[i]))
+ continue;
+ service_head = &nt->services[i];
+ hlist_for_each_entry_rcu(service, service_head, service_list) {
+ tipc_service_delete(net, service);
}
- if (!list_is_last(&publ->zone_list, &info->zone_list))
- ret += tipc_snprintf(buf + ret, len - ret,
- "\n%33s", " ");
}
+ spin_unlock_bh(&tn->nametbl_lock);
- ret += tipc_snprintf(buf + ret, len - ret, "\n");
- return ret;
+ /* TODO: clear tn->nametbl, implement proper RCU rules ? */
+ kfree_rcu(nt, rcu);
}
-/**
- * nameseq_list - print specified name sequence contents into the given buffer
- */
-static int nameseq_list(struct name_seq *seq, char *buf, int len, u32 depth,
- u32 type, u32 lowbound, u32 upbound, u32 index)
+static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
+ struct tipc_service *service,
+ struct service_range *sr,
+ u32 *last_key)
{
- struct sub_seq *sseq;
- char typearea[11];
- int ret = 0;
+ struct publication *p;
+ struct nlattr *attrs;
+ struct nlattr *b;
+ void *hdr;
+
+ if (*last_key) {
+ list_for_each_entry(p, &sr->all_publ, all_publ)
+ if (p->key == *last_key)
+ break;
+ if (list_entry_is_head(p, &sr->all_publ, all_publ))
+ return -EPIPE;
+ } else {
+ p = list_first_entry(&sr->all_publ,
+ struct publication,
+ all_publ);
+ }
- if (seq->first_free == 0)
- return 0;
+ list_for_each_entry_from(p, &sr->all_publ, all_publ) {
+ *last_key = p->key;
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq,
+ &tipc_genl_family, NLM_F_MULTI,
+ TIPC_NL_NAME_TABLE_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NAME_TABLE);
+ if (!attrs)
+ goto msg_full;
+
+ b = nla_nest_start_noflag(msg->skb, TIPC_NLA_NAME_TABLE_PUBL);
+ if (!b)
+ goto attr_msg_full;
+
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_TYPE, service->type))
+ goto publ_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_LOWER, sr->lower))
+ goto publ_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_UPPER, sr->upper))
+ goto publ_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_SCOPE, p->scope))
+ goto publ_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->sk.node))
+ goto publ_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->sk.ref))
+ goto publ_msg_full;
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key))
+ goto publ_msg_full;
+
+ nla_nest_end(msg->skb, b);
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
+ }
+ *last_key = 0;
- sprintf(typearea, "%-10u", seq->type);
+ return 0;
- if (depth == 1) {
- ret += tipc_snprintf(buf, len, "%s\n", typearea);
- return ret;
- }
+publ_msg_full:
+ nla_nest_cancel(msg->skb, b);
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
- for (sseq = seq->sseqs; sseq != &seq->sseqs[seq->first_free]; sseq++) {
- if ((lowbound <= sseq->upper) && (upbound >= sseq->lower)) {
- ret += tipc_snprintf(buf + ret, len - ret, "%s ",
- typearea);
- spin_lock_bh(&seq->lock);
- ret += subseq_list(sseq, buf + ret, len - ret,
- depth, index);
- spin_unlock_bh(&seq->lock);
- sprintf(typearea, "%10s", " ");
- }
- }
- return ret;
+ return -EMSGSIZE;
}
-/**
- * nametbl_header - print name table header into the given buffer
- */
-static int nametbl_header(char *buf, int len, u32 depth)
+static int __tipc_nl_service_range_list(struct tipc_nl_msg *msg,
+ struct tipc_service *sc,
+ u32 *last_lower, u32 *last_key)
{
- const char *header[] = {
- "Type ",
- "Lower Upper ",
- "Port Identity ",
- "Publication Scope"
- };
-
- int i;
- int ret = 0;
+ struct service_range *sr;
+ struct rb_node *n;
+ int err;
- if (depth > 4)
- depth = 4;
- for (i = 0; i < depth; i++)
- ret += tipc_snprintf(buf + ret, len - ret, header[i]);
- ret += tipc_snprintf(buf + ret, len - ret, "\n");
- return ret;
+ for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
+ sr = container_of(n, struct service_range, tree_node);
+ if (sr->lower < *last_lower)
+ continue;
+ err = __tipc_nl_add_nametable_publ(msg, sc, sr, last_key);
+ if (err) {
+ *last_lower = sr->lower;
+ return err;
+ }
+ }
+ *last_lower = 0;
+ return 0;
}
-/**
- * nametbl_list - print specified name table contents into the given buffer
- */
-static int nametbl_list(char *buf, int len, u32 depth_info,
- u32 type, u32 lowbound, u32 upbound)
-{
- struct hlist_head *seq_head;
- struct name_seq *seq;
- int all_types;
- int ret = 0;
- u32 depth;
- u32 i;
+static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg,
+ u32 *last_type, u32 *last_lower, u32 *last_key)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_service *service = NULL;
+ struct hlist_head *head;
+ struct tipc_uaddr ua;
+ int err;
+ int i;
- all_types = (depth_info & TIPC_NTQ_ALLTYPES);
- depth = (depth_info & ~TIPC_NTQ_ALLTYPES);
+ if (*last_type)
+ i = hash(*last_type);
+ else
+ i = 0;
+
+ for (; i < TIPC_NAMETBL_SIZE; i++) {
+ head = &tn->nametbl->services[i];
+
+ if (*last_type ||
+ (!i && *last_key && (*last_lower == *last_key))) {
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+ *last_type, *last_lower, *last_lower);
+ service = tipc_service_find(net, &ua);
+ if (!service)
+ return -EPIPE;
+ } else {
+ hlist_for_each_entry_rcu(service, head, service_list)
+ break;
+ if (!service)
+ continue;
+ }
- if (depth == 0)
- return 0;
+ hlist_for_each_entry_from_rcu(service, service_list) {
+ spin_lock_bh(&service->lock);
+ err = __tipc_nl_service_range_list(msg, service,
+ last_lower,
+ last_key);
- if (all_types) {
- /* display all entries in name table to specified depth */
- ret += nametbl_header(buf, len, depth);
- lowbound = 0;
- upbound = ~0;
- for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
- seq_head = &table.types[i];
- hlist_for_each_entry(seq, seq_head, ns_list) {
- ret += nameseq_list(seq, buf + ret, len - ret,
- depth, seq->type,
- lowbound, upbound, i);
- }
- }
- } else {
- /* display only the sequence that matches the specified type */
- if (upbound < lowbound) {
- ret += tipc_snprintf(buf + ret, len - ret,
- "invalid name sequence specified\n");
- return ret;
- }
- ret += nametbl_header(buf + ret, len - ret, depth);
- i = hash(type);
- seq_head = &table.types[i];
- hlist_for_each_entry(seq, seq_head, ns_list) {
- if (seq->type == type) {
- ret += nameseq_list(seq, buf + ret, len - ret,
- depth, type,
- lowbound, upbound, i);
- break;
+ if (err) {
+ *last_type = service->type;
+ spin_unlock_bh(&service->lock);
+ return err;
}
+ spin_unlock_bh(&service->lock);
}
+ *last_type = 0;
}
- return ret;
+ return 0;
}
-struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
+int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct sk_buff *buf;
- struct tipc_name_table_query *argv;
- struct tlv_desc *rep_tlv;
- char *pb;
- int pb_len;
- int str_len;
+ struct net *net = sock_net(skb->sk);
+ u32 last_type = cb->args[0];
+ u32 last_lower = cb->args[1];
+ u32 last_key = cb->args[2];
+ int done = cb->args[3];
+ struct tipc_nl_msg msg;
+ int err;
+
+ if (done)
+ return 0;
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NAME_TBL_QUERY))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+ msg.skb = skb;
+ msg.portid = NETLINK_CB(cb->skb).portid;
+ msg.seq = cb->nlh->nlmsg_seq;
+
+ rcu_read_lock();
+ err = tipc_nl_service_list(net, &msg, &last_type,
+ &last_lower, &last_key);
+ if (!err) {
+ done = 1;
+ } else if (err != -EMSGSIZE) {
+ /* We never set seq or call nl_dump_check_consistent() this
+ * means that setting prev_seq here will cause the consistence
+ * check to fail in the netlink callback handler. Resulting in
+ * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if
+ * we got an error.
+ */
+ cb->prev_seq = 1;
+ }
+ rcu_read_unlock();
- buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
- if (!buf)
- return NULL;
+ cb->args[0] = last_type;
+ cb->args[1] = last_lower;
+ cb->args[2] = last_key;
+ cb->args[3] = done;
- rep_tlv = (struct tlv_desc *)buf->data;
- pb = TLV_DATA(rep_tlv);
- pb_len = ULTRA_STRING_MAX_LEN;
- argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area);
- read_lock_bh(&tipc_nametbl_lock);
- str_len = nametbl_list(pb, pb_len, ntohl(argv->depth),
- ntohl(argv->type),
- ntohl(argv->lowbound), ntohl(argv->upbound));
- read_unlock_bh(&tipc_nametbl_lock);
- str_len += 1; /* for "\0" */
- skb_put(buf, TLV_SPACE(str_len));
- TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+ return skb->len;
+}
- return buf;
+struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port)
+{
+ struct tipc_dest *dst;
+
+ list_for_each_entry(dst, l, list) {
+ if (dst->node == node && dst->port == port)
+ return dst;
+ }
+ return NULL;
}
-int tipc_nametbl_init(void)
+bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
{
- table.types = kcalloc(TIPC_NAMETBL_SIZE, sizeof(struct hlist_head),
- GFP_ATOMIC);
- if (!table.types)
- return -ENOMEM;
+ struct tipc_dest *dst;
+
+ if (tipc_dest_find(l, node, port))
+ return false;
+
+ dst = kmalloc(sizeof(*dst), GFP_ATOMIC);
+ if (unlikely(!dst))
+ return false;
+ dst->node = node;
+ dst->port = port;
+ list_add(&dst->list, l);
+ return true;
+}
- table.local_publ_count = 0;
- return 0;
+bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port)
+{
+ struct tipc_dest *dst;
+
+ if (list_empty(l))
+ return false;
+ dst = list_first_entry(l, typeof(*dst), list);
+ if (port)
+ *port = dst->port;
+ if (node)
+ *node = dst->node;
+ list_del(&dst->list);
+ kfree(dst);
+ return true;
}
-void tipc_nametbl_stop(void)
+bool tipc_dest_del(struct list_head *l, u32 node, u32 port)
{
- u32 i;
+ struct tipc_dest *dst;
+
+ dst = tipc_dest_find(l, node, port);
+ if (!dst)
+ return false;
+ list_del(&dst->list);
+ kfree(dst);
+ return true;
+}
- if (!table.types)
- return;
+void tipc_dest_list_purge(struct list_head *l)
+{
+ struct tipc_dest *dst, *tmp;
- /* Verify name table is empty, then release it */
- write_lock_bh(&tipc_nametbl_lock);
- for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
- if (hlist_empty(&table.types[i]))
- continue;
- pr_err("nametbl_stop(): orphaned hash chain detected\n");
- break;
+ list_for_each_entry_safe(dst, tmp, l, list) {
+ list_del(&dst->list);
+ kfree(dst);
}
- kfree(table.types);
- table.types = NULL;
- write_unlock_bh(&tipc_nametbl_lock);
}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index f02f48b9a216..7ff6eeebaae6 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -1,8 +1,9 @@
/*
* net/tipc/name_table.h: Include file for TIPC name table code
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,68 +38,120 @@
#ifndef _TIPC_NAME_TABLE_H
#define _TIPC_NAME_TABLE_H
-#include "node_subscr.h"
-
struct tipc_subscription;
-struct tipc_port_list;
+struct tipc_plist;
+struct tipc_nlist;
+struct tipc_group;
+struct tipc_uaddr;
/*
* TIPC name types reserved for internal TIPC use (both current and planned)
*/
-#define TIPC_ZM_SRV 3 /* zone master service name type */
+#define TIPC_ZM_SRV 3 /* zone master service name type */
+#define TIPC_PUBL_SCOPE_NUM (TIPC_NODE_SCOPE + 1)
+#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
+
+#define TIPC_ANY_SCOPE 10 /* Both node and cluster scope will match */
/**
- * struct publication - info about a published (name or) name sequence
- * @type: name sequence type
- * @lower: name sequence lower bound
- * @upper: name sequence upper bound
- * @scope: scope of publication
- * @node: network address of publishing port's node
- * @ref: publishing port
- * @key: publication key
- * @subscr: subscription to "node down" event (for off-node publications only)
- * @local_list: adjacent entries in list of publications made by this node
- * @pport_list: adjacent entries in list of publications made by this port
- * @node_list: adjacent matching name seq publications with >= node scope
- * @cluster_list: adjacent matching name seq publications with >= cluster scope
- * @zone_list: adjacent matching name seq publications with >= zone scope
- *
- * Note that the node list, cluster list, and zone list are circular lists.
+ * struct publication - info about a published service address or range
+ * @sr: service range represented by this publication
+ * @sk: address of socket bound to this publication
+ * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE
+ * @key: publication key, unique across the cluster
+ * @id: publication id
+ * @binding_node: all publications from the same node which bound this one
+ * - Remote publications: in node->publ_list;
+ * Used by node/name distr to withdraw publications when node is lost
+ * - Local/node scope publications: in name_table->node_scope list
+ * - Local/cluster scope publications: in name_table->cluster_scope list
+ * @binding_sock: all publications from the same socket which bound this one
+ * Used by socket to withdraw publications when socket is unbound/released
+ * @local_publ: list of identical publications made from this node
+ * Used by closest_first and multicast receive lookup algorithms
+ * @all_publ: all publications identical to this one, whatever node and scope
+ * Used by round-robin lookup algorithm
+ * @list: to form a list of publications in temporal order
+ * @rcu: RCU callback head used for deferred freeing
*/
struct publication {
- u32 type;
- u32 lower;
- u32 upper;
- u32 scope;
- u32 node;
- u32 ref;
+ struct tipc_service_range sr;
+ struct tipc_socket_addr sk;
+ u16 scope;
u32 key;
- struct tipc_node_subscr subscr;
- struct list_head local_list;
- struct list_head pport_list;
- struct list_head node_list;
- struct list_head cluster_list;
- struct list_head zone_list;
+ u32 id;
+ struct list_head binding_node;
+ struct list_head binding_sock;
+ struct list_head local_publ;
+ struct list_head all_publ;
+ struct list_head list;
+ struct rcu_head rcu;
};
+/**
+ * struct name_table - table containing all existing port name publications
+ * @rcu: RCU callback head used for deferred freeing
+ * @services: name sequence hash lists
+ * @node_scope: all local publications with node scope
+ * - used by name_distr during re-init of name table
+ * @cluster_scope: all local publications with cluster scope
+ * - used by name_distr to send bulk updates to new nodes
+ * - used by name_distr during re-init of name table
+ * @cluster_scope_lock: lock for accessing @cluster_scope
+ * @local_publ_count: number of publications issued by this node
+ * @rc_dests: destination node counter
+ * @snd_nxt: next sequence number to be used
+ */
+struct name_table {
+ struct rcu_head rcu;
+ struct hlist_head services[TIPC_NAMETBL_SIZE];
+ struct list_head node_scope;
+ struct list_head cluster_scope;
+ rwlock_t cluster_scope_lock;
+ u32 local_publ_count;
+ u32 rc_dests;
+ u32 snd_nxt;
+};
-extern rwlock_t tipc_nametbl_lock;
-
-struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space);
-u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node);
-int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
- struct tipc_port_list *dports);
-struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
- u32 scope, u32 port_ref, u32 key);
-int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key);
-struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
- u32 scope, u32 node, u32 ref,
+int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
+bool tipc_nametbl_lookup_anycast(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk);
+void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
+ struct list_head *dports);
+void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_nlist *nodes);
+bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua,
+ struct list_head *dsts, int *dstcnt,
+ u32 exclude, bool mcast);
+void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
+ struct tipc_uaddr *ua);
+struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk, u32 key);
+void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk, u32 key);
+struct publication *tipc_nametbl_insert_publ(struct net *net,
+ struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk,
+ u32 key);
+struct publication *tipc_nametbl_remove_publ(struct net *net,
+ struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk,
u32 key);
-struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 node,
- u32 ref, u32 key);
-void tipc_nametbl_subscribe(struct tipc_subscription *s);
+bool tipc_nametbl_subscribe(struct tipc_subscription *s);
void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
-int tipc_nametbl_init(void);
-void tipc_nametbl_stop(void);
+int tipc_nametbl_init(struct net *net);
+void tipc_nametbl_stop(struct net *net);
+
+struct tipc_dest {
+ struct list_head list;
+ u32 port;
+ u32 node;
+};
+
+struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port);
+bool tipc_dest_push(struct list_head *l, u32 node, u32 port);
+bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
+bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
+void tipc_dest_list_purge(struct list_head *l);
#endif
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 7d305ecc09c2..7e65d0b0c4a8 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -1,7 +1,7 @@
/*
* net/tipc/net.c: TIPC network routing code
*
- * Copyright (c) 1995-2006, Ericsson AB
+ * Copyright (c) 1995-2006, 2014, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -38,46 +38,44 @@
#include "net.h"
#include "name_distr.h"
#include "subscr.h"
-#include "port.h"
+#include "socket.h"
#include "node.h"
-#include "config.h"
+#include "bcast.h"
+#include "link.h"
+#include "netlink.h"
+#include "monitor.h"
/*
* The TIPC locking policy is designed to ensure a very fine locking
* granularity, permitting complete parallel access to individual
- * port and node/link instances. The code consists of three major
+ * port and node/link instances. The code consists of four major
* locking domains, each protected with their own disjunct set of locks.
*
- * 1: The routing hierarchy.
- * Comprises the structures 'zone', 'cluster', 'node', 'link'
- * and 'bearer'. The whole hierarchy is protected by a big
- * read/write lock, tipc_net_lock, to enssure that nothing is added
- * or removed while code is accessing any of these structures.
- * This layer must not be called from the two others while they
- * hold any of their own locks.
- * Neither must it itself do any upcalls to the other two before
- * it has released tipc_net_lock and other protective locks.
+ * 1: The bearer level.
+ * RTNL lock is used to serialize the process of configuring bearer
+ * on update side, and RCU lock is applied on read side to make
+ * bearer instance valid on both paths of message transmission and
+ * reception.
*
- * Within the tipc_net_lock domain there are two sub-domains;'node' and
- * 'bearer', where local write operations are permitted,
- * provided that those are protected by individual spin_locks
- * per instance. Code holding tipc_net_lock(read) and a node spin_lock
- * is permitted to poke around in both the node itself and its
- * subordinate links. I.e, it can update link counters and queues,
- * change link state, send protocol messages, and alter the
- * "active_links" array in the node; but it can _not_ remove a link
- * or a node from the overall structure.
- * Correspondingly, individual bearers may change status within a
- * tipc_net_lock(read), protected by an individual spin_lock ber bearer
- * instance, but it needs tipc_net_lock(write) to remove/add any bearers.
+ * 2: The node and link level.
+ * All node instances are saved into two tipc_node_list and node_htable
+ * lists. The two lists are protected by node_list_lock on write side,
+ * and they are guarded with RCU lock on read side. Especially node
+ * instance is destroyed only when TIPC module is removed, and we can
+ * confirm that there has no any user who is accessing the node at the
+ * moment. Therefore, Except for iterating the two lists within RCU
+ * protection, it's no needed to hold RCU that we access node instance
+ * in other places.
*
+ * In addition, all members in node structure including link instances
+ * are protected by node spin lock.
*
- * 2: The transport level of the protocol.
- * This consists of the structures port, (and its user level
- * representations, such as user_port and tipc_sock), reference and
- * tipc_user (port.c, reg.c, socket.c).
+ * 3: The transport level of the protocol.
+ * This consists of the structures port, (and its user level
+ * representations, such as user_port and tipc_sock), reference and
+ * tipc_user (port.c, reg.c, socket.c).
*
- * This layer has four different locks:
+ * This layer has four different locks:
* - The tipc_port spin_lock. This is protecting each port instance
* from parallel data access and removal. Since we can not place
* this lock in the port itself, it has been placed in the
@@ -92,11 +90,11 @@
* - A spin lock to protect the registry of kernel/driver users (reg.c)
* - A global spin_lock (tipc_port_lock), which only task is to ensure
* consistency where more than one port is involved in an operation,
- * i.e., whe a port is part of a linked list of ports.
+ * i.e., when a port is part of a linked list of ports.
* There are two such lists; 'port_list', which is used for management,
* and 'wait_list', which is used to queue ports during congestion.
*
- * 3: The name table (name_table.c, name_distr.c, subscription.c)
+ * 4: The name table (name_table.c, name_distr.c, subscription.c)
* - There is one big read/write-lock (tipc_nametbl_lock) protecting the
* overall name table structure. Nothing must be added/removed to
* this structure without holding write access to it.
@@ -108,98 +106,242 @@
* - A local spin_lock protecting the queue of subscriber events.
*/
-DEFINE_RWLOCK(tipc_net_lock);
+static void tipc_net_finalize(struct net *net, u32 addr);
-static void net_route_named_msg(struct sk_buff *buf)
+int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
{
- struct tipc_msg *msg = buf_msg(buf);
- u32 dnode;
- u32 dport;
-
- if (!msg_named(msg)) {
- kfree_skb(buf);
- return;
+ if (tipc_own_id(net)) {
+ pr_info("Cannot configure node identity twice\n");
+ return -1;
}
+ pr_info("Started in network mode\n");
- dnode = addr_domain(msg_lookup_scope(msg));
- dport = tipc_nametbl_translate(msg_nametype(msg), msg_nameinst(msg), &dnode);
- if (dport) {
- msg_set_destnode(msg, dnode);
- msg_set_destport(msg, dport);
- tipc_net_route_msg(buf);
- return;
- }
- tipc_reject_msg(buf, TIPC_ERR_NO_NAME);
+ if (node_id)
+ tipc_set_node_id(net, node_id);
+ if (addr)
+ tipc_net_finalize(net, addr);
+ return 0;
}
-void tipc_net_route_msg(struct sk_buff *buf)
+static void tipc_net_finalize(struct net *net, u32 addr)
{
- struct tipc_msg *msg;
- u32 dnode;
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_socket_addr sk = {0, addr};
+ struct tipc_uaddr ua;
+
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE,
+ TIPC_NODE_STATE, addr, addr);
- if (!buf)
+ if (cmpxchg(&tn->node_addr, 0, addr))
return;
- msg = buf_msg(buf);
-
- /* Handle message for this node */
- dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg);
- if (tipc_in_scope(dnode, tipc_own_addr)) {
- if (msg_isdata(msg)) {
- if (msg_mcast(msg))
- tipc_port_recv_mcast(buf, NULL);
- else if (msg_destport(msg))
- tipc_port_recv_msg(buf);
- else
- net_route_named_msg(buf);
- return;
- }
- switch (msg_user(msg)) {
- case NAME_DISTRIBUTOR:
- tipc_named_recv(buf);
- break;
- case CONN_MANAGER:
- tipc_port_recv_proto_msg(buf);
- break;
- default:
- kfree_skb(buf);
- }
+ tipc_set_node_addr(net, addr);
+ tipc_named_reinit(net);
+ tipc_sk_reinit(net);
+ tipc_mon_reinit_self(net);
+ tipc_nametbl_publish(net, &ua, &sk, addr);
+}
+
+void tipc_net_finalize_work(struct work_struct *work)
+{
+ struct tipc_net *tn = container_of(work, struct tipc_net, work);
+
+ rtnl_lock();
+ tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr);
+ rtnl_unlock();
+}
+
+void tipc_net_stop(struct net *net)
+{
+ if (!tipc_own_id(net))
return;
+
+ rtnl_lock();
+ tipc_bearer_stop(net);
+ tipc_node_stop(net);
+ rtnl_unlock();
+
+ pr_info("Left network mode\n");
+}
+
+static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ u64 *w0 = (u64 *)&tn->node_id[0];
+ u64 *w1 = (u64 *)&tn->node_id[8];
+ struct nlattr *attrs;
+ void *hdr;
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ NLM_F_MULTI, TIPC_NL_NET_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NET);
+ if (!attrs)
+ goto msg_full;
+
+ if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id))
+ goto attr_msg_full;
+ if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID, *w0, 0))
+ goto attr_msg_full;
+ if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID_W1, *w1, 0))
+ goto attr_msg_full;
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
+
+ return 0;
+
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
+
+ return -EMSGSIZE;
+}
+
+int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ int err;
+ int done = cb->args[0];
+ struct tipc_nl_msg msg;
+
+ if (done)
+ return 0;
+
+ msg.skb = skb;
+ msg.portid = NETLINK_CB(cb->skb).portid;
+ msg.seq = cb->nlh->nlmsg_seq;
+
+ err = __tipc_nl_add_net(net, &msg);
+ if (err)
+ goto out;
+
+ done = 1;
+out:
+ cb->args[0] = done;
+
+ return skb->len;
+}
+
+int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = tipc_net(net);
+ int err;
+
+ if (!info->attrs[TIPC_NLA_NET])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX,
+ info->attrs[TIPC_NLA_NET],
+ tipc_nl_net_policy, info->extack);
+
+ if (err)
+ return err;
+
+ /* Can't change net id once TIPC has joined a network */
+ if (tipc_own_addr(net))
+ return -EPERM;
+
+ if (attrs[TIPC_NLA_NET_ID]) {
+ u32 val;
+
+ val = nla_get_u32(attrs[TIPC_NLA_NET_ID]);
+ if (val < 1 || val > 9999)
+ return -EINVAL;
+
+ tn->net_id = val;
+ }
+
+ if (attrs[TIPC_NLA_NET_ADDR]) {
+ u32 addr;
+
+ addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
+ if (!addr)
+ return -EINVAL;
+ tn->legacy_addr_format = true;
+ tipc_net_init(net, NULL, addr);
+ }
+
+ if (attrs[TIPC_NLA_NET_NODEID]) {
+ u8 node_id[NODE_ID_LEN];
+ u64 *w0 = (u64 *)&node_id[0];
+ u64 *w1 = (u64 *)&node_id[8];
+
+ if (!attrs[TIPC_NLA_NET_NODEID_W1])
+ return -EINVAL;
+ *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]);
+ *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]);
+ tipc_net_init(net, node_id, 0);
}
+ return 0;
+}
+
+int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
- /* Handle message for another node */
- skb_trim(buf, msg_size(msg));
- tipc_link_send(buf, dnode, msg_link_selector(msg));
+ rtnl_lock();
+ err = __tipc_nl_net_set(skb, info);
+ rtnl_unlock();
+
+ return err;
}
-void tipc_net_start(u32 addr)
+static int __tipc_nl_addr_legacy_get(struct net *net, struct tipc_nl_msg *msg)
{
- char addr_string[16];
+ struct tipc_net *tn = tipc_net(net);
+ struct nlattr *attrs;
+ void *hdr;
- write_lock_bh(&tipc_net_lock);
- tipc_own_addr = addr;
- tipc_named_reinit();
- tipc_port_reinit();
- tipc_bclink_init();
- write_unlock_bh(&tipc_net_lock);
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ 0, TIPC_NL_ADDR_LEGACY_GET);
+ if (!hdr)
+ return -EMSGSIZE;
- tipc_cfg_reinit();
+ attrs = nla_nest_start(msg->skb, TIPC_NLA_NET);
+ if (!attrs)
+ goto msg_full;
- pr_info("Started in network mode\n");
- pr_info("Own node address %s, network identity %u\n",
- tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
+ if (tn->legacy_addr_format)
+ if (nla_put_flag(msg->skb, TIPC_NLA_NET_ADDR_LEGACY))
+ goto attr_msg_full;
+
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
+
+ return 0;
+
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
+
+ return -EMSGSIZE;
}
-void tipc_net_stop(void)
+int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info)
{
- struct tipc_node *node, *t_node;
+ struct net *net = sock_net(skb->sk);
+ struct tipc_nl_msg msg;
+ struct sk_buff *rep;
+ int err;
- if (!tipc_own_addr)
- return;
- write_lock_bh(&tipc_net_lock);
- tipc_bearer_stop();
- tipc_bclink_stop();
- list_for_each_entry_safe(node, t_node, &tipc_node_list, list)
- tipc_node_delete(node);
- write_unlock_bh(&tipc_net_lock);
- pr_info("Left network mode\n");
+ rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ msg.skb = rep;
+ msg.portid = info->snd_portid;
+ msg.seq = info->snd_seq;
+
+ err = __tipc_nl_addr_legacy_get(net, &msg);
+ if (err) {
+ nlmsg_free(msg.skb);
+ return err;
+ }
+
+ return genlmsg_reply(msg.skb, info);
}
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 079daadb3f72..1cb1e43cf34a 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -1,7 +1,7 @@
/*
* net/tipc/net.h: Include file for TIPC network routing code
*
- * Copyright (c) 1995-2006, Ericsson AB
+ * Copyright (c) 1995-2006, 2014, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -37,11 +37,16 @@
#ifndef _TIPC_NET_H
#define _TIPC_NET_H
-extern rwlock_t tipc_net_lock;
+#include <net/genetlink.h>
-void tipc_net_route_msg(struct sk_buff *buf);
+extern const struct nla_policy tipc_nl_net_policy[];
-void tipc_net_start(u32 addr);
-void tipc_net_stop(void);
+int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
+void tipc_net_finalize_work(struct work_struct *work);
+void tipc_net_stop(struct net *net);
+int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
+int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
+int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info);
#endif
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 8bcd4985d0fb..1a9a5bdaccf4 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -1,7 +1,7 @@
/*
* net/tipc/netlink.c: TIPC configuration handling
*
- * Copyright (c) 2005-2006, Ericsson AB
+ * Copyright (c) 2005-2006, 2014, Ericsson AB
* Copyright (c) 2005-2007, Wind River Systems
* All rights reserved.
*
@@ -35,74 +35,281 @@
*/
#include "core.h"
-#include "config.h"
+#include "socket.h"
+#include "name_table.h"
+#include "bearer.h"
+#include "link.h"
+#include "node.h"
+#include "net.h"
+#include "udp_media.h"
#include <net/genetlink.h>
-static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
-{
- struct sk_buff *rep_buf;
- struct nlmsghdr *rep_nlh;
- struct nlmsghdr *req_nlh = info->nlhdr;
- struct tipc_genlmsghdr *req_userhdr = info->userhdr;
- int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
- u16 cmd;
-
- if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN)))
- cmd = TIPC_CMD_NOT_NET_ADMIN;
- else
- cmd = req_userhdr->cmd;
-
- rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd,
- nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN,
- nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN),
- hdr_space);
-
- if (rep_buf) {
- skb_push(rep_buf, hdr_space);
- rep_nlh = nlmsg_hdr(rep_buf);
- memcpy(rep_nlh, req_nlh, hdr_space);
- rep_nlh->nlmsg_len = rep_buf->len;
- genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid);
- }
+static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = {
+ [TIPC_NLA_UNSPEC] = { .type = NLA_UNSPEC, },
+ [TIPC_NLA_BEARER] = { .type = NLA_NESTED, },
+ [TIPC_NLA_SOCK] = { .type = NLA_NESTED, },
+ [TIPC_NLA_PUBL] = { .type = NLA_NESTED, },
+ [TIPC_NLA_LINK] = { .type = NLA_NESTED, },
+ [TIPC_NLA_MEDIA] = { .type = NLA_NESTED, },
+ [TIPC_NLA_NODE] = { .type = NLA_NESTED, },
+ [TIPC_NLA_NET] = { .type = NLA_NESTED, },
+ [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, },
+ [TIPC_NLA_MON] = { .type = NLA_NESTED, },
+};
- return 0;
-}
+const struct nla_policy
+tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = {
+ [TIPC_NLA_NAME_TABLE_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED }
+};
+
+const struct nla_policy tipc_nl_monitor_policy[TIPC_NLA_MON_MAX + 1] = {
+ [TIPC_NLA_MON_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_MON_REF] = { .type = NLA_U32 },
+ [TIPC_NLA_MON_ACTIVATION_THRESHOLD] = { .type = NLA_U32 },
+};
+
+const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
+ [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 },
+ [TIPC_NLA_SOCK_REF] = { .type = NLA_U32 },
+ [TIPC_NLA_SOCK_CON] = { .type = NLA_NESTED },
+ [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG }
+};
+
+const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
+ [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_NET_ID] = { .type = NLA_U32 },
+ [TIPC_NLA_NET_ADDR] = { .type = NLA_U32 },
+ [TIPC_NLA_NET_NODEID] = { .type = NLA_U64 },
+ [TIPC_NLA_NET_NODEID_W1] = { .type = NLA_U64 },
+ [TIPC_NLA_NET_ADDR_LEGACY] = { .type = NLA_FLAG }
+};
+
+const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
+ [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_LINK_NAME] = { .type = NLA_NUL_STRING,
+ .len = TIPC_MAX_LINK_NAME },
+ [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 },
+ [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG },
+ [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG },
+ [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG },
+ [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED },
+ [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED },
+ [TIPC_NLA_LINK_RX] = { .type = NLA_U32 },
+ [TIPC_NLA_LINK_TX] = { .type = NLA_U32 }
+};
+
+const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
+ [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 },
+ [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG },
+ [TIPC_NLA_NODE_ID] = { .type = NLA_BINARY,
+ .len = TIPC_NODEID_LEN},
+ [TIPC_NLA_NODE_KEY] = { .type = NLA_BINARY,
+ .len = TIPC_AEAD_KEY_SIZE_MAX},
+ [TIPC_NLA_NODE_KEY_MASTER] = { .type = NLA_FLAG },
+ [TIPC_NLA_NODE_REKEYING] = { .type = NLA_U32 },
+};
-static struct genl_family tipc_genl_family = {
- .id = GENL_ID_GENERATE,
- .name = TIPC_GENL_NAME,
- .version = TIPC_GENL_VERSION,
- .hdrsize = TIPC_GENL_HDRLEN,
- .maxattr = 0,
+/* Properties valid for media, bearer and link */
+const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
+ [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 },
+ [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 },
+ [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 },
+ [TIPC_NLA_PROP_MTU] = { .type = NLA_U32 },
+ [TIPC_NLA_PROP_BROADCAST] = { .type = NLA_U32 },
+ [TIPC_NLA_PROP_BROADCAST_RATIO] = { .type = NLA_U32 }
};
-static struct genl_ops tipc_genl_ops = {
- .cmd = TIPC_GENL_CMD,
- .doit = handle_cmd,
+const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = {
+ [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_BEARER_NAME] = { .type = NLA_NUL_STRING,
+ .len = TIPC_MAX_BEARER_NAME },
+ [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED },
+ [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 }
};
-static int tipc_genl_family_registered;
+const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
+ [TIPC_NLA_MEDIA_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_MEDIA_NAME] = { .type = NLA_STRING },
+ [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED }
+};
-int tipc_netlink_start(void)
+const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
+ [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
+ [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
+ .len = sizeof(struct sockaddr_storage)},
+ [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY,
+ .len = sizeof(struct sockaddr_storage)},
+};
+
+/* Users of the legacy API (tipc-config) can't handle that we add operations,
+ * so we have a separate genl handling for the new API.
+ */
+static const struct genl_ops tipc_genl_v2_ops[] = {
+ {
+ .cmd = TIPC_NL_BEARER_DISABLE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_bearer_disable,
+ },
+ {
+ .cmd = TIPC_NL_BEARER_ENABLE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_bearer_enable,
+ },
+ {
+ .cmd = TIPC_NL_BEARER_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_bearer_get,
+ .dumpit = tipc_nl_bearer_dump,
+ },
+ {
+ .cmd = TIPC_NL_BEARER_ADD,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_bearer_add,
+ },
+ {
+ .cmd = TIPC_NL_BEARER_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_bearer_set,
+ },
+ {
+ .cmd = TIPC_NL_SOCK_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .start = tipc_dump_start,
+ .dumpit = tipc_nl_sk_dump,
+ .done = tipc_dump_done,
+ },
+ {
+ .cmd = TIPC_NL_PUBL_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
+ .dumpit = tipc_nl_publ_dump,
+ },
+ {
+ .cmd = TIPC_NL_LINK_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = tipc_nl_node_get_link,
+ .dumpit = tipc_nl_node_dump_link,
+ },
+ {
+ .cmd = TIPC_NL_LINK_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_node_set_link,
+ },
+ {
+ .cmd = TIPC_NL_LINK_RESET_STATS,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_node_reset_link_stats,
+ },
+ {
+ .cmd = TIPC_NL_MEDIA_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_media_get,
+ .dumpit = tipc_nl_media_dump,
+ },
+ {
+ .cmd = TIPC_NL_MEDIA_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_media_set,
+ },
+ {
+ .cmd = TIPC_NL_NODE_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .dumpit = tipc_nl_node_dump,
+ },
+ {
+ .cmd = TIPC_NL_NET_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .dumpit = tipc_nl_net_dump,
+ },
+ {
+ .cmd = TIPC_NL_NET_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_net_set,
+ },
+ {
+ .cmd = TIPC_NL_NAME_TABLE_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .dumpit = tipc_nl_name_table_dump,
+ },
+ {
+ .cmd = TIPC_NL_MON_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_node_set_monitor,
+ },
+ {
+ .cmd = TIPC_NL_MON_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_node_get_monitor,
+ .dumpit = tipc_nl_node_dump_monitor,
+ },
+ {
+ .cmd = TIPC_NL_MON_PEER_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
+ .dumpit = tipc_nl_node_dump_monitor_peer,
+ },
+ {
+ .cmd = TIPC_NL_PEER_REMOVE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_peer_rm,
+ },
+#ifdef CONFIG_TIPC_MEDIA_UDP
+ {
+ .cmd = TIPC_NL_UDP_GET_REMOTEIP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
+ .dumpit = tipc_udp_nl_dump_remoteip,
+ },
+#endif
+#ifdef CONFIG_TIPC_CRYPTO
+ {
+ .cmd = TIPC_NL_KEY_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_node_set_key,
+ },
+ {
+ .cmd = TIPC_NL_KEY_FLUSH,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_node_flush_key,
+ },
+#endif
+ {
+ .cmd = TIPC_NL_ADDR_LEGACY_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_net_addr_legacy_get,
+ },
+};
+
+struct genl_family tipc_genl_family __ro_after_init = {
+ .name = TIPC_GENL_V2_NAME,
+ .version = TIPC_GENL_V2_VERSION,
+ .hdrsize = 0,
+ .maxattr = TIPC_NLA_MAX,
+ .policy = tipc_nl_policy,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = tipc_genl_v2_ops,
+ .n_ops = ARRAY_SIZE(tipc_genl_v2_ops),
+ .resv_start_op = TIPC_NL_ADDR_LEGACY_GET + 1,
+};
+
+int __init tipc_netlink_start(void)
{
int res;
- res = genl_register_family_with_ops(&tipc_genl_family,
- &tipc_genl_ops, 1);
+ res = genl_register_family(&tipc_genl_family);
if (res) {
pr_err("Failed to register netlink interface\n");
return res;
}
-
- tipc_genl_family_registered = 1;
return 0;
}
void tipc_netlink_stop(void)
{
- if (!tipc_genl_family_registered)
- return;
-
genl_unregister_family(&tipc_genl_family);
- tipc_genl_family_registered = 0;
}
diff --git a/net/tipc/node_subscr.h b/net/tipc/netlink.h
index c95d20727ded..7cf777723e3e 100644
--- a/net/tipc/node_subscr.h
+++ b/net/tipc/netlink.h
@@ -1,8 +1,7 @@
/*
- * net/tipc/node_subscr.h: Include file for TIPC "node down" subscription handling
+ * net/tipc/netlink.h: Include file for TIPC netlink code
*
- * Copyright (c) 1995-2006, Ericsson AB
- * Copyright (c) 2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2014, Ericsson AB
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,30 +33,32 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef _TIPC_NODE_SUBSCR_H
-#define _TIPC_NODE_SUBSCR_H
+#ifndef _TIPC_NETLINK_H
+#define _TIPC_NETLINK_H
+#include <net/netlink.h>
-#include "addr.h"
+extern struct genl_family tipc_genl_family;
-typedef void (*net_ev_handler) (void *usr_handle);
-
-/**
- * struct tipc_node_subscr - "node down" subscription entry
- * @node: ptr to node structure of interest (or NULL, if none)
- * @handle_node_down: routine to invoke when node fails
- * @usr_handle: argument to pass to routine when node fails
- * @nodesub_list: adjacent entries in list of subscriptions for the node
- */
-struct tipc_node_subscr {
- struct tipc_node *node;
- net_ev_handler handle_node_down;
- void *usr_handle;
- struct list_head nodesub_list;
+struct tipc_nl_msg {
+ struct sk_buff *skb;
+ u32 portid;
+ u32 seq;
};
-void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr,
- void *usr_handle, net_ev_handler handle_down);
-void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub);
-void tipc_nodesub_notify(struct tipc_node *node);
+extern const struct nla_policy tipc_nl_name_table_policy[];
+extern const struct nla_policy tipc_nl_sock_policy[];
+extern const struct nla_policy tipc_nl_net_policy[];
+extern const struct nla_policy tipc_nl_link_policy[];
+extern const struct nla_policy tipc_nl_node_policy[];
+extern const struct nla_policy tipc_nl_prop_policy[];
+extern const struct nla_policy tipc_nl_bearer_policy[];
+extern const struct nla_policy tipc_nl_media_policy[];
+extern const struct nla_policy tipc_nl_udp_policy[];
+extern const struct nla_policy tipc_nl_monitor_policy[];
+
+int tipc_netlink_start(void);
+int tipc_netlink_compat_start(void);
+void tipc_netlink_stop(void);
+void tipc_netlink_compat_stop(void);
#endif
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
new file mode 100644
index 000000000000..079aebb16ed8
--- /dev/null
+++ b/net/tipc/netlink_compat.c
@@ -0,0 +1,1376 @@
+/*
+ * Copyright (c) 2014, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "bearer.h"
+#include "link.h"
+#include "name_table.h"
+#include "socket.h"
+#include "node.h"
+#include "net.h"
+#include <net/genetlink.h>
+#include <linux/string_helpers.h>
+#include <linux/tipc_config.h>
+
+/* The legacy API had an artificial message length limit called
+ * ULTRA_STRING_MAX_LEN.
+ */
+#define ULTRA_STRING_MAX_LEN 32768
+
+#define TIPC_SKB_MAX TLV_SPACE(ULTRA_STRING_MAX_LEN)
+
+#define REPLY_TRUNCATED "<truncated>\n"
+
+struct tipc_nl_compat_msg {
+ u16 cmd;
+ int rep_type;
+ int rep_size;
+ int req_type;
+ int req_size;
+ struct net *net;
+ struct sk_buff *rep;
+ struct tlv_desc *req;
+ struct sock *dst_sk;
+};
+
+struct tipc_nl_compat_cmd_dump {
+ int (*header)(struct tipc_nl_compat_msg *);
+ int (*dumpit)(struct sk_buff *, struct netlink_callback *);
+ int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs);
+};
+
+struct tipc_nl_compat_cmd_doit {
+ int (*doit)(struct sk_buff *skb, struct genl_info *info);
+ int (*transcode)(struct tipc_nl_compat_cmd_doit *cmd,
+ struct sk_buff *skb, struct tipc_nl_compat_msg *msg);
+};
+
+static int tipc_skb_tailroom(struct sk_buff *skb)
+{
+ int tailroom;
+ int limit;
+
+ tailroom = skb_tailroom(skb);
+ limit = TIPC_SKB_MAX - skb->len;
+
+ if (tailroom < limit)
+ return tailroom;
+
+ return limit;
+}
+
+static inline int TLV_GET_DATA_LEN(struct tlv_desc *tlv)
+{
+ return TLV_GET_LEN(tlv) - TLV_SPACE(0);
+}
+
+static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
+{
+ struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb);
+
+ if (tipc_skb_tailroom(skb) < TLV_SPACE(len))
+ return -EMSGSIZE;
+
+ skb_put(skb, TLV_SPACE(len));
+ memset(tlv, 0, TLV_SPACE(len));
+ tlv->tlv_type = htons(type);
+ tlv->tlv_len = htons(TLV_LENGTH(len));
+ if (len && data)
+ memcpy(TLV_DATA(tlv), data, len);
+
+ return 0;
+}
+
+static void tipc_tlv_init(struct sk_buff *skb, u16 type)
+{
+ struct tlv_desc *tlv = (struct tlv_desc *)skb->data;
+
+ TLV_SET_LEN(tlv, 0);
+ TLV_SET_TYPE(tlv, type);
+ skb_put(skb, sizeof(struct tlv_desc));
+}
+
+static __printf(2, 3) int tipc_tlv_sprintf(struct sk_buff *skb,
+ const char *fmt, ...)
+{
+ int n;
+ u16 len;
+ u32 rem;
+ char *buf;
+ struct tlv_desc *tlv;
+ va_list args;
+
+ rem = tipc_skb_tailroom(skb);
+
+ tlv = (struct tlv_desc *)skb->data;
+ len = TLV_GET_LEN(tlv);
+ buf = TLV_DATA(tlv) + len;
+
+ va_start(args, fmt);
+ n = vscnprintf(buf, rem, fmt, args);
+ va_end(args);
+
+ TLV_SET_LEN(tlv, n + len);
+ skb_put(skb, n);
+
+ return n;
+}
+
+static struct sk_buff *tipc_tlv_alloc(int size)
+{
+ int hdr_len;
+ struct sk_buff *buf;
+
+ size = TLV_SPACE(size);
+ hdr_len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+
+ buf = alloc_skb(hdr_len + size, GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ skb_reserve(buf, hdr_len);
+
+ return buf;
+}
+
+static struct sk_buff *tipc_get_err_tlv(char *str)
+{
+ int str_len = strlen(str) + 1;
+ struct sk_buff *buf;
+
+ buf = tipc_tlv_alloc(str_len);
+ if (buf)
+ tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len);
+
+ return buf;
+}
+
+static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
+ struct tipc_nl_compat_msg *msg,
+ struct sk_buff *arg)
+{
+ struct genl_dumpit_info info;
+ int len = 0;
+ int err;
+ struct sk_buff *buf;
+ struct nlmsghdr *nlmsg;
+ struct netlink_callback cb;
+ struct nlattr **attrbuf;
+
+ memset(&cb, 0, sizeof(cb));
+ cb.nlh = (struct nlmsghdr *)arg->data;
+ cb.skb = arg;
+ cb.data = &info;
+
+ buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->sk = msg->dst_sk;
+ if (__tipc_dump_start(&cb, msg->net)) {
+ kfree_skb(buf);
+ return -ENOMEM;
+ }
+
+ attrbuf = kcalloc(tipc_genl_family.maxattr + 1,
+ sizeof(struct nlattr *), GFP_KERNEL);
+ if (!attrbuf) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ info.info.attrs = attrbuf;
+
+ if (nlmsg_len(cb.nlh) > 0) {
+ err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf,
+ tipc_genl_family.maxattr,
+ tipc_genl_family.policy, NULL);
+ if (err)
+ goto err_out;
+ }
+ do {
+ int rem;
+
+ len = (*cmd->dumpit)(buf, &cb);
+
+ nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) {
+ err = nlmsg_parse_deprecated(nlmsg, GENL_HDRLEN,
+ attrbuf,
+ tipc_genl_family.maxattr,
+ tipc_genl_family.policy,
+ NULL);
+ if (err)
+ goto err_out;
+
+ err = (*cmd->format)(msg, attrbuf);
+ if (err)
+ goto err_out;
+
+ if (tipc_skb_tailroom(msg->rep) <= 1) {
+ err = -EMSGSIZE;
+ goto err_out;
+ }
+ }
+
+ skb_reset_tail_pointer(buf);
+ buf->len = 0;
+
+ } while (len);
+
+ err = 0;
+
+err_out:
+ kfree(attrbuf);
+ tipc_dump_done(&cb);
+ kfree_skb(buf);
+
+ if (err == -EMSGSIZE) {
+ /* The legacy API only considered messages filling
+ * "ULTRA_STRING_MAX_LEN" to be truncated.
+ */
+ if ((TIPC_SKB_MAX - msg->rep->len) <= 1) {
+ char *tail = skb_tail_pointer(msg->rep);
+
+ if (*tail != '\0')
+ sprintf(tail - sizeof(REPLY_TRUNCATED) - 1,
+ REPLY_TRUNCATED);
+ }
+
+ return 0;
+ }
+
+ return err;
+}
+
+static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
+ struct tipc_nl_compat_msg *msg)
+{
+ struct nlmsghdr *nlh;
+ struct sk_buff *arg;
+ int err;
+
+ if (msg->req_type && (!msg->req_size ||
+ !TLV_CHECK_TYPE(msg->req, msg->req_type)))
+ return -EINVAL;
+
+ msg->rep = tipc_tlv_alloc(msg->rep_size);
+ if (!msg->rep)
+ return -ENOMEM;
+
+ if (msg->rep_type)
+ tipc_tlv_init(msg->rep, msg->rep_type);
+
+ if (cmd->header) {
+ err = (*cmd->header)(msg);
+ if (err) {
+ kfree_skb(msg->rep);
+ msg->rep = NULL;
+ return err;
+ }
+ }
+
+ arg = nlmsg_new(0, GFP_KERNEL);
+ if (!arg) {
+ kfree_skb(msg->rep);
+ msg->rep = NULL;
+ return -ENOMEM;
+ }
+
+ nlh = nlmsg_put(arg, 0, 0, tipc_genl_family.id, 0, NLM_F_MULTI);
+ if (!nlh) {
+ kfree_skb(arg);
+ kfree_skb(msg->rep);
+ msg->rep = NULL;
+ return -EMSGSIZE;
+ }
+ nlmsg_end(arg, nlh);
+
+ err = __tipc_nl_compat_dumpit(cmd, msg, arg);
+ if (err) {
+ kfree_skb(msg->rep);
+ msg->rep = NULL;
+ }
+ kfree_skb(arg);
+
+ return err;
+}
+
+static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
+ struct tipc_nl_compat_msg *msg)
+{
+ int err;
+ struct sk_buff *doit_buf;
+ struct sk_buff *trans_buf;
+ struct nlattr **attrbuf;
+ struct genl_info info;
+
+ trans_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!trans_buf)
+ return -ENOMEM;
+
+ attrbuf = kmalloc_array(tipc_genl_family.maxattr + 1,
+ sizeof(struct nlattr *),
+ GFP_KERNEL);
+ if (!attrbuf) {
+ err = -ENOMEM;
+ goto trans_out;
+ }
+
+ doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!doit_buf) {
+ err = -ENOMEM;
+ goto attrbuf_out;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.attrs = attrbuf;
+
+ rtnl_lock();
+ err = (*cmd->transcode)(cmd, trans_buf, msg);
+ if (err)
+ goto doit_out;
+
+ err = nla_parse_deprecated(attrbuf, tipc_genl_family.maxattr,
+ (const struct nlattr *)trans_buf->data,
+ trans_buf->len, NULL, NULL);
+ if (err)
+ goto doit_out;
+
+ doit_buf->sk = msg->dst_sk;
+
+ err = (*cmd->doit)(doit_buf, &info);
+doit_out:
+ rtnl_unlock();
+
+ kfree_skb(doit_buf);
+attrbuf_out:
+ kfree(attrbuf);
+trans_out:
+ kfree_skb(trans_buf);
+
+ return err;
+}
+
+static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
+ struct tipc_nl_compat_msg *msg)
+{
+ int err;
+
+ if (msg->req_type && (!msg->req_size ||
+ !TLV_CHECK_TYPE(msg->req, msg->req_type)))
+ return -EINVAL;
+
+ err = __tipc_nl_compat_doit(cmd, msg);
+ if (err)
+ return err;
+
+ /* The legacy API considered an empty message a success message */
+ msg->rep = tipc_tlv_alloc(0);
+ if (!msg->rep)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
+ struct nlattr **attrs)
+{
+ struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1];
+ int err;
+
+ if (!attrs[TIPC_NLA_BEARER])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(bearer, TIPC_NLA_BEARER_MAX,
+ attrs[TIPC_NLA_BEARER], NULL, NULL);
+ if (err)
+ return err;
+
+ return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME,
+ nla_data(bearer[TIPC_NLA_BEARER_NAME]),
+ nla_len(bearer[TIPC_NLA_BEARER_NAME]));
+}
+
+static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd,
+ struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg)
+{
+ struct nlattr *prop;
+ struct nlattr *bearer;
+ struct tipc_bearer_config *b;
+ int len;
+
+ b = (struct tipc_bearer_config *)TLV_DATA(msg->req);
+
+ bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER);
+ if (!bearer)
+ return -EMSGSIZE;
+
+ len = TLV_GET_DATA_LEN(msg->req);
+ len -= offsetof(struct tipc_bearer_config, name);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
+ if (!string_is_terminated(b->name, len))
+ return -EINVAL;
+
+ if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name))
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, TIPC_NLA_BEARER_DOMAIN, ntohl(b->disc_domain)))
+ return -EMSGSIZE;
+
+ if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) {
+ prop = nla_nest_start_noflag(skb, TIPC_NLA_BEARER_PROP);
+ if (!prop)
+ return -EMSGSIZE;
+ if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority)))
+ return -EMSGSIZE;
+ nla_nest_end(skb, prop);
+ }
+ nla_nest_end(skb, bearer);
+
+ return 0;
+}
+
+static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd,
+ struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg)
+{
+ char *name;
+ struct nlattr *bearer;
+ int len;
+
+ name = (char *)TLV_DATA(msg->req);
+
+ bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER);
+ if (!bearer)
+ return -EMSGSIZE;
+
+ len = TLV_GET_DATA_LEN(msg->req);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
+ if (!string_is_terminated(name, len))
+ return -EINVAL;
+
+ if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name))
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, bearer);
+
+ return 0;
+}
+
+static inline u32 perc(u32 count, u32 total)
+{
+ return (count * 100 + (total / 2)) / total;
+}
+
+static void __fill_bc_link_stat(struct tipc_nl_compat_msg *msg,
+ struct nlattr *prop[], struct nlattr *stats[])
+{
+ tipc_tlv_sprintf(msg->rep, " Window:%u packets\n",
+ nla_get_u32(prop[TIPC_NLA_PROP_WIN]));
+
+ tipc_tlv_sprintf(msg->rep,
+ " RX packets:%u fragments:%u/%u bundles:%u/%u\n",
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED]));
+
+ tipc_tlv_sprintf(msg->rep,
+ " TX packets:%u fragments:%u/%u bundles:%u/%u\n",
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]),
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]),
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]),
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]),
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED]));
+
+ tipc_tlv_sprintf(msg->rep, " RX naks:%u defs:%u dups:%u\n",
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]),
+ nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES]));
+
+ tipc_tlv_sprintf(msg->rep, " TX naks:%u acks:%u dups:%u\n",
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]),
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED]));
+
+ tipc_tlv_sprintf(msg->rep,
+ " Congestion link:%u Send queue max:%u avg:%u",
+ nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]),
+ nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]),
+ nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE]));
+}
+
+static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
+ struct nlattr **attrs)
+{
+ char *name;
+ struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
+ struct nlattr *prop[TIPC_NLA_PROP_MAX + 1];
+ struct nlattr *stats[TIPC_NLA_STATS_MAX + 1];
+ int err;
+ int len;
+
+ if (!attrs[TIPC_NLA_LINK])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX,
+ attrs[TIPC_NLA_LINK], NULL, NULL);
+ if (err)
+ return err;
+
+ if (!link[TIPC_NLA_LINK_PROP])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(prop, TIPC_NLA_PROP_MAX,
+ link[TIPC_NLA_LINK_PROP], NULL,
+ NULL);
+ if (err)
+ return err;
+
+ if (!link[TIPC_NLA_LINK_STATS])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(stats, TIPC_NLA_STATS_MAX,
+ link[TIPC_NLA_LINK_STATS], NULL,
+ NULL);
+ if (err)
+ return err;
+
+ name = (char *)TLV_DATA(msg->req);
+
+ len = TLV_GET_DATA_LEN(msg->req);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_LINK_NAME);
+ if (!string_is_terminated(name, len))
+ return -EINVAL;
+
+ if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0)
+ return 0;
+
+ tipc_tlv_sprintf(msg->rep, "\nLink <%s>\n",
+ (char *)nla_data(link[TIPC_NLA_LINK_NAME]));
+
+ if (link[TIPC_NLA_LINK_BROADCAST]) {
+ __fill_bc_link_stat(msg, prop, stats);
+ return 0;
+ }
+
+ if (link[TIPC_NLA_LINK_ACTIVE])
+ tipc_tlv_sprintf(msg->rep, " ACTIVE");
+ else if (link[TIPC_NLA_LINK_UP])
+ tipc_tlv_sprintf(msg->rep, " STANDBY");
+ else
+ tipc_tlv_sprintf(msg->rep, " DEFUNCT");
+
+ tipc_tlv_sprintf(msg->rep, " MTU:%u Priority:%u",
+ nla_get_u32(link[TIPC_NLA_LINK_MTU]),
+ nla_get_u32(prop[TIPC_NLA_PROP_PRIO]));
+
+ tipc_tlv_sprintf(msg->rep, " Tolerance:%u ms Window:%u packets\n",
+ nla_get_u32(prop[TIPC_NLA_PROP_TOL]),
+ nla_get_u32(prop[TIPC_NLA_PROP_WIN]));
+
+ tipc_tlv_sprintf(msg->rep,
+ " RX packets:%u fragments:%u/%u bundles:%u/%u\n",
+ nla_get_u32(link[TIPC_NLA_LINK_RX]) -
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED]));
+
+ tipc_tlv_sprintf(msg->rep,
+ " TX packets:%u fragments:%u/%u bundles:%u/%u\n",
+ nla_get_u32(link[TIPC_NLA_LINK_TX]) -
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]),
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]),
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]),
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]),
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED]));
+
+ tipc_tlv_sprintf(msg->rep,
+ " TX profile sample:%u packets average:%u octets\n",
+ nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_CNT]),
+ nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_TOT]) /
+ nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]));
+
+ tipc_tlv_sprintf(msg->rep,
+ " 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% ",
+ perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P0]),
+ nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+ perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P1]),
+ nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+ perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P2]),
+ nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+ perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P3]),
+ nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])));
+
+ tipc_tlv_sprintf(msg->rep, "-16384:%u%% -32768:%u%% -66000:%u%%\n",
+ perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P4]),
+ nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+ perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P5]),
+ nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+ perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P6]),
+ nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])));
+
+ tipc_tlv_sprintf(msg->rep,
+ " RX states:%u probes:%u naks:%u defs:%u dups:%u\n",
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_STATES]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_PROBES]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]),
+ nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES]));
+
+ tipc_tlv_sprintf(msg->rep,
+ " TX states:%u probes:%u naks:%u acks:%u dups:%u\n",
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_STATES]),
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_PROBES]),
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]),
+ nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]),
+ nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED]));
+
+ tipc_tlv_sprintf(msg->rep,
+ " Congestion link:%u Send queue max:%u avg:%u",
+ nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]),
+ nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]),
+ nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE]));
+
+ return 0;
+}
+
+static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
+ struct nlattr **attrs)
+{
+ struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
+ struct tipc_link_info link_info;
+ int err;
+
+ if (!attrs[TIPC_NLA_LINK])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX,
+ attrs[TIPC_NLA_LINK], NULL, NULL);
+ if (err)
+ return err;
+
+ link_info.dest = htonl(nla_get_flag(link[TIPC_NLA_LINK_DEST]));
+ link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
+ nla_strscpy(link_info.str, link[TIPC_NLA_LINK_NAME],
+ TIPC_MAX_LINK_NAME);
+
+ return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO,
+ &link_info, sizeof(link_info));
+}
+
+static int __tipc_add_link_prop(struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg,
+ struct tipc_link_config *lc)
+{
+ switch (msg->cmd) {
+ case TIPC_CMD_SET_LINK_PRI:
+ return nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value));
+ case TIPC_CMD_SET_LINK_TOL:
+ return nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value));
+ case TIPC_CMD_SET_LINK_WINDOW:
+ return nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value));
+ }
+
+ return -EINVAL;
+}
+
+static int tipc_nl_compat_media_set(struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg)
+{
+ struct nlattr *prop;
+ struct nlattr *media;
+ struct tipc_link_config *lc;
+
+ lc = (struct tipc_link_config *)TLV_DATA(msg->req);
+
+ media = nla_nest_start_noflag(skb, TIPC_NLA_MEDIA);
+ if (!media)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name))
+ return -EMSGSIZE;
+
+ prop = nla_nest_start_noflag(skb, TIPC_NLA_MEDIA_PROP);
+ if (!prop)
+ return -EMSGSIZE;
+
+ __tipc_add_link_prop(skb, msg, lc);
+ nla_nest_end(skb, prop);
+ nla_nest_end(skb, media);
+
+ return 0;
+}
+
+static int tipc_nl_compat_bearer_set(struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg)
+{
+ struct nlattr *prop;
+ struct nlattr *bearer;
+ struct tipc_link_config *lc;
+
+ lc = (struct tipc_link_config *)TLV_DATA(msg->req);
+
+ bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER);
+ if (!bearer)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name))
+ return -EMSGSIZE;
+
+ prop = nla_nest_start_noflag(skb, TIPC_NLA_BEARER_PROP);
+ if (!prop)
+ return -EMSGSIZE;
+
+ __tipc_add_link_prop(skb, msg, lc);
+ nla_nest_end(skb, prop);
+ nla_nest_end(skb, bearer);
+
+ return 0;
+}
+
+static int __tipc_nl_compat_link_set(struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg)
+{
+ struct nlattr *prop;
+ struct nlattr *link;
+ struct tipc_link_config *lc;
+
+ lc = (struct tipc_link_config *)TLV_DATA(msg->req);
+
+ link = nla_nest_start_noflag(skb, TIPC_NLA_LINK);
+ if (!link)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name))
+ return -EMSGSIZE;
+
+ prop = nla_nest_start_noflag(skb, TIPC_NLA_LINK_PROP);
+ if (!prop)
+ return -EMSGSIZE;
+
+ __tipc_add_link_prop(skb, msg, lc);
+ nla_nest_end(skb, prop);
+ nla_nest_end(skb, link);
+
+ return 0;
+}
+
+static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd,
+ struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg)
+{
+ struct tipc_link_config *lc;
+ struct tipc_bearer *bearer;
+ struct tipc_media *media;
+ int len;
+
+ lc = (struct tipc_link_config *)TLV_DATA(msg->req);
+
+ len = TLV_GET_DATA_LEN(msg->req);
+ len -= offsetof(struct tipc_link_config, name);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_LINK_NAME);
+ if (!string_is_terminated(lc->name, len))
+ return -EINVAL;
+
+ media = tipc_media_find(lc->name);
+ if (media) {
+ cmd->doit = &__tipc_nl_media_set;
+ return tipc_nl_compat_media_set(skb, msg);
+ }
+
+ bearer = tipc_bearer_find(msg->net, lc->name);
+ if (bearer) {
+ cmd->doit = &__tipc_nl_bearer_set;
+ return tipc_nl_compat_bearer_set(skb, msg);
+ }
+
+ return __tipc_nl_compat_link_set(skb, msg);
+}
+
+static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd,
+ struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg)
+{
+ char *name;
+ struct nlattr *link;
+ int len;
+
+ name = (char *)TLV_DATA(msg->req);
+
+ link = nla_nest_start_noflag(skb, TIPC_NLA_LINK);
+ if (!link)
+ return -EMSGSIZE;
+
+ len = TLV_GET_DATA_LEN(msg->req);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_LINK_NAME);
+ if (!string_is_terminated(name, len))
+ return -EINVAL;
+
+ if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name))
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, link);
+
+ return 0;
+}
+
+static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg)
+{
+ int i;
+ u32 depth;
+ struct tipc_name_table_query *ntq;
+ static const char * const header[] = {
+ "Type ",
+ "Lower Upper ",
+ "Port Identity ",
+ "Publication Scope"
+ };
+
+ ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req);
+ if (TLV_GET_DATA_LEN(msg->req) < (int)sizeof(struct tipc_name_table_query))
+ return -EINVAL;
+
+ depth = ntohl(ntq->depth);
+
+ if (depth > 4)
+ depth = 4;
+ for (i = 0; i < depth; i++)
+ tipc_tlv_sprintf(msg->rep, header[i]);
+ tipc_tlv_sprintf(msg->rep, "\n");
+
+ return 0;
+}
+
+static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg,
+ struct nlattr **attrs)
+{
+ char port_str[27];
+ struct tipc_name_table_query *ntq;
+ struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1];
+ struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1];
+ u32 node, depth, type, lowbound, upbound;
+ static const char * const scope_str[] = {"", " zone", " cluster",
+ " node"};
+ int err;
+
+ if (!attrs[TIPC_NLA_NAME_TABLE])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(nt, TIPC_NLA_NAME_TABLE_MAX,
+ attrs[TIPC_NLA_NAME_TABLE], NULL,
+ NULL);
+ if (err)
+ return err;
+
+ if (!nt[TIPC_NLA_NAME_TABLE_PUBL])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(publ, TIPC_NLA_PUBL_MAX,
+ nt[TIPC_NLA_NAME_TABLE_PUBL], NULL,
+ NULL);
+ if (err)
+ return err;
+
+ ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req);
+
+ depth = ntohl(ntq->depth);
+ type = ntohl(ntq->type);
+ lowbound = ntohl(ntq->lowbound);
+ upbound = ntohl(ntq->upbound);
+
+ if (!(depth & TIPC_NTQ_ALLTYPES) &&
+ (type != nla_get_u32(publ[TIPC_NLA_PUBL_TYPE])))
+ return 0;
+ if (lowbound && (lowbound > nla_get_u32(publ[TIPC_NLA_PUBL_UPPER])))
+ return 0;
+ if (upbound && (upbound < nla_get_u32(publ[TIPC_NLA_PUBL_LOWER])))
+ return 0;
+
+ tipc_tlv_sprintf(msg->rep, "%-10u ",
+ nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]));
+
+ if (depth == 1)
+ goto out;
+
+ tipc_tlv_sprintf(msg->rep, "%-10u %-10u ",
+ nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]),
+ nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]));
+
+ if (depth == 2)
+ goto out;
+
+ node = nla_get_u32(publ[TIPC_NLA_PUBL_NODE]);
+ sprintf(port_str, "<%u.%u.%u:%u>", tipc_zone(node), tipc_cluster(node),
+ tipc_node(node), nla_get_u32(publ[TIPC_NLA_PUBL_REF]));
+ tipc_tlv_sprintf(msg->rep, "%-26s ", port_str);
+
+ if (depth == 3)
+ goto out;
+
+ tipc_tlv_sprintf(msg->rep, "%-10u %s",
+ nla_get_u32(publ[TIPC_NLA_PUBL_KEY]),
+ scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]);
+out:
+ tipc_tlv_sprintf(msg->rep, "\n");
+
+ return 0;
+}
+
+static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg,
+ struct nlattr **attrs)
+{
+ u32 type, lower, upper;
+ struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1];
+ int err;
+
+ if (!attrs[TIPC_NLA_PUBL])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(publ, TIPC_NLA_PUBL_MAX,
+ attrs[TIPC_NLA_PUBL], NULL, NULL);
+ if (err)
+ return err;
+
+ type = nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]);
+ lower = nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]);
+ upper = nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]);
+
+ if (lower == upper)
+ tipc_tlv_sprintf(msg->rep, " {%u,%u}", type, lower);
+ else
+ tipc_tlv_sprintf(msg->rep, " {%u,%u,%u}", type, lower, upper);
+
+ return 0;
+}
+
+static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock)
+{
+ int err;
+ void *hdr;
+ struct nlattr *nest;
+ struct sk_buff *args;
+ struct tipc_nl_compat_cmd_dump dump;
+
+ args = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!args)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI,
+ TIPC_NL_PUBL_GET);
+ if (!hdr) {
+ kfree_skb(args);
+ return -EMSGSIZE;
+ }
+
+ nest = nla_nest_start_noflag(args, TIPC_NLA_SOCK);
+ if (!nest) {
+ kfree_skb(args);
+ return -EMSGSIZE;
+ }
+
+ if (nla_put_u32(args, TIPC_NLA_SOCK_REF, sock)) {
+ kfree_skb(args);
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(args, nest);
+ genlmsg_end(args, hdr);
+
+ dump.dumpit = tipc_nl_publ_dump;
+ dump.format = __tipc_nl_compat_publ_dump;
+
+ err = __tipc_nl_compat_dumpit(&dump, msg, args);
+
+ kfree_skb(args);
+
+ return err;
+}
+
+static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg,
+ struct nlattr **attrs)
+{
+ int err;
+ u32 sock_ref;
+ struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];
+
+ if (!attrs[TIPC_NLA_SOCK])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX,
+ attrs[TIPC_NLA_SOCK], NULL, NULL);
+ if (err)
+ return err;
+
+ sock_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
+ tipc_tlv_sprintf(msg->rep, "%u:", sock_ref);
+
+ if (sock[TIPC_NLA_SOCK_CON]) {
+ u32 node;
+ struct nlattr *con[TIPC_NLA_CON_MAX + 1];
+
+ err = nla_parse_nested_deprecated(con, TIPC_NLA_CON_MAX,
+ sock[TIPC_NLA_SOCK_CON],
+ NULL, NULL);
+
+ if (err)
+ return err;
+
+ node = nla_get_u32(con[TIPC_NLA_CON_NODE]);
+ tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>",
+ tipc_zone(node),
+ tipc_cluster(node),
+ tipc_node(node),
+ nla_get_u32(con[TIPC_NLA_CON_SOCK]));
+
+ if (con[TIPC_NLA_CON_FLAG])
+ tipc_tlv_sprintf(msg->rep, " via {%u,%u}\n",
+ nla_get_u32(con[TIPC_NLA_CON_TYPE]),
+ nla_get_u32(con[TIPC_NLA_CON_INST]));
+ else
+ tipc_tlv_sprintf(msg->rep, "\n");
+ } else if (sock[TIPC_NLA_SOCK_HAS_PUBL]) {
+ tipc_tlv_sprintf(msg->rep, " bound to");
+
+ err = tipc_nl_compat_publ_dump(msg, sock_ref);
+ if (err)
+ return err;
+ }
+ tipc_tlv_sprintf(msg->rep, "\n");
+
+ return 0;
+}
+
+static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg,
+ struct nlattr **attrs)
+{
+ struct nlattr *media[TIPC_NLA_MEDIA_MAX + 1];
+ int err;
+
+ if (!attrs[TIPC_NLA_MEDIA])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(media, TIPC_NLA_MEDIA_MAX,
+ attrs[TIPC_NLA_MEDIA], NULL, NULL);
+ if (err)
+ return err;
+
+ return tipc_add_tlv(msg->rep, TIPC_TLV_MEDIA_NAME,
+ nla_data(media[TIPC_NLA_MEDIA_NAME]),
+ nla_len(media[TIPC_NLA_MEDIA_NAME]));
+}
+
+static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg,
+ struct nlattr **attrs)
+{
+ struct tipc_node_info node_info;
+ struct nlattr *node[TIPC_NLA_NODE_MAX + 1];
+ int err;
+
+ if (!attrs[TIPC_NLA_NODE])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(node, TIPC_NLA_NODE_MAX,
+ attrs[TIPC_NLA_NODE], NULL, NULL);
+ if (err)
+ return err;
+
+ node_info.addr = htonl(nla_get_u32(node[TIPC_NLA_NODE_ADDR]));
+ node_info.up = htonl(nla_get_flag(node[TIPC_NLA_NODE_UP]));
+
+ return tipc_add_tlv(msg->rep, TIPC_TLV_NODE_INFO, &node_info,
+ sizeof(node_info));
+}
+
+static int tipc_nl_compat_net_set(struct tipc_nl_compat_cmd_doit *cmd,
+ struct sk_buff *skb,
+ struct tipc_nl_compat_msg *msg)
+{
+ u32 val;
+ struct nlattr *net;
+
+ val = ntohl(*(__be32 *)TLV_DATA(msg->req));
+
+ net = nla_nest_start_noflag(skb, TIPC_NLA_NET);
+ if (!net)
+ return -EMSGSIZE;
+
+ if (msg->cmd == TIPC_CMD_SET_NODE_ADDR) {
+ if (nla_put_u32(skb, TIPC_NLA_NET_ADDR, val))
+ return -EMSGSIZE;
+ } else if (msg->cmd == TIPC_CMD_SET_NETID) {
+ if (nla_put_u32(skb, TIPC_NLA_NET_ID, val))
+ return -EMSGSIZE;
+ }
+ nla_nest_end(skb, net);
+
+ return 0;
+}
+
+static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg,
+ struct nlattr **attrs)
+{
+ __be32 id;
+ struct nlattr *net[TIPC_NLA_NET_MAX + 1];
+ int err;
+
+ if (!attrs[TIPC_NLA_NET])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(net, TIPC_NLA_NET_MAX,
+ attrs[TIPC_NLA_NET], NULL, NULL);
+ if (err)
+ return err;
+
+ id = htonl(nla_get_u32(net[TIPC_NLA_NET_ID]));
+
+ return tipc_add_tlv(msg->rep, TIPC_TLV_UNSIGNED, &id, sizeof(id));
+}
+
+static int tipc_cmd_show_stats_compat(struct tipc_nl_compat_msg *msg)
+{
+ msg->rep = tipc_tlv_alloc(ULTRA_STRING_MAX_LEN);
+ if (!msg->rep)
+ return -ENOMEM;
+
+ tipc_tlv_init(msg->rep, TIPC_TLV_ULTRA_STRING);
+ tipc_tlv_sprintf(msg->rep, "TIPC version " TIPC_MOD_VER "\n");
+
+ return 0;
+}
+
+static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
+{
+ struct tipc_nl_compat_cmd_dump dump;
+ struct tipc_nl_compat_cmd_doit doit;
+
+ memset(&dump, 0, sizeof(dump));
+ memset(&doit, 0, sizeof(doit));
+
+ switch (msg->cmd) {
+ case TIPC_CMD_NOOP:
+ msg->rep = tipc_tlv_alloc(0);
+ if (!msg->rep)
+ return -ENOMEM;
+ return 0;
+ case TIPC_CMD_GET_BEARER_NAMES:
+ msg->rep_size = MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME);
+ dump.dumpit = tipc_nl_bearer_dump;
+ dump.format = tipc_nl_compat_bearer_dump;
+ return tipc_nl_compat_dumpit(&dump, msg);
+ case TIPC_CMD_ENABLE_BEARER:
+ msg->req_type = TIPC_TLV_BEARER_CONFIG;
+ doit.doit = __tipc_nl_bearer_enable;
+ doit.transcode = tipc_nl_compat_bearer_enable;
+ return tipc_nl_compat_doit(&doit, msg);
+ case TIPC_CMD_DISABLE_BEARER:
+ msg->req_type = TIPC_TLV_BEARER_NAME;
+ doit.doit = __tipc_nl_bearer_disable;
+ doit.transcode = tipc_nl_compat_bearer_disable;
+ return tipc_nl_compat_doit(&doit, msg);
+ case TIPC_CMD_SHOW_LINK_STATS:
+ msg->req_type = TIPC_TLV_LINK_NAME;
+ msg->rep_size = ULTRA_STRING_MAX_LEN;
+ msg->rep_type = TIPC_TLV_ULTRA_STRING;
+ dump.dumpit = tipc_nl_node_dump_link;
+ dump.format = tipc_nl_compat_link_stat_dump;
+ return tipc_nl_compat_dumpit(&dump, msg);
+ case TIPC_CMD_GET_LINKS:
+ msg->req_type = TIPC_TLV_NET_ADDR;
+ msg->rep_size = ULTRA_STRING_MAX_LEN;
+ dump.dumpit = tipc_nl_node_dump_link;
+ dump.format = tipc_nl_compat_link_dump;
+ return tipc_nl_compat_dumpit(&dump, msg);
+ case TIPC_CMD_SET_LINK_TOL:
+ case TIPC_CMD_SET_LINK_PRI:
+ case TIPC_CMD_SET_LINK_WINDOW:
+ msg->req_type = TIPC_TLV_LINK_CONFIG;
+ doit.doit = tipc_nl_node_set_link;
+ doit.transcode = tipc_nl_compat_link_set;
+ return tipc_nl_compat_doit(&doit, msg);
+ case TIPC_CMD_RESET_LINK_STATS:
+ msg->req_type = TIPC_TLV_LINK_NAME;
+ doit.doit = tipc_nl_node_reset_link_stats;
+ doit.transcode = tipc_nl_compat_link_reset_stats;
+ return tipc_nl_compat_doit(&doit, msg);
+ case TIPC_CMD_SHOW_NAME_TABLE:
+ msg->req_type = TIPC_TLV_NAME_TBL_QUERY;
+ msg->rep_size = ULTRA_STRING_MAX_LEN;
+ msg->rep_type = TIPC_TLV_ULTRA_STRING;
+ dump.header = tipc_nl_compat_name_table_dump_header;
+ dump.dumpit = tipc_nl_name_table_dump;
+ dump.format = tipc_nl_compat_name_table_dump;
+ return tipc_nl_compat_dumpit(&dump, msg);
+ case TIPC_CMD_SHOW_PORTS:
+ msg->rep_size = ULTRA_STRING_MAX_LEN;
+ msg->rep_type = TIPC_TLV_ULTRA_STRING;
+ dump.dumpit = tipc_nl_sk_dump;
+ dump.format = tipc_nl_compat_sk_dump;
+ return tipc_nl_compat_dumpit(&dump, msg);
+ case TIPC_CMD_GET_MEDIA_NAMES:
+ msg->rep_size = MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME);
+ dump.dumpit = tipc_nl_media_dump;
+ dump.format = tipc_nl_compat_media_dump;
+ return tipc_nl_compat_dumpit(&dump, msg);
+ case TIPC_CMD_GET_NODES:
+ msg->rep_size = ULTRA_STRING_MAX_LEN;
+ dump.dumpit = tipc_nl_node_dump;
+ dump.format = tipc_nl_compat_node_dump;
+ return tipc_nl_compat_dumpit(&dump, msg);
+ case TIPC_CMD_SET_NODE_ADDR:
+ msg->req_type = TIPC_TLV_NET_ADDR;
+ doit.doit = __tipc_nl_net_set;
+ doit.transcode = tipc_nl_compat_net_set;
+ return tipc_nl_compat_doit(&doit, msg);
+ case TIPC_CMD_SET_NETID:
+ msg->req_type = TIPC_TLV_UNSIGNED;
+ doit.doit = __tipc_nl_net_set;
+ doit.transcode = tipc_nl_compat_net_set;
+ return tipc_nl_compat_doit(&doit, msg);
+ case TIPC_CMD_GET_NETID:
+ msg->rep_size = sizeof(u32);
+ dump.dumpit = tipc_nl_net_dump;
+ dump.format = tipc_nl_compat_net_dump;
+ return tipc_nl_compat_dumpit(&dump, msg);
+ case TIPC_CMD_SHOW_STATS:
+ return tipc_cmd_show_stats_compat(msg);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+ int len;
+ struct tipc_nl_compat_msg msg;
+ struct nlmsghdr *req_nlh;
+ struct nlmsghdr *rep_nlh;
+ struct tipc_genlmsghdr *req_userhdr = genl_info_userhdr(info);
+
+ memset(&msg, 0, sizeof(msg));
+
+ req_nlh = (struct nlmsghdr *)skb->data;
+ msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN;
+ msg.cmd = req_userhdr->cmd;
+ msg.net = genl_info_net(info);
+ msg.dst_sk = skb->sk;
+
+ if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) {
+ msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN);
+ err = -EACCES;
+ goto send;
+ }
+
+ msg.req_size = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN);
+ if (msg.req_size && !TLV_OK(msg.req, msg.req_size)) {
+ msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
+ err = -EOPNOTSUPP;
+ goto send;
+ }
+
+ err = tipc_nl_compat_handle(&msg);
+ if ((err == -EOPNOTSUPP) || (err == -EPERM))
+ msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
+ else if (err == -EINVAL)
+ msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR);
+send:
+ if (!msg.rep)
+ return err;
+
+ len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+ skb_push(msg.rep, len);
+ rep_nlh = nlmsg_hdr(msg.rep);
+ memcpy(rep_nlh, info->nlhdr, len);
+ rep_nlh->nlmsg_len = msg.rep->len;
+ genlmsg_unicast(msg.net, msg.rep, NETLINK_CB(skb).portid);
+
+ return err;
+}
+
+static const struct genl_small_ops tipc_genl_compat_ops[] = {
+ {
+ .cmd = TIPC_GENL_CMD,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_compat_recv,
+ },
+};
+
+static struct genl_family tipc_genl_compat_family __ro_after_init = {
+ .name = TIPC_GENL_NAME,
+ .version = TIPC_GENL_VERSION,
+ .hdrsize = TIPC_GENL_HDRLEN,
+ .maxattr = 0,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .small_ops = tipc_genl_compat_ops,
+ .n_small_ops = ARRAY_SIZE(tipc_genl_compat_ops),
+ .resv_start_op = TIPC_GENL_CMD + 1,
+};
+
+int __init tipc_netlink_compat_start(void)
+{
+ int res;
+
+ res = genl_register_family(&tipc_genl_compat_family);
+ if (res) {
+ pr_err("Failed to register legacy compat interface\n");
+ return res;
+ }
+
+ return 0;
+}
+
+void tipc_netlink_compat_stop(void)
+{
+ genl_unregister_family(&tipc_genl_compat_family);
+}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 6e6c434872e8..a07fb073368c 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1,8 +1,8 @@
/*
* net/tipc/node.c: TIPC node management routines
*
- * Copyright (c) 2000-2006, 2012 Ericsson AB
- * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
+ * Copyright (c) 2000-2006, 2012-2016, Ericsson AB
+ * Copyright (c) 2005-2006, 2010-2014, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,407 +35,3131 @@
*/
#include "core.h"
-#include "config.h"
+#include "link.h"
#include "node.h"
#include "name_distr.h"
+#include "socket.h"
+#include "bcast.h"
+#include "monitor.h"
+#include "discover.h"
+#include "netlink.h"
+#include "trace.h"
+#include "crypto.h"
+
+#define INVALID_NODE_SIG 0x10000
+#define NODE_CLEANUP_AFTER 300000
+
+/* Flags used to take different actions according to flag type
+ * TIPC_NOTIFY_NODE_DOWN: notify node is down
+ * TIPC_NOTIFY_NODE_UP: notify node is up
+ * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type
+ */
+enum {
+ TIPC_NOTIFY_NODE_DOWN = (1 << 3),
+ TIPC_NOTIFY_NODE_UP = (1 << 4),
+ TIPC_NOTIFY_LINK_UP = (1 << 6),
+ TIPC_NOTIFY_LINK_DOWN = (1 << 7)
+};
+
+struct tipc_link_entry {
+ struct tipc_link *link;
+ spinlock_t lock; /* per link */
+ u32 mtu;
+ struct sk_buff_head inputq;
+ struct tipc_media_addr maddr;
+};
+
+struct tipc_bclink_entry {
+ struct tipc_link *link;
+ struct sk_buff_head inputq1;
+ struct sk_buff_head arrvq;
+ struct sk_buff_head inputq2;
+ struct sk_buff_head namedq;
+ u16 named_rcv_nxt;
+ bool named_open;
+};
+
+/**
+ * struct tipc_node - TIPC node structure
+ * @addr: network address of node
+ * @kref: reference counter to node object
+ * @lock: rwlock governing access to structure
+ * @net: the applicable net namespace
+ * @hash: links to adjacent nodes in unsorted hash chain
+ * @active_links: bearer ids of active links, used as index into links[] array
+ * @links: array containing references to all links to node
+ * @bc_entry: broadcast link entry
+ * @action_flags: bit mask of different types of node actions
+ * @state: connectivity state vs peer node
+ * @preliminary: a preliminary node or not
+ * @failover_sent: failover sent or not
+ * @sync_point: sequence number where synch/failover is finished
+ * @list: links to adjacent nodes in sorted list of cluster's nodes
+ * @working_links: number of working links to node (both active and standby)
+ * @link_cnt: number of links to node
+ * @capabilities: bitmap, indicating peer node's functional capabilities
+ * @signature: node instance identifier
+ * @link_id: local and remote bearer ids of changing link, if any
+ * @peer_id: 128-bit ID of peer
+ * @peer_id_string: ID string of peer
+ * @publ_list: list of publications
+ * @conn_sks: list of connections (FIXME)
+ * @timer: node's keepalive timer
+ * @keepalive_intv: keepalive interval in milliseconds
+ * @rcu: rcu struct for tipc_node
+ * @delete_at: indicates the time for deleting a down node
+ * @peer_net: peer's net namespace
+ * @peer_hash_mix: hash for this peer (FIXME)
+ * @crypto_rx: RX crypto handler
+ */
+struct tipc_node {
+ u32 addr;
+ struct kref kref;
+ rwlock_t lock;
+ struct net *net;
+ struct hlist_node hash;
+ int active_links[2];
+ struct tipc_link_entry links[MAX_BEARERS];
+ struct tipc_bclink_entry bc_entry;
+ int action_flags;
+ struct list_head list;
+ int state;
+ bool preliminary;
+ bool failover_sent;
+ u16 sync_point;
+ int link_cnt;
+ u16 working_links;
+ u16 capabilities;
+ u32 signature;
+ u32 link_id;
+ u8 peer_id[16];
+ char peer_id_string[NODE_ID_STR_LEN];
+ struct list_head publ_list;
+ struct list_head conn_sks;
+ unsigned long keepalive_intv;
+ struct timer_list timer;
+ struct rcu_head rcu;
+ unsigned long delete_at;
+ struct net *peer_net;
+ u32 peer_hash_mix;
+#ifdef CONFIG_TIPC_CRYPTO
+ struct tipc_crypto *crypto_rx;
+#endif
+};
+
+/* Node FSM states and events:
+ */
+enum {
+ SELF_DOWN_PEER_DOWN = 0xdd,
+ SELF_UP_PEER_UP = 0xaa,
+ SELF_DOWN_PEER_LEAVING = 0xd1,
+ SELF_UP_PEER_COMING = 0xac,
+ SELF_COMING_PEER_UP = 0xca,
+ SELF_LEAVING_PEER_DOWN = 0x1d,
+ NODE_FAILINGOVER = 0xf0,
+ NODE_SYNCHING = 0xcc
+};
+
+enum {
+ SELF_ESTABL_CONTACT_EVT = 0xece,
+ SELF_LOST_CONTACT_EVT = 0x1ce,
+ PEER_ESTABL_CONTACT_EVT = 0x9ece,
+ PEER_LOST_CONTACT_EVT = 0x91ce,
+ NODE_FAILOVER_BEGIN_EVT = 0xfbe,
+ NODE_FAILOVER_END_EVT = 0xfee,
+ NODE_SYNCH_BEGIN_EVT = 0xcbe,
+ NODE_SYNCH_END_EVT = 0xcee
+};
+
+static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
+ struct sk_buff_head *xmitq,
+ struct tipc_media_addr **maddr);
+static void tipc_node_link_down(struct tipc_node *n, int bearer_id,
+ bool delete);
+static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq);
+static void tipc_node_delete(struct tipc_node *node);
+static void tipc_node_timeout(struct timer_list *t);
+static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
+static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
+static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id);
+static bool node_is_up(struct tipc_node *n);
+static void tipc_node_delete_from_list(struct tipc_node *node);
+
+struct tipc_sock_conn {
+ u32 port;
+ u32 peer_port;
+ u32 peer_node;
+ struct list_head list;
+};
+
+static struct tipc_link *node_active_link(struct tipc_node *n, int sel)
+{
+ int bearer_id = n->active_links[sel & 1];
+
+ if (unlikely(bearer_id == INVALID_BEARER_ID))
+ return NULL;
+
+ return n->links[bearer_id].link;
+}
-#define NODE_HTABLE_SIZE 512
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected)
+{
+ struct tipc_node *n;
+ int bearer_id;
+ unsigned int mtu = MAX_MSG_SIZE;
+
+ n = tipc_node_find(net, addr);
+ if (unlikely(!n))
+ return mtu;
+
+ /* Allow MAX_MSG_SIZE when building connection oriented message
+ * if they are in the same core network
+ */
+ if (n->peer_net && connected) {
+ tipc_node_put(n);
+ return mtu;
+ }
-static void node_lost_contact(struct tipc_node *n_ptr);
-static void node_established_contact(struct tipc_node *n_ptr);
+ bearer_id = n->active_links[sel & 1];
+ if (likely(bearer_id != INVALID_BEARER_ID))
+ mtu = n->links[bearer_id].mtu;
+ tipc_node_put(n);
+ return mtu;
+}
+
+bool tipc_node_get_id(struct net *net, u32 addr, u8 *id)
+{
+ u8 *own_id = tipc_own_id(net);
+ struct tipc_node *n;
-static DEFINE_SPINLOCK(node_create_lock);
+ if (!own_id)
+ return true;
-static struct hlist_head node_htable[NODE_HTABLE_SIZE];
-LIST_HEAD(tipc_node_list);
-static u32 tipc_num_nodes;
+ if (addr == tipc_own_addr(net)) {
+ memcpy(id, own_id, TIPC_NODEID_LEN);
+ return true;
+ }
+ n = tipc_node_find(net, addr);
+ if (!n)
+ return false;
-static atomic_t tipc_num_links = ATOMIC_INIT(0);
+ memcpy(id, &n->peer_id, TIPC_NODEID_LEN);
+ tipc_node_put(n);
+ return true;
+}
-/*
- * A trivial power-of-two bitmask technique is used for speed, since this
- * operation is done for every incoming TIPC packet. The number of hash table
- * entries has been chosen so that no hash chain exceeds 8 nodes and will
- * usually be much smaller (typically only a single node).
+u16 tipc_node_get_capabilities(struct net *net, u32 addr)
+{
+ struct tipc_node *n;
+ u16 caps;
+
+ n = tipc_node_find(net, addr);
+ if (unlikely(!n))
+ return TIPC_NODE_CAPABILITIES;
+ caps = n->capabilities;
+ tipc_node_put(n);
+ return caps;
+}
+
+u32 tipc_node_get_addr(struct tipc_node *node)
+{
+ return (node) ? node->addr : 0;
+}
+
+char *tipc_node_get_id_str(struct tipc_node *node)
+{
+ return node->peer_id_string;
+}
+
+#ifdef CONFIG_TIPC_CRYPTO
+/**
+ * tipc_node_crypto_rx - Retrieve crypto RX handle from node
+ * @__n: target tipc_node
+ * Note: node ref counter must be held first!
*/
-static unsigned int tipc_hashfn(u32 addr)
+struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n)
+{
+ return (__n) ? __n->crypto_rx : NULL;
+}
+
+struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos)
+{
+ return container_of(pos, struct tipc_node, list)->crypto_rx;
+}
+
+struct tipc_crypto *tipc_node_crypto_rx_by_addr(struct net *net, u32 addr)
+{
+ struct tipc_node *n;
+
+ n = tipc_node_find(net, addr);
+ return (n) ? n->crypto_rx : NULL;
+}
+#endif
+
+static void tipc_node_free(struct rcu_head *rp)
+{
+ struct tipc_node *n = container_of(rp, struct tipc_node, rcu);
+
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_stop(&n->crypto_rx);
+#endif
+ kfree(n);
+}
+
+static void tipc_node_kref_release(struct kref *kref)
+{
+ struct tipc_node *n = container_of(kref, struct tipc_node, kref);
+
+ kfree(n->bc_entry.link);
+ call_rcu(&n->rcu, tipc_node_free);
+}
+
+void tipc_node_put(struct tipc_node *node)
+{
+ kref_put(&node->kref, tipc_node_kref_release);
+}
+
+void tipc_node_get(struct tipc_node *node)
{
- return addr & (NODE_HTABLE_SIZE - 1);
+ kref_get(&node->kref);
}
/*
* tipc_node_find - locate specified node object, if it exists
*/
-struct tipc_node *tipc_node_find(u32 addr)
+static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
{
+ struct tipc_net *tn = tipc_net(net);
struct tipc_node *node;
+ unsigned int thash = tipc_hashfn(addr);
- if (unlikely(!in_own_cluster_exact(addr)))
- return NULL;
-
- hlist_for_each_entry(node, &node_htable[tipc_hashfn(addr)], hash) {
- if (node->addr == addr)
- return node;
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) {
+ if (node->addr != addr || node->preliminary)
+ continue;
+ if (!kref_get_unless_zero(&node->kref))
+ node = NULL;
+ break;
}
- return NULL;
+ rcu_read_unlock();
+ return node;
}
-/**
- * tipc_node_create - create neighboring node
- *
- * Currently, this routine is called by neighbor discovery code, which holds
- * net_lock for reading only. We must take node_create_lock to ensure a node
- * isn't created twice if two different bearers discover the node at the same
- * time. (It would be preferable to switch to holding net_lock in write mode,
- * but this is a non-trivial change.)
+/* tipc_node_find_by_id - locate specified node object by its 128-bit id
+ * Note: this function is called only when a discovery request failed
+ * to find the node by its 32-bit id, and is not time critical
*/
-struct tipc_node *tipc_node_create(u32 addr)
+static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id)
{
- struct tipc_node *n_ptr, *temp_node;
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_node *n;
+ bool found = false;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ read_lock_bh(&n->lock);
+ if (!memcmp(id, n->peer_id, 16) &&
+ kref_get_unless_zero(&n->kref))
+ found = true;
+ read_unlock_bh(&n->lock);
+ if (found)
+ break;
+ }
+ rcu_read_unlock();
+ return found ? n : NULL;
+}
- spin_lock_bh(&node_create_lock);
+static void tipc_node_read_lock(struct tipc_node *n)
+ __acquires(n->lock)
+{
+ read_lock_bh(&n->lock);
+}
- n_ptr = tipc_node_find(addr);
- if (n_ptr) {
- spin_unlock_bh(&node_create_lock);
- return n_ptr;
+static void tipc_node_read_unlock(struct tipc_node *n)
+ __releases(n->lock)
+{
+ read_unlock_bh(&n->lock);
+}
+
+static void tipc_node_write_lock(struct tipc_node *n)
+ __acquires(n->lock)
+{
+ write_lock_bh(&n->lock);
+}
+
+static void tipc_node_write_unlock_fast(struct tipc_node *n)
+ __releases(n->lock)
+{
+ write_unlock_bh(&n->lock);
+}
+
+static void tipc_node_write_unlock(struct tipc_node *n)
+ __releases(n->lock)
+{
+ struct tipc_socket_addr sk;
+ struct net *net = n->net;
+ u32 flags = n->action_flags;
+ struct list_head *publ_list;
+ struct tipc_uaddr ua;
+ u32 bearer_id, node;
+
+ if (likely(!flags)) {
+ write_unlock_bh(&n->lock);
+ return;
}
- n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC);
- if (!n_ptr) {
- spin_unlock_bh(&node_create_lock);
- pr_warn("Node creation failed, no memory\n");
- return NULL;
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+ TIPC_LINK_STATE, n->addr, n->addr);
+ sk.ref = n->link_id;
+ sk.node = tipc_own_addr(net);
+ node = n->addr;
+ bearer_id = n->link_id & 0xffff;
+ publ_list = &n->publ_list;
+
+ n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
+ TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP);
+
+ write_unlock_bh(&n->lock);
+
+ if (flags & TIPC_NOTIFY_NODE_DOWN)
+ tipc_publ_notify(net, publ_list, node, n->capabilities);
+
+ if (flags & TIPC_NOTIFY_NODE_UP)
+ tipc_named_node_up(net, node, n->capabilities);
+
+ if (flags & TIPC_NOTIFY_LINK_UP) {
+ tipc_mon_peer_up(net, node, bearer_id);
+ tipc_nametbl_publish(net, &ua, &sk, sk.ref);
+ }
+ if (flags & TIPC_NOTIFY_LINK_DOWN) {
+ tipc_mon_peer_down(net, node, bearer_id);
+ tipc_nametbl_withdraw(net, &ua, &sk, sk.ref);
}
+}
+
+static void tipc_node_assign_peer_net(struct tipc_node *n, u32 hash_mixes)
+{
+ int net_id = tipc_netid(n->net);
+ struct tipc_net *tn_peer;
+ struct net *tmp;
+ u32 hash_chk;
+
+ if (n->peer_net)
+ return;
+
+ for_each_net_rcu(tmp) {
+ tn_peer = tipc_net(tmp);
+ if (!tn_peer)
+ continue;
+ /* Integrity checking whether node exists in namespace or not */
+ if (tn_peer->net_id != net_id)
+ continue;
+ if (memcmp(n->peer_id, tn_peer->node_id, NODE_ID_LEN))
+ continue;
+ hash_chk = tipc_net_hash_mixes(tmp, tn_peer->random);
+ if (hash_mixes ^ hash_chk)
+ continue;
+ n->peer_net = tmp;
+ n->peer_hash_mix = hash_mixes;
+ break;
+ }
+}
+
+struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
+ u16 capabilities, u32 hash_mixes,
+ bool preliminary)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_link *l, *snd_l = tipc_bc_sndlink(net);
+ struct tipc_node *n, *temp_node;
+ unsigned long intv;
+ int bearer_id;
+ int i;
+
+ spin_lock_bh(&tn->node_list_lock);
+ n = tipc_node_find(net, addr) ?:
+ tipc_node_find_by_id(net, peer_id);
+ if (n) {
+ if (!n->preliminary)
+ goto update;
+ if (preliminary)
+ goto exit;
+ /* A preliminary node becomes "real" now, refresh its data */
+ tipc_node_write_lock(n);
+ if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
+ n->capabilities, &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link refresh failed, no memory\n");
+ tipc_node_write_unlock_fast(n);
+ tipc_node_put(n);
+ n = NULL;
+ goto exit;
+ }
+ n->preliminary = false;
+ n->addr = addr;
+ hlist_del_rcu(&n->hash);
+ hlist_add_head_rcu(&n->hash,
+ &tn->node_htable[tipc_hashfn(addr)]);
+ list_del_rcu(&n->list);
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ if (n->addr < temp_node->addr)
+ break;
+ }
+ list_add_tail_rcu(&n->list, &temp_node->list);
+ tipc_node_write_unlock_fast(n);
+
+update:
+ if (n->peer_hash_mix ^ hash_mixes)
+ tipc_node_assign_peer_net(n, hash_mixes);
+ if (n->capabilities == capabilities)
+ goto exit;
+ /* Same node may come back with new capabilities */
+ tipc_node_write_lock(n);
+ n->capabilities = capabilities;
+ for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+ l = n->links[bearer_id].link;
+ if (l)
+ tipc_link_update_caps(l, capabilities);
+ }
+ tipc_node_write_unlock_fast(n);
- n_ptr->addr = addr;
- spin_lock_init(&n_ptr->lock);
- INIT_HLIST_NODE(&n_ptr->hash);
- INIT_LIST_HEAD(&n_ptr->list);
- INIT_LIST_HEAD(&n_ptr->nsub);
+ /* Calculate cluster capabilities */
+ tn->capabilities = TIPC_NODE_CAPABILITIES;
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ tn->capabilities &= temp_node->capabilities;
+ }
- hlist_add_head(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]);
+ tipc_bcast_toggle_rcast(net,
+ (tn->capabilities & TIPC_BCAST_RCAST));
- list_for_each_entry(temp_node, &tipc_node_list, list) {
- if (n_ptr->addr < temp_node->addr)
+ goto exit;
+ }
+ n = kzalloc(sizeof(*n), GFP_ATOMIC);
+ if (!n) {
+ pr_warn("Node creation failed, no memory\n");
+ goto exit;
+ }
+ tipc_nodeid2string(n->peer_id_string, peer_id);
+#ifdef CONFIG_TIPC_CRYPTO
+ if (unlikely(tipc_crypto_start(&n->crypto_rx, net, n))) {
+ pr_warn("Failed to start crypto RX(%s)!\n", n->peer_id_string);
+ kfree(n);
+ n = NULL;
+ goto exit;
+ }
+#endif
+ n->addr = addr;
+ n->preliminary = preliminary;
+ memcpy(&n->peer_id, peer_id, 16);
+ n->net = net;
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
+ /* Assign kernel local namespace if exists */
+ tipc_node_assign_peer_net(n, hash_mixes);
+ n->capabilities = capabilities;
+ kref_init(&n->kref);
+ rwlock_init(&n->lock);
+ INIT_HLIST_NODE(&n->hash);
+ INIT_LIST_HEAD(&n->list);
+ INIT_LIST_HEAD(&n->publ_list);
+ INIT_LIST_HEAD(&n->conn_sks);
+ skb_queue_head_init(&n->bc_entry.namedq);
+ skb_queue_head_init(&n->bc_entry.inputq1);
+ __skb_queue_head_init(&n->bc_entry.arrvq);
+ skb_queue_head_init(&n->bc_entry.inputq2);
+ for (i = 0; i < MAX_BEARERS; i++)
+ spin_lock_init(&n->links[i].lock);
+ n->state = SELF_DOWN_PEER_LEAVING;
+ n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER);
+ n->signature = INVALID_NODE_SIG;
+ n->active_links[0] = INVALID_BEARER_ID;
+ n->active_links[1] = INVALID_BEARER_ID;
+ if (!preliminary &&
+ !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
+ n->capabilities, &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link creation failed, no memory\n");
+ tipc_node_put(n);
+ n = NULL;
+ goto exit;
+ }
+ tipc_node_get(n);
+ timer_setup(&n->timer, tipc_node_timeout, 0);
+ /* Start a slow timer anyway, crypto needs it */
+ n->keepalive_intv = 10000;
+ intv = jiffies + msecs_to_jiffies(n->keepalive_intv);
+ if (!mod_timer(&n->timer, intv))
+ tipc_node_get(n);
+ hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ if (n->addr < temp_node->addr)
break;
}
- list_add_tail(&n_ptr->list, &temp_node->list);
- n_ptr->block_setup = WAIT_PEER_DOWN;
- n_ptr->signature = INVALID_NODE_SIG;
+ list_add_tail_rcu(&n->list, &temp_node->list);
+ /* Calculate cluster capabilities */
+ tn->capabilities = TIPC_NODE_CAPABILITIES;
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ tn->capabilities &= temp_node->capabilities;
+ }
+ tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST));
+ trace_tipc_node_create(n, true, " ");
+exit:
+ spin_unlock_bh(&tn->node_list_lock);
+ return n;
+}
+
+static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l)
+{
+ unsigned long tol = tipc_link_tolerance(l);
+ unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4;
+
+ /* Link with lowest tolerance determines timer interval */
+ if (intv < n->keepalive_intv)
+ n->keepalive_intv = intv;
+
+ /* Ensure link's abort limit corresponds to current tolerance */
+ tipc_link_set_abort_limit(l, tol / n->keepalive_intv);
+}
+
+static void tipc_node_delete_from_list(struct tipc_node *node)
+{
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_key_flush(node->crypto_rx);
+#endif
+ list_del_rcu(&node->list);
+ hlist_del_rcu(&node->hash);
+ tipc_node_put(node);
+}
+
+static void tipc_node_delete(struct tipc_node *node)
+{
+ trace_tipc_node_delete(node, true, " ");
+ tipc_node_delete_from_list(node);
+
+ timer_delete_sync(&node->timer);
+ tipc_node_put(node);
+}
+
+void tipc_node_stop(struct net *net)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_node *node, *t_node;
+
+ spin_lock_bh(&tn->node_list_lock);
+ list_for_each_entry_safe(node, t_node, &tn->node_list, list)
+ tipc_node_delete(node);
+ spin_unlock_bh(&tn->node_list_lock);
+}
+
+void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr)
+{
+ struct tipc_node *n;
+
+ if (in_own_node(net, addr))
+ return;
+
+ n = tipc_node_find(net, addr);
+ if (!n) {
+ pr_warn("Node subscribe rejected, unknown node 0x%x\n", addr);
+ return;
+ }
+ tipc_node_write_lock(n);
+ list_add_tail(subscr, &n->publ_list);
+ tipc_node_write_unlock_fast(n);
+ tipc_node_put(n);
+}
+
+void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr)
+{
+ struct tipc_node *n;
+
+ if (in_own_node(net, addr))
+ return;
+
+ n = tipc_node_find(net, addr);
+ if (!n) {
+ pr_warn("Node unsubscribe rejected, unknown node 0x%x\n", addr);
+ return;
+ }
+ tipc_node_write_lock(n);
+ list_del_init(subscr);
+ tipc_node_write_unlock_fast(n);
+ tipc_node_put(n);
+}
+
+int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port)
+{
+ struct tipc_node *node;
+ struct tipc_sock_conn *conn;
+ int err = 0;
+
+ if (in_own_node(net, dnode))
+ return 0;
+
+ node = tipc_node_find(net, dnode);
+ if (!node) {
+ pr_warn("Connecting sock to node 0x%x failed\n", dnode);
+ return -EHOSTUNREACH;
+ }
+ conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
+ if (!conn) {
+ err = -EHOSTUNREACH;
+ goto exit;
+ }
+ conn->peer_node = dnode;
+ conn->port = port;
+ conn->peer_port = peer_port;
+
+ tipc_node_write_lock(node);
+ list_add_tail(&conn->list, &node->conn_sks);
+ tipc_node_write_unlock(node);
+exit:
+ tipc_node_put(node);
+ return err;
+}
+
+void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
+{
+ struct tipc_node *node;
+ struct tipc_sock_conn *conn, *safe;
+
+ if (in_own_node(net, dnode))
+ return;
+
+ node = tipc_node_find(net, dnode);
+ if (!node)
+ return;
+
+ tipc_node_write_lock(node);
+ list_for_each_entry_safe(conn, safe, &node->conn_sks, list) {
+ if (port != conn->port)
+ continue;
+ list_del(&conn->list);
+ kfree(conn);
+ }
+ tipc_node_write_unlock(node);
+ tipc_node_put(node);
+}
+
+static void tipc_node_clear_links(struct tipc_node *node)
+{
+ int i;
+
+ for (i = 0; i < MAX_BEARERS; i++) {
+ struct tipc_link_entry *le = &node->links[i];
+
+ if (le->link) {
+ kfree(le->link);
+ le->link = NULL;
+ node->link_cnt--;
+ }
+ }
+}
+
+/* tipc_node_cleanup - delete nodes that does not
+ * have active links for NODE_CLEANUP_AFTER time
+ */
+static bool tipc_node_cleanup(struct tipc_node *peer)
+{
+ struct tipc_node *temp_node;
+ struct tipc_net *tn = tipc_net(peer->net);
+ bool deleted = false;
+
+ /* If lock held by tipc_node_stop() the node will be deleted anyway */
+ if (!spin_trylock_bh(&tn->node_list_lock))
+ return false;
+
+ tipc_node_write_lock(peer);
+
+ if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) {
+ tipc_node_clear_links(peer);
+ tipc_node_delete_from_list(peer);
+ deleted = true;
+ }
+ tipc_node_write_unlock(peer);
- tipc_num_nodes++;
+ if (!deleted) {
+ spin_unlock_bh(&tn->node_list_lock);
+ return deleted;
+ }
- spin_unlock_bh(&node_create_lock);
- return n_ptr;
+ /* Calculate cluster capabilities */
+ tn->capabilities = TIPC_NODE_CAPABILITIES;
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ tn->capabilities &= temp_node->capabilities;
+ }
+ tipc_bcast_toggle_rcast(peer->net,
+ (tn->capabilities & TIPC_BCAST_RCAST));
+ spin_unlock_bh(&tn->node_list_lock);
+ return deleted;
}
-void tipc_node_delete(struct tipc_node *n_ptr)
+/* tipc_node_timeout - handle expiration of node timer
+ */
+static void tipc_node_timeout(struct timer_list *t)
{
- list_del(&n_ptr->list);
- hlist_del(&n_ptr->hash);
- kfree(n_ptr);
+ struct tipc_node *n = timer_container_of(n, t, timer);
+ struct tipc_link_entry *le;
+ struct sk_buff_head xmitq;
+ int remains = n->link_cnt;
+ int bearer_id;
+ int rc = 0;
+
+ trace_tipc_node_timeout(n, false, " ");
+ if (!node_is_up(n) && tipc_node_cleanup(n)) {
+ /*Removing the reference of Timer*/
+ tipc_node_put(n);
+ return;
+ }
+
+#ifdef CONFIG_TIPC_CRYPTO
+ /* Take any crypto key related actions first */
+ tipc_crypto_timeout(n->crypto_rx);
+#endif
+ __skb_queue_head_init(&xmitq);
+
+ /* Initial node interval to value larger (10 seconds), then it will be
+ * recalculated with link lowest tolerance
+ */
+ tipc_node_read_lock(n);
+ n->keepalive_intv = 10000;
+ tipc_node_read_unlock(n);
+ for (bearer_id = 0; remains && (bearer_id < MAX_BEARERS); bearer_id++) {
+ tipc_node_read_lock(n);
+ le = &n->links[bearer_id];
+ if (le->link) {
+ spin_lock_bh(&le->lock);
+ /* Link tolerance may change asynchronously: */
+ tipc_node_calculate_timer(n, le->link);
+ rc = tipc_link_timeout(le->link, &xmitq);
+ spin_unlock_bh(&le->lock);
+ remains--;
+ }
+ tipc_node_read_unlock(n);
+ tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr, n);
+ if (rc & TIPC_LINK_DOWN_EVT)
+ tipc_node_link_down(n, bearer_id, false);
+ }
+ mod_timer(&n->timer, jiffies + msecs_to_jiffies(n->keepalive_intv));
+}
+
+/**
+ * __tipc_node_link_up - handle addition of link
+ * @n: target tipc_node
+ * @bearer_id: id of the bearer
+ * @xmitq: queue for messages to be xmited on
+ * Node lock must be held by caller
+ * Link becomes active (alone or shared) or standby, depending on its priority.
+ */
+static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
+ struct sk_buff_head *xmitq)
+{
+ int *slot0 = &n->active_links[0];
+ int *slot1 = &n->active_links[1];
+ struct tipc_link *ol = node_active_link(n, 0);
+ struct tipc_link *nl = n->links[bearer_id].link;
+
+ if (!nl || tipc_link_is_up(nl))
+ return;
+
+ tipc_link_fsm_evt(nl, LINK_ESTABLISH_EVT);
+ if (!tipc_link_is_up(nl))
+ return;
+
+ n->working_links++;
+ n->action_flags |= TIPC_NOTIFY_LINK_UP;
+ n->link_id = tipc_link_id(nl);
+
+ /* Leave room for tunnel header when returning 'mtu' to users: */
+ n->links[bearer_id].mtu = tipc_link_mss(nl);
+
+ tipc_bearer_add_dest(n->net, bearer_id, n->addr);
+ tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id);
+
+ pr_debug("Established link <%s> on network plane %c\n",
+ tipc_link_name(nl), tipc_link_plane(nl));
+ trace_tipc_node_link_up(n, true, " ");
+
+ /* Ensure that a STATE message goes first */
+ tipc_link_build_state_msg(nl, xmitq);
+
+ /* First link? => give it both slots */
+ if (!ol) {
+ *slot0 = bearer_id;
+ *slot1 = bearer_id;
+ tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT);
+ n->action_flags |= TIPC_NOTIFY_NODE_UP;
+ tipc_link_set_active(nl, true);
+ tipc_bcast_add_peer(n->net, nl, xmitq);
+ return;
+ }
+
+ /* Second link => redistribute slots */
+ if (tipc_link_prio(nl) > tipc_link_prio(ol)) {
+ pr_debug("Old link <%s> becomes standby\n", tipc_link_name(ol));
+ *slot0 = bearer_id;
+ *slot1 = bearer_id;
+ tipc_link_set_active(nl, true);
+ tipc_link_set_active(ol, false);
+ } else if (tipc_link_prio(nl) == tipc_link_prio(ol)) {
+ tipc_link_set_active(nl, true);
+ *slot1 = bearer_id;
+ } else {
+ pr_debug("New link <%s> is standby\n", tipc_link_name(nl));
+ }
- tipc_num_nodes--;
+ /* Prepare synchronization with first link */
+ tipc_link_tnl_prepare(ol, nl, SYNCH_MSG, xmitq);
}
/**
* tipc_node_link_up - handle addition of link
+ * @n: target tipc_node
+ * @bearer_id: id of the bearer
+ * @xmitq: queue for messages to be xmited on
*
* Link becomes active (alone or shared) or standby, depending on its priority.
*/
-void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
+static void tipc_node_link_up(struct tipc_node *n, int bearer_id,
+ struct sk_buff_head *xmitq)
{
- struct tipc_link **active = &n_ptr->active_links[0];
+ struct tipc_media_addr *maddr;
- n_ptr->working_links++;
+ tipc_node_write_lock(n);
+ __tipc_node_link_up(n, bearer_id, xmitq);
+ maddr = &n->links[bearer_id].maddr;
+ tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr, n);
+ tipc_node_write_unlock(n);
+}
- pr_info("Established link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->b_ptr->net_plane);
+/**
+ * tipc_node_link_failover() - start failover in case "half-failover"
+ *
+ * This function is only called in a very special situation where link
+ * failover can be already started on peer node but not on this node.
+ * This can happen when e.g.::
+ *
+ * 1. Both links <1A-2A>, <1B-2B> down
+ * 2. Link endpoint 2A up, but 1A still down (e.g. due to network
+ * disturbance, wrong session, etc.)
+ * 3. Link <1B-2B> up
+ * 4. Link endpoint 2A down (e.g. due to link tolerance timeout)
+ * 5. Node 2 starts failover onto link <1B-2B>
+ *
+ * ==> Node 1 does never start link/node failover!
+ *
+ * @n: tipc node structure
+ * @l: link peer endpoint failingover (- can be NULL)
+ * @tnl: tunnel link
+ * @xmitq: queue for messages to be xmited on tnl link later
+ */
+static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l,
+ struct tipc_link *tnl,
+ struct sk_buff_head *xmitq)
+{
+ /* Avoid to be "self-failover" that can never end */
+ if (!tipc_link_is_up(tnl))
+ return;
- if (!active[0]) {
- active[0] = active[1] = l_ptr;
- node_established_contact(n_ptr);
+ /* Don't rush, failure link may be in the process of resetting */
+ if (l && !tipc_link_is_reset(l))
return;
+
+ tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
+ tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
+
+ n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1);
+ tipc_link_failover_prepare(l, tnl, xmitq);
+
+ if (l)
+ tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
+ tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT);
+}
+
+/**
+ * __tipc_node_link_down - handle loss of link
+ * @n: target tipc_node
+ * @bearer_id: id of the bearer
+ * @xmitq: queue for messages to be xmited on
+ * @maddr: output media address of the bearer
+ */
+static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
+ struct sk_buff_head *xmitq,
+ struct tipc_media_addr **maddr)
+{
+ struct tipc_link_entry *le = &n->links[*bearer_id];
+ int *slot0 = &n->active_links[0];
+ int *slot1 = &n->active_links[1];
+ int i, highest = 0, prio;
+ struct tipc_link *l, *_l, *tnl;
+
+ l = n->links[*bearer_id].link;
+ if (!l || tipc_link_is_reset(l))
+ return;
+
+ n->working_links--;
+ n->action_flags |= TIPC_NOTIFY_LINK_DOWN;
+ n->link_id = tipc_link_id(l);
+
+ tipc_bearer_remove_dest(n->net, *bearer_id, n->addr);
+
+ pr_debug("Lost link <%s> on network plane %c\n",
+ tipc_link_name(l), tipc_link_plane(l));
+
+ /* Select new active link if any available */
+ *slot0 = INVALID_BEARER_ID;
+ *slot1 = INVALID_BEARER_ID;
+ for (i = 0; i < MAX_BEARERS; i++) {
+ _l = n->links[i].link;
+ if (!_l || !tipc_link_is_up(_l))
+ continue;
+ if (_l == l)
+ continue;
+ prio = tipc_link_prio(_l);
+ if (prio < highest)
+ continue;
+ if (prio > highest) {
+ highest = prio;
+ *slot0 = i;
+ *slot1 = i;
+ continue;
+ }
+ *slot1 = i;
}
- if (l_ptr->priority < active[0]->priority) {
- pr_info("New link <%s> becomes standby\n", l_ptr->name);
+
+ if (!node_is_up(n)) {
+ if (tipc_link_peer_is_down(l))
+ tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
+ tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
+ trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link down!");
+ tipc_link_fsm_evt(l, LINK_RESET_EVT);
+ tipc_link_reset(l);
+ tipc_link_build_reset_msg(l, xmitq);
+ *maddr = &n->links[*bearer_id].maddr;
+ node_lost_contact(n, &le->inputq);
+ tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id);
return;
}
- tipc_link_send_duplicate(active[0], l_ptr);
- if (l_ptr->priority == active[0]->priority) {
- active[0] = l_ptr;
+ tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id);
+
+ /* There is still a working link => initiate failover */
+ *bearer_id = n->active_links[0];
+ tnl = n->links[*bearer_id].link;
+ tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
+ tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
+ n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1);
+ tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq);
+ trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link down -> failover!");
+ tipc_link_reset(l);
+ tipc_link_fsm_evt(l, LINK_RESET_EVT);
+ tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
+ tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT);
+ *maddr = &n->links[*bearer_id].maddr;
+}
+
+static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
+{
+ struct tipc_link_entry *le = &n->links[bearer_id];
+ struct tipc_media_addr *maddr = NULL;
+ struct tipc_link *l = le->link;
+ int old_bearer_id = bearer_id;
+ struct sk_buff_head xmitq;
+
+ if (!l)
return;
+
+ __skb_queue_head_init(&xmitq);
+
+ tipc_node_write_lock(n);
+ if (!tipc_link_is_establishing(l)) {
+ __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
+ } else {
+ /* Defuse pending tipc_node_link_up() */
+ tipc_link_reset(l);
+ tipc_link_fsm_evt(l, LINK_RESET_EVT);
+ }
+ if (delete) {
+ kfree(l);
+ le->link = NULL;
+ n->link_cnt--;
}
- pr_info("Old link <%s> becomes standby\n", active[0]->name);
- if (active[1] != active[0])
- pr_info("Old link <%s> becomes standby\n", active[1]->name);
- active[0] = active[1] = l_ptr;
+ trace_tipc_node_link_down(n, true, "node link down or deleted!");
+ tipc_node_write_unlock(n);
+ if (delete)
+ tipc_mon_remove_peer(n->net, n->addr, old_bearer_id);
+ if (!skb_queue_empty(&xmitq))
+ tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr, n);
+ tipc_sk_rcv(n->net, &le->inputq);
}
-/**
- * node_select_active_links - select active link
+static bool node_is_up(struct tipc_node *n)
+{
+ return n->active_links[0] != INVALID_BEARER_ID;
+}
+
+bool tipc_node_is_up(struct net *net, u32 addr)
+{
+ struct tipc_node *n;
+ bool retval = false;
+
+ if (in_own_node(net, addr))
+ return true;
+
+ n = tipc_node_find(net, addr);
+ if (!n)
+ return false;
+ retval = node_is_up(n);
+ tipc_node_put(n);
+ return retval;
+}
+
+static u32 tipc_node_suggest_addr(struct net *net, u32 addr)
+{
+ struct tipc_node *n;
+
+ addr ^= tipc_net(net)->random;
+ while ((n = tipc_node_find(net, addr))) {
+ tipc_node_put(n);
+ addr++;
+ }
+ return addr;
+}
+
+/* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not
+ * Returns suggested address if any, otherwise 0
*/
-static void node_select_active_links(struct tipc_node *n_ptr)
+u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
{
- struct tipc_link **active = &n_ptr->active_links[0];
- u32 i;
- u32 highest_prio = 0;
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_node *n;
+ bool preliminary;
+ u32 sugg_addr;
+
+ /* Suggest new address if some other peer is using this one */
+ n = tipc_node_find(net, addr);
+ if (n) {
+ if (!memcmp(n->peer_id, id, NODE_ID_LEN))
+ addr = 0;
+ tipc_node_put(n);
+ if (!addr)
+ return 0;
+ return tipc_node_suggest_addr(net, addr);
+ }
+
+ /* Suggest previously used address if peer is known */
+ n = tipc_node_find_by_id(net, id);
+ if (n) {
+ sugg_addr = n->addr;
+ preliminary = n->preliminary;
+ tipc_node_put(n);
+ if (!preliminary)
+ return sugg_addr;
+ }
+
+ /* Even this node may be in conflict */
+ if (tn->trial_addr == addr)
+ return tipc_node_suggest_addr(net, addr);
- active[0] = active[1] = NULL;
+ return 0;
+}
+void tipc_node_check_dest(struct net *net, u32 addr,
+ u8 *peer_id, struct tipc_bearer *b,
+ u16 capabilities, u32 signature, u32 hash_mixes,
+ struct tipc_media_addr *maddr,
+ bool *respond, bool *dupl_addr)
+{
+ struct tipc_node *n;
+ struct tipc_link *l;
+ struct tipc_link_entry *le;
+ bool addr_match = false;
+ bool sign_match = false;
+ bool link_up = false;
+ bool link_is_reset = false;
+ bool accept_addr = false;
+ bool reset = false;
+ char *if_name;
+ unsigned long intv;
+ u16 session;
+
+ *dupl_addr = false;
+ *respond = false;
+
+ n = tipc_node_create(net, addr, peer_id, capabilities, hash_mixes,
+ false);
+ if (!n)
+ return;
+
+ tipc_node_write_lock(n);
+
+ le = &n->links[b->identity];
+
+ /* Prepare to validate requesting node's signature and media address */
+ l = le->link;
+ link_up = l && tipc_link_is_up(l);
+ link_is_reset = l && tipc_link_is_reset(l);
+ addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr));
+ sign_match = (signature == n->signature);
+
+ /* These three flags give us eight permutations: */
+
+ if (sign_match && addr_match && link_up) {
+ /* All is fine. Ignore requests. */
+ /* Peer node is not a container/local namespace */
+ if (!n->peer_hash_mix)
+ n->peer_hash_mix = hash_mixes;
+ } else if (sign_match && addr_match && !link_up) {
+ /* Respond. The link will come up in due time */
+ *respond = true;
+ } else if (sign_match && !addr_match && link_up) {
+ /* Peer has changed i/f address without rebooting.
+ * If so, the link will reset soon, and the next
+ * discovery will be accepted. So we can ignore it.
+ * It may also be a cloned or malicious peer having
+ * chosen the same node address and signature as an
+ * existing one.
+ * Ignore requests until the link goes down, if ever.
+ */
+ *dupl_addr = true;
+ } else if (sign_match && !addr_match && !link_up) {
+ /* Peer link has changed i/f address without rebooting.
+ * It may also be a cloned or malicious peer; we can't
+ * distinguish between the two.
+ * The signature is correct, so we must accept.
+ */
+ accept_addr = true;
+ *respond = true;
+ reset = true;
+ } else if (!sign_match && addr_match && link_up) {
+ /* Peer node rebooted. Two possibilities:
+ * - Delayed re-discovery; this link endpoint has already
+ * reset and re-established contact with the peer, before
+ * receiving a discovery message from that node.
+ * (The peer happened to receive one from this node first).
+ * - The peer came back so fast that our side has not
+ * discovered it yet. Probing from this side will soon
+ * reset the link, since there can be no working link
+ * endpoint at the peer end, and the link will re-establish.
+ * Accept the signature, since it comes from a known peer.
+ */
+ n->signature = signature;
+ } else if (!sign_match && addr_match && !link_up) {
+ /* The peer node has rebooted.
+ * Accept signature, since it is a known peer.
+ */
+ n->signature = signature;
+ *respond = true;
+ } else if (!sign_match && !addr_match && link_up) {
+ /* Peer rebooted with new address, or a new/duplicate peer.
+ * Ignore until the link goes down, if ever.
+ */
+ *dupl_addr = true;
+ } else if (!sign_match && !addr_match && !link_up) {
+ /* Peer rebooted with new address, or it is a new peer.
+ * Accept signature and address.
+ */
+ n->signature = signature;
+ accept_addr = true;
+ *respond = true;
+ reset = true;
+ }
+
+ if (!accept_addr)
+ goto exit;
+
+ /* Now create new link if not already existing */
+ if (!l) {
+ if (n->link_cnt == 2)
+ goto exit;
+
+ if_name = strchr(b->name, ':') + 1;
+ get_random_bytes(&session, sizeof(u16));
+ if (!tipc_link_create(net, if_name, b->identity, b->tolerance,
+ b->net_plane, b->mtu, b->priority,
+ b->min_win, b->max_win, session,
+ tipc_own_addr(net), addr, peer_id,
+ n->capabilities,
+ tipc_bc_sndlink(n->net), n->bc_entry.link,
+ &le->inputq,
+ &n->bc_entry.namedq, &l)) {
+ *respond = false;
+ goto exit;
+ }
+ trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link created!");
+ tipc_link_reset(l);
+ tipc_link_fsm_evt(l, LINK_RESET_EVT);
+ if (n->state == NODE_FAILINGOVER)
+ tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
+ link_is_reset = tipc_link_is_reset(l);
+ le->link = l;
+ n->link_cnt++;
+ tipc_node_calculate_timer(n, l);
+ if (n->link_cnt == 1) {
+ intv = jiffies + msecs_to_jiffies(n->keepalive_intv);
+ if (!mod_timer(&n->timer, intv))
+ tipc_node_get(n);
+ }
+ }
+ memcpy(&le->maddr, maddr, sizeof(*maddr));
+exit:
+ tipc_node_write_unlock(n);
+ if (reset && !link_is_reset)
+ tipc_node_link_down(n, b->identity, false);
+ tipc_node_put(n);
+}
+
+void tipc_node_delete_links(struct net *net, int bearer_id)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_node *n;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ tipc_node_link_down(n, bearer_id, true);
+ }
+ rcu_read_unlock();
+}
+
+static void tipc_node_reset_links(struct tipc_node *n)
+{
+ int i;
+
+ pr_warn("Resetting all links to %x\n", n->addr);
+
+ trace_tipc_node_reset_links(n, true, " ");
for (i = 0; i < MAX_BEARERS; i++) {
- struct tipc_link *l_ptr = n_ptr->links[i];
+ tipc_node_link_down(n, i, false);
+ }
+}
- if (!l_ptr || !tipc_link_is_up(l_ptr) ||
- (l_ptr->priority < highest_prio))
- continue;
+/* tipc_node_fsm_evt - node finite state machine
+ * Determines when contact is allowed with peer node
+ */
+static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
+{
+ int state = n->state;
- if (l_ptr->priority > highest_prio) {
- highest_prio = l_ptr->priority;
- active[0] = active[1] = l_ptr;
- } else {
- active[1] = l_ptr;
+ switch (state) {
+ case SELF_DOWN_PEER_DOWN:
+ switch (evt) {
+ case SELF_ESTABL_CONTACT_EVT:
+ state = SELF_UP_PEER_COMING;
+ break;
+ case PEER_ESTABL_CONTACT_EVT:
+ state = SELF_COMING_PEER_UP;
+ break;
+ case SELF_LOST_CONTACT_EVT:
+ case PEER_LOST_CONTACT_EVT:
+ break;
+ case NODE_SYNCH_END_EVT:
+ case NODE_SYNCH_BEGIN_EVT:
+ case NODE_FAILOVER_BEGIN_EVT:
+ case NODE_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case SELF_UP_PEER_UP:
+ switch (evt) {
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_LEAVING;
+ break;
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_LEAVING_PEER_DOWN;
+ break;
+ case NODE_SYNCH_BEGIN_EVT:
+ state = NODE_SYNCHING;
+ break;
+ case NODE_FAILOVER_BEGIN_EVT:
+ state = NODE_FAILINGOVER;
+ break;
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ case NODE_SYNCH_END_EVT:
+ case NODE_FAILOVER_END_EVT:
+ break;
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case SELF_DOWN_PEER_LEAVING:
+ switch (evt) {
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_DOWN;
+ break;
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ case SELF_LOST_CONTACT_EVT:
+ break;
+ case NODE_SYNCH_END_EVT:
+ case NODE_SYNCH_BEGIN_EVT:
+ case NODE_FAILOVER_BEGIN_EVT:
+ case NODE_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case SELF_UP_PEER_COMING:
+ switch (evt) {
+ case PEER_ESTABL_CONTACT_EVT:
+ state = SELF_UP_PEER_UP;
+ break;
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_DOWN;
+ break;
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_LOST_CONTACT_EVT:
+ case NODE_SYNCH_END_EVT:
+ case NODE_FAILOVER_BEGIN_EVT:
+ break;
+ case NODE_SYNCH_BEGIN_EVT:
+ case NODE_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
}
+ break;
+ case SELF_COMING_PEER_UP:
+ switch (evt) {
+ case SELF_ESTABL_CONTACT_EVT:
+ state = SELF_UP_PEER_UP;
+ break;
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_DOWN;
+ break;
+ case SELF_LOST_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ break;
+ case NODE_SYNCH_END_EVT:
+ case NODE_SYNCH_BEGIN_EVT:
+ case NODE_FAILOVER_BEGIN_EVT:
+ case NODE_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case SELF_LEAVING_PEER_DOWN:
+ switch (evt) {
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_DOWN;
+ break;
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ case PEER_LOST_CONTACT_EVT:
+ break;
+ case NODE_SYNCH_END_EVT:
+ case NODE_SYNCH_BEGIN_EVT:
+ case NODE_FAILOVER_BEGIN_EVT:
+ case NODE_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case NODE_FAILINGOVER:
+ switch (evt) {
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_LEAVING;
+ break;
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_LEAVING_PEER_DOWN;
+ break;
+ case NODE_FAILOVER_END_EVT:
+ state = SELF_UP_PEER_UP;
+ break;
+ case NODE_FAILOVER_BEGIN_EVT:
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ break;
+ case NODE_SYNCH_BEGIN_EVT:
+ case NODE_SYNCH_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case NODE_SYNCHING:
+ switch (evt) {
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_LEAVING;
+ break;
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_LEAVING_PEER_DOWN;
+ break;
+ case NODE_SYNCH_END_EVT:
+ state = SELF_UP_PEER_UP;
+ break;
+ case NODE_FAILOVER_BEGIN_EVT:
+ state = NODE_FAILINGOVER;
+ break;
+ case NODE_SYNCH_BEGIN_EVT:
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ break;
+ case NODE_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ default:
+ pr_err("Unknown node fsm state %x\n", state);
+ break;
+ }
+ trace_tipc_node_fsm(n->peer_id, n->state, state, evt);
+ n->state = state;
+ return;
+
+illegal_evt:
+ pr_err("Illegal node fsm evt %x in state %x\n", evt, state);
+ trace_tipc_node_fsm(n->peer_id, n->state, state, evt);
+}
+
+static void node_lost_contact(struct tipc_node *n,
+ struct sk_buff_head *inputq)
+{
+ struct tipc_sock_conn *conn, *safe;
+ struct tipc_link *l;
+ struct list_head *conns = &n->conn_sks;
+ struct sk_buff *skb;
+ uint i;
+
+ pr_debug("Lost contact with %x\n", n->addr);
+ n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER);
+ trace_tipc_node_lost_contact(n, true, " ");
+
+ /* Clean up broadcast state */
+ tipc_bcast_remove_peer(n->net, n->bc_entry.link);
+ skb_queue_purge(&n->bc_entry.namedq);
+
+ /* Abort any ongoing link failover */
+ for (i = 0; i < MAX_BEARERS; i++) {
+ l = n->links[i].link;
+ if (l)
+ tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT);
+ }
+
+ /* Notify publications from this node */
+ n->action_flags |= TIPC_NOTIFY_NODE_DOWN;
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
+ /* Notify sockets connected to node */
+ list_for_each_entry_safe(conn, safe, conns, list) {
+ skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
+ SHORT_H_SIZE, 0, tipc_own_addr(n->net),
+ conn->peer_node, conn->port,
+ conn->peer_port, TIPC_ERR_NO_NODE);
+ if (likely(skb))
+ skb_queue_tail(inputq, skb);
+ list_del(&conn->list);
+ kfree(conn);
}
}
/**
- * tipc_node_link_down - handle loss of link
+ * tipc_node_get_linkname - get the name of a link
+ *
+ * @net: the applicable net namespace
+ * @bearer_id: id of the bearer
+ * @addr: peer node address
+ * @linkname: link name output buffer
+ * @len: size of @linkname output buffer
+ *
+ * Return: 0 on success
*/
-void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
+int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
+ char *linkname, size_t len)
+{
+ struct tipc_link *link;
+ int err = -EINVAL;
+ struct tipc_node *node = tipc_node_find(net, addr);
+
+ if (!node)
+ return err;
+
+ if (bearer_id >= MAX_BEARERS)
+ goto exit;
+
+ tipc_node_read_lock(node);
+ link = node->links[bearer_id].link;
+ if (link) {
+ strscpy(linkname, tipc_link_name(link), len);
+ err = 0;
+ }
+ tipc_node_read_unlock(node);
+exit:
+ tipc_node_put(node);
+ return err;
+}
+
+/* Caller should hold node lock for the passed node */
+static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node)
{
- struct tipc_link **active;
+ void *hdr;
+ struct nlattr *attrs;
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ NLM_F_MULTI, TIPC_NL_NODE_GET);
+ if (!hdr)
+ return -EMSGSIZE;
- n_ptr->working_links--;
+ attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NODE);
+ if (!attrs)
+ goto msg_full;
- if (!tipc_link_is_active(l_ptr)) {
- pr_info("Lost standby link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->b_ptr->net_plane);
+ if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr))
+ goto attr_msg_full;
+ if (node_is_up(node))
+ if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP))
+ goto attr_msg_full;
+
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
+
+ return 0;
+
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
+
+ return -EMSGSIZE;
+}
+
+static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list)
+{
+ struct tipc_msg *hdr = buf_msg(skb_peek(list));
+ struct sk_buff_head inputq;
+
+ switch (msg_user(hdr)) {
+ case TIPC_LOW_IMPORTANCE:
+ case TIPC_MEDIUM_IMPORTANCE:
+ case TIPC_HIGH_IMPORTANCE:
+ case TIPC_CRITICAL_IMPORTANCE:
+ if (msg_connected(hdr) || msg_named(hdr) ||
+ msg_direct(hdr)) {
+ tipc_loopback_trace(peer_net, list);
+ spin_lock_init(&list->lock);
+ tipc_sk_rcv(peer_net, list);
+ return;
+ }
+ if (msg_mcast(hdr)) {
+ tipc_loopback_trace(peer_net, list);
+ skb_queue_head_init(&inputq);
+ tipc_sk_mcast_rcv(peer_net, list, &inputq);
+ __skb_queue_purge(list);
+ skb_queue_purge(&inputq);
+ return;
+ }
return;
+ case MSG_FRAGMENTER:
+ if (tipc_msg_assemble(list)) {
+ tipc_loopback_trace(peer_net, list);
+ skb_queue_head_init(&inputq);
+ tipc_sk_mcast_rcv(peer_net, list, &inputq);
+ __skb_queue_purge(list);
+ skb_queue_purge(&inputq);
+ }
+ return;
+ case GROUP_PROTOCOL:
+ case CONN_MANAGER:
+ tipc_loopback_trace(peer_net, list);
+ spin_lock_init(&list->lock);
+ tipc_sk_rcv(peer_net, list);
+ return;
+ case LINK_PROTOCOL:
+ case NAME_DISTRIBUTOR:
+ case TUNNEL_PROTOCOL:
+ case BCAST_PROTOCOL:
+ return;
+ default:
+ return;
+ }
+}
+
+/**
+ * tipc_node_xmit() - general link level function for message sending
+ * @net: the applicable net namespace
+ * @list: chain of buffers containing message
+ * @dnode: address of destination node
+ * @selector: a number used for deterministic link selection
+ * Consumes the buffer chain.
+ * Return: 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF
+ */
+int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
+ u32 dnode, int selector)
+{
+ struct tipc_link_entry *le = NULL;
+ struct tipc_node *n;
+ struct sk_buff_head xmitq;
+ bool node_up = false;
+ struct net *peer_net;
+ int bearer_id;
+ int rc;
+
+ if (in_own_node(net, dnode)) {
+ tipc_loopback_trace(net, list);
+ spin_lock_init(&list->lock);
+ tipc_sk_rcv(net, list);
+ return 0;
+ }
+
+ n = tipc_node_find(net, dnode);
+ if (unlikely(!n)) {
+ __skb_queue_purge(list);
+ return -EHOSTUNREACH;
+ }
+
+ rcu_read_lock();
+ tipc_node_read_lock(n);
+ node_up = node_is_up(n);
+ peer_net = n->peer_net;
+ tipc_node_read_unlock(n);
+ if (node_up && peer_net && check_net(peer_net)) {
+ /* xmit inner linux container */
+ tipc_lxc_xmit(peer_net, list);
+ if (likely(skb_queue_empty(list))) {
+ rcu_read_unlock();
+ tipc_node_put(n);
+ return 0;
+ }
+ }
+ rcu_read_unlock();
+
+ tipc_node_read_lock(n);
+ bearer_id = n->active_links[selector & 1];
+ if (unlikely(bearer_id == INVALID_BEARER_ID)) {
+ tipc_node_read_unlock(n);
+ tipc_node_put(n);
+ __skb_queue_purge(list);
+ return -EHOSTUNREACH;
}
- pr_info("Lost link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->b_ptr->net_plane);
-
- active = &n_ptr->active_links[0];
- if (active[0] == l_ptr)
- active[0] = active[1];
- if (active[1] == l_ptr)
- active[1] = active[0];
- if (active[0] == l_ptr)
- node_select_active_links(n_ptr);
- if (tipc_node_is_up(n_ptr))
- tipc_link_changeover(l_ptr);
+
+ __skb_queue_head_init(&xmitq);
+ le = &n->links[bearer_id];
+ spin_lock_bh(&le->lock);
+ rc = tipc_link_xmit(le->link, list, &xmitq);
+ spin_unlock_bh(&le->lock);
+ tipc_node_read_unlock(n);
+
+ if (unlikely(rc == -ENOBUFS))
+ tipc_node_link_down(n, bearer_id, false);
else
- node_lost_contact(n_ptr);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n);
+
+ tipc_node_put(n);
+
+ return rc;
}
-int tipc_node_active_links(struct tipc_node *n_ptr)
+/* tipc_node_xmit_skb(): send single buffer to destination
+ * Buffers sent via this function are generally TIPC_SYSTEM_IMPORTANCE
+ * messages, which will not be rejected
+ * The only exception is datagram messages rerouted after secondary
+ * lookup, which are rare and safe to dispose of anyway.
+ */
+int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
+ u32 selector)
{
- return n_ptr->active_links[0] != NULL;
+ struct sk_buff_head head;
+
+ __skb_queue_head_init(&head);
+ __skb_queue_tail(&head, skb);
+ tipc_node_xmit(net, &head, dnode, selector);
+ return 0;
}
-int tipc_node_redundant_links(struct tipc_node *n_ptr)
+/* tipc_node_distr_xmit(): send single buffer msgs to individual destinations
+ * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected
+ */
+int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq)
{
- return n_ptr->working_links > 1;
+ struct sk_buff *skb;
+ u32 selector, dnode;
+
+ while ((skb = __skb_dequeue(xmitq))) {
+ selector = msg_origport(buf_msg(skb));
+ dnode = msg_destnode(buf_msg(skb));
+ tipc_node_xmit_skb(net, skb, dnode, selector);
+ }
+ return 0;
}
-int tipc_node_is_up(struct tipc_node *n_ptr)
+void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests)
{
- return tipc_node_active_links(n_ptr);
+ struct sk_buff_head xmitq;
+ struct sk_buff *txskb;
+ struct tipc_node *n;
+ u16 dummy;
+ u32 dst;
+
+ /* Use broadcast if all nodes support it */
+ if (!rc_dests && tipc_bcast_get_mode(net) != BCLINK_MODE_RCAST) {
+ __skb_queue_head_init(&xmitq);
+ __skb_queue_tail(&xmitq, skb);
+ tipc_bcast_xmit(net, &xmitq, &dummy);
+ return;
+ }
+
+ /* Otherwise use legacy replicast method */
+ rcu_read_lock();
+ list_for_each_entry_rcu(n, tipc_nodes(net), list) {
+ dst = n->addr;
+ if (in_own_node(net, dst))
+ continue;
+ if (!node_is_up(n))
+ continue;
+ txskb = pskb_copy(skb, GFP_ATOMIC);
+ if (!txskb)
+ break;
+ msg_set_destnode(buf_msg(txskb), dst);
+ tipc_node_xmit_skb(net, txskb, dst, 0);
+ }
+ rcu_read_unlock();
+ kfree_skb(skb);
}
-void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
+static void tipc_node_mcast_rcv(struct tipc_node *n)
{
- n_ptr->links[l_ptr->b_ptr->identity] = l_ptr;
- atomic_inc(&tipc_num_links);
- n_ptr->link_cnt++;
+ struct tipc_bclink_entry *be = &n->bc_entry;
+
+ /* 'arrvq' is under inputq2's lock protection */
+ spin_lock_bh(&be->inputq2.lock);
+ spin_lock_bh(&be->inputq1.lock);
+ skb_queue_splice_tail_init(&be->inputq1, &be->arrvq);
+ spin_unlock_bh(&be->inputq1.lock);
+ spin_unlock_bh(&be->inputq2.lock);
+ tipc_sk_mcast_rcv(n->net, &be->arrvq, &be->inputq2);
}
-void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
+static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
+ int bearer_id, struct sk_buff_head *xmitq)
{
- n_ptr->links[l_ptr->b_ptr->identity] = NULL;
- atomic_dec(&tipc_num_links);
- n_ptr->link_cnt--;
+ struct tipc_link *ucl;
+ int rc;
+
+ rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr, xmitq);
+
+ if (rc & TIPC_LINK_DOWN_EVT) {
+ tipc_node_reset_links(n);
+ return;
+ }
+
+ if (!(rc & TIPC_LINK_SND_STATE))
+ return;
+
+ /* If probe message, a STATE response will be sent anyway */
+ if (msg_probe(hdr))
+ return;
+
+ /* Produce a STATE message carrying broadcast NACK */
+ tipc_node_read_lock(n);
+ ucl = n->links[bearer_id].link;
+ if (ucl)
+ tipc_link_build_state_msg(ucl, xmitq);
+ tipc_node_read_unlock(n);
}
-static void node_established_contact(struct tipc_node *n_ptr)
+/**
+ * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node
+ * @net: the applicable net namespace
+ * @skb: TIPC packet
+ * @bearer_id: id of bearer message arrived on
+ *
+ * Invoked with no locks held.
+ */
+static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id)
{
- tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
- n_ptr->bclink.oos_state = 0;
- n_ptr->bclink.acked = tipc_bclink_get_last_sent();
- tipc_bclink_add_node(n_ptr->addr);
+ int rc;
+ struct sk_buff_head xmitq;
+ struct tipc_bclink_entry *be;
+ struct tipc_link_entry *le;
+ struct tipc_msg *hdr = buf_msg(skb);
+ int usr = msg_user(hdr);
+ u32 dnode = msg_destnode(hdr);
+ struct tipc_node *n;
+
+ __skb_queue_head_init(&xmitq);
+
+ /* If NACK for other node, let rcv link for that node peek into it */
+ if ((usr == BCAST_PROTOCOL) && (dnode != tipc_own_addr(net)))
+ n = tipc_node_find(net, dnode);
+ else
+ n = tipc_node_find(net, msg_prevnode(hdr));
+ if (!n) {
+ kfree_skb(skb);
+ return;
+ }
+ be = &n->bc_entry;
+ le = &n->links[bearer_id];
+
+ rc = tipc_bcast_rcv(net, be->link, skb);
+
+ /* Broadcast ACKs are sent on a unicast link */
+ if (rc & TIPC_LINK_SND_STATE) {
+ tipc_node_read_lock(n);
+ tipc_link_build_state_msg(le->link, &xmitq);
+ tipc_node_read_unlock(n);
+ }
+
+ if (!skb_queue_empty(&xmitq))
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n);
+
+ if (!skb_queue_empty(&be->inputq1))
+ tipc_node_mcast_rcv(n);
+
+ /* Handle NAME_DISTRIBUTOR messages sent from 1.7 nodes */
+ if (!skb_queue_empty(&n->bc_entry.namedq))
+ tipc_named_rcv(net, &n->bc_entry.namedq,
+ &n->bc_entry.named_rcv_nxt,
+ &n->bc_entry.named_open);
+
+ /* If reassembly or retransmission failure => reset all links to peer */
+ if (rc & TIPC_LINK_DOWN_EVT)
+ tipc_node_reset_links(n);
+
+ tipc_node_put(n);
}
-static void node_name_purge_complete(unsigned long node_addr)
+/**
+ * tipc_node_check_state - check and if necessary update node state
+ * @n: target tipc_node
+ * @skb: TIPC packet
+ * @bearer_id: identity of bearer delivering the packet
+ * @xmitq: queue for messages to be xmited on
+ * Return: true if state and msg are ok, otherwise false
+ */
+static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
+ int bearer_id, struct sk_buff_head *xmitq)
{
- struct tipc_node *n_ptr;
+ struct tipc_msg *hdr = buf_msg(skb);
+ int usr = msg_user(hdr);
+ int mtyp = msg_type(hdr);
+ u16 oseqno = msg_seqno(hdr);
+ u16 exp_pkts = msg_msgcnt(hdr);
+ u16 rcv_nxt, syncpt, dlv_nxt, inputq_len;
+ int state = n->state;
+ struct tipc_link *l, *tnl, *pl = NULL;
+ struct tipc_media_addr *maddr;
+ int pb_id;
+
+ if (trace_tipc_node_check_state_enabled()) {
+ trace_tipc_skb_dump(skb, false, "skb for node state check");
+ trace_tipc_node_check_state(n, true, " ");
+ }
+ l = n->links[bearer_id].link;
+ if (!l)
+ return false;
+ rcv_nxt = tipc_link_rcv_nxt(l);
+
+
+ if (likely((state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL)))
+ return true;
+
+ /* Find parallel link, if any */
+ for (pb_id = 0; pb_id < MAX_BEARERS; pb_id++) {
+ if ((pb_id != bearer_id) && n->links[pb_id].link) {
+ pl = n->links[pb_id].link;
+ break;
+ }
+ }
+
+ if (!tipc_link_validate_msg(l, hdr)) {
+ trace_tipc_skb_dump(skb, false, "PROTO invalid (2)!");
+ trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (2)!");
+ return false;
+ }
+
+ /* Check and update node accesibility if applicable */
+ if (state == SELF_UP_PEER_COMING) {
+ if (!tipc_link_is_up(l))
+ return true;
+ if (!msg_peer_link_is_up(hdr))
+ return true;
+ tipc_node_fsm_evt(n, PEER_ESTABL_CONTACT_EVT);
+ }
- read_lock_bh(&tipc_net_lock);
- n_ptr = tipc_node_find(node_addr);
- if (n_ptr) {
- tipc_node_lock(n_ptr);
- n_ptr->block_setup &= ~WAIT_NAMES_GONE;
- tipc_node_unlock(n_ptr);
+ if (state == SELF_DOWN_PEER_LEAVING) {
+ if (msg_peer_node_is_up(hdr))
+ return false;
+ tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
+ return true;
}
- read_unlock_bh(&tipc_net_lock);
+
+ if (state == SELF_LEAVING_PEER_DOWN)
+ return false;
+
+ /* Ignore duplicate packets */
+ if ((usr != LINK_PROTOCOL) && less(oseqno, rcv_nxt))
+ return true;
+
+ /* Initiate or update failover mode if applicable */
+ if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) {
+ syncpt = oseqno + exp_pkts - 1;
+ if (pl && !tipc_link_is_reset(pl)) {
+ __tipc_node_link_down(n, &pb_id, xmitq, &maddr);
+ trace_tipc_node_link_down(n, true,
+ "node link down <- failover!");
+ tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl),
+ tipc_link_inputq(l));
+ }
+
+ /* If parallel link was already down, and this happened before
+ * the tunnel link came up, node failover was never started.
+ * Ensure that a FAILOVER_MSG is sent to get peer out of
+ * NODE_FAILINGOVER state, also this node must accept
+ * TUNNEL_MSGs from peer.
+ */
+ if (n->state != NODE_FAILINGOVER)
+ tipc_node_link_failover(n, pl, l, xmitq);
+
+ /* If pkts arrive out of order, use lowest calculated syncpt */
+ if (less(syncpt, n->sync_point))
+ n->sync_point = syncpt;
+ }
+
+ /* Open parallel link when tunnel link reaches synch point */
+ if ((n->state == NODE_FAILINGOVER) && tipc_link_is_up(l)) {
+ if (!more(rcv_nxt, n->sync_point))
+ return true;
+ tipc_node_fsm_evt(n, NODE_FAILOVER_END_EVT);
+ if (pl)
+ tipc_link_fsm_evt(pl, LINK_FAILOVER_END_EVT);
+ return true;
+ }
+
+ /* No syncing needed if only one link */
+ if (!pl || !tipc_link_is_up(pl))
+ return true;
+
+ /* Initiate synch mode if applicable */
+ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) {
+ if (n->capabilities & TIPC_TUNNEL_ENHANCED)
+ syncpt = msg_syncpt(hdr);
+ else
+ syncpt = msg_seqno(msg_inner_hdr(hdr)) + exp_pkts - 1;
+ if (!tipc_link_is_up(l))
+ __tipc_node_link_up(n, bearer_id, xmitq);
+ if (n->state == SELF_UP_PEER_UP) {
+ n->sync_point = syncpt;
+ tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT);
+ tipc_node_fsm_evt(n, NODE_SYNCH_BEGIN_EVT);
+ }
+ }
+
+ /* Open tunnel link when parallel link reaches synch point */
+ if (n->state == NODE_SYNCHING) {
+ if (tipc_link_is_synching(l)) {
+ tnl = l;
+ } else {
+ tnl = pl;
+ pl = l;
+ }
+ inputq_len = skb_queue_len(tipc_link_inputq(pl));
+ dlv_nxt = tipc_link_rcv_nxt(pl) - inputq_len;
+ if (more(dlv_nxt, n->sync_point)) {
+ tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
+ tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
+ return true;
+ }
+ if (l == pl)
+ return true;
+ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG))
+ return true;
+ if (usr == LINK_PROTOCOL)
+ return true;
+ return false;
+ }
+ return true;
}
-static void node_lost_contact(struct tipc_node *n_ptr)
+/**
+ * tipc_rcv - process TIPC packets/messages arriving from off-node
+ * @net: the applicable net namespace
+ * @skb: TIPC packet
+ * @b: pointer to bearer message arrived on
+ *
+ * Invoked with no locks held. Bearer pointer must point to a valid bearer
+ * structure (i.e. cannot be NULL), but bearer can be inactive.
+ */
+void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
{
- char addr_string[16];
- u32 i;
+ struct sk_buff_head xmitq;
+ struct tipc_link_entry *le;
+ struct tipc_msg *hdr;
+ struct tipc_node *n;
+ int bearer_id = b->identity;
+ u32 self = tipc_own_addr(net);
+ int usr, rc = 0;
+ u16 bc_ack;
+#ifdef CONFIG_TIPC_CRYPTO
+ struct tipc_ehdr *ehdr;
+
+ /* Check if message must be decrypted first */
+ if (TIPC_SKB_CB(skb)->decrypted || !tipc_ehdr_validate(skb))
+ goto rcv;
+
+ ehdr = (struct tipc_ehdr *)skb->data;
+ if (likely(ehdr->user != LINK_CONFIG)) {
+ n = tipc_node_find(net, ntohl(ehdr->addr));
+ if (unlikely(!n))
+ goto discard;
+ } else {
+ n = tipc_node_find_by_id(net, ehdr->id);
+ }
+ skb_dst_force(skb);
+ tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b);
+ if (!skb)
+ return;
- pr_info("Lost contact with %s\n",
- tipc_addr_string_fill(addr_string, n_ptr->addr));
+rcv:
+#endif
+ /* Ensure message is well-formed before touching the header */
+ if (unlikely(!tipc_msg_validate(&skb)))
+ goto discard;
+ __skb_queue_head_init(&xmitq);
+ hdr = buf_msg(skb);
+ usr = msg_user(hdr);
+ bc_ack = msg_bcast_ack(hdr);
+
+ /* Handle arrival of discovery or broadcast packet */
+ if (unlikely(msg_non_seq(hdr))) {
+ if (unlikely(usr == LINK_CONFIG))
+ return tipc_disc_rcv(net, skb, b);
+ else
+ return tipc_node_bc_rcv(net, skb, bearer_id);
+ }
- /* Flush broadcast link info associated with lost node */
- if (n_ptr->bclink.recv_permitted) {
- while (n_ptr->bclink.deferred_head) {
- struct sk_buff *buf = n_ptr->bclink.deferred_head;
- n_ptr->bclink.deferred_head = buf->next;
- kfree_skb(buf);
+ /* Discard unicast link messages destined for another node */
+ if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self)))
+ goto discard;
+
+ /* Locate neighboring node that sent packet */
+ n = tipc_node_find(net, msg_prevnode(hdr));
+ if (unlikely(!n))
+ goto discard;
+ le = &n->links[bearer_id];
+
+ /* Ensure broadcast reception is in synch with peer's send state */
+ if (unlikely(usr == LINK_PROTOCOL)) {
+ if (unlikely(skb_linearize(skb))) {
+ tipc_node_put(n);
+ goto discard;
}
- n_ptr->bclink.deferred_size = 0;
+ hdr = buf_msg(skb);
+ tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq);
+ } else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) {
+ tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr);
+ }
- if (n_ptr->bclink.defragm) {
- kfree_skb(n_ptr->bclink.defragm);
- n_ptr->bclink.defragm = NULL;
+ /* Receive packet directly if conditions permit */
+ tipc_node_read_lock(n);
+ if (likely((n->state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) {
+ spin_lock_bh(&le->lock);
+ if (le->link) {
+ rc = tipc_link_rcv(le->link, skb, &xmitq);
+ skb = NULL;
+ }
+ spin_unlock_bh(&le->lock);
+ }
+ tipc_node_read_unlock(n);
+
+ /* Check/update node state before receiving */
+ if (unlikely(skb)) {
+ if (unlikely(skb_linearize(skb)))
+ goto out_node_put;
+ tipc_node_write_lock(n);
+ if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) {
+ if (le->link) {
+ rc = tipc_link_rcv(le->link, skb, &xmitq);
+ skb = NULL;
+ }
}
+ tipc_node_write_unlock(n);
+ }
+
+ if (unlikely(rc & TIPC_LINK_UP_EVT))
+ tipc_node_link_up(n, bearer_id, &xmitq);
+
+ if (unlikely(rc & TIPC_LINK_DOWN_EVT))
+ tipc_node_link_down(n, bearer_id, false);
+
+ if (unlikely(!skb_queue_empty(&n->bc_entry.namedq)))
+ tipc_named_rcv(net, &n->bc_entry.namedq,
+ &n->bc_entry.named_rcv_nxt,
+ &n->bc_entry.named_open);
+
+ if (unlikely(!skb_queue_empty(&n->bc_entry.inputq1)))
+ tipc_node_mcast_rcv(n);
- tipc_bclink_remove_node(n_ptr->addr);
- tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ);
+ if (!skb_queue_empty(&le->inputq))
+ tipc_sk_rcv(net, &le->inputq);
- n_ptr->bclink.recv_permitted = false;
+ if (!skb_queue_empty(&xmitq))
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n);
+
+out_node_put:
+ tipc_node_put(n);
+discard:
+ kfree_skb(skb);
+}
+
+void tipc_node_apply_property(struct net *net, struct tipc_bearer *b,
+ int prop)
+{
+ struct tipc_net *tn = tipc_net(net);
+ int bearer_id = b->identity;
+ struct sk_buff_head xmitq;
+ struct tipc_link_entry *e;
+ struct tipc_node *n;
+
+ __skb_queue_head_init(&xmitq);
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ tipc_node_write_lock(n);
+ e = &n->links[bearer_id];
+ if (e->link) {
+ if (prop == TIPC_NLA_PROP_TOL)
+ tipc_link_set_tolerance(e->link, b->tolerance,
+ &xmitq);
+ else if (prop == TIPC_NLA_PROP_MTU)
+ tipc_link_set_mtu(e->link, b->mtu);
+
+ /* Update MTU for node link entry */
+ e->mtu = tipc_link_mss(e->link);
+ }
+
+ tipc_node_write_unlock(n);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL);
}
- /* Abort link changeover */
- for (i = 0; i < MAX_BEARERS; i++) {
- struct tipc_link *l_ptr = n_ptr->links[i];
- if (!l_ptr)
+ rcu_read_unlock();
+}
+
+int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+ struct tipc_node *peer, *temp_node;
+ u8 node_id[NODE_ID_LEN];
+ u64 *w0 = (u64 *)&node_id[0];
+ u64 *w1 = (u64 *)&node_id[8];
+ u32 addr;
+ int err;
+
+ /* We identify the peer by its net */
+ if (!info->attrs[TIPC_NLA_NET])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX,
+ info->attrs[TIPC_NLA_NET],
+ tipc_nl_net_policy, info->extack);
+ if (err)
+ return err;
+
+ /* attrs[TIPC_NLA_NET_NODEID] and attrs[TIPC_NLA_NET_ADDR] are
+ * mutually exclusive cases
+ */
+ if (attrs[TIPC_NLA_NET_ADDR]) {
+ addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
+ if (!addr)
+ return -EINVAL;
+ }
+
+ if (attrs[TIPC_NLA_NET_NODEID]) {
+ if (!attrs[TIPC_NLA_NET_NODEID_W1])
+ return -EINVAL;
+ *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]);
+ *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]);
+ addr = hash128to32(node_id);
+ }
+
+ if (in_own_node(net, addr))
+ return -ENOTSUPP;
+
+ spin_lock_bh(&tn->node_list_lock);
+ peer = tipc_node_find(net, addr);
+ if (!peer) {
+ spin_unlock_bh(&tn->node_list_lock);
+ return -ENXIO;
+ }
+
+ tipc_node_write_lock(peer);
+ if (peer->state != SELF_DOWN_PEER_DOWN &&
+ peer->state != SELF_DOWN_PEER_LEAVING) {
+ tipc_node_write_unlock(peer);
+ err = -EBUSY;
+ goto err_out;
+ }
+
+ tipc_node_clear_links(peer);
+ tipc_node_write_unlock(peer);
+ tipc_node_delete(peer);
+
+ /* Calculate cluster capabilities */
+ tn->capabilities = TIPC_NODE_CAPABILITIES;
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ tn->capabilities &= temp_node->capabilities;
+ }
+ tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST));
+ err = 0;
+err_out:
+ tipc_node_put(peer);
+ spin_unlock_bh(&tn->node_list_lock);
+
+ return err;
+}
+
+int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int err;
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ int done = cb->args[0];
+ int last_addr = cb->args[1];
+ struct tipc_node *node;
+ struct tipc_nl_msg msg;
+
+ if (done)
+ return 0;
+
+ msg.skb = skb;
+ msg.portid = NETLINK_CB(cb->skb).portid;
+ msg.seq = cb->nlh->nlmsg_seq;
+
+ rcu_read_lock();
+ if (last_addr) {
+ node = tipc_node_find(net, last_addr);
+ if (!node) {
+ rcu_read_unlock();
+ /* We never set seq or call nl_dump_check_consistent()
+ * this means that setting prev_seq here will cause the
+ * consistence check to fail in the netlink callback
+ * handler. Resulting in the NLMSG_DONE message having
+ * the NLM_F_DUMP_INTR flag set if the node state
+ * changed while we released the lock.
+ */
+ cb->prev_seq = 1;
+ return -EPIPE;
+ }
+ tipc_node_put(node);
+ }
+
+ list_for_each_entry_rcu(node, &tn->node_list, list) {
+ if (node->preliminary)
continue;
- l_ptr->reset_checkpoint = l_ptr->next_in_no;
- l_ptr->exp_msg_count = 0;
- tipc_link_reset_fragments(l_ptr);
+ if (last_addr) {
+ if (node->addr == last_addr)
+ last_addr = 0;
+ else
+ continue;
+ }
+
+ tipc_node_read_lock(node);
+ err = __tipc_nl_add_node(&msg, node);
+ if (err) {
+ last_addr = node->addr;
+ tipc_node_read_unlock(node);
+ goto out;
+ }
+
+ tipc_node_read_unlock(node);
}
+ done = 1;
+out:
+ cb->args[0] = done;
+ cb->args[1] = last_addr;
+ rcu_read_unlock();
+
+ return skb->len;
+}
- /* Notify subscribers */
- tipc_nodesub_notify(n_ptr);
+/* tipc_node_find_by_name - locate owner node of link by link's name
+ * @net: the applicable net namespace
+ * @name: pointer to link name string
+ * @bearer_id: pointer to index in 'node->links' array where the link was found.
+ *
+ * Returns pointer to node owning the link, or 0 if no matching link is found.
+ */
+static struct tipc_node *tipc_node_find_by_name(struct net *net,
+ const char *link_name,
+ unsigned int *bearer_id)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_link *l;
+ struct tipc_node *n;
+ struct tipc_node *found_node = NULL;
+ int i;
+
+ *bearer_id = 0;
+ rcu_read_lock();
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ tipc_node_read_lock(n);
+ for (i = 0; i < MAX_BEARERS; i++) {
+ l = n->links[i].link;
+ if (l && !strcmp(tipc_link_name(l), link_name)) {
+ *bearer_id = i;
+ found_node = n;
+ break;
+ }
+ }
+ tipc_node_read_unlock(n);
+ if (found_node)
+ break;
+ }
+ rcu_read_unlock();
- /* Prevent re-contact with node until cleanup is done */
- n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE;
- tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr);
+ return found_node;
}
-struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
+int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
{
- u32 domain;
- struct sk_buff *buf;
- struct tipc_node *n_ptr;
- struct tipc_node_info node_info;
- u32 payload_size;
+ int err;
+ int res = 0;
+ int bearer_id;
+ char *name;
+ struct tipc_link *link;
+ struct tipc_node *node;
+ struct sk_buff_head xmitq;
+ struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+
+ __skb_queue_head_init(&xmitq);
+
+ if (!info->attrs[TIPC_NLA_LINK])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX,
+ info->attrs[TIPC_NLA_LINK],
+ tipc_nl_link_policy, info->extack);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_LINK_NAME])
+ return -EINVAL;
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+ name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
- domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
- if (!tipc_addr_domain_valid(domain))
- return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
- " (network address)");
+ if (strcmp(name, tipc_bclink_name) == 0)
+ return tipc_nl_bc_link_set(net, attrs);
- read_lock_bh(&tipc_net_lock);
- if (!tipc_num_nodes) {
- read_unlock_bh(&tipc_net_lock);
- return tipc_cfg_reply_none();
+ node = tipc_node_find_by_name(net, name, &bearer_id);
+ if (!node)
+ return -EINVAL;
+
+ tipc_node_read_lock(node);
+
+ link = node->links[bearer_id].link;
+ if (!link) {
+ res = -EINVAL;
+ goto out;
}
- /* For now, get space for all other nodes */
- payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes;
- if (payload_size > 32768u) {
- read_unlock_bh(&tipc_net_lock);
- return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
- " (too many nodes)");
+ if (attrs[TIPC_NLA_LINK_PROP]) {
+ struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
+
+ err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props);
+ if (err) {
+ res = err;
+ goto out;
+ }
+
+ if (props[TIPC_NLA_PROP_TOL]) {
+ u32 tol;
+
+ tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
+ tipc_link_set_tolerance(link, tol, &xmitq);
+ }
+ if (props[TIPC_NLA_PROP_PRIO]) {
+ u32 prio;
+
+ prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
+ tipc_link_set_prio(link, prio, &xmitq);
+ }
+ if (props[TIPC_NLA_PROP_WIN]) {
+ u32 max_win;
+
+ max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ tipc_link_set_queue_limits(link,
+ tipc_link_min_win(link),
+ max_win);
+ }
}
- buf = tipc_cfg_reply_alloc(payload_size);
- if (!buf) {
- read_unlock_bh(&tipc_net_lock);
- return NULL;
+
+out:
+ tipc_node_read_unlock(node);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr,
+ NULL);
+ return res;
+}
+
+int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
+ struct tipc_nl_msg msg;
+ char *name;
+ int err;
+
+ msg.portid = info->snd_portid;
+ msg.seq = info->snd_seq;
+
+ if (!info->attrs[TIPC_NLA_LINK])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX,
+ info->attrs[TIPC_NLA_LINK],
+ tipc_nl_link_policy, info->extack);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_LINK_NAME])
+ return -EINVAL;
+
+ name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
+
+ msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg.skb)
+ return -ENOMEM;
+
+ if (strcmp(name, tipc_bclink_name) == 0) {
+ err = tipc_nl_add_bc_link(net, &msg, tipc_net(net)->bcl);
+ if (err)
+ goto err_free;
+ } else {
+ int bearer_id;
+ struct tipc_node *node;
+ struct tipc_link *link;
+
+ node = tipc_node_find_by_name(net, name, &bearer_id);
+ if (!node) {
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ tipc_node_read_lock(node);
+ link = node->links[bearer_id].link;
+ if (!link) {
+ tipc_node_read_unlock(node);
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ err = __tipc_nl_add_link(net, &msg, link, 0);
+ tipc_node_read_unlock(node);
+ if (err)
+ goto err_free;
+ }
+
+ return genlmsg_reply(msg.skb, info);
+
+err_free:
+ nlmsg_free(msg.skb);
+ return err;
+}
+
+int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+ char *link_name;
+ unsigned int bearer_id;
+ struct tipc_link *link;
+ struct tipc_node *node;
+ struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_link_entry *le;
+
+ if (!info->attrs[TIPC_NLA_LINK])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX,
+ info->attrs[TIPC_NLA_LINK],
+ tipc_nl_link_policy, info->extack);
+ if (err)
+ return err;
+
+ if (!attrs[TIPC_NLA_LINK_NAME])
+ return -EINVAL;
+
+ link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
+
+ err = -EINVAL;
+ if (!strcmp(link_name, tipc_bclink_name)) {
+ err = tipc_bclink_reset_stats(net, tipc_bc_sndlink(net));
+ if (err)
+ return err;
+ return 0;
+ } else if (strstr(link_name, tipc_bclink_name)) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(node, &tn->node_list, list) {
+ tipc_node_read_lock(node);
+ link = node->bc_entry.link;
+ if (link && !strcmp(link_name, tipc_link_name(link))) {
+ err = tipc_bclink_reset_stats(net, link);
+ tipc_node_read_unlock(node);
+ break;
+ }
+ tipc_node_read_unlock(node);
+ }
+ rcu_read_unlock();
+ return err;
+ }
+
+ node = tipc_node_find_by_name(net, link_name, &bearer_id);
+ if (!node)
+ return -EINVAL;
+
+ le = &node->links[bearer_id];
+ tipc_node_read_lock(node);
+ spin_lock_bh(&le->lock);
+ link = node->links[bearer_id].link;
+ if (!link) {
+ spin_unlock_bh(&le->lock);
+ tipc_node_read_unlock(node);
+ return -EINVAL;
}
+ tipc_link_reset_stats(link);
+ spin_unlock_bh(&le->lock);
+ tipc_node_read_unlock(node);
+ return 0;
+}
+
+/* Caller should hold node lock */
+static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_node *node, u32 *prev_link,
+ bool bc_link)
+{
+ u32 i;
+ int err;
- /* Add TLVs for all nodes in scope */
- list_for_each_entry(n_ptr, &tipc_node_list, list) {
- if (!tipc_in_scope(domain, n_ptr->addr))
+ for (i = *prev_link; i < MAX_BEARERS; i++) {
+ *prev_link = i;
+
+ if (!node->links[i].link)
continue;
- node_info.addr = htonl(n_ptr->addr);
- node_info.up = htonl(tipc_node_is_up(n_ptr));
- tipc_cfg_append_tlv(buf, TIPC_TLV_NODE_INFO,
- &node_info, sizeof(node_info));
+
+ err = __tipc_nl_add_link(net, msg,
+ node->links[i].link, NLM_F_MULTI);
+ if (err)
+ return err;
}
- read_unlock_bh(&tipc_net_lock);
- return buf;
+ if (bc_link) {
+ *prev_link = i;
+ err = tipc_nl_add_bc_link(net, msg, node->bc_entry.link);
+ if (err)
+ return err;
+ }
+
+ *prev_link = 0;
+
+ return 0;
}
-struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
+int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
{
- u32 domain;
- struct sk_buff *buf;
- struct tipc_node *n_ptr;
- struct tipc_link_info link_info;
- u32 payload_size;
+ struct net *net = sock_net(skb->sk);
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
+ struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_node *node;
+ struct tipc_nl_msg msg;
+ u32 prev_node = cb->args[0];
+ u32 prev_link = cb->args[1];
+ int done = cb->args[2];
+ bool bc_link = cb->args[3];
+ int err;
+
+ if (done)
+ return 0;
+
+ if (!prev_node) {
+ /* Check if broadcast-receiver links dumping is needed */
+ if (attrs && attrs[TIPC_NLA_LINK]) {
+ err = nla_parse_nested_deprecated(link,
+ TIPC_NLA_LINK_MAX,
+ attrs[TIPC_NLA_LINK],
+ tipc_nl_link_policy,
+ NULL);
+ if (unlikely(err))
+ return err;
+ if (unlikely(!link[TIPC_NLA_LINK_BROADCAST]))
+ return -EINVAL;
+ bc_link = true;
+ }
+ }
+
+ msg.skb = skb;
+ msg.portid = NETLINK_CB(cb->skb).portid;
+ msg.seq = cb->nlh->nlmsg_seq;
+
+ rcu_read_lock();
+ if (prev_node) {
+ node = tipc_node_find(net, prev_node);
+ if (!node) {
+ /* We never set seq or call nl_dump_check_consistent()
+ * this means that setting prev_seq here will cause the
+ * consistence check to fail in the netlink callback
+ * handler. Resulting in the last NLMSG_DONE message
+ * having the NLM_F_DUMP_INTR flag set.
+ */
+ cb->prev_seq = 1;
+ goto out;
+ }
+ tipc_node_put(node);
+
+ list_for_each_entry_continue_rcu(node, &tn->node_list,
+ list) {
+ tipc_node_read_lock(node);
+ err = __tipc_nl_add_node_links(net, &msg, node,
+ &prev_link, bc_link);
+ tipc_node_read_unlock(node);
+ if (err)
+ goto out;
+
+ prev_node = node->addr;
+ }
+ } else {
+ err = tipc_nl_add_bc_link(net, &msg, tn->bcl);
+ if (err)
+ goto out;
+
+ list_for_each_entry_rcu(node, &tn->node_list, list) {
+ tipc_node_read_lock(node);
+ err = __tipc_nl_add_node_links(net, &msg, node,
+ &prev_link, bc_link);
+ tipc_node_read_unlock(node);
+ if (err)
+ goto out;
+
+ prev_node = node->addr;
+ }
+ }
+ done = 1;
+out:
+ rcu_read_unlock();
+
+ cb->args[0] = prev_node;
+ cb->args[1] = prev_link;
+ cb->args[2] = done;
+ cb->args[3] = bc_link;
+
+ return skb->len;
+}
+
+int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attrs[TIPC_NLA_MON_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ int err;
+
+ if (!info->attrs[TIPC_NLA_MON])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MON_MAX,
+ info->attrs[TIPC_NLA_MON],
+ tipc_nl_monitor_policy,
+ info->extack);
+ if (err)
+ return err;
+
+ if (attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]) {
+ u32 val;
+
+ val = nla_get_u32(attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]);
+ err = tipc_nl_monitor_set_threshold(net, val);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int __tipc_nl_add_monitor_prop(struct net *net, struct tipc_nl_msg *msg)
+{
+ struct nlattr *attrs;
+ void *hdr;
+ u32 val;
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ 0, TIPC_NL_MON_GET);
+ if (!hdr)
+ return -EMSGSIZE;
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+ attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON);
+ if (!attrs)
+ goto msg_full;
- domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
- if (!tipc_addr_domain_valid(domain))
- return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
- " (network address)");
+ val = tipc_nl_monitor_get_threshold(net);
- if (!tipc_own_addr)
- return tipc_cfg_reply_none();
+ if (nla_put_u32(msg->skb, TIPC_NLA_MON_ACTIVATION_THRESHOLD, val))
+ goto attr_msg_full;
- read_lock_bh(&tipc_net_lock);
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
- /* Get space for all unicast links + broadcast link */
- payload_size = TLV_SPACE(sizeof(link_info)) *
- (atomic_read(&tipc_num_links) + 1);
- if (payload_size > 32768u) {
- read_unlock_bh(&tipc_net_lock);
- return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
- " (too many links)");
+ return 0;
+
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
+
+ return -EMSGSIZE;
+}
+
+int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tipc_nl_msg msg;
+ int err;
+
+ msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg.skb)
+ return -ENOMEM;
+ msg.portid = info->snd_portid;
+ msg.seq = info->snd_seq;
+
+ err = __tipc_nl_add_monitor_prop(net, &msg);
+ if (err) {
+ nlmsg_free(msg.skb);
+ return err;
}
- buf = tipc_cfg_reply_alloc(payload_size);
- if (!buf) {
- read_unlock_bh(&tipc_net_lock);
- return NULL;
+
+ return genlmsg_reply(msg.skb, info);
+}
+
+int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ u32 prev_bearer = cb->args[0];
+ struct tipc_nl_msg msg;
+ int bearer_id;
+ int err;
+
+ if (prev_bearer == MAX_BEARERS)
+ return 0;
+
+ msg.skb = skb;
+ msg.portid = NETLINK_CB(cb->skb).portid;
+ msg.seq = cb->nlh->nlmsg_seq;
+
+ rtnl_lock();
+ for (bearer_id = prev_bearer; bearer_id < MAX_BEARERS; bearer_id++) {
+ err = __tipc_nl_add_monitor(net, &msg, bearer_id);
+ if (err)
+ break;
+ }
+ rtnl_unlock();
+ cb->args[0] = bearer_id;
+
+ return skb->len;
+}
+
+int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ u32 prev_node = cb->args[1];
+ u32 bearer_id = cb->args[2];
+ int done = cb->args[0];
+ struct tipc_nl_msg msg;
+ int err;
+
+ if (!prev_node) {
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
+ struct nlattr *mon[TIPC_NLA_MON_MAX + 1];
+
+ if (!attrs[TIPC_NLA_MON])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(mon, TIPC_NLA_MON_MAX,
+ attrs[TIPC_NLA_MON],
+ tipc_nl_monitor_policy,
+ NULL);
+ if (err)
+ return err;
+
+ if (!mon[TIPC_NLA_MON_REF])
+ return -EINVAL;
+
+ bearer_id = nla_get_u32(mon[TIPC_NLA_MON_REF]);
+
+ if (bearer_id >= MAX_BEARERS)
+ return -EINVAL;
}
- /* Add TLV for broadcast link */
- link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr));
- link_info.up = htonl(1);
- strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME);
- tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info));
+ if (done)
+ return 0;
+
+ msg.skb = skb;
+ msg.portid = NETLINK_CB(cb->skb).portid;
+ msg.seq = cb->nlh->nlmsg_seq;
+
+ rtnl_lock();
+ err = tipc_nl_add_monitor_peer(net, &msg, bearer_id, &prev_node);
+ if (!err)
+ done = 1;
- /* Add TLVs for any other links in scope */
- list_for_each_entry(n_ptr, &tipc_node_list, list) {
- u32 i;
+ rtnl_unlock();
+ cb->args[0] = done;
+ cb->args[1] = prev_node;
+ cb->args[2] = bearer_id;
+
+ return skb->len;
+}
- if (!tipc_in_scope(domain, n_ptr->addr))
+#ifdef CONFIG_TIPC_CRYPTO
+static int tipc_nl_retrieve_key(struct nlattr **attrs,
+ struct tipc_aead_key **pkey)
+{
+ struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY];
+ struct tipc_aead_key *key;
+
+ if (!attr)
+ return -ENODATA;
+
+ if (nla_len(attr) < sizeof(*key))
+ return -EINVAL;
+ key = (struct tipc_aead_key *)nla_data(attr);
+ if (key->keylen > TIPC_AEAD_KEYLEN_MAX ||
+ nla_len(attr) < tipc_aead_key_size(key))
+ return -EINVAL;
+
+ *pkey = key;
+ return 0;
+}
+
+static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id)
+{
+ struct nlattr *attr = attrs[TIPC_NLA_NODE_ID];
+
+ if (!attr)
+ return -ENODATA;
+
+ if (nla_len(attr) < TIPC_NODEID_LEN)
+ return -EINVAL;
+
+ *node_id = (u8 *)nla_data(attr);
+ return 0;
+}
+
+static int tipc_nl_retrieve_rekeying(struct nlattr **attrs, u32 *intv)
+{
+ struct nlattr *attr = attrs[TIPC_NLA_NODE_REKEYING];
+
+ if (!attr)
+ return -ENODATA;
+
+ *intv = nla_get_u32(attr);
+ return 0;
+}
+
+static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx, *c = tx;
+ struct tipc_node *n = NULL;
+ struct tipc_aead_key *ukey;
+ bool rekeying = true, master_key = false;
+ u8 *id, *own_id, mode;
+ u32 intv = 0;
+ int rc = 0;
+
+ if (!info->attrs[TIPC_NLA_NODE])
+ return -EINVAL;
+
+ rc = nla_parse_nested(attrs, TIPC_NLA_NODE_MAX,
+ info->attrs[TIPC_NLA_NODE],
+ tipc_nl_node_policy, info->extack);
+ if (rc)
+ return rc;
+
+ own_id = tipc_own_id(net);
+ if (!own_id) {
+ GENL_SET_ERR_MSG(info, "not found own node identity (set id?)");
+ return -EPERM;
+ }
+
+ rc = tipc_nl_retrieve_rekeying(attrs, &intv);
+ if (rc == -ENODATA)
+ rekeying = false;
+
+ rc = tipc_nl_retrieve_key(attrs, &ukey);
+ if (rc == -ENODATA && rekeying)
+ goto rekeying;
+ else if (rc)
+ return rc;
+
+ rc = tipc_aead_key_validate(ukey, info);
+ if (rc)
+ return rc;
+
+ rc = tipc_nl_retrieve_nodeid(attrs, &id);
+ switch (rc) {
+ case -ENODATA:
+ mode = CLUSTER_KEY;
+ master_key = !!(attrs[TIPC_NLA_NODE_KEY_MASTER]);
+ break;
+ case 0:
+ mode = PER_NODE_KEY;
+ if (memcmp(id, own_id, NODE_ID_LEN)) {
+ n = tipc_node_find_by_id(net, id) ?:
+ tipc_node_create(net, 0, id, 0xffffu, 0, true);
+ if (unlikely(!n))
+ return -ENOMEM;
+ c = n->crypto_rx;
+ }
+ break;
+ default:
+ return rc;
+ }
+
+ /* Initiate the TX/RX key */
+ rc = tipc_crypto_key_init(c, ukey, mode, master_key);
+ if (n)
+ tipc_node_put(n);
+
+ if (unlikely(rc < 0)) {
+ GENL_SET_ERR_MSG(info, "unable to initiate or attach new key");
+ return rc;
+ } else if (c == tx) {
+ /* Distribute TX key but not master one */
+ if (!master_key && tipc_crypto_key_distr(tx, rc, NULL))
+ GENL_SET_ERR_MSG(info, "failed to replicate new key");
+rekeying:
+ /* Schedule TX rekeying if needed */
+ tipc_crypto_rekeying_sched(tx, rekeying, intv);
+ }
+
+ return 0;
+}
+
+int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_node_set_key(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
+
+static int __tipc_nl_node_flush_key(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_node *n;
+
+ tipc_crypto_key_flush(tn->crypto_tx);
+ rcu_read_lock();
+ list_for_each_entry_rcu(n, &tn->node_list, list)
+ tipc_crypto_key_flush(n->crypto_rx);
+ rcu_read_unlock();
+
+ return 0;
+}
+
+int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_node_flush_key(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
+#endif
+
+/**
+ * tipc_node_dump - dump TIPC node data
+ * @n: tipc node to be dumped
+ * @more: dump more?
+ * - false: dump only tipc node data
+ * - true: dump node link data as well
+ * @buf: returned buffer of dump data in format
+ */
+int tipc_node_dump(struct tipc_node *n, bool more, char *buf)
+{
+ int i = 0;
+ size_t sz = (more) ? NODE_LMAX : NODE_LMIN;
+
+ if (!n) {
+ i += scnprintf(buf, sz, "node data: (null)\n");
+ return i;
+ }
+
+ i += scnprintf(buf, sz, "node data: %x", n->addr);
+ i += scnprintf(buf + i, sz - i, " %x", n->state);
+ i += scnprintf(buf + i, sz - i, " %d", n->active_links[0]);
+ i += scnprintf(buf + i, sz - i, " %d", n->active_links[1]);
+ i += scnprintf(buf + i, sz - i, " %x", n->action_flags);
+ i += scnprintf(buf + i, sz - i, " %u", n->failover_sent);
+ i += scnprintf(buf + i, sz - i, " %u", n->sync_point);
+ i += scnprintf(buf + i, sz - i, " %d", n->link_cnt);
+ i += scnprintf(buf + i, sz - i, " %u", n->working_links);
+ i += scnprintf(buf + i, sz - i, " %x", n->capabilities);
+ i += scnprintf(buf + i, sz - i, " %lu\n", n->keepalive_intv);
+
+ if (!more)
+ return i;
+
+ i += scnprintf(buf + i, sz - i, "link_entry[0]:\n");
+ i += scnprintf(buf + i, sz - i, " mtu: %u\n", n->links[0].mtu);
+ i += scnprintf(buf + i, sz - i, " media: ");
+ i += tipc_media_addr_printf(buf + i, sz - i, &n->links[0].maddr);
+ i += scnprintf(buf + i, sz - i, "\n");
+ i += tipc_link_dump(n->links[0].link, TIPC_DUMP_NONE, buf + i);
+ i += scnprintf(buf + i, sz - i, " inputq: ");
+ i += tipc_list_dump(&n->links[0].inputq, false, buf + i);
+
+ i += scnprintf(buf + i, sz - i, "link_entry[1]:\n");
+ i += scnprintf(buf + i, sz - i, " mtu: %u\n", n->links[1].mtu);
+ i += scnprintf(buf + i, sz - i, " media: ");
+ i += tipc_media_addr_printf(buf + i, sz - i, &n->links[1].maddr);
+ i += scnprintf(buf + i, sz - i, "\n");
+ i += tipc_link_dump(n->links[1].link, TIPC_DUMP_NONE, buf + i);
+ i += scnprintf(buf + i, sz - i, " inputq: ");
+ i += tipc_list_dump(&n->links[1].inputq, false, buf + i);
+
+ i += scnprintf(buf + i, sz - i, "bclink:\n ");
+ i += tipc_link_dump(n->bc_entry.link, TIPC_DUMP_NONE, buf + i);
+
+ return i;
+}
+
+void tipc_node_pre_cleanup_net(struct net *exit_net)
+{
+ struct tipc_node *n;
+ struct tipc_net *tn;
+ struct net *tmp;
+
+ rcu_read_lock();
+ for_each_net_rcu(tmp) {
+ if (tmp == exit_net)
continue;
- tipc_node_lock(n_ptr);
- for (i = 0; i < MAX_BEARERS; i++) {
- if (!n_ptr->links[i])
+ tn = tipc_net(tmp);
+ if (!tn)
+ continue;
+ spin_lock_bh(&tn->node_list_lock);
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ if (!n->peer_net)
+ continue;
+ if (n->peer_net != exit_net)
continue;
- link_info.dest = htonl(n_ptr->addr);
- link_info.up = htonl(tipc_link_is_up(n_ptr->links[i]));
- strcpy(link_info.str, n_ptr->links[i]->name);
- tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO,
- &link_info, sizeof(link_info));
+ tipc_node_write_lock(n);
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
+ tipc_node_write_unlock_fast(n);
+ break;
}
- tipc_node_unlock(n_ptr);
+ spin_unlock_bh(&tn->node_list_lock);
}
-
- read_unlock_bh(&tipc_net_lock);
- return buf;
+ rcu_read_unlock();
}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 3c189b35b102..154a5bbb0d29 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -1,8 +1,8 @@
/*
* net/tipc/node.h: Include file for TIPC node management routines
*
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2000-2006, 2014-2016, Ericsson AB
+ * Copyright (c) 2005, 2010-2014, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,95 +37,95 @@
#ifndef _TIPC_NODE_H
#define _TIPC_NODE_H
-#include "node_subscr.h"
#include "addr.h"
#include "net.h"
#include "bearer.h"
+#include "msg.h"
-/*
- * Out-of-range value for node signature
- */
-#define INVALID_NODE_SIG 0x10000
-
-/* Flags used to block (re)establishment of contact with a neighboring node */
-#define WAIT_PEER_DOWN 0x0001 /* wait to see that peer's links are down */
-#define WAIT_NAMES_GONE 0x0002 /* wait for peer's publications to be purged */
-#define WAIT_NODE_DOWN 0x0004 /* wait until peer node is declared down */
-
-/**
- * struct tipc_node - TIPC node structure
- * @addr: network address of node
- * @lock: spinlock governing access to structure
- * @hash: links to adjacent nodes in unsorted hash chain
- * @list: links to adjacent nodes in sorted list of cluster's nodes
- * @nsub: list of "node down" subscriptions monitoring node
- * @active_links: pointers to active links to node
- * @links: pointers to all links to node
- * @working_links: number of working links to node (both active and standby)
- * @block_setup: bit mask of conditions preventing link establishment to node
- * @link_cnt: number of links to node
- * @permit_changeover: non-zero if node has redundant links to this system
- * @signature: node instance identifier
- * @bclink: broadcast-related info
- * @acked: sequence # of last outbound b'cast message acknowledged by node
- * @last_in: sequence # of last in-sequence b'cast message received from node
- * @last_sent: sequence # of last b'cast message sent by node
- * @oos_state: state tracker for handling OOS b'cast messages
- * @deferred_size: number of OOS b'cast messages in deferred queue
- * @deferred_head: oldest OOS b'cast message received from node
- * @deferred_tail: newest OOS b'cast message received from node
- * @defragm: list of partially reassembled b'cast message fragments from node
- * @recv_permitted: true if node is allowed to receive b'cast messages
+/* Optional capabilities supported by this code version
*/
-struct tipc_node {
- u32 addr;
- spinlock_t lock;
- struct hlist_node hash;
- struct list_head list;
- struct list_head nsub;
- struct tipc_link *active_links[2];
- struct tipc_link *links[MAX_BEARERS];
- int link_cnt;
- int working_links;
- int block_setup;
- int permit_changeover;
- u32 signature;
- struct {
- u32 acked;
- u32 last_in;
- u32 last_sent;
- u32 oos_state;
- u32 deferred_size;
- struct sk_buff *deferred_head;
- struct sk_buff *deferred_tail;
- struct sk_buff *defragm;
- bool recv_permitted;
- } bclink;
+enum {
+ TIPC_SYN_BIT = (1),
+ TIPC_BCAST_SYNCH = (1 << 1),
+ TIPC_BCAST_STATE_NACK = (1 << 2),
+ TIPC_BLOCK_FLOWCTL = (1 << 3),
+ TIPC_BCAST_RCAST = (1 << 4),
+ TIPC_NODE_ID128 = (1 << 5),
+ TIPC_LINK_PROTO_SEQNO = (1 << 6),
+ TIPC_MCAST_RBCTL = (1 << 7),
+ TIPC_GAP_ACK_BLOCK = (1 << 8),
+ TIPC_TUNNEL_ENHANCED = (1 << 9),
+ TIPC_NAGLE = (1 << 10),
+ TIPC_NAMED_BCAST = (1 << 11)
};
-extern struct list_head tipc_node_list;
+#define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \
+ TIPC_BCAST_SYNCH | \
+ TIPC_BCAST_STATE_NACK | \
+ TIPC_BCAST_RCAST | \
+ TIPC_BLOCK_FLOWCTL | \
+ TIPC_NODE_ID128 | \
+ TIPC_LINK_PROTO_SEQNO | \
+ TIPC_MCAST_RBCTL | \
+ TIPC_GAP_ACK_BLOCK | \
+ TIPC_TUNNEL_ENHANCED | \
+ TIPC_NAGLE | \
+ TIPC_NAMED_BCAST)
-struct tipc_node *tipc_node_find(u32 addr);
-struct tipc_node *tipc_node_create(u32 addr);
-void tipc_node_delete(struct tipc_node *n_ptr);
-void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
-void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
-void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
-void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
-int tipc_node_active_links(struct tipc_node *n_ptr);
-int tipc_node_redundant_links(struct tipc_node *n_ptr);
-int tipc_node_is_up(struct tipc_node *n_ptr);
-struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space);
-struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space);
+#define INVALID_BEARER_ID -1
-static inline void tipc_node_lock(struct tipc_node *n_ptr)
-{
- spin_lock_bh(&n_ptr->lock);
-}
-
-static inline void tipc_node_unlock(struct tipc_node *n_ptr)
-{
- spin_unlock_bh(&n_ptr->lock);
-}
+void tipc_node_stop(struct net *net);
+bool tipc_node_get_id(struct net *net, u32 addr, u8 *id);
+u32 tipc_node_get_addr(struct tipc_node *node);
+char *tipc_node_get_id_str(struct tipc_node *node);
+void tipc_node_put(struct tipc_node *node);
+void tipc_node_get(struct tipc_node *node);
+struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
+ u16 capabilities, u32 hash_mixes,
+ bool preliminary);
+#ifdef CONFIG_TIPC_CRYPTO
+struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n);
+struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos);
+struct tipc_crypto *tipc_node_crypto_rx_by_addr(struct net *net, u32 addr);
+#endif
+u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
+void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
+ struct tipc_bearer *bearer,
+ u16 capabilities, u32 signature, u32 hash_mixes,
+ struct tipc_media_addr *maddr,
+ bool *respond, bool *dupl_addr);
+void tipc_node_delete_links(struct net *net, int bearer_id);
+void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, int prop);
+int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
+ char *linkname, size_t len);
+int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
+ int selector);
+int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *list);
+int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
+ u32 selector);
+void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr);
+void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr);
+void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests);
+int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
+void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected);
+bool tipc_node_is_up(struct net *net, u32 addr);
+u16 tipc_node_get_capabilities(struct net *net, u32 addr);
+int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
+int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
+int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb);
+int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
+ struct netlink_callback *cb);
+#ifdef CONFIG_TIPC_CRYPTO
+int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info);
+#endif
+void tipc_node_pre_cleanup_net(struct net *exit_net);
#endif
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
deleted file mode 100644
index 8a7384c04add..000000000000
--- a/net/tipc/node_subscr.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * net/tipc/node_subscr.c: TIPC "node down" subscription handling
- *
- * Copyright (c) 1995-2006, Ericsson AB
- * Copyright (c) 2005, 2010-2011, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "node_subscr.h"
-#include "node.h"
-
-/**
- * tipc_nodesub_subscribe - create "node down" subscription for specified node
- */
-void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr,
- void *usr_handle, net_ev_handler handle_down)
-{
- if (in_own_node(addr)) {
- node_sub->node = NULL;
- return;
- }
-
- node_sub->node = tipc_node_find(addr);
- if (!node_sub->node) {
- pr_warn("Node subscription rejected, unknown node 0x%x\n",
- addr);
- return;
- }
- node_sub->handle_node_down = handle_down;
- node_sub->usr_handle = usr_handle;
-
- tipc_node_lock(node_sub->node);
- list_add_tail(&node_sub->nodesub_list, &node_sub->node->nsub);
- tipc_node_unlock(node_sub->node);
-}
-
-/**
- * tipc_nodesub_unsubscribe - cancel "node down" subscription (if any)
- */
-void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub)
-{
- if (!node_sub->node)
- return;
-
- tipc_node_lock(node_sub->node);
- list_del_init(&node_sub->nodesub_list);
- tipc_node_unlock(node_sub->node);
-}
-
-/**
- * tipc_nodesub_notify - notify subscribers that a node is unreachable
- *
- * Note: node is locked by caller
- */
-void tipc_nodesub_notify(struct tipc_node *node)
-{
- struct tipc_node_subscr *ns;
-
- list_for_each_entry(ns, &node->nsub, nodesub_list) {
- if (ns->handle_node_down) {
- tipc_k_signal((Handler)ns->handle_node_down,
- (unsigned long)ns->usr_handle);
- ns->handle_node_down = NULL;
- }
- }
-}
diff --git a/net/tipc/port.c b/net/tipc/port.c
deleted file mode 100644
index b3ed2fcab4fb..000000000000
--- a/net/tipc/port.c
+++ /dev/null
@@ -1,1066 +0,0 @@
-/*
- * net/tipc/port.c: TIPC port code
- *
- * Copyright (c) 1992-2007, Ericsson AB
- * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "config.h"
-#include "port.h"
-#include "name_table.h"
-
-/* Connection management: */
-#define PROBING_INTERVAL 3600000 /* [ms] => 1 h */
-#define CONFIRMED 0
-#define PROBING 1
-
-#define MAX_REJECT_SIZE 1024
-
-DEFINE_SPINLOCK(tipc_port_list_lock);
-
-static LIST_HEAD(ports);
-static void port_handle_node_down(unsigned long ref);
-static struct sk_buff *port_build_self_abort_msg(struct tipc_port *, u32 err);
-static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *, u32 err);
-static void port_timeout(unsigned long ref);
-
-
-static u32 port_peernode(struct tipc_port *p_ptr)
-{
- return msg_destnode(&p_ptr->phdr);
-}
-
-static u32 port_peerport(struct tipc_port *p_ptr)
-{
- return msg_destport(&p_ptr->phdr);
-}
-
-/**
- * tipc_port_peer_msg - verify message was sent by connected port's peer
- *
- * Handles cases where the node's network address has changed from
- * the default of <0.0.0> to its configured setting.
- */
-int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg)
-{
- u32 peernode;
- u32 orignode;
-
- if (msg_origport(msg) != port_peerport(p_ptr))
- return 0;
-
- orignode = msg_orignode(msg);
- peernode = port_peernode(p_ptr);
- return (orignode == peernode) ||
- (!orignode && (peernode == tipc_own_addr)) ||
- (!peernode && (orignode == tipc_own_addr));
-}
-
-/**
- * tipc_multicast - send a multicast message to local and remote destinations
- */
-int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
- u32 num_sect, struct iovec const *msg_sect,
- unsigned int total_len)
-{
- struct tipc_msg *hdr;
- struct sk_buff *buf;
- struct sk_buff *ibuf = NULL;
- struct tipc_port_list dports = {0, NULL, };
- struct tipc_port *oport = tipc_port_deref(ref);
- int ext_targets;
- int res;
-
- if (unlikely(!oport))
- return -EINVAL;
-
- /* Create multicast message */
- hdr = &oport->phdr;
- msg_set_type(hdr, TIPC_MCAST_MSG);
- msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
- msg_set_destport(hdr, 0);
- msg_set_destnode(hdr, 0);
- msg_set_nametype(hdr, seq->type);
- msg_set_namelower(hdr, seq->lower);
- msg_set_nameupper(hdr, seq->upper);
- msg_set_hdr_sz(hdr, MCAST_H_SIZE);
- res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE,
- &buf);
- if (unlikely(!buf))
- return res;
-
- /* Figure out where to send multicast message */
- ext_targets = tipc_nametbl_mc_translate(seq->type, seq->lower, seq->upper,
- TIPC_NODE_SCOPE, &dports);
-
- /* Send message to destinations (duplicate it only if necessary) */
- if (ext_targets) {
- if (dports.count != 0) {
- ibuf = skb_copy(buf, GFP_ATOMIC);
- if (ibuf == NULL) {
- tipc_port_list_free(&dports);
- kfree_skb(buf);
- return -ENOMEM;
- }
- }
- res = tipc_bclink_send_msg(buf);
- if ((res < 0) && (dports.count != 0))
- kfree_skb(ibuf);
- } else {
- ibuf = buf;
- }
-
- if (res >= 0) {
- if (ibuf)
- tipc_port_recv_mcast(ibuf, &dports);
- } else {
- tipc_port_list_free(&dports);
- }
- return res;
-}
-
-/**
- * tipc_port_recv_mcast - deliver multicast message to all destination ports
- *
- * If there is no port list, perform a lookup to create one
- */
-void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp)
-{
- struct tipc_msg *msg;
- struct tipc_port_list dports = {0, NULL, };
- struct tipc_port_list *item = dp;
- int cnt = 0;
-
- msg = buf_msg(buf);
-
- /* Create destination port list, if one wasn't supplied */
- if (dp == NULL) {
- tipc_nametbl_mc_translate(msg_nametype(msg),
- msg_namelower(msg),
- msg_nameupper(msg),
- TIPC_CLUSTER_SCOPE,
- &dports);
- item = dp = &dports;
- }
-
- /* Deliver a copy of message to each destination port */
- if (dp->count != 0) {
- msg_set_destnode(msg, tipc_own_addr);
- if (dp->count == 1) {
- msg_set_destport(msg, dp->ports[0]);
- tipc_port_recv_msg(buf);
- tipc_port_list_free(dp);
- return;
- }
- for (; cnt < dp->count; cnt++) {
- int index = cnt % PLSIZE;
- struct sk_buff *b = skb_clone(buf, GFP_ATOMIC);
-
- if (b == NULL) {
- pr_warn("Unable to deliver multicast message(s)\n");
- goto exit;
- }
- if ((index == 0) && (cnt != 0))
- item = item->next;
- msg_set_destport(buf_msg(b), item->ports[index]);
- tipc_port_recv_msg(b);
- }
- }
-exit:
- kfree_skb(buf);
- tipc_port_list_free(dp);
-}
-
-/**
- * tipc_createport - create a generic TIPC port
- *
- * Returns pointer to (locked) TIPC port, or NULL if unable to create it
- */
-struct tipc_port *tipc_createport(struct sock *sk,
- u32 (*dispatcher)(struct tipc_port *,
- struct sk_buff *),
- void (*wakeup)(struct tipc_port *),
- const u32 importance)
-{
- struct tipc_port *p_ptr;
- struct tipc_msg *msg;
- u32 ref;
-
- p_ptr = kzalloc(sizeof(*p_ptr), GFP_ATOMIC);
- if (!p_ptr) {
- pr_warn("Port creation failed, no memory\n");
- return NULL;
- }
- ref = tipc_ref_acquire(p_ptr, &p_ptr->lock);
- if (!ref) {
- pr_warn("Port creation failed, ref. table exhausted\n");
- kfree(p_ptr);
- return NULL;
- }
-
- p_ptr->sk = sk;
- p_ptr->max_pkt = MAX_PKT_DEFAULT;
- p_ptr->ref = ref;
- INIT_LIST_HEAD(&p_ptr->wait_list);
- INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
- p_ptr->dispatcher = dispatcher;
- p_ptr->wakeup = wakeup;
- k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref);
- INIT_LIST_HEAD(&p_ptr->publications);
- INIT_LIST_HEAD(&p_ptr->port_list);
-
- /*
- * Must hold port list lock while initializing message header template
- * to ensure a change to node's own network address doesn't result
- * in template containing out-dated network address information
- */
- spin_lock_bh(&tipc_port_list_lock);
- msg = &p_ptr->phdr;
- tipc_msg_init(msg, importance, TIPC_NAMED_MSG, NAMED_H_SIZE, 0);
- msg_set_origport(msg, ref);
- list_add_tail(&p_ptr->port_list, &ports);
- spin_unlock_bh(&tipc_port_list_lock);
- return p_ptr;
-}
-
-int tipc_deleteport(u32 ref)
-{
- struct tipc_port *p_ptr;
- struct sk_buff *buf = NULL;
-
- tipc_withdraw(ref, 0, NULL);
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
-
- tipc_ref_discard(ref);
- tipc_port_unlock(p_ptr);
-
- k_cancel_timer(&p_ptr->timer);
- if (p_ptr->connected) {
- buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT);
- tipc_nodesub_unsubscribe(&p_ptr->subscription);
- }
-
- spin_lock_bh(&tipc_port_list_lock);
- list_del(&p_ptr->port_list);
- list_del(&p_ptr->wait_list);
- spin_unlock_bh(&tipc_port_list_lock);
- k_term_timer(&p_ptr->timer);
- kfree(p_ptr);
- tipc_net_route_msg(buf);
- return 0;
-}
-
-static int port_unreliable(struct tipc_port *p_ptr)
-{
- return msg_src_droppable(&p_ptr->phdr);
-}
-
-int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- *isunreliable = port_unreliable(p_ptr);
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-int tipc_set_portunreliable(u32 ref, unsigned int isunreliable)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- msg_set_src_droppable(&p_ptr->phdr, (isunreliable != 0));
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-static int port_unreturnable(struct tipc_port *p_ptr)
-{
- return msg_dest_droppable(&p_ptr->phdr);
-}
-
-int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- *isunrejectable = port_unreturnable(p_ptr);
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- msg_set_dest_droppable(&p_ptr->phdr, (isunrejectable != 0));
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-/*
- * port_build_proto_msg(): create connection protocol message for port
- *
- * On entry the port must be locked and connected.
- */
-static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr,
- u32 type, u32 ack)
-{
- struct sk_buff *buf;
- struct tipc_msg *msg;
-
- buf = tipc_buf_acquire(INT_H_SIZE);
- if (buf) {
- msg = buf_msg(buf);
- tipc_msg_init(msg, CONN_MANAGER, type, INT_H_SIZE,
- port_peernode(p_ptr));
- msg_set_destport(msg, port_peerport(p_ptr));
- msg_set_origport(msg, p_ptr->ref);
- msg_set_msgcnt(msg, ack);
- }
- return buf;
-}
-
-int tipc_reject_msg(struct sk_buff *buf, u32 err)
-{
- struct tipc_msg *msg = buf_msg(buf);
- struct sk_buff *rbuf;
- struct tipc_msg *rmsg;
- int hdr_sz;
- u32 imp;
- u32 data_sz = msg_data_sz(msg);
- u32 src_node;
- u32 rmsg_sz;
-
- /* discard rejected message if it shouldn't be returned to sender */
- if (WARN(!msg_isdata(msg),
- "attempt to reject message with user=%u", msg_user(msg))) {
- dump_stack();
- goto exit;
- }
- if (msg_errcode(msg) || msg_dest_droppable(msg))
- goto exit;
-
- /*
- * construct returned message by copying rejected message header and
- * data (or subset), then updating header fields that need adjusting
- */
- hdr_sz = msg_hdr_sz(msg);
- rmsg_sz = hdr_sz + min_t(u32, data_sz, MAX_REJECT_SIZE);
-
- rbuf = tipc_buf_acquire(rmsg_sz);
- if (rbuf == NULL)
- goto exit;
-
- rmsg = buf_msg(rbuf);
- skb_copy_to_linear_data(rbuf, msg, rmsg_sz);
-
- if (msg_connected(rmsg)) {
- imp = msg_importance(rmsg);
- if (imp < TIPC_CRITICAL_IMPORTANCE)
- msg_set_importance(rmsg, ++imp);
- }
- msg_set_non_seq(rmsg, 0);
- msg_set_size(rmsg, rmsg_sz);
- msg_set_errcode(rmsg, err);
- msg_set_prevnode(rmsg, tipc_own_addr);
- msg_swap_words(rmsg, 4, 5);
- if (!msg_short(rmsg))
- msg_swap_words(rmsg, 6, 7);
-
- /* send self-abort message when rejecting on a connected port */
- if (msg_connected(msg)) {
- struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg));
-
- if (p_ptr) {
- struct sk_buff *abuf = NULL;
-
- if (p_ptr->connected)
- abuf = port_build_self_abort_msg(p_ptr, err);
- tipc_port_unlock(p_ptr);
- tipc_net_route_msg(abuf);
- }
- }
-
- /* send returned message & dispose of rejected message */
- src_node = msg_prevnode(msg);
- if (in_own_node(src_node))
- tipc_port_recv_msg(rbuf);
- else
- tipc_link_send(rbuf, src_node, msg_link_selector(rmsg));
-exit:
- kfree_skb(buf);
- return data_sz;
-}
-
-int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
- struct iovec const *msg_sect, u32 num_sect,
- unsigned int total_len, int err)
-{
- struct sk_buff *buf;
- int res;
-
- res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE,
- &buf);
- if (!buf)
- return res;
-
- return tipc_reject_msg(buf, err);
-}
-
-static void port_timeout(unsigned long ref)
-{
- struct tipc_port *p_ptr = tipc_port_lock(ref);
- struct sk_buff *buf = NULL;
-
- if (!p_ptr)
- return;
-
- if (!p_ptr->connected) {
- tipc_port_unlock(p_ptr);
- return;
- }
-
- /* Last probe answered ? */
- if (p_ptr->probing_state == PROBING) {
- buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_PORT);
- } else {
- buf = port_build_proto_msg(p_ptr, CONN_PROBE, 0);
- p_ptr->probing_state = PROBING;
- k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
- }
- tipc_port_unlock(p_ptr);
- tipc_net_route_msg(buf);
-}
-
-
-static void port_handle_node_down(unsigned long ref)
-{
- struct tipc_port *p_ptr = tipc_port_lock(ref);
- struct sk_buff *buf = NULL;
-
- if (!p_ptr)
- return;
- buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_NODE);
- tipc_port_unlock(p_ptr);
- tipc_net_route_msg(buf);
-}
-
-
-static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 err)
-{
- struct sk_buff *buf = port_build_peer_abort_msg(p_ptr, err);
-
- if (buf) {
- struct tipc_msg *msg = buf_msg(buf);
- msg_swap_words(msg, 4, 5);
- msg_swap_words(msg, 6, 7);
- }
- return buf;
-}
-
-
-static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 err)
-{
- struct sk_buff *buf;
- struct tipc_msg *msg;
- u32 imp;
-
- if (!p_ptr->connected)
- return NULL;
-
- buf = tipc_buf_acquire(BASIC_H_SIZE);
- if (buf) {
- msg = buf_msg(buf);
- memcpy(msg, &p_ptr->phdr, BASIC_H_SIZE);
- msg_set_hdr_sz(msg, BASIC_H_SIZE);
- msg_set_size(msg, BASIC_H_SIZE);
- imp = msg_importance(msg);
- if (imp < TIPC_CRITICAL_IMPORTANCE)
- msg_set_importance(msg, ++imp);
- msg_set_errcode(msg, err);
- }
- return buf;
-}
-
-void tipc_port_recv_proto_msg(struct sk_buff *buf)
-{
- struct tipc_msg *msg = buf_msg(buf);
- struct tipc_port *p_ptr;
- struct sk_buff *r_buf = NULL;
- u32 destport = msg_destport(msg);
- int wakeable;
-
- /* Validate connection */
- p_ptr = tipc_port_lock(destport);
- if (!p_ptr || !p_ptr->connected || !tipc_port_peer_msg(p_ptr, msg)) {
- r_buf = tipc_buf_acquire(BASIC_H_SIZE);
- if (r_buf) {
- msg = buf_msg(r_buf);
- tipc_msg_init(msg, TIPC_HIGH_IMPORTANCE, TIPC_CONN_MSG,
- BASIC_H_SIZE, msg_orignode(msg));
- msg_set_errcode(msg, TIPC_ERR_NO_PORT);
- msg_set_origport(msg, destport);
- msg_set_destport(msg, msg_origport(msg));
- }
- if (p_ptr)
- tipc_port_unlock(p_ptr);
- goto exit;
- }
-
- /* Process protocol message sent by peer */
- switch (msg_type(msg)) {
- case CONN_ACK:
- wakeable = tipc_port_congested(p_ptr) && p_ptr->congested &&
- p_ptr->wakeup;
- p_ptr->acked += msg_msgcnt(msg);
- if (!tipc_port_congested(p_ptr)) {
- p_ptr->congested = 0;
- if (wakeable)
- p_ptr->wakeup(p_ptr);
- }
- break;
- case CONN_PROBE:
- r_buf = port_build_proto_msg(p_ptr, CONN_PROBE_REPLY, 0);
- break;
- default:
- /* CONN_PROBE_REPLY or unrecognized - no action required */
- break;
- }
- p_ptr->probing_state = CONFIRMED;
- tipc_port_unlock(p_ptr);
-exit:
- tipc_net_route_msg(r_buf);
- kfree_skb(buf);
-}
-
-static int port_print(struct tipc_port *p_ptr, char *buf, int len, int full_id)
-{
- struct publication *publ;
- int ret;
-
- if (full_id)
- ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:",
- tipc_zone(tipc_own_addr),
- tipc_cluster(tipc_own_addr),
- tipc_node(tipc_own_addr), p_ptr->ref);
- else
- ret = tipc_snprintf(buf, len, "%-10u:", p_ptr->ref);
-
- if (p_ptr->connected) {
- u32 dport = port_peerport(p_ptr);
- u32 destnode = port_peernode(p_ptr);
-
- ret += tipc_snprintf(buf + ret, len - ret,
- " connected to <%u.%u.%u:%u>",
- tipc_zone(destnode),
- tipc_cluster(destnode),
- tipc_node(destnode), dport);
- if (p_ptr->conn_type != 0)
- ret += tipc_snprintf(buf + ret, len - ret,
- " via {%u,%u}", p_ptr->conn_type,
- p_ptr->conn_instance);
- } else if (p_ptr->published) {
- ret += tipc_snprintf(buf + ret, len - ret, " bound to");
- list_for_each_entry(publ, &p_ptr->publications, pport_list) {
- if (publ->lower == publ->upper)
- ret += tipc_snprintf(buf + ret, len - ret,
- " {%u,%u}", publ->type,
- publ->lower);
- else
- ret += tipc_snprintf(buf + ret, len - ret,
- " {%u,%u,%u}", publ->type,
- publ->lower, publ->upper);
- }
- }
- ret += tipc_snprintf(buf + ret, len - ret, "\n");
- return ret;
-}
-
-struct sk_buff *tipc_port_get_ports(void)
-{
- struct sk_buff *buf;
- struct tlv_desc *rep_tlv;
- char *pb;
- int pb_len;
- struct tipc_port *p_ptr;
- int str_len = 0;
-
- buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
- if (!buf)
- return NULL;
- rep_tlv = (struct tlv_desc *)buf->data;
- pb = TLV_DATA(rep_tlv);
- pb_len = ULTRA_STRING_MAX_LEN;
-
- spin_lock_bh(&tipc_port_list_lock);
- list_for_each_entry(p_ptr, &ports, port_list) {
- spin_lock_bh(p_ptr->lock);
- str_len += port_print(p_ptr, pb, pb_len, 0);
- spin_unlock_bh(p_ptr->lock);
- }
- spin_unlock_bh(&tipc_port_list_lock);
- str_len += 1; /* for "\0" */
- skb_put(buf, TLV_SPACE(str_len));
- TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
-
- return buf;
-}
-
-void tipc_port_reinit(void)
-{
- struct tipc_port *p_ptr;
- struct tipc_msg *msg;
-
- spin_lock_bh(&tipc_port_list_lock);
- list_for_each_entry(p_ptr, &ports, port_list) {
- msg = &p_ptr->phdr;
- msg_set_prevnode(msg, tipc_own_addr);
- msg_set_orignode(msg, tipc_own_addr);
- }
- spin_unlock_bh(&tipc_port_list_lock);
-}
-
-void tipc_acknowledge(u32 ref, u32 ack)
-{
- struct tipc_port *p_ptr;
- struct sk_buff *buf = NULL;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return;
- if (p_ptr->connected) {
- p_ptr->conn_unacked -= ack;
- buf = port_build_proto_msg(p_ptr, CONN_ACK, ack);
- }
- tipc_port_unlock(p_ptr);
- tipc_net_route_msg(buf);
-}
-
-int tipc_portimportance(u32 ref, unsigned int *importance)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- *importance = (unsigned int)msg_importance(&p_ptr->phdr);
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-int tipc_set_portimportance(u32 ref, unsigned int imp)
-{
- struct tipc_port *p_ptr;
-
- if (imp > TIPC_CRITICAL_IMPORTANCE)
- return -EINVAL;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- msg_set_importance(&p_ptr->phdr, (u32)imp);
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-
-int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
-{
- struct tipc_port *p_ptr;
- struct publication *publ;
- u32 key;
- int res = -EINVAL;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
-
- if (p_ptr->connected)
- goto exit;
- key = ref + p_ptr->pub_count + 1;
- if (key == ref) {
- res = -EADDRINUSE;
- goto exit;
- }
- publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper,
- scope, p_ptr->ref, key);
- if (publ) {
- list_add(&publ->pport_list, &p_ptr->publications);
- p_ptr->pub_count++;
- p_ptr->published = 1;
- res = 0;
- }
-exit:
- tipc_port_unlock(p_ptr);
- return res;
-}
-
-int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
-{
- struct tipc_port *p_ptr;
- struct publication *publ;
- struct publication *tpubl;
- int res = -EINVAL;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- if (!seq) {
- list_for_each_entry_safe(publ, tpubl,
- &p_ptr->publications, pport_list) {
- tipc_nametbl_withdraw(publ->type, publ->lower,
- publ->ref, publ->key);
- }
- res = 0;
- } else {
- list_for_each_entry_safe(publ, tpubl,
- &p_ptr->publications, pport_list) {
- if (publ->scope != scope)
- continue;
- if (publ->type != seq->type)
- continue;
- if (publ->lower != seq->lower)
- continue;
- if (publ->upper != seq->upper)
- break;
- tipc_nametbl_withdraw(publ->type, publ->lower,
- publ->ref, publ->key);
- res = 0;
- break;
- }
- }
- if (list_empty(&p_ptr->publications))
- p_ptr->published = 0;
- tipc_port_unlock(p_ptr);
- return res;
-}
-
-int tipc_connect(u32 ref, struct tipc_portid const *peer)
-{
- struct tipc_port *p_ptr;
- int res;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- res = __tipc_connect(ref, p_ptr, peer);
- tipc_port_unlock(p_ptr);
- return res;
-}
-
-/*
- * __tipc_connect - connect to a remote peer
- *
- * Port must be locked.
- */
-int __tipc_connect(u32 ref, struct tipc_port *p_ptr,
- struct tipc_portid const *peer)
-{
- struct tipc_msg *msg;
- int res = -EINVAL;
-
- if (p_ptr->published || p_ptr->connected)
- goto exit;
- if (!peer->ref)
- goto exit;
-
- msg = &p_ptr->phdr;
- msg_set_destnode(msg, peer->node);
- msg_set_destport(msg, peer->ref);
- msg_set_type(msg, TIPC_CONN_MSG);
- msg_set_lookup_scope(msg, 0);
- msg_set_hdr_sz(msg, SHORT_H_SIZE);
-
- p_ptr->probing_interval = PROBING_INTERVAL;
- p_ptr->probing_state = CONFIRMED;
- p_ptr->connected = 1;
- k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
-
- tipc_nodesub_subscribe(&p_ptr->subscription, peer->node,
- (void *)(unsigned long)ref,
- (net_ev_handler)port_handle_node_down);
- res = 0;
-exit:
- p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref);
- return res;
-}
-
-/*
- * __tipc_disconnect - disconnect port from peer
- *
- * Port must be locked.
- */
-int __tipc_disconnect(struct tipc_port *tp_ptr)
-{
- int res;
-
- if (tp_ptr->connected) {
- tp_ptr->connected = 0;
- /* let timer expire on it's own to avoid deadlock! */
- tipc_nodesub_unsubscribe(&tp_ptr->subscription);
- res = 0;
- } else {
- res = -ENOTCONN;
- }
- return res;
-}
-
-/*
- * tipc_disconnect(): Disconnect port form peer.
- * This is a node local operation.
- */
-int tipc_disconnect(u32 ref)
-{
- struct tipc_port *p_ptr;
- int res;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- res = __tipc_disconnect(p_ptr);
- tipc_port_unlock(p_ptr);
- return res;
-}
-
-/*
- * tipc_shutdown(): Send a SHUTDOWN msg to peer and disconnect
- */
-int tipc_shutdown(u32 ref)
-{
- struct tipc_port *p_ptr;
- struct sk_buff *buf = NULL;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
-
- buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN);
- tipc_port_unlock(p_ptr);
- tipc_net_route_msg(buf);
- return tipc_disconnect(ref);
-}
-
-/**
- * tipc_port_recv_msg - receive message from lower layer and deliver to port user
- */
-int tipc_port_recv_msg(struct sk_buff *buf)
-{
- struct tipc_port *p_ptr;
- struct tipc_msg *msg = buf_msg(buf);
- u32 destport = msg_destport(msg);
- u32 dsz = msg_data_sz(msg);
- u32 err;
-
- /* forward unresolved named message */
- if (unlikely(!destport)) {
- tipc_net_route_msg(buf);
- return dsz;
- }
-
- /* validate destination & pass to port, otherwise reject message */
- p_ptr = tipc_port_lock(destport);
- if (likely(p_ptr)) {
- err = p_ptr->dispatcher(p_ptr, buf);
- tipc_port_unlock(p_ptr);
- if (likely(!err))
- return dsz;
- } else {
- err = TIPC_ERR_NO_PORT;
- }
-
- return tipc_reject_msg(buf, err);
-}
-
-/*
- * tipc_port_recv_sections(): Concatenate and deliver sectioned
- * message for this node.
- */
-static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_sect,
- struct iovec const *msg_sect,
- unsigned int total_len)
-{
- struct sk_buff *buf;
- int res;
-
- res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, total_len,
- MAX_MSG_SIZE, &buf);
- if (likely(buf))
- tipc_port_recv_msg(buf);
- return res;
-}
-
-/**
- * tipc_send - send message sections on connection
- */
-int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect,
- unsigned int total_len)
-{
- struct tipc_port *p_ptr;
- u32 destnode;
- int res;
-
- p_ptr = tipc_port_deref(ref);
- if (!p_ptr || !p_ptr->connected)
- return -EINVAL;
-
- p_ptr->congested = 1;
- if (!tipc_port_congested(p_ptr)) {
- destnode = port_peernode(p_ptr);
- if (likely(!in_own_node(destnode)))
- res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect,
- total_len, destnode);
- else
- res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect,
- total_len);
-
- if (likely(res != -ELINKCONG)) {
- p_ptr->congested = 0;
- if (res > 0)
- p_ptr->sent++;
- return res;
- }
- }
- if (port_unreliable(p_ptr)) {
- p_ptr->congested = 0;
- return total_len;
- }
- return -ELINKCONG;
-}
-
-/**
- * tipc_send2name - send message sections to port name
- */
-int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
- unsigned int num_sect, struct iovec const *msg_sect,
- unsigned int total_len)
-{
- struct tipc_port *p_ptr;
- struct tipc_msg *msg;
- u32 destnode = domain;
- u32 destport;
- int res;
-
- p_ptr = tipc_port_deref(ref);
- if (!p_ptr || p_ptr->connected)
- return -EINVAL;
-
- msg = &p_ptr->phdr;
- msg_set_type(msg, TIPC_NAMED_MSG);
- msg_set_hdr_sz(msg, NAMED_H_SIZE);
- msg_set_nametype(msg, name->type);
- msg_set_nameinst(msg, name->instance);
- msg_set_lookup_scope(msg, tipc_addr_scope(domain));
- destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
- msg_set_destnode(msg, destnode);
- msg_set_destport(msg, destport);
-
- if (likely(destport || destnode)) {
- if (likely(in_own_node(destnode)))
- res = tipc_port_recv_sections(p_ptr, num_sect,
- msg_sect, total_len);
- else if (tipc_own_addr)
- res = tipc_link_send_sections_fast(p_ptr, msg_sect,
- num_sect, total_len,
- destnode);
- else
- res = tipc_port_reject_sections(p_ptr, msg, msg_sect,
- num_sect, total_len,
- TIPC_ERR_NO_NODE);
- if (likely(res != -ELINKCONG)) {
- if (res > 0)
- p_ptr->sent++;
- return res;
- }
- if (port_unreliable(p_ptr)) {
- return total_len;
- }
- return -ELINKCONG;
- }
- return tipc_port_reject_sections(p_ptr, msg, msg_sect, num_sect,
- total_len, TIPC_ERR_NO_NAME);
-}
-
-/**
- * tipc_send2port - send message sections to port identity
- */
-int tipc_send2port(u32 ref, struct tipc_portid const *dest,
- unsigned int num_sect, struct iovec const *msg_sect,
- unsigned int total_len)
-{
- struct tipc_port *p_ptr;
- struct tipc_msg *msg;
- int res;
-
- p_ptr = tipc_port_deref(ref);
- if (!p_ptr || p_ptr->connected)
- return -EINVAL;
-
- msg = &p_ptr->phdr;
- msg_set_type(msg, TIPC_DIRECT_MSG);
- msg_set_lookup_scope(msg, 0);
- msg_set_destnode(msg, dest->node);
- msg_set_destport(msg, dest->ref);
- msg_set_hdr_sz(msg, BASIC_H_SIZE);
-
- if (in_own_node(dest->node))
- res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect,
- total_len);
- else if (tipc_own_addr)
- res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect,
- total_len, dest->node);
- else
- res = tipc_port_reject_sections(p_ptr, msg, msg_sect, num_sect,
- total_len, TIPC_ERR_NO_NODE);
- if (likely(res != -ELINKCONG)) {
- if (res > 0)
- p_ptr->sent++;
- return res;
- }
- if (port_unreliable(p_ptr)) {
- return total_len;
- }
- return -ELINKCONG;
-}
diff --git a/net/tipc/port.h b/net/tipc/port.h
deleted file mode 100644
index 5a7026b9c345..000000000000
--- a/net/tipc/port.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * net/tipc/port.h: Include file for TIPC port code
- *
- * Copyright (c) 1994-2007, Ericsson AB
- * Copyright (c) 2004-2007, 2010-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_PORT_H
-#define _TIPC_PORT_H
-
-#include "ref.h"
-#include "net.h"
-#include "msg.h"
-#include "node_subscr.h"
-
-#define TIPC_FLOW_CONTROL_WIN 512
-#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
- SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
-
-/**
- * struct tipc_port - TIPC port structure
- * @sk: pointer to socket handle
- * @lock: pointer to spinlock for controlling access to port
- * @connected: non-zero if port is currently connected to a peer port
- * @conn_type: TIPC type used when connection was established
- * @conn_instance: TIPC instance used when connection was established
- * @conn_unacked: number of unacknowledged messages received from peer port
- * @published: non-zero if port has one or more associated names
- * @congested: non-zero if cannot send because of link or port congestion
- * @max_pkt: maximum packet size "hint" used when building messages sent by port
- * @ref: unique reference to port in TIPC object registry
- * @phdr: preformatted message header used when sending messages
- * @port_list: adjacent ports in TIPC's global list of ports
- * @dispatcher: ptr to routine which handles received messages
- * @wakeup: ptr to routine to call when port is no longer congested
- * @wait_list: adjacent ports in list of ports waiting on link congestion
- * @waiting_pkts:
- * @sent: # of non-empty messages sent by port
- * @acked: # of non-empty message acknowledgements from connected port's peer
- * @publications: list of publications for port
- * @pub_count: total # of publications port has made during its lifetime
- * @probing_state:
- * @probing_interval:
- * @timer_ref:
- * @subscription: "node down" subscription used to terminate failed connections
- */
-struct tipc_port {
- struct sock *sk;
- spinlock_t *lock;
- int connected;
- u32 conn_type;
- u32 conn_instance;
- u32 conn_unacked;
- int published;
- u32 congested;
- u32 max_pkt;
- u32 ref;
- struct tipc_msg phdr;
- struct list_head port_list;
- u32 (*dispatcher)(struct tipc_port *, struct sk_buff *);
- void (*wakeup)(struct tipc_port *);
- struct list_head wait_list;
- u32 waiting_pkts;
- u32 sent;
- u32 acked;
- struct list_head publications;
- u32 pub_count;
- u32 probing_state;
- u32 probing_interval;
- struct timer_list timer;
- struct tipc_node_subscr subscription;
-};
-
-extern spinlock_t tipc_port_list_lock;
-struct tipc_port_list;
-
-/*
- * TIPC port manipulation routines
- */
-struct tipc_port *tipc_createport(struct sock *sk,
- u32 (*dispatcher)(struct tipc_port *,
- struct sk_buff *),
- void (*wakeup)(struct tipc_port *),
- const u32 importance);
-
-int tipc_reject_msg(struct sk_buff *buf, u32 err);
-
-void tipc_acknowledge(u32 port_ref, u32 ack);
-
-int tipc_deleteport(u32 portref);
-
-int tipc_portimportance(u32 portref, unsigned int *importance);
-int tipc_set_portimportance(u32 portref, unsigned int importance);
-
-int tipc_portunreliable(u32 portref, unsigned int *isunreliable);
-int tipc_set_portunreliable(u32 portref, unsigned int isunreliable);
-
-int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable);
-int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable);
-
-int tipc_publish(u32 portref, unsigned int scope,
- struct tipc_name_seq const *name_seq);
-int tipc_withdraw(u32 portref, unsigned int scope,
- struct tipc_name_seq const *name_seq);
-
-int tipc_connect(u32 portref, struct tipc_portid const *port);
-
-int tipc_disconnect(u32 portref);
-
-int tipc_shutdown(u32 ref);
-
-
-/*
- * The following routines require that the port be locked on entry
- */
-int __tipc_disconnect(struct tipc_port *tp_ptr);
-int __tipc_connect(u32 ref, struct tipc_port *p_ptr,
- struct tipc_portid const *peer);
-int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg);
-
-/*
- * TIPC messaging routines
- */
-int tipc_port_recv_msg(struct sk_buff *buf);
-int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect,
- unsigned int total_len);
-
-int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain,
- unsigned int num_sect, struct iovec const *msg_sect,
- unsigned int total_len);
-
-int tipc_send2port(u32 portref, struct tipc_portid const *dest,
- unsigned int num_sect, struct iovec const *msg_sect,
- unsigned int total_len);
-
-int tipc_multicast(u32 portref, struct tipc_name_seq const *seq,
- unsigned int section_count, struct iovec const *msg,
- unsigned int total_len);
-
-int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
- struct iovec const *msg_sect, u32 num_sect,
- unsigned int total_len, int err);
-struct sk_buff *tipc_port_get_ports(void);
-void tipc_port_recv_proto_msg(struct sk_buff *buf);
-void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp);
-void tipc_port_reinit(void);
-
-/**
- * tipc_port_lock - lock port instance referred to and return its pointer
- */
-static inline struct tipc_port *tipc_port_lock(u32 ref)
-{
- return (struct tipc_port *)tipc_ref_lock(ref);
-}
-
-/**
- * tipc_port_unlock - unlock a port instance
- *
- * Can use pointer instead of tipc_ref_unlock() since port is already locked.
- */
-static inline void tipc_port_unlock(struct tipc_port *p_ptr)
-{
- spin_unlock_bh(p_ptr->lock);
-}
-
-static inline struct tipc_port *tipc_port_deref(u32 ref)
-{
- return (struct tipc_port *)tipc_ref_deref(ref);
-}
-
-static inline int tipc_port_congested(struct tipc_port *p_ptr)
-{
- return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
-}
-
-#endif
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
deleted file mode 100644
index 2a2a938dc22c..000000000000
--- a/net/tipc/ref.c
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * net/tipc/ref.c: TIPC object registry code
- *
- * Copyright (c) 1991-2006, Ericsson AB
- * Copyright (c) 2004-2007, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "ref.h"
-
-/**
- * struct reference - TIPC object reference entry
- * @object: pointer to object associated with reference entry
- * @lock: spinlock controlling access to object
- * @ref: reference value for object (combines instance & array index info)
- */
-struct reference {
- void *object;
- spinlock_t lock;
- u32 ref;
-};
-
-/**
- * struct tipc_ref_table - table of TIPC object reference entries
- * @entries: pointer to array of reference entries
- * @capacity: array index of first unusable entry
- * @init_point: array index of first uninitialized entry
- * @first_free: array index of first unused object reference entry
- * @last_free: array index of last unused object reference entry
- * @index_mask: bitmask for array index portion of reference values
- * @start_mask: initial value for instance value portion of reference values
- */
-struct ref_table {
- struct reference *entries;
- u32 capacity;
- u32 init_point;
- u32 first_free;
- u32 last_free;
- u32 index_mask;
- u32 start_mask;
-};
-
-/*
- * Object reference table consists of 2**N entries.
- *
- * State Object ptr Reference
- * ----- ---------- ---------
- * In use non-NULL XXXX|own index
- * (XXXX changes each time entry is acquired)
- * Free NULL YYYY|next free index
- * (YYYY is one more than last used XXXX)
- * Uninitialized NULL 0
- *
- * Entry 0 is not used; this allows index 0 to denote the end of the free list.
- *
- * Note that a reference value of 0 does not necessarily indicate that an
- * entry is uninitialized, since the last entry in the free list could also
- * have a reference value of 0 (although this is unlikely).
- */
-
-static struct ref_table tipc_ref_table;
-
-static DEFINE_RWLOCK(ref_table_lock);
-
-/**
- * tipc_ref_table_init - create reference table for objects
- */
-int tipc_ref_table_init(u32 requested_size, u32 start)
-{
- struct reference *table;
- u32 actual_size;
-
- /* account for unused entry, then round up size to a power of 2 */
-
- requested_size++;
- for (actual_size = 16; actual_size < requested_size; actual_size <<= 1)
- /* do nothing */ ;
-
- /* allocate table & mark all entries as uninitialized */
- table = vzalloc(actual_size * sizeof(struct reference));
- if (table == NULL)
- return -ENOMEM;
-
- tipc_ref_table.entries = table;
- tipc_ref_table.capacity = requested_size;
- tipc_ref_table.init_point = 1;
- tipc_ref_table.first_free = 0;
- tipc_ref_table.last_free = 0;
- tipc_ref_table.index_mask = actual_size - 1;
- tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask;
-
- return 0;
-}
-
-/**
- * tipc_ref_table_stop - destroy reference table for objects
- */
-void tipc_ref_table_stop(void)
-{
- if (!tipc_ref_table.entries)
- return;
-
- vfree(tipc_ref_table.entries);
- tipc_ref_table.entries = NULL;
-}
-
-/**
- * tipc_ref_acquire - create reference to an object
- *
- * Register an object pointer in reference table and lock the object.
- * Returns a unique reference value that is used from then on to retrieve the
- * object pointer, or to determine that the object has been deregistered.
- *
- * Note: The object is returned in the locked state so that the caller can
- * register a partially initialized object, without running the risk that
- * the object will be accessed before initialization is complete.
- */
-u32 tipc_ref_acquire(void *object, spinlock_t **lock)
-{
- u32 index;
- u32 index_mask;
- u32 next_plus_upper;
- u32 ref;
- struct reference *entry = NULL;
-
- if (!object) {
- pr_err("Attempt to acquire ref. to non-existent obj\n");
- return 0;
- }
- if (!tipc_ref_table.entries) {
- pr_err("Ref. table not found in acquisition attempt\n");
- return 0;
- }
-
- /* take a free entry, if available; otherwise initialize a new entry */
- write_lock_bh(&ref_table_lock);
- if (tipc_ref_table.first_free) {
- index = tipc_ref_table.first_free;
- entry = &(tipc_ref_table.entries[index]);
- index_mask = tipc_ref_table.index_mask;
- next_plus_upper = entry->ref;
- tipc_ref_table.first_free = next_plus_upper & index_mask;
- ref = (next_plus_upper & ~index_mask) + index;
- } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
- index = tipc_ref_table.init_point++;
- entry = &(tipc_ref_table.entries[index]);
- spin_lock_init(&entry->lock);
- ref = tipc_ref_table.start_mask + index;
- } else {
- ref = 0;
- }
- write_unlock_bh(&ref_table_lock);
-
- /*
- * Grab the lock so no one else can modify this entry
- * While we assign its ref value & object pointer
- */
- if (entry) {
- spin_lock_bh(&entry->lock);
- entry->ref = ref;
- entry->object = object;
- *lock = &entry->lock;
- /*
- * keep it locked, the caller is responsible
- * for unlocking this when they're done with it
- */
- }
-
- return ref;
-}
-
-/**
- * tipc_ref_discard - invalidate references to an object
- *
- * Disallow future references to an object and free up the entry for re-use.
- * Note: The entry's spin_lock may still be busy after discard
- */
-void tipc_ref_discard(u32 ref)
-{
- struct reference *entry;
- u32 index;
- u32 index_mask;
-
- if (!tipc_ref_table.entries) {
- pr_err("Ref. table not found during discard attempt\n");
- return;
- }
-
- index_mask = tipc_ref_table.index_mask;
- index = ref & index_mask;
- entry = &(tipc_ref_table.entries[index]);
-
- write_lock_bh(&ref_table_lock);
-
- if (!entry->object) {
- pr_err("Attempt to discard ref. to non-existent obj\n");
- goto exit;
- }
- if (entry->ref != ref) {
- pr_err("Attempt to discard non-existent reference\n");
- goto exit;
- }
-
- /*
- * mark entry as unused; increment instance part of entry's reference
- * to invalidate any subsequent references
- */
- entry->object = NULL;
- entry->ref = (ref & ~index_mask) + (index_mask + 1);
-
- /* append entry to free entry list */
- if (tipc_ref_table.first_free == 0)
- tipc_ref_table.first_free = index;
- else
- tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index;
- tipc_ref_table.last_free = index;
-
-exit:
- write_unlock_bh(&ref_table_lock);
-}
-
-/**
- * tipc_ref_lock - lock referenced object and return pointer to it
- */
-void *tipc_ref_lock(u32 ref)
-{
- if (likely(tipc_ref_table.entries)) {
- struct reference *entry;
-
- entry = &tipc_ref_table.entries[ref &
- tipc_ref_table.index_mask];
- if (likely(entry->ref != 0)) {
- spin_lock_bh(&entry->lock);
- if (likely((entry->ref == ref) && (entry->object)))
- return entry->object;
- spin_unlock_bh(&entry->lock);
- }
- }
- return NULL;
-}
-
-
-/**
- * tipc_ref_deref - return pointer referenced object (without locking it)
- */
-void *tipc_ref_deref(u32 ref)
-{
- if (likely(tipc_ref_table.entries)) {
- struct reference *entry;
-
- entry = &tipc_ref_table.entries[ref &
- tipc_ref_table.index_mask];
- if (likely(entry->ref == ref))
- return entry->object;
- }
- return NULL;
-}
diff --git a/net/tipc/server.c b/net/tipc/server.c
deleted file mode 100644
index 19da5abe0fa6..000000000000
--- a/net/tipc/server.c
+++ /dev/null
@@ -1,596 +0,0 @@
-/*
- * net/tipc/server.c: TIPC server infrastructure
- *
- * Copyright (c) 2012-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "server.h"
-#include "core.h"
-#include <net/sock.h>
-
-/* Number of messages to send before rescheduling */
-#define MAX_SEND_MSG_COUNT 25
-#define MAX_RECV_MSG_COUNT 25
-#define CF_CONNECTED 1
-
-#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data)
-
-/**
- * struct tipc_conn - TIPC connection structure
- * @kref: reference counter to connection object
- * @conid: connection identifier
- * @sock: socket handler associated with connection
- * @flags: indicates connection state
- * @server: pointer to connected server
- * @rwork: receive work item
- * @usr_data: user-specified field
- * @rx_action: what to do when connection socket is active
- * @outqueue: pointer to first outbound message in queue
- * @outqueue_lock: controll access to the outqueue
- * @outqueue: list of connection objects for its server
- * @swork: send work item
- */
-struct tipc_conn {
- struct kref kref;
- int conid;
- struct socket *sock;
- unsigned long flags;
- struct tipc_server *server;
- struct work_struct rwork;
- int (*rx_action) (struct tipc_conn *con);
- void *usr_data;
- struct list_head outqueue;
- spinlock_t outqueue_lock;
- struct work_struct swork;
-};
-
-/* An entry waiting to be sent */
-struct outqueue_entry {
- struct list_head list;
- struct kvec iov;
- struct sockaddr_tipc dest;
-};
-
-static void tipc_recv_work(struct work_struct *work);
-static void tipc_send_work(struct work_struct *work);
-static void tipc_clean_outqueues(struct tipc_conn *con);
-
-static void tipc_conn_kref_release(struct kref *kref)
-{
- struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
- struct tipc_server *s = con->server;
-
- if (con->sock) {
- tipc_sock_release_local(con->sock);
- con->sock = NULL;
- }
-
- tipc_clean_outqueues(con);
-
- if (con->conid)
- s->tipc_conn_shutdown(con->conid, con->usr_data);
-
- kfree(con);
-}
-
-static void conn_put(struct tipc_conn *con)
-{
- kref_put(&con->kref, tipc_conn_kref_release);
-}
-
-static void conn_get(struct tipc_conn *con)
-{
- kref_get(&con->kref);
-}
-
-static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
-{
- struct tipc_conn *con;
-
- spin_lock_bh(&s->idr_lock);
- con = idr_find(&s->conn_idr, conid);
- if (con)
- conn_get(con);
- spin_unlock_bh(&s->idr_lock);
- return con;
-}
-
-static void sock_data_ready(struct sock *sk, int unused)
-{
- struct tipc_conn *con;
-
- read_lock(&sk->sk_callback_lock);
- con = sock2con(sk);
- if (con && test_bit(CF_CONNECTED, &con->flags)) {
- conn_get(con);
- if (!queue_work(con->server->rcv_wq, &con->rwork))
- conn_put(con);
- }
- read_unlock(&sk->sk_callback_lock);
-}
-
-static void sock_write_space(struct sock *sk)
-{
- struct tipc_conn *con;
-
- read_lock(&sk->sk_callback_lock);
- con = sock2con(sk);
- if (con && test_bit(CF_CONNECTED, &con->flags)) {
- conn_get(con);
- if (!queue_work(con->server->send_wq, &con->swork))
- conn_put(con);
- }
- read_unlock(&sk->sk_callback_lock);
-}
-
-static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
-{
- struct sock *sk = sock->sk;
-
- write_lock_bh(&sk->sk_callback_lock);
-
- sk->sk_data_ready = sock_data_ready;
- sk->sk_write_space = sock_write_space;
- sk->sk_user_data = con;
-
- con->sock = sock;
-
- write_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void tipc_unregister_callbacks(struct tipc_conn *con)
-{
- struct sock *sk = con->sock->sk;
-
- write_lock_bh(&sk->sk_callback_lock);
- sk->sk_user_data = NULL;
- write_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void tipc_close_conn(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
-
- if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
- spin_lock_bh(&s->idr_lock);
- idr_remove(&s->conn_idr, con->conid);
- s->idr_in_use--;
- spin_unlock_bh(&s->idr_lock);
-
- tipc_unregister_callbacks(con);
-
- /* We shouldn't flush pending works as we may be in the
- * thread. In fact the races with pending rx/tx work structs
- * are harmless for us here as we have already deleted this
- * connection from server connection list and set
- * sk->sk_user_data to 0 before releasing connection object.
- */
- kernel_sock_shutdown(con->sock, SHUT_RDWR);
-
- conn_put(con);
- }
-}
-
-static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
-{
- struct tipc_conn *con;
- int ret;
-
- con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC);
- if (!con)
- return ERR_PTR(-ENOMEM);
-
- kref_init(&con->kref);
- INIT_LIST_HEAD(&con->outqueue);
- spin_lock_init(&con->outqueue_lock);
- INIT_WORK(&con->swork, tipc_send_work);
- INIT_WORK(&con->rwork, tipc_recv_work);
-
- spin_lock_bh(&s->idr_lock);
- ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
- if (ret < 0) {
- kfree(con);
- spin_unlock_bh(&s->idr_lock);
- return ERR_PTR(-ENOMEM);
- }
- con->conid = ret;
- s->idr_in_use++;
- spin_unlock_bh(&s->idr_lock);
-
- set_bit(CF_CONNECTED, &con->flags);
- con->server = s;
-
- return con;
-}
-
-static int tipc_receive_from_sock(struct tipc_conn *con)
-{
- struct msghdr msg = {};
- struct tipc_server *s = con->server;
- struct sockaddr_tipc addr;
- struct kvec iov;
- void *buf;
- int ret;
-
- buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC);
- if (!buf) {
- ret = -ENOMEM;
- goto out_close;
- }
-
- iov.iov_base = buf;
- iov.iov_len = s->max_rcvbuf_size;
- msg.msg_name = &addr;
- ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
- MSG_DONTWAIT);
- if (ret <= 0) {
- kmem_cache_free(s->rcvbuf_cache, buf);
- goto out_close;
- }
-
- s->tipc_conn_recvmsg(con->conid, &addr, con->usr_data, buf, ret);
-
- kmem_cache_free(s->rcvbuf_cache, buf);
-
- return 0;
-
-out_close:
- if (ret != -EWOULDBLOCK)
- tipc_close_conn(con);
- else if (ret == 0)
- /* Don't return success if we really got EOF */
- ret = -EAGAIN;
-
- return ret;
-}
-
-static int tipc_accept_from_sock(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
- struct socket *sock = con->sock;
- struct socket *newsock;
- struct tipc_conn *newcon;
- int ret;
-
- ret = tipc_sock_accept_local(sock, &newsock, O_NONBLOCK);
- if (ret < 0)
- return ret;
-
- newcon = tipc_alloc_conn(con->server);
- if (IS_ERR(newcon)) {
- ret = PTR_ERR(newcon);
- sock_release(newsock);
- return ret;
- }
-
- newcon->rx_action = tipc_receive_from_sock;
- tipc_register_callbacks(newsock, newcon);
-
- /* Notify that new connection is incoming */
- newcon->usr_data = s->tipc_conn_new(newcon->conid);
-
- /* Wake up receive process in case of 'SYN+' message */
- newsock->sk->sk_data_ready(newsock->sk, 0);
- return ret;
-}
-
-static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
-{
- struct tipc_server *s = con->server;
- struct socket *sock = NULL;
- int ret;
-
- ret = tipc_sock_create_local(s->type, &sock);
- if (ret < 0)
- return NULL;
- ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
- (char *)&s->imp, sizeof(s->imp));
- if (ret < 0)
- goto create_err;
- ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr));
- if (ret < 0)
- goto create_err;
-
- switch (s->type) {
- case SOCK_STREAM:
- case SOCK_SEQPACKET:
- con->rx_action = tipc_accept_from_sock;
-
- ret = kernel_listen(sock, 0);
- if (ret < 0)
- goto create_err;
- break;
- case SOCK_DGRAM:
- case SOCK_RDM:
- con->rx_action = tipc_receive_from_sock;
- break;
- default:
- pr_err("Unknown socket type %d\n", s->type);
- goto create_err;
- }
- return sock;
-
-create_err:
- sock_release(sock);
- con->sock = NULL;
- return NULL;
-}
-
-static int tipc_open_listening_sock(struct tipc_server *s)
-{
- struct socket *sock;
- struct tipc_conn *con;
-
- con = tipc_alloc_conn(s);
- if (IS_ERR(con))
- return PTR_ERR(con);
-
- sock = tipc_create_listen_sock(con);
- if (!sock)
- return -EINVAL;
-
- tipc_register_callbacks(sock, con);
- return 0;
-}
-
-static struct outqueue_entry *tipc_alloc_entry(void *data, int len)
-{
- struct outqueue_entry *entry;
- void *buf;
-
- entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC);
- if (!entry)
- return NULL;
-
- buf = kmalloc(len, GFP_ATOMIC);
- if (!buf) {
- kfree(entry);
- return NULL;
- }
-
- memcpy(buf, data, len);
- entry->iov.iov_base = buf;
- entry->iov.iov_len = len;
-
- return entry;
-}
-
-static void tipc_free_entry(struct outqueue_entry *e)
-{
- kfree(e->iov.iov_base);
- kfree(e);
-}
-
-static void tipc_clean_outqueues(struct tipc_conn *con)
-{
- struct outqueue_entry *e, *safe;
-
- spin_lock_bh(&con->outqueue_lock);
- list_for_each_entry_safe(e, safe, &con->outqueue, list) {
- list_del(&e->list);
- tipc_free_entry(e);
- }
- spin_unlock_bh(&con->outqueue_lock);
-}
-
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
- struct sockaddr_tipc *addr, void *data, size_t len)
-{
- struct outqueue_entry *e;
- struct tipc_conn *con;
-
- con = tipc_conn_lookup(s, conid);
- if (!con)
- return -EINVAL;
-
- e = tipc_alloc_entry(data, len);
- if (!e) {
- conn_put(con);
- return -ENOMEM;
- }
-
- if (addr)
- memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc));
-
- spin_lock_bh(&con->outqueue_lock);
- list_add_tail(&e->list, &con->outqueue);
- spin_unlock_bh(&con->outqueue_lock);
-
- if (test_bit(CF_CONNECTED, &con->flags))
- if (!queue_work(s->send_wq, &con->swork))
- conn_put(con);
-
- return 0;
-}
-
-void tipc_conn_terminate(struct tipc_server *s, int conid)
-{
- struct tipc_conn *con;
-
- con = tipc_conn_lookup(s, conid);
- if (con) {
- tipc_close_conn(con);
- conn_put(con);
- }
-}
-
-static void tipc_send_to_sock(struct tipc_conn *con)
-{
- int count = 0;
- struct tipc_server *s = con->server;
- struct outqueue_entry *e;
- struct msghdr msg;
- int ret;
-
- spin_lock_bh(&con->outqueue_lock);
- while (1) {
- e = list_entry(con->outqueue.next, struct outqueue_entry,
- list);
- if ((struct list_head *) e == &con->outqueue)
- break;
- spin_unlock_bh(&con->outqueue_lock);
-
- memset(&msg, 0, sizeof(msg));
- msg.msg_flags = MSG_DONTWAIT;
-
- if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
- msg.msg_name = &e->dest;
- msg.msg_namelen = sizeof(struct sockaddr_tipc);
- }
- ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
- e->iov.iov_len);
- if (ret == -EWOULDBLOCK || ret == 0) {
- cond_resched();
- goto out;
- } else if (ret < 0) {
- goto send_err;
- }
-
- /* Don't starve users filling buffers */
- if (++count >= MAX_SEND_MSG_COUNT) {
- cond_resched();
- count = 0;
- }
-
- spin_lock_bh(&con->outqueue_lock);
- list_del(&e->list);
- tipc_free_entry(e);
- }
- spin_unlock_bh(&con->outqueue_lock);
-out:
- return;
-
-send_err:
- tipc_close_conn(con);
-}
-
-static void tipc_recv_work(struct work_struct *work)
-{
- struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
- int count = 0;
-
- while (test_bit(CF_CONNECTED, &con->flags)) {
- if (con->rx_action(con))
- break;
-
- /* Don't flood Rx machine */
- if (++count >= MAX_RECV_MSG_COUNT) {
- cond_resched();
- count = 0;
- }
- }
- conn_put(con);
-}
-
-static void tipc_send_work(struct work_struct *work)
-{
- struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
-
- if (test_bit(CF_CONNECTED, &con->flags))
- tipc_send_to_sock(con);
-
- conn_put(con);
-}
-
-static void tipc_work_stop(struct tipc_server *s)
-{
- destroy_workqueue(s->rcv_wq);
- destroy_workqueue(s->send_wq);
-}
-
-static int tipc_work_start(struct tipc_server *s)
-{
- s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1);
- if (!s->rcv_wq) {
- pr_err("can't start tipc receive workqueue\n");
- return -ENOMEM;
- }
-
- s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1);
- if (!s->send_wq) {
- pr_err("can't start tipc send workqueue\n");
- destroy_workqueue(s->rcv_wq);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-int tipc_server_start(struct tipc_server *s)
-{
- int ret;
-
- spin_lock_init(&s->idr_lock);
- idr_init(&s->conn_idr);
- s->idr_in_use = 0;
-
- s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size,
- 0, SLAB_HWCACHE_ALIGN, NULL);
- if (!s->rcvbuf_cache)
- return -ENOMEM;
-
- ret = tipc_work_start(s);
- if (ret < 0) {
- kmem_cache_destroy(s->rcvbuf_cache);
- return ret;
- }
- s->enabled = 1;
-
- return tipc_open_listening_sock(s);
-}
-
-void tipc_server_stop(struct tipc_server *s)
-{
- struct tipc_conn *con;
- int total = 0;
- int id;
-
- if (!s->enabled)
- return;
-
- s->enabled = 0;
- spin_lock_bh(&s->idr_lock);
- for (id = 0; total < s->idr_in_use; id++) {
- con = idr_find(&s->conn_idr, id);
- if (con) {
- total++;
- spin_unlock_bh(&s->idr_lock);
- tipc_close_conn(con);
- spin_lock_bh(&s->idr_lock);
- }
- }
- spin_unlock_bh(&s->idr_lock);
-
- tipc_work_stop(s);
- kmem_cache_destroy(s->rcvbuf_cache);
- idr_destroy(&s->conn_idr);
-}
diff --git a/net/tipc/server.h b/net/tipc/server.h
deleted file mode 100644
index 98b23f20bc0f..000000000000
--- a/net/tipc/server.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * net/tipc/server.h: Include file for TIPC server code
- *
- * Copyright (c) 2012-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_SERVER_H
-#define _TIPC_SERVER_H
-
-#include "core.h"
-
-#define TIPC_SERVER_NAME_LEN 32
-
-/**
- * struct tipc_server - TIPC server structure
- * @conn_idr: identifier set of connection
- * @idr_lock: protect the connection identifier set
- * @idr_in_use: amount of allocated identifier entry
- * @rcvbuf_cache: memory cache of server receive buffer
- * @rcv_wq: receive workqueue
- * @send_wq: send workqueue
- * @max_rcvbuf_size: maximum permitted receive message length
- * @tipc_conn_new: callback will be called when new connection is incoming
- * @tipc_conn_shutdown: callback will be called when connection is shut down
- * @tipc_conn_recvmsg: callback will be called when message arrives
- * @saddr: TIPC server address
- * @name: server name
- * @imp: message importance
- * @type: socket type
- * @enabled: identify whether server is launched or not
- */
-struct tipc_server {
- struct idr conn_idr;
- spinlock_t idr_lock;
- int idr_in_use;
- struct kmem_cache *rcvbuf_cache;
- struct workqueue_struct *rcv_wq;
- struct workqueue_struct *send_wq;
- int max_rcvbuf_size;
- void *(*tipc_conn_new) (int conid);
- void (*tipc_conn_shutdown) (int conid, void *usr_data);
- void (*tipc_conn_recvmsg) (int conid, struct sockaddr_tipc *addr,
- void *usr_data, void *buf, size_t len);
- struct sockaddr_tipc *saddr;
- const char name[TIPC_SERVER_NAME_LEN];
- int imp;
- int type;
- int enabled;
-};
-
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
- struct sockaddr_tipc *addr, void *data, size_t len);
-
-/**
- * tipc_conn_terminate - terminate connection with server
- *
- * Note: Must call it in process context since it might sleep
- */
-void tipc_conn_terminate(struct tipc_server *s, int conid);
-
-int tipc_server_start(struct tipc_server *s);
-
-void tipc_server_stop(struct tipc_server *s);
-
-#endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ce8249c76827..817b07d95a91 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,8 +1,9 @@
/*
* net/tipc/socket.c: TIPC socket API
*
- * Copyright (c) 2001-2007, 2012 Ericsson AB
+ * Copyright (c) 2001-2007, 2012-2019, Ericsson AB
* Copyright (c) 2004-2008, 2010-2013, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,115 +35,414 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "core.h"
-#include "port.h"
-
-#include <linux/export.h>
-#include <net/sock.h>
+#include <linux/rhashtable.h>
+#include <linux/sched/signal.h>
+#include <trace/events/sock.h>
-#define SS_LISTENING -1 /* socket is listening */
-#define SS_READY -2 /* socket is connectionless */
+#include "core.h"
+#include "name_table.h"
+#include "node.h"
+#include "link.h"
+#include "name_distr.h"
+#include "socket.h"
+#include "bcast.h"
+#include "netlink.h"
+#include "group.h"
+#include "trace.h"
+
+#define NAGLE_START_INIT 4
+#define NAGLE_START_MAX 1024
+#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
+#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
+#define TIPC_MAX_PORT 0xffffffff
+#define TIPC_MIN_PORT 1
+#define TIPC_ACK_RATE 4 /* ACK at 1/4 of rcv window size */
+
+enum {
+ TIPC_LISTEN = TCP_LISTEN,
+ TIPC_ESTABLISHED = TCP_ESTABLISHED,
+ TIPC_OPEN = TCP_CLOSE,
+ TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
+ TIPC_CONNECTING = TCP_SYN_SENT,
+};
-#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
+struct sockaddr_pair {
+ struct sockaddr_tipc sock;
+ struct sockaddr_tipc member;
+};
+/**
+ * struct tipc_sock - TIPC socket structure
+ * @sk: socket - interacts with 'port' and with user via the socket API
+ * @max_pkt: maximum packet size "hint" used when building messages sent by port
+ * @maxnagle: maximum size of msg which can be subject to nagle
+ * @portid: unique port identity in TIPC socket hash table
+ * @phdr: preformatted message header used when sending messages
+ * @cong_links: list of congested links
+ * @publications: list of publications for port
+ * @pub_count: total # of publications port has made during its lifetime
+ * @conn_timeout: the time we can wait for an unresponded setup request
+ * @probe_unacked: probe has not received ack yet
+ * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
+ * @cong_link_cnt: number of congested links
+ * @snt_unacked: # messages sent by socket, and not yet acked by peer
+ * @snd_win: send window size
+ * @peer_caps: peer capabilities mask
+ * @rcv_unacked: # messages read by user, but not yet acked back to peer
+ * @rcv_win: receive window size
+ * @peer: 'connected' peer for dgram/rdm
+ * @node: hash table node
+ * @mc_method: cookie for use between socket and broadcast layer
+ * @rcu: rcu struct for tipc_sock
+ * @group: TIPC communications group
+ * @oneway: message count in one direction (FIXME)
+ * @nagle_start: current nagle value
+ * @snd_backlog: send backlog count
+ * @msg_acc: messages accepted; used in managing backlog and nagle
+ * @pkt_cnt: TIPC socket packet count
+ * @expect_ack: whether this TIPC socket is expecting an ack
+ * @nodelay: setsockopt() TIPC_NODELAY setting
+ * @group_is_open: TIPC socket group is fully open (FIXME)
+ * @published: true if port has one or more associated names
+ * @conn_addrtype: address type used when establishing connection
+ */
struct tipc_sock {
struct sock sk;
- struct tipc_port *p;
- struct tipc_portid peer_name;
- unsigned int conn_timeout;
+ u32 max_pkt;
+ u32 maxnagle;
+ u32 portid;
+ struct tipc_msg phdr;
+ struct list_head cong_links;
+ struct list_head publications;
+ u32 pub_count;
+ atomic_t dupl_rcvcnt;
+ u16 conn_timeout;
+ bool probe_unacked;
+ u16 cong_link_cnt;
+ u16 snt_unacked;
+ u16 snd_win;
+ u16 peer_caps;
+ u16 rcv_unacked;
+ u16 rcv_win;
+ struct sockaddr_tipc peer;
+ struct rhash_head node;
+ struct tipc_mc_method mc_method;
+ struct rcu_head rcu;
+ struct tipc_group *group;
+ u32 oneway;
+ u32 nagle_start;
+ u16 snd_backlog;
+ u16 msg_acc;
+ u16 pkt_cnt;
+ bool expect_ack;
+ bool nodelay;
+ bool group_is_open;
+ bool published;
+ u8 conn_addrtype;
};
-#define tipc_sk(sk) ((struct tipc_sock *)(sk))
-#define tipc_sk_port(sk) (tipc_sk(sk)->p)
-
-#define tipc_rx_ready(sock) (!skb_queue_empty(&sock->sk->sk_receive_queue) || \
- (sock->state == SS_DISCONNECTING))
-
-static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
-static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
-static void wakeupdispatch(struct tipc_port *tport);
-static void tipc_data_ready(struct sock *sk, int len);
+static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
+static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
-static int release(struct socket *sock);
-static int accept(struct socket *sock, struct socket *new_sock, int flags);
+static void tipc_sock_destruct(struct sock *sk);
+static int tipc_release(struct socket *sock);
+static void tipc_sk_timeout(struct timer_list *t);
+static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua);
+static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua);
+static int tipc_sk_leave(struct tipc_sock *tsk);
+static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
+static int tipc_sk_insert(struct tipc_sock *tsk);
+static void tipc_sk_remove(struct tipc_sock *tsk);
+static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
+static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
+static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
+static int tipc_wait_for_connect(struct socket *sock, long *timeo_p);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
-
static struct proto tipc_proto;
-static struct proto tipc_proto_kern;
+static const struct rhashtable_params tsk_rht_params;
+
+static u32 tsk_own_node(struct tipc_sock *tsk)
+{
+ return msg_prevnode(&tsk->phdr);
+}
-static int sockets_enabled;
+static u32 tsk_peer_node(struct tipc_sock *tsk)
+{
+ return msg_destnode(&tsk->phdr);
+}
-/*
- * Revised TIPC socket locking policy:
- *
- * Most socket operations take the standard socket lock when they start
- * and hold it until they finish (or until they need to sleep). Acquiring
- * this lock grants the owner exclusive access to the fields of the socket
- * data structures, with the exception of the backlog queue. A few socket
- * operations can be done without taking the socket lock because they only
- * read socket information that never changes during the life of the socket.
- *
- * Socket operations may acquire the lock for the associated TIPC port if they
- * need to perform an operation on the port. If any routine needs to acquire
- * both the socket lock and the port lock it must take the socket lock first
- * to avoid the risk of deadlock.
- *
- * The dispatcher handling incoming messages cannot grab the socket lock in
- * the standard fashion, since invoked it runs at the BH level and cannot block.
- * Instead, it checks to see if the socket lock is currently owned by someone,
- * and either handles the message itself or adds it to the socket's backlog
- * queue; in the latter case the queued message is processed once the process
- * owning the socket lock releases it.
- *
- * NOTE: Releasing the socket lock while an operation is sleeping overcomes
- * the problem of a blocked socket operation preventing any other operations
- * from occurring. However, applications must be careful if they have
- * multiple threads trying to send (or receive) on the same socket, as these
- * operations might interfere with each other. For example, doing a connect
- * and a receive at the same time might allow the receive to consume the
- * ACK message meant for the connect. While additional work could be done
- * to try and overcome this, it doesn't seem to be worthwhile at the present.
- *
- * NOTE: Releasing the socket lock while an operation is sleeping also ensures
- * that another operation that must be performed in a non-blocking manner is
- * not delayed for very long because the lock has already been taken.
- *
- * NOTE: This code assumes that certain fields of a port/socket pair are
- * constant over its lifetime; such fields can be examined without taking
- * the socket lock and/or port lock, and do not need to be re-read even
- * after resuming processing after waiting. These fields include:
- * - socket type
- * - pointer to socket sk structure (aka tipc_sock structure)
- * - pointer to port structure
- * - port reference
+static u32 tsk_peer_port(struct tipc_sock *tsk)
+{
+ return msg_destport(&tsk->phdr);
+}
+
+static bool tsk_unreliable(struct tipc_sock *tsk)
+{
+ return msg_src_droppable(&tsk->phdr) != 0;
+}
+
+static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
+{
+ msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
+}
+
+static bool tsk_unreturnable(struct tipc_sock *tsk)
+{
+ return msg_dest_droppable(&tsk->phdr) != 0;
+}
+
+static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
+{
+ msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
+}
+
+static int tsk_importance(struct tipc_sock *tsk)
+{
+ return msg_importance(&tsk->phdr);
+}
+
+static struct tipc_sock *tipc_sk(const struct sock *sk)
+{
+ return container_of(sk, struct tipc_sock, sk);
+}
+
+int tsk_set_importance(struct sock *sk, int imp)
+{
+ if (imp > TIPC_CRITICAL_IMPORTANCE)
+ return -EINVAL;
+ msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp);
+ return 0;
+}
+
+static bool tsk_conn_cong(struct tipc_sock *tsk)
+{
+ return tsk->snt_unacked > tsk->snd_win;
+}
+
+static u16 tsk_blocks(int len)
+{
+ return ((len / FLOWCTL_BLK_SZ) + 1);
+}
+
+/* tsk_blocks(): translate a buffer size in bytes to number of
+ * advertisable blocks, taking into account the ratio truesize(len)/len
+ * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
*/
+static u16 tsk_adv_blocks(int len)
+{
+ return len / FLOWCTL_BLK_SZ / 4;
+}
+
+/* tsk_inc(): increment counter for sent or received data
+ * - If block based flow control is not supported by peer we
+ * fall back to message based ditto, incrementing the counter
+ */
+static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
+{
+ if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
+ return ((msglen / FLOWCTL_BLK_SZ) + 1);
+ return 1;
+}
+
+/* tsk_set_nagle - enable/disable nagle property by manipulating maxnagle
+ */
+static void tsk_set_nagle(struct tipc_sock *tsk)
+{
+ struct sock *sk = &tsk->sk;
+
+ tsk->maxnagle = 0;
+ if (sk->sk_type != SOCK_STREAM)
+ return;
+ if (tsk->nodelay)
+ return;
+ if (!(tsk->peer_caps & TIPC_NAGLE))
+ return;
+ /* Limit node local buffer size to avoid receive queue overflow */
+ if (tsk->max_pkt == MAX_MSG_SIZE)
+ tsk->maxnagle = 1500;
+ else
+ tsk->maxnagle = tsk->max_pkt;
+}
/**
- * advance_rx_queue - discard first buffer in socket receive queue
+ * tsk_advance_rx_queue - discard first buffer in socket receive queue
+ * @sk: network socket
*
* Caller must hold socket lock
*/
-static void advance_rx_queue(struct sock *sk)
+static void tsk_advance_rx_queue(struct sock *sk)
{
+ trace_tipc_sk_advance_rx(sk, NULL, TIPC_DUMP_SK_RCVQ, " ");
kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
}
+/* tipc_sk_respond() : send response message back to sender
+ */
+static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
+{
+ u32 selector;
+ u32 dnode;
+ u32 onode = tipc_own_addr(sock_net(sk));
+
+ if (!tipc_msg_reverse(onode, &skb, err))
+ return;
+
+ trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_NONE, "@sk_respond!");
+ dnode = msg_destnode(buf_msg(skb));
+ selector = msg_origport(buf_msg(skb));
+ tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
+}
+
/**
- * reject_rx_queue - reject all buffers in socket receive queue
+ * tsk_rej_rx_queue - reject all buffers in socket receive queue
+ * @sk: network socket
+ * @error: response error code
*
* Caller must hold socket lock
*/
-static void reject_rx_queue(struct sock *sk)
+static void tsk_rej_rx_queue(struct sock *sk, int error)
{
- struct sk_buff *buf;
+ struct sk_buff *skb;
+
+ while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
+ tipc_sk_respond(sk, skb, error);
+}
+
+static bool tipc_sk_connected(const struct sock *sk)
+{
+ return READ_ONCE(sk->sk_state) == TIPC_ESTABLISHED;
+}
+
+/* tipc_sk_type_connectionless - check if the socket is datagram socket
+ * @sk: socket
+ *
+ * Returns true if connection less, false otherwise
+ */
+static bool tipc_sk_type_connectionless(struct sock *sk)
+{
+ return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
+}
+
+/* tsk_peer_msg - verify if message was sent by connected port's peer
+ *
+ * Handles cases where the node's network address has changed from
+ * the default of <0.0.0> to its configured setting.
+ */
+static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
+{
+ struct sock *sk = &tsk->sk;
+ u32 self = tipc_own_addr(sock_net(sk));
+ u32 peer_port = tsk_peer_port(tsk);
+ u32 orig_node, peer_node;
+
+ if (unlikely(!tipc_sk_connected(sk)))
+ return false;
+
+ if (unlikely(msg_origport(msg) != peer_port))
+ return false;
- while ((buf = __skb_dequeue(&sk->sk_receive_queue)))
- tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
+ orig_node = msg_orignode(msg);
+ peer_node = tsk_peer_node(tsk);
+
+ if (likely(orig_node == peer_node))
+ return true;
+
+ if (!orig_node && peer_node == self)
+ return true;
+
+ if (!peer_node && orig_node == self)
+ return true;
+
+ return false;
}
+/* tipc_set_sk_state - set the sk_state of the socket
+ * @sk: socket
+ *
+ * Caller must hold socket lock
+ *
+ * Returns 0 on success, errno otherwise
+ */
+static int tipc_set_sk_state(struct sock *sk, int state)
+{
+ int oldsk_state = sk->sk_state;
+ int res = -EINVAL;
+
+ switch (state) {
+ case TIPC_OPEN:
+ res = 0;
+ break;
+ case TIPC_LISTEN:
+ case TIPC_CONNECTING:
+ if (oldsk_state == TIPC_OPEN)
+ res = 0;
+ break;
+ case TIPC_ESTABLISHED:
+ if (oldsk_state == TIPC_CONNECTING ||
+ oldsk_state == TIPC_OPEN)
+ res = 0;
+ break;
+ case TIPC_DISCONNECTING:
+ if (oldsk_state == TIPC_CONNECTING ||
+ oldsk_state == TIPC_ESTABLISHED)
+ res = 0;
+ break;
+ }
+
+ if (!res)
+ sk->sk_state = state;
+
+ return res;
+}
+
+static int tipc_sk_sock_err(struct socket *sock, long *timeout)
+{
+ struct sock *sk = sock->sk;
+ int err = sock_error(sk);
+ int typ = sock->type;
+
+ if (err)
+ return err;
+ if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
+ if (sk->sk_state == TIPC_DISCONNECTING)
+ return -EPIPE;
+ else if (!tipc_sk_connected(sk))
+ return -ENOTCONN;
+ }
+ if (!*timeout)
+ return -EAGAIN;
+ if (signal_pending(current))
+ return sock_intr_errno(*timeout);
+
+ return 0;
+}
+
+#define tipc_wait_for_cond(sock_, timeo_, condition_) \
+({ \
+ DEFINE_WAIT_FUNC(wait_, woken_wake_function); \
+ struct sock *sk_; \
+ int rc_; \
+ \
+ while ((rc_ = !(condition_))) { \
+ /* coupled with smp_wmb() in tipc_sk_proto_rcv() */ \
+ smp_rmb(); \
+ sk_ = (sock_)->sk; \
+ rc_ = tipc_sk_sock_err((sock_), timeo_); \
+ if (rc_) \
+ break; \
+ add_wait_queue(sk_sleep(sk_), &wait_); \
+ release_sock(sk_); \
+ *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
+ sched_annotate_sleep(); \
+ lock_sock(sk_); \
+ remove_wait_queue(sk_sleep(sk_), &wait_); \
+ } \
+ rc_; \
+})
+
/**
* tipc_sk_create - create a TIPC socket
* @net: network namespace (must be default network)
@@ -153,15 +453,15 @@ static void reject_rx_queue(struct sock *sk)
* This routine creates additional data structures used by the TIPC socket,
* initializes them, and links them together.
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
-static int tipc_sk_create(struct net *net, struct socket *sock, int protocol,
- int kern)
+static int tipc_sk_create(struct net *net, struct socket *sock,
+ int protocol, int kern)
{
const struct proto_ops *ops;
- socket_state state;
struct sock *sk;
- struct tipc_port *tp_ptr;
+ struct tipc_sock *tsk;
+ struct tipc_msg *msg;
/* Validate arguments */
if (unlikely(protocol != 0))
@@ -170,135 +470,143 @@ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol,
switch (sock->type) {
case SOCK_STREAM:
ops = &stream_ops;
- state = SS_UNCONNECTED;
break;
case SOCK_SEQPACKET:
ops = &packet_ops;
- state = SS_UNCONNECTED;
break;
case SOCK_DGRAM:
case SOCK_RDM:
ops = &msg_ops;
- state = SS_READY;
break;
default:
return -EPROTOTYPE;
}
/* Allocate socket's protocol area */
- if (!kern)
- sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
- else
- sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern);
-
+ sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
if (sk == NULL)
return -ENOMEM;
- /* Allocate TIPC port for socket to use */
- tp_ptr = tipc_createport(sk, &dispatch, &wakeupdispatch,
- TIPC_LOW_IMPORTANCE);
- if (unlikely(!tp_ptr)) {
- sk_free(sk);
- return -ENOMEM;
- }
+ tsk = tipc_sk(sk);
+ tsk->max_pkt = MAX_PKT_DEFAULT;
+ tsk->maxnagle = 0;
+ tsk->nagle_start = NAGLE_START_INIT;
+ INIT_LIST_HEAD(&tsk->publications);
+ INIT_LIST_HEAD(&tsk->cong_links);
+ msg = &tsk->phdr;
/* Finish initializing socket data structures */
sock->ops = ops;
- sock->state = state;
-
sock_init_data(sock, sk);
- sk->sk_backlog_rcv = backlog_rcv;
- sk->sk_rcvbuf = sysctl_tipc_rmem[1];
+ tipc_set_sk_state(sk, TIPC_OPEN);
+ if (tipc_sk_insert(tsk)) {
+ sk_free(sk);
+ pr_warn("Socket create failed; port number exhausted\n");
+ return -EINVAL;
+ }
+
+ /* Ensure tsk is visible before we read own_addr. */
+ smp_mb();
+
+ tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE,
+ TIPC_NAMED_MSG, NAMED_H_SIZE, 0);
+
+ msg_set_origport(msg, tsk->portid);
+ timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
+ sk->sk_shutdown = 0;
+ sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
+ sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
- tipc_sk(sk)->p = tp_ptr;
- tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT;
+ sk->sk_destruct = tipc_sock_destruct;
+ tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
+ tsk->group_is_open = true;
+ atomic_set(&tsk->dupl_rcvcnt, 0);
- spin_unlock_bh(tp_ptr->lock);
+ /* Start out with safe limits until we receive an advertised window */
+ tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
+ tsk->rcv_win = tsk->snd_win;
- if (sock->state == SS_READY) {
- tipc_set_portunreturnable(tp_ptr->ref, 1);
+ if (tipc_sk_type_connectionless(sk)) {
+ tsk_set_unreturnable(tsk, true);
if (sock->type == SOCK_DGRAM)
- tipc_set_portunreliable(tp_ptr->ref, 1);
+ tsk_set_unreliable(tsk, true);
}
-
+ __skb_queue_head_init(&tsk->mc_method.deferredq);
+ trace_tipc_sk_create(sk, NULL, TIPC_DUMP_NONE, " ");
return 0;
}
-/**
- * tipc_sock_create_local - create TIPC socket from inside TIPC module
- * @type: socket type - SOCK_RDM or SOCK_SEQPACKET
- *
- * We cannot use sock_creat_kern here because it bumps module user count.
- * Since socket owner and creator is the same module we must make sure
- * that module count remains zero for module local sockets, otherwise
- * we cannot do rmmod.
- *
- * Returns 0 on success, errno otherwise
- */
-int tipc_sock_create_local(int type, struct socket **res)
+static void tipc_sk_callback(struct rcu_head *head)
{
- int rc;
- struct sock *sk;
-
- rc = sock_create_lite(AF_TIPC, type, 0, res);
- if (rc < 0) {
- pr_err("Failed to create kernel socket\n");
- return rc;
- }
- tipc_sk_create(&init_net, *res, 0, 1);
-
- sk = (*res)->sk;
-
- return 0;
-}
+ struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);
-/**
- * tipc_sock_release_local - release socket created by tipc_sock_create_local
- * @sock: the socket to be released.
- *
- * Module reference count is not incremented when such sockets are created,
- * so we must keep it from being decremented when they are released.
- */
-void tipc_sock_release_local(struct socket *sock)
-{
- release(sock);
- sock->ops = NULL;
- sock_release(sock);
+ sock_put(&tsk->sk);
}
-/**
- * tipc_sock_accept_local - accept a connection on a socket created
- * with tipc_sock_create_local. Use this function to avoid that
- * module reference count is inadvertently incremented.
- *
- * @sock: the accepting socket
- * @newsock: reference to the new socket to be created
- * @flags: socket flags
- */
-
-int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
- int flags)
+/* Caller should hold socket lock for the socket. */
+static void __tipc_shutdown(struct socket *sock, int error)
{
struct sock *sk = sock->sk;
- int ret;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct net *net = sock_net(sk);
+ long timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
+ u32 dnode = tsk_peer_node(tsk);
+ struct sk_buff *skb;
+
+ /* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */
+ tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
+ !tsk_conn_cong(tsk)));
+
+ /* Push out delayed messages if in Nagle mode */
+ tipc_sk_push_backlog(tsk, false);
+ /* Remove pending SYN */
+ __skb_queue_purge(&sk->sk_write_queue);
+
+ /* Remove partially received buffer if any */
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb && TIPC_SKB_CB(skb)->bytes_read) {
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ kfree_skb(skb);
+ }
- ret = sock_create_lite(sk->sk_family, sk->sk_type,
- sk->sk_protocol, newsock);
- if (ret < 0)
- return ret;
+ /* Reject all unreceived messages if connectionless */
+ if (tipc_sk_type_connectionless(sk)) {
+ tsk_rej_rx_queue(sk, error);
+ return;
+ }
- ret = accept(sock, *newsock, flags);
- if (ret < 0) {
- sock_release(*newsock);
- return ret;
+ switch (sk->sk_state) {
+ case TIPC_CONNECTING:
+ case TIPC_ESTABLISHED:
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ tipc_node_remove_conn(net, dnode, tsk->portid);
+ /* Send a FIN+/- to its peer */
+ skb = __skb_dequeue(&sk->sk_receive_queue);
+ if (skb) {
+ __skb_queue_purge(&sk->sk_receive_queue);
+ tipc_sk_respond(sk, skb, error);
+ break;
+ }
+ skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
+ TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
+ tsk_own_node(tsk), tsk_peer_port(tsk),
+ tsk->portid, error);
+ if (skb)
+ tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
+ break;
+ case TIPC_LISTEN:
+ /* Reject all SYN messages */
+ tsk_rej_rx_queue(sk, error);
+ break;
+ default:
+ __skb_queue_purge(&sk->sk_receive_queue);
+ break;
}
- (*newsock)->ops = sock->ops;
- return ret;
}
/**
- * release - destroy a TIPC socket
+ * tipc_release - destroy a TIPC socket
* @sock: socket to destroy
*
* This routine cleans up any messages that are still queued on the socket.
@@ -311,14 +619,12 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
* are returned or discarded according to the "destination droppable" setting
* specified for the message by the sender.
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
-static int release(struct socket *sock)
+static int tipc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport;
- struct sk_buff *buf;
- int res;
+ struct tipc_sock *tsk;
/*
* Exit if socket isn't fully initialized (occurs when a failed accept()
@@ -327,138 +633,148 @@ static int release(struct socket *sock)
if (sk == NULL)
return 0;
- tport = tipc_sk_port(sk);
+ tsk = tipc_sk(sk);
lock_sock(sk);
- /*
- * Reject all unreceived messages, except on an active connection
- * (which disconnects locally & sends a 'FIN+' to peer)
- */
- while (sock->state != SS_DISCONNECTING) {
- buf = __skb_dequeue(&sk->sk_receive_queue);
- if (buf == NULL)
- break;
- if (TIPC_SKB_CB(buf)->handle != 0)
- kfree_skb(buf);
- else {
- if ((sock->state == SS_CONNECTING) ||
- (sock->state == SS_CONNECTED)) {
- sock->state = SS_DISCONNECTING;
- tipc_disconnect(tport->ref);
- }
- tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
- }
- }
-
- /*
- * Delete TIPC port; this ensures no more messages are queued
- * (also disconnects an active connection & sends a 'FIN-' to peer)
- */
- res = tipc_deleteport(tport->ref);
-
- /* Discard any remaining (connection-based) messages in receive queue */
- __skb_queue_purge(&sk->sk_receive_queue);
+ trace_tipc_sk_release(sk, NULL, TIPC_DUMP_ALL, " ");
+ __tipc_shutdown(sock, TIPC_ERR_NO_PORT);
+ sk->sk_shutdown = SHUTDOWN_MASK;
+ tipc_sk_leave(tsk);
+ tipc_sk_withdraw(tsk, NULL);
+ __skb_queue_purge(&tsk->mc_method.deferredq);
+ sk_stop_timer(sk, &sk->sk_timer);
+ tipc_sk_remove(tsk);
+ sock_orphan(sk);
/* Reject any messages that accumulated in backlog queue */
- sock->state = SS_DISCONNECTING;
release_sock(sk);
-
- sock_put(sk);
+ tipc_dest_list_purge(&tsk->cong_links);
+ tsk->cong_link_cnt = 0;
+ call_rcu(&tsk->rcu, tipc_sk_callback);
sock->sk = NULL;
- return res;
+ return 0;
}
/**
- * bind - associate or disassocate TIPC name(s) with a socket
+ * __tipc_bind - associate or disassociate TIPC name(s) with a socket
* @sock: socket structure
- * @uaddr: socket address describing name(s) and desired operation
- * @uaddr_len: size of socket address data structure
+ * @skaddr: socket address describing name(s) and desired operation
+ * @alen: size of socket address data structure
*
- * Name and name sequence binding is indicated using a positive scope value;
+ * Name and name sequence binding are indicated using a positive scope value;
* a negative scope value unbinds the specified name. Specifying no name
* (i.e. a socket address length of 0) unbinds all names from the socket.
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*
* NOTE: This routine doesn't need to take the socket lock since it doesn't
* access any non-constant socket information.
*/
-static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
+static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
{
- struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
- u32 portref = tipc_sk_port(sock->sk)->ref;
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr;
+ struct tipc_sock *tsk = tipc_sk(sock->sk);
+ bool unbind = false;
- if (unlikely(!uaddr_len))
- return tipc_withdraw(portref, 0, NULL);
-
- if (uaddr_len < sizeof(struct sockaddr_tipc))
- return -EINVAL;
- if (addr->family != AF_TIPC)
- return -EAFNOSUPPORT;
+ if (unlikely(!alen))
+ return tipc_sk_withdraw(tsk, NULL);
- if (addr->addrtype == TIPC_ADDR_NAME)
- addr->addr.nameseq.upper = addr->addr.nameseq.lower;
- else if (addr->addrtype != TIPC_ADDR_NAMESEQ)
- return -EAFNOSUPPORT;
+ if (ua->addrtype == TIPC_SERVICE_ADDR) {
+ ua->addrtype = TIPC_SERVICE_RANGE;
+ ua->sr.upper = ua->sr.lower;
+ }
+ if (ua->scope < 0) {
+ unbind = true;
+ ua->scope = -ua->scope;
+ }
+ /* Users may still use deprecated TIPC_ZONE_SCOPE */
+ if (ua->scope != TIPC_NODE_SCOPE)
+ ua->scope = TIPC_CLUSTER_SCOPE;
- if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
- (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
- (addr->addr.nameseq.type != TIPC_CFG_SRV))
+ if (tsk->group)
return -EACCES;
- return (addr->scope > 0) ?
- tipc_publish(portref, addr->scope, &addr->addr.nameseq) :
- tipc_withdraw(portref, -addr->scope, &addr->addr.nameseq);
+ if (unbind)
+ return tipc_sk_withdraw(tsk, ua);
+ return tipc_sk_publish(tsk, ua);
+}
+
+int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
+{
+ int res;
+
+ lock_sock(sock->sk);
+ res = __tipc_bind(sock, skaddr, alen);
+ release_sock(sock->sk);
+ return res;
+}
+
+static int tipc_bind(struct socket *sock, struct sockaddr_unsized *skaddr, int alen)
+{
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr;
+ u32 atype = ua->addrtype;
+
+ if (alen) {
+ if (!tipc_uaddr_valid(ua, alen))
+ return -EINVAL;
+ if (atype == TIPC_SOCKET_ADDR)
+ return -EAFNOSUPPORT;
+ if (ua->sr.type < TIPC_RESERVED_TYPES) {
+ pr_warn_once("Can't bind to reserved service type %u\n",
+ ua->sr.type);
+ return -EACCES;
+ }
+ }
+ return tipc_sk_bind(sock, (struct sockaddr *)skaddr, alen);
}
/**
- * get_name - get port ID of socket or peer socket
+ * tipc_getname - get port ID of socket or peer socket
* @sock: socket structure
* @uaddr: area for returned socket address
- * @uaddr_len: area for returned length of socket address
* @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*
* NOTE: This routine doesn't need to take the socket lock since it only
* accesses socket information that is unchanging (or which changes in
* a completely predictable manner).
*/
-static int get_name(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
+ int peer)
{
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
- struct tipc_sock *tsock = tipc_sk(sock->sk);
+ struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
memset(addr, 0, sizeof(*addr));
if (peer) {
- if ((sock->state != SS_CONNECTED) &&
- ((peer != 2) || (sock->state != SS_DISCONNECTING)))
+ if ((!tipc_sk_connected(sk)) &&
+ ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
return -ENOTCONN;
- addr->addr.id.ref = tsock->peer_name.ref;
- addr->addr.id.node = tsock->peer_name.node;
+ addr->addr.id.ref = tsk_peer_port(tsk);
+ addr->addr.id.node = tsk_peer_node(tsk);
} else {
- addr->addr.id.ref = tsock->p->ref;
- addr->addr.id.node = tipc_own_addr;
+ addr->addr.id.ref = tsk->portid;
+ addr->addr.id.node = tipc_own_addr(sock_net(sk));
}
- *uaddr_len = sizeof(*addr);
- addr->addrtype = TIPC_ADDR_ID;
+ addr->addrtype = TIPC_SOCKET_ADDR;
addr->family = AF_TIPC;
addr->scope = 0;
addr->addr.name.domain = 0;
- return 0;
+ return sizeof(*addr);
}
/**
- * poll - read and possibly block on pollmask
+ * tipc_poll - read and possibly block on pollmask
* @file: file structure associated with the socket
* @sock: socket for which to calculate the poll bits
* @wait: ???
*
- * Returns pollmask value
+ * Return: pollmask value
*
* COMMENTARY:
* It appears that the usual socket locking mechanisms are not useful here
@@ -466,728 +782,1324 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
* exits. TCP and other protocols seem to rely on higher level poll routines
* to handle any preventable race conditions, so TIPC will do the same ...
*
- * TIPC sets the returned events as follows:
- *
- * socket state flags set
- * ------------ ---------
- * unconnected no read flags
- * POLLOUT if port is not congested
- *
- * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue
- * no write flags
- *
- * connected POLLIN/POLLRDNORM if data in rx queue
- * POLLOUT if port is not congested
- *
- * disconnecting POLLIN/POLLRDNORM/POLLHUP
- * no write flags
- *
- * listening POLLIN if SYN in rx queue
- * no write flags
- *
- * ready POLLIN/POLLRDNORM if data in rx queue
- * [connectionless] POLLOUT (since port cannot be congested)
- *
* IMPORTANT: The fact that a read or write operation is indicated does NOT
* imply that the operation will succeed, merely that it should be performed
* and will not block.
*/
-static unsigned int poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t tipc_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
- u32 mask = 0;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
-
- switch ((int)sock->state) {
- case SS_UNCONNECTED:
- if (!tipc_sk_port(sk)->congested)
- mask |= POLLOUT;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ __poll_t revents = 0;
+
+ sock_poll_wait(file, sock, wait);
+ trace_tipc_sk_poll(sk, NULL, TIPC_DUMP_ALL, " ");
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
+ if (sk->sk_shutdown == SHUTDOWN_MASK)
+ revents |= EPOLLHUP;
+
+ switch (sk->sk_state) {
+ case TIPC_ESTABLISHED:
+ if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
+ revents |= EPOLLOUT;
+ fallthrough;
+ case TIPC_LISTEN:
+ case TIPC_CONNECTING:
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ revents |= EPOLLIN | EPOLLRDNORM;
break;
- case SS_READY:
- case SS_CONNECTED:
- if (!tipc_sk_port(sk)->congested)
- mask |= POLLOUT;
- /* fall thru' */
- case SS_CONNECTING:
- case SS_LISTENING:
- if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= (POLLIN | POLLRDNORM);
+ case TIPC_OPEN:
+ if (tsk->group_is_open && !tsk->cong_link_cnt)
+ revents |= EPOLLOUT;
+ if (!tipc_sk_type_connectionless(sk))
+ break;
+ if (skb_queue_empty_lockless(&sk->sk_receive_queue))
+ break;
+ revents |= EPOLLIN | EPOLLRDNORM;
break;
- case SS_DISCONNECTING:
- mask = (POLLIN | POLLRDNORM | POLLHUP);
+ case TIPC_DISCONNECTING:
+ revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP;
break;
}
+ return revents;
+}
+
+/**
+ * tipc_sendmcast - send multicast message
+ * @sock: socket structure
+ * @ua: destination address struct
+ * @msg: message to send
+ * @dlen: length of data to send
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Return: the number of bytes sent on success, or errno
+ */
+static int tipc_sendmcast(struct socket *sock, struct tipc_uaddr *ua,
+ struct msghdr *msg, size_t dlen, long timeout)
+{
+ struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct net *net = sock_net(sk);
+ int mtu = tipc_bcast_get_mtu(net);
+ struct sk_buff_head pkts;
+ struct tipc_nlist dsts;
+ int rc;
+
+ if (tsk->group)
+ return -EACCES;
+
+ /* Block or return if any destination link is congested */
+ rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
+ if (unlikely(rc))
+ return rc;
+
+ /* Lookup destination nodes */
+ tipc_nlist_init(&dsts, tipc_own_addr(net));
+ tipc_nametbl_lookup_mcast_nodes(net, ua, &dsts);
+ if (!dsts.local && !dsts.remote)
+ return -EHOSTUNREACH;
+
+ /* Build message header */
+ msg_set_type(hdr, TIPC_MCAST_MSG);
+ msg_set_hdr_sz(hdr, MCAST_H_SIZE);
+ msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
+ msg_set_destport(hdr, 0);
+ msg_set_destnode(hdr, 0);
+ msg_set_nametype(hdr, ua->sr.type);
+ msg_set_namelower(hdr, ua->sr.lower);
+ msg_set_nameupper(hdr, ua->sr.upper);
+
+ /* Build message as chain of buffers */
+ __skb_queue_head_init(&pkts);
+ rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
+
+ /* Send message if build was successful */
+ if (unlikely(rc == dlen)) {
+ trace_tipc_sk_sendmcast(sk, skb_peek(&pkts),
+ TIPC_DUMP_SK_SNDQ, " ");
+ rc = tipc_mcast_xmit(net, &pkts, &tsk->mc_method, &dsts,
+ &tsk->cong_link_cnt);
+ }
- return mask;
+ tipc_nlist_purge(&dsts);
+
+ return rc ? rc : dlen;
}
/**
- * dest_name_check - verify user is permitted to send to specified port name
- * @dest: destination address
- * @m: descriptor for message to be sent
+ * tipc_send_group_msg - send a message to a member in the group
+ * @net: network namespace
+ * @tsk: tipc socket
+ * @m: message to send
+ * @mb: group member
+ * @dnode: destination node
+ * @dport: destination port
+ * @dlen: total length of message data
+ */
+static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
+ struct msghdr *m, struct tipc_member *mb,
+ u32 dnode, u32 dport, int dlen)
+{
+ u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
+ struct tipc_mc_method *method = &tsk->mc_method;
+ int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct sk_buff_head pkts;
+ int mtu, rc;
+
+ /* Complete message header */
+ msg_set_type(hdr, TIPC_GRP_UCAST_MSG);
+ msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+ msg_set_destport(hdr, dport);
+ msg_set_destnode(hdr, dnode);
+ msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
+
+ /* Build message as chain of buffers */
+ __skb_queue_head_init(&pkts);
+ mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
+ rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
+ if (unlikely(rc != dlen))
+ return rc;
+
+ /* Send message */
+ rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+ if (unlikely(rc == -ELINKCONG)) {
+ tipc_dest_push(&tsk->cong_links, dnode, 0);
+ tsk->cong_link_cnt++;
+ }
+
+ /* Update send window */
+ tipc_group_update_member(mb, blks);
+
+ /* A broadcast sent within next EXPIRE period must follow same path */
+ method->rcast = true;
+ method->mandatory = true;
+ return dlen;
+}
+
+/**
+ * tipc_send_group_unicast - send message to a member in the group
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
*
- * Prevents restricted configuration commands from being issued by
- * unauthorized users.
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Return: the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ struct sock *sk = sock->sk;
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
+ int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct net *net = sock_net(sk);
+ struct tipc_member *mb = NULL;
+ u32 node, port;
+ int rc;
+
+ node = ua->sk.node;
+ port = ua->sk.ref;
+ if (!port && !node)
+ return -EHOSTUNREACH;
+
+ /* Block or return if destination link or member is congested */
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tipc_dest_find(&tsk->cong_links, node, 0) &&
+ tsk->group &&
+ !tipc_group_cong(tsk->group, node, port, blks,
+ &mb));
+ if (unlikely(rc))
+ return rc;
+
+ if (unlikely(!mb))
+ return -EHOSTUNREACH;
+
+ rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen);
+
+ return rc ? rc : dlen;
+}
+
+/**
+ * tipc_send_group_anycast - send message to any member with given identity
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
*
- * Returns 0 if permission is granted, otherwise errno
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Return: the number of bytes sent on success, or errno
*/
-static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
+static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
{
- struct tipc_cfg_msg_hdr hdr;
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
+ struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct list_head *cong_links = &tsk->cong_links;
+ int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct tipc_member *first = NULL;
+ struct tipc_member *mbr = NULL;
+ struct net *net = sock_net(sk);
+ u32 node, port, exclude;
+ LIST_HEAD(dsts);
+ int lookups = 0;
+ int dstcnt, rc;
+ bool cong;
+
+ ua->sa.type = msg_nametype(hdr);
+ ua->scope = msg_lookup_scope(hdr);
+
+ while (++lookups < 4) {
+ exclude = tipc_group_exclude(tsk->group);
+
+ first = NULL;
+
+ /* Look for a non-congested destination member, if any */
+ while (1) {
+ if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt,
+ exclude, false))
+ return -EHOSTUNREACH;
+ tipc_dest_pop(&dsts, &node, &port);
+ cong = tipc_group_cong(tsk->group, node, port, blks,
+ &mbr);
+ if (!cong)
+ break;
+ if (mbr == first)
+ break;
+ if (!first)
+ first = mbr;
+ }
- if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES))
- return 0;
- if (likely(dest->addr.name.name.type == TIPC_TOP_SRV))
- return 0;
- if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
- return -EACCES;
+ /* Start over if destination was not in member list */
+ if (unlikely(!mbr))
+ continue;
- if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr)))
- return -EMSGSIZE;
- if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
- return -EFAULT;
- if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
- return -EACCES;
+ if (likely(!cong && !tipc_dest_find(cong_links, node, 0)))
+ break;
- return 0;
+ /* Block or return if destination link or member is congested */
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tipc_dest_find(cong_links, node, 0) &&
+ tsk->group &&
+ !tipc_group_cong(tsk->group, node, port,
+ blks, &mbr));
+ if (unlikely(rc))
+ return rc;
+
+ /* Send, unless destination disappeared while waiting */
+ if (likely(mbr))
+ break;
+ }
+
+ if (unlikely(lookups >= 4))
+ return -EHOSTUNREACH;
+
+ rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen);
+
+ return rc ? rc : dlen;
}
/**
- * send_msg - send message in connectionless manner
- * @iocb: if NULL, indicates that socket lock is already held
+ * tipc_send_group_bcast - send message to all members in communication group
* @sock: socket structure
* @m: message to send
- * @total_len: length of message
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
*
- * Message must have an destination specified explicitly.
- * Used for SOCK_RDM and SOCK_DGRAM messages,
- * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
- * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Return: the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
+ struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_nlist *dsts;
+ struct tipc_mc_method *method = &tsk->mc_method;
+ bool ack = method->mandatory && method->rcast;
+ int blks = tsk_blocks(MCAST_H_SIZE + dlen);
+ struct tipc_msg *hdr = &tsk->phdr;
+ int mtu = tipc_bcast_get_mtu(net);
+ struct sk_buff_head pkts;
+ int rc = -EHOSTUNREACH;
+
+ /* Block or return if any destination link or member is congested */
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tsk->cong_link_cnt && tsk->group &&
+ !tipc_group_bc_cong(tsk->group, blks));
+ if (unlikely(rc))
+ return rc;
+
+ dsts = tipc_group_dests(tsk->group);
+ if (!dsts->local && !dsts->remote)
+ return -EHOSTUNREACH;
+
+ /* Complete message header */
+ if (ua) {
+ msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
+ msg_set_nameinst(hdr, ua->sa.instance);
+ } else {
+ msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
+ msg_set_nameinst(hdr, 0);
+ }
+ msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+ msg_set_destport(hdr, 0);
+ msg_set_destnode(hdr, 0);
+ msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(tsk->group));
+
+ /* Avoid getting stuck with repeated forced replicasts */
+ msg_set_grp_bc_ack_req(hdr, ack);
+
+ /* Build message as chain of buffers */
+ __skb_queue_head_init(&pkts);
+ rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
+ if (unlikely(rc != dlen))
+ return rc;
+
+ /* Send message */
+ rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
+ if (unlikely(rc))
+ return rc;
+
+ /* Update broadcast sequence number and send windows */
+ tipc_group_update_bc_members(tsk->group, blks, ack);
+
+ /* Broadcast link is now free to choose method for next broadcast */
+ method->mandatory = false;
+ method->expires = jiffies;
+
+ return dlen;
+}
+
+/**
+ * tipc_send_group_mcast - send message to all members with given identity
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
*
- * Returns the number of bytes sent on success, or errno otherwise
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Return: the number of bytes sent on success, or errno
*/
-static int send_msg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
{
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
- struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
- int needs_conn;
- long timeout_val;
- int res = -EINVAL;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct net *net = sock_net(sk);
+ u32 dstcnt, exclude;
+ LIST_HEAD(dsts);
+
+ ua->sa.type = msg_nametype(hdr);
+ ua->scope = msg_lookup_scope(hdr);
+ exclude = tipc_group_exclude(grp);
+
+ if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, exclude, true))
+ return -EHOSTUNREACH;
+
+ if (dstcnt == 1) {
+ tipc_dest_pop(&dsts, &ua->sk.node, &ua->sk.ref);
+ return tipc_send_group_unicast(sock, m, dlen, timeout);
+ }
- if (unlikely(!dest))
- return -EDESTADDRREQ;
- if (unlikely((m->msg_namelen < sizeof(*dest)) ||
- (dest->family != AF_TIPC)))
- return -EINVAL;
- if (total_len > TIPC_MAX_USER_MSG_SIZE)
- return -EMSGSIZE;
+ tipc_dest_list_purge(&dsts);
+ return tipc_send_group_bcast(sock, m, dlen, timeout);
+}
- if (iocb)
- lock_sock(sk);
+/**
+ * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
+ * @net: the associated network namespace
+ * @arrvq: queue with arriving messages, to be cloned after destination lookup
+ * @inputq: queue with cloned messages, delivered to socket after dest lookup
+ *
+ * Multi-threaded: parallel calls with reference to same queues may occur
+ */
+void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
+ struct sk_buff_head *inputq)
+{
+ u32 self = tipc_own_addr(net);
+ struct sk_buff *skb, *_skb;
+ u32 portid, onode;
+ struct sk_buff_head tmpq;
+ struct list_head dports;
+ struct tipc_msg *hdr;
+ struct tipc_uaddr ua;
+ int user, mtyp, hlen;
+
+ __skb_queue_head_init(&tmpq);
+ INIT_LIST_HEAD(&dports);
+ ua.addrtype = TIPC_SERVICE_RANGE;
+
+ /* tipc_skb_peek() increments the head skb's reference counter */
+ skb = tipc_skb_peek(arrvq, &inputq->lock);
+ for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
+ hdr = buf_msg(skb);
+ user = msg_user(hdr);
+ mtyp = msg_type(hdr);
+ hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
+ onode = msg_orignode(hdr);
+ ua.sr.type = msg_nametype(hdr);
+ ua.sr.lower = msg_namelower(hdr);
+ ua.sr.upper = msg_nameupper(hdr);
+ if (onode == self)
+ ua.scope = TIPC_ANY_SCOPE;
+ else
+ ua.scope = TIPC_CLUSTER_SCOPE;
- needs_conn = (sock->state != SS_READY);
- if (unlikely(needs_conn)) {
- if (sock->state == SS_LISTENING) {
- res = -EPIPE;
- goto exit;
+ if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
+ spin_lock_bh(&inputq->lock);
+ if (skb_peek(arrvq) == skb) {
+ __skb_dequeue(arrvq);
+ __skb_queue_tail(inputq, skb);
+ }
+ kfree_skb(skb);
+ spin_unlock_bh(&inputq->lock);
+ continue;
}
- if (sock->state != SS_UNCONNECTED) {
- res = -EISCONN;
- goto exit;
+
+ /* Group messages require exact scope match */
+ if (msg_in_group(hdr)) {
+ ua.sr.lower = 0;
+ ua.sr.upper = ~0;
+ ua.scope = msg_lookup_scope(hdr);
}
- if (tport->published) {
- res = -EOPNOTSUPP;
- goto exit;
+
+ /* Create destination port list: */
+ tipc_nametbl_lookup_mcast_sockets(net, &ua, &dports);
+
+ /* Clone message per destination */
+ while (tipc_dest_pop(&dports, NULL, &portid)) {
+ _skb = __pskb_copy(skb, hlen, GFP_ATOMIC);
+ if (_skb) {
+ msg_set_destport(buf_msg(_skb), portid);
+ __skb_queue_tail(&tmpq, _skb);
+ continue;
+ }
+ pr_warn("Failed to clone mcast rcv buffer\n");
}
- if (dest->addrtype == TIPC_ADDR_NAME) {
- tport->conn_type = dest->addr.name.name.type;
- tport->conn_instance = dest->addr.name.name.instance;
+ /* Append clones to inputq only if skb is still head of arrvq */
+ spin_lock_bh(&inputq->lock);
+ if (skb_peek(arrvq) == skb) {
+ skb_queue_splice_tail_init(&tmpq, inputq);
+ /* Decrement the skb's refcnt */
+ kfree_skb(__skb_dequeue(arrvq));
}
-
- /* Abort any pending connection attempts (very unlikely) */
- reject_rx_queue(sk);
+ spin_unlock_bh(&inputq->lock);
+ __skb_queue_purge(&tmpq);
+ kfree_skb(skb);
}
+ tipc_sk_rcv(net, inputq);
+}
- timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+/* tipc_sk_push_backlog(): send accumulated buffers in socket write queue
+ * when socket is in Nagle mode
+ */
+static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack)
+{
+ struct sk_buff_head *txq = &tsk->sk.sk_write_queue;
+ struct sk_buff *skb = skb_peek_tail(txq);
+ struct net *net = sock_net(&tsk->sk);
+ u32 dnode = tsk_peer_node(tsk);
+ int rc;
- do {
- if (dest->addrtype == TIPC_ADDR_NAME) {
- res = dest_name_check(dest, m);
- if (res)
- break;
- res = tipc_send2name(tport->ref,
- &dest->addr.name.name,
- dest->addr.name.domain,
- m->msg_iovlen,
- m->msg_iov,
- total_len);
- } else if (dest->addrtype == TIPC_ADDR_ID) {
- res = tipc_send2port(tport->ref,
- &dest->addr.id,
- m->msg_iovlen,
- m->msg_iov,
- total_len);
- } else if (dest->addrtype == TIPC_ADDR_MCAST) {
- if (needs_conn) {
- res = -EOPNOTSUPP;
- break;
+ if (nagle_ack) {
+ tsk->pkt_cnt += skb_queue_len(txq);
+ if (!tsk->pkt_cnt || tsk->msg_acc / tsk->pkt_cnt < 2) {
+ tsk->oneway = 0;
+ if (tsk->nagle_start < NAGLE_START_MAX)
+ tsk->nagle_start *= 2;
+ tsk->expect_ack = false;
+ pr_debug("tsk %10u: bad nagle %u -> %u, next start %u!\n",
+ tsk->portid, tsk->msg_acc, tsk->pkt_cnt,
+ tsk->nagle_start);
+ } else {
+ tsk->nagle_start = NAGLE_START_INIT;
+ if (skb) {
+ msg_set_ack_required(buf_msg(skb));
+ tsk->expect_ack = true;
+ } else {
+ tsk->expect_ack = false;
}
- res = dest_name_check(dest, m);
- if (res)
- break;
- res = tipc_multicast(tport->ref,
- &dest->addr.nameseq,
- m->msg_iovlen,
- m->msg_iov,
- total_len);
- }
- if (likely(res != -ELINKCONG)) {
- if (needs_conn && (res >= 0))
- sock->state = SS_CONNECTING;
- break;
- }
- if (timeout_val <= 0L) {
- res = timeout_val ? timeout_val : -EWOULDBLOCK;
- break;
}
- release_sock(sk);
- timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk),
- !tport->congested, timeout_val);
- lock_sock(sk);
- } while (1);
+ tsk->msg_acc = 0;
+ tsk->pkt_cnt = 0;
+ }
+
+ if (!skb || tsk->cong_link_cnt)
+ return;
+
+ /* Do not send SYN again after congestion */
+ if (msg_is_syn(buf_msg(skb)))
+ return;
+
+ if (tsk->msg_acc)
+ tsk->pkt_cnt += skb_queue_len(txq);
+ tsk->snt_unacked += tsk->snd_backlog;
+ tsk->snd_backlog = 0;
+ rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
+ if (rc == -ELINKCONG)
+ tsk->cong_link_cnt = 1;
+}
+/**
+ * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
+ * @tsk: receiving socket
+ * @skb: pointer to message buffer.
+ * @inputq: buffer list containing the buffers
+ * @xmitq: output message area
+ */
+static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct tipc_msg *hdr = buf_msg(skb);
+ u32 onode = tsk_own_node(tsk);
+ struct sock *sk = &tsk->sk;
+ int mtyp = msg_type(hdr);
+ bool was_cong;
+
+ /* Ignore if connection cannot be validated: */
+ if (!tsk_peer_msg(tsk, hdr)) {
+ trace_tipc_sk_drop_msg(sk, skb, TIPC_DUMP_NONE, "@proto_rcv!");
+ goto exit;
+ }
+
+ if (unlikely(msg_errcode(hdr))) {
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
+ tsk_peer_port(tsk));
+ sk->sk_state_change(sk);
+
+ /* State change is ignored if socket already awake,
+ * - convert msg to abort msg and add to inqueue
+ */
+ msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
+ msg_set_type(hdr, TIPC_CONN_MSG);
+ msg_set_size(hdr, BASIC_H_SIZE);
+ msg_set_hdr_sz(hdr, BASIC_H_SIZE);
+ __skb_queue_tail(inputq, skb);
+ return;
+ }
+
+ tsk->probe_unacked = false;
+
+ if (mtyp == CONN_PROBE) {
+ msg_set_type(hdr, CONN_PROBE_REPLY);
+ if (tipc_msg_reverse(onode, &skb, TIPC_OK))
+ __skb_queue_tail(xmitq, skb);
+ return;
+ } else if (mtyp == CONN_ACK) {
+ was_cong = tsk_conn_cong(tsk);
+ tipc_sk_push_backlog(tsk, msg_nagle_ack(hdr));
+ tsk->snt_unacked -= msg_conn_ack(hdr);
+ if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
+ tsk->snd_win = msg_adv_win(hdr);
+ if (was_cong && !tsk_conn_cong(tsk))
+ sk->sk_write_space(sk);
+ } else if (mtyp != CONN_PROBE_REPLY) {
+ pr_warn("Received unknown CONN_PROTO msg\n");
+ }
exit:
- if (iocb)
- release_sock(sk);
- return res;
+ kfree_skb(skb);
}
/**
- * send_packet - send a connection-oriented message
- * @iocb: if NULL, indicates that socket lock is already held
+ * tipc_sendmsg - send message in connectionless manner
* @sock: socket structure
* @m: message to send
- * @total_len: length of message
+ * @dsz: amount of user data to be sent
*
- * Used for SOCK_SEQPACKET messages and SOCK_STREAM data.
+ * Message must have an destination specified explicitly.
+ * Used for SOCK_RDM and SOCK_DGRAM messages,
+ * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
+ * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
*
- * Returns the number of bytes sent on success, or errno otherwise
+ * Return: the number of bytes sent on success, or errno otherwise
*/
-static int send_packet(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+static int tipc_sendmsg(struct socket *sock,
+ struct msghdr *m, size_t dsz)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
- struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
- long timeout_val;
- int res;
+ int ret;
+
+ lock_sock(sk);
+ ret = __tipc_sendmsg(sock, m, dsz);
+ release_sock(sk);
- /* Handle implied connection establishment */
- if (unlikely(dest))
- return send_msg(iocb, sock, m, total_len);
+ return ret;
+}
- if (total_len > TIPC_MAX_USER_MSG_SIZE)
+static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
+{
+ struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
+ long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+ struct list_head *clinks = &tsk->cong_links;
+ bool syn = !tipc_sk_type_connectionless(sk);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct tipc_socket_addr skaddr;
+ struct sk_buff_head pkts;
+ int atype, mtu, rc;
+
+ if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
return -EMSGSIZE;
- if (iocb)
- lock_sock(sk);
+ if (ua) {
+ if (!tipc_uaddr_valid(ua, m->msg_namelen))
+ return -EINVAL;
+ atype = ua->addrtype;
+ }
- timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+ /* If socket belongs to a communication group follow other paths */
+ if (grp) {
+ if (!ua)
+ return tipc_send_group_bcast(sock, m, dlen, timeout);
+ if (atype == TIPC_SERVICE_ADDR)
+ return tipc_send_group_anycast(sock, m, dlen, timeout);
+ if (atype == TIPC_SOCKET_ADDR)
+ return tipc_send_group_unicast(sock, m, dlen, timeout);
+ if (atype == TIPC_SERVICE_RANGE)
+ return tipc_send_group_mcast(sock, m, dlen, timeout);
+ return -EINVAL;
+ }
- do {
- if (unlikely(sock->state != SS_CONNECTED)) {
- if (sock->state == SS_DISCONNECTING)
- res = -EPIPE;
- else
- res = -ENOTCONN;
- break;
- }
+ if (!ua) {
+ ua = (struct tipc_uaddr *)&tsk->peer;
+ if (!syn && ua->family != AF_TIPC)
+ return -EDESTADDRREQ;
+ atype = ua->addrtype;
+ }
- res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov,
- total_len);
- if (likely(res != -ELINKCONG))
- break;
- if (timeout_val <= 0L) {
- res = timeout_val ? timeout_val : -EWOULDBLOCK;
- break;
+ if (unlikely(syn)) {
+ if (sk->sk_state == TIPC_LISTEN)
+ return -EPIPE;
+ if (sk->sk_state != TIPC_OPEN)
+ return -EISCONN;
+ if (tsk->published)
+ return -EOPNOTSUPP;
+ if (atype == TIPC_SERVICE_ADDR)
+ tsk->conn_addrtype = atype;
+ msg_set_syn(hdr, 1);
+ }
+
+ memset(&skaddr, 0, sizeof(skaddr));
+
+ /* Determine destination */
+ if (atype == TIPC_SERVICE_RANGE) {
+ return tipc_sendmcast(sock, ua, m, dlen, timeout);
+ } else if (atype == TIPC_SERVICE_ADDR) {
+ skaddr.node = ua->lookup_node;
+ ua->scope = tipc_node2scope(skaddr.node);
+ if (!tipc_nametbl_lookup_anycast(net, ua, &skaddr))
+ return -EHOSTUNREACH;
+ } else if (atype == TIPC_SOCKET_ADDR) {
+ skaddr = ua->sk;
+ } else {
+ return -EINVAL;
+ }
+
+ /* Block or return if destination link is congested */
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tipc_dest_find(clinks, skaddr.node, 0));
+ if (unlikely(rc))
+ return rc;
+
+ /* Finally build message header */
+ msg_set_destnode(hdr, skaddr.node);
+ msg_set_destport(hdr, skaddr.ref);
+ if (atype == TIPC_SERVICE_ADDR) {
+ msg_set_type(hdr, TIPC_NAMED_MSG);
+ msg_set_hdr_sz(hdr, NAMED_H_SIZE);
+ msg_set_nametype(hdr, ua->sa.type);
+ msg_set_nameinst(hdr, ua->sa.instance);
+ msg_set_lookup_scope(hdr, ua->scope);
+ } else { /* TIPC_SOCKET_ADDR */
+ msg_set_type(hdr, TIPC_DIRECT_MSG);
+ msg_set_lookup_scope(hdr, 0);
+ msg_set_hdr_sz(hdr, BASIC_H_SIZE);
+ }
+
+ /* Add message body */
+ __skb_queue_head_init(&pkts);
+ mtu = tipc_node_get_mtu(net, skaddr.node, tsk->portid, true);
+ rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
+ if (unlikely(rc != dlen))
+ return rc;
+ if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue))) {
+ __skb_queue_purge(&pkts);
+ return -ENOMEM;
+ }
+
+ /* Send message */
+ trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " ");
+ rc = tipc_node_xmit(net, &pkts, skaddr.node, tsk->portid);
+ if (unlikely(rc == -ELINKCONG)) {
+ tipc_dest_push(clinks, skaddr.node, 0);
+ tsk->cong_link_cnt++;
+ rc = 0;
+ }
+
+ if (unlikely(syn && !rc)) {
+ tipc_set_sk_state(sk, TIPC_CONNECTING);
+ if (dlen && timeout) {
+ timeout = msecs_to_jiffies(timeout);
+ tipc_wait_for_connect(sock, &timeout);
}
- release_sock(sk);
- timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk),
- (!tport->congested || !tport->connected), timeout_val);
- lock_sock(sk);
- } while (1);
+ }
- if (iocb)
- release_sock(sk);
- return res;
+ return rc ? rc : dlen;
}
/**
- * send_stream - send stream-oriented data
- * @iocb: (unused)
+ * tipc_sendstream - send stream-oriented data
* @sock: socket structure
* @m: data to send
- * @total_len: total length of data to be sent
+ * @dsz: total length of data to be transmitted
*
* Used for SOCK_STREAM data.
*
- * Returns the number of bytes sent on success (or partial success),
+ * Return: the number of bytes sent on success (or partial success),
* or errno if no data sent
*/
-static int send_stream(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
- struct msghdr my_msg;
- struct iovec my_iov;
- struct iovec *curr_iov;
- int curr_iovlen;
- char __user *curr_start;
- u32 hdr_size;
- int curr_left;
- int bytes_to_send;
- int bytes_sent;
- int res;
+ int ret;
lock_sock(sk);
+ ret = __tipc_sendstream(sock, m, dsz);
+ release_sock(sk);
- /* Handle special cases where there is no connection */
- if (unlikely(sock->state != SS_CONNECTED)) {
- if (sock->state == SS_UNCONNECTED) {
- res = send_packet(NULL, sock, m, total_len);
- goto exit;
- } else if (sock->state == SS_DISCONNECTING) {
- res = -EPIPE;
- goto exit;
- } else {
- res = -ENOTCONN;
- goto exit;
- }
- }
+ return ret;
+}
- if (unlikely(m->msg_name)) {
- res = -EISCONN;
- goto exit;
- }
+static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
+{
+ struct sock *sk = sock->sk;
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+ struct sk_buff_head *txq = &sk->sk_write_queue;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct net *net = sock_net(sk);
+ struct sk_buff *skb;
+ u32 dnode = tsk_peer_node(tsk);
+ int maxnagle = tsk->maxnagle;
+ int maxpkt = tsk->max_pkt;
+ int send, sent = 0;
+ int blocks, rc = 0;
+
+ if (unlikely(dlen > INT_MAX))
+ return -EMSGSIZE;
- if (total_len > (unsigned int)INT_MAX) {
- res = -EMSGSIZE;
- goto exit;
+ /* Handle implicit connection setup */
+ if (unlikely(dest && sk->sk_state == TIPC_OPEN)) {
+ rc = __tipc_sendmsg(sock, m, dlen);
+ if (dlen && dlen == rc) {
+ tsk->peer_caps = tipc_node_get_capabilities(net, dnode);
+ tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
+ }
+ return rc;
}
- /*
- * Send each iovec entry using one or more messages
- *
- * Note: This algorithm is good for the most likely case
- * (i.e. one large iovec entry), but could be improved to pass sets
- * of small iovec entries into send_packet().
- */
- curr_iov = m->msg_iov;
- curr_iovlen = m->msg_iovlen;
- my_msg.msg_iov = &my_iov;
- my_msg.msg_iovlen = 1;
- my_msg.msg_flags = m->msg_flags;
- my_msg.msg_name = NULL;
- bytes_sent = 0;
-
- hdr_size = msg_hdr_sz(&tport->phdr);
-
- while (curr_iovlen--) {
- curr_start = curr_iov->iov_base;
- curr_left = curr_iov->iov_len;
-
- while (curr_left) {
- bytes_to_send = tport->max_pkt - hdr_size;
- if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
- bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
- if (curr_left < bytes_to_send)
- bytes_to_send = curr_left;
- my_iov.iov_base = curr_start;
- my_iov.iov_len = bytes_to_send;
- res = send_packet(NULL, sock, &my_msg, bytes_to_send);
- if (res < 0) {
- if (bytes_sent)
- res = bytes_sent;
- goto exit;
+ do {
+ rc = tipc_wait_for_cond(sock, &timeout,
+ (!tsk->cong_link_cnt &&
+ !tsk_conn_cong(tsk) &&
+ tipc_sk_connected(sk)));
+ if (unlikely(rc))
+ break;
+ send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
+ blocks = tsk->snd_backlog;
+ if (tsk->oneway++ >= tsk->nagle_start && maxnagle &&
+ send <= maxnagle) {
+ rc = tipc_msg_append(hdr, m, send, maxnagle, txq);
+ if (unlikely(rc < 0))
+ break;
+ blocks += rc;
+ tsk->msg_acc++;
+ if (blocks <= 64 && tsk->expect_ack) {
+ tsk->snd_backlog = blocks;
+ sent += send;
+ break;
+ } else if (blocks > 64) {
+ tsk->pkt_cnt += skb_queue_len(txq);
+ } else {
+ skb = skb_peek_tail(txq);
+ if (skb) {
+ msg_set_ack_required(buf_msg(skb));
+ tsk->expect_ack = true;
+ } else {
+ tsk->expect_ack = false;
+ }
+ tsk->msg_acc = 0;
+ tsk->pkt_cnt = 0;
}
- curr_left -= bytes_to_send;
- curr_start += bytes_to_send;
- bytes_sent += bytes_to_send;
+ } else {
+ rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq);
+ if (unlikely(rc != send))
+ break;
+ blocks += tsk_inc(tsk, send + MIN_H_SIZE);
+ }
+ trace_tipc_sk_sendstream(sk, skb_peek(txq),
+ TIPC_DUMP_SK_SNDQ, " ");
+ rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
+ if (unlikely(rc == -ELINKCONG)) {
+ tsk->cong_link_cnt = 1;
+ rc = 0;
}
+ if (likely(!rc)) {
+ tsk->snt_unacked += blocks;
+ tsk->snd_backlog = 0;
+ sent += send;
+ }
+ } while (sent < dlen && !rc);
- curr_iov++;
- }
- res = bytes_sent;
-exit:
- release_sock(sk);
- return res;
+ return sent ? sent : rc;
}
/**
- * auto_connect - complete connection setup to a remote port
+ * tipc_send_packet - send a connection-oriented message
* @sock: socket structure
- * @msg: peer's response message
+ * @m: message to send
+ * @dsz: length of data to be transmitted
*
- * Returns 0 on success, errno otherwise
+ * Used for SOCK_SEQPACKET messages.
+ *
+ * Return: the number of bytes sent on success, or errno otherwise
*/
-static int auto_connect(struct socket *sock, struct tipc_msg *msg)
+static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
{
- struct tipc_sock *tsock = tipc_sk(sock->sk);
- struct tipc_port *p_ptr;
+ if (dsz > TIPC_MAX_USER_MSG_SIZE)
+ return -EMSGSIZE;
- tsock->peer_name.ref = msg_origport(msg);
- tsock->peer_name.node = msg_orignode(msg);
- p_ptr = tipc_port_deref(tsock->p->ref);
- if (!p_ptr)
- return -EINVAL;
+ return tipc_sendstream(sock, m, dsz);
+}
- __tipc_connect(tsock->p->ref, p_ptr, &tsock->peer_name);
+/* tipc_sk_finish_conn - complete the setup of a connection
+ */
+static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
+ u32 peer_node)
+{
+ struct sock *sk = &tsk->sk;
+ struct net *net = sock_net(sk);
+ struct tipc_msg *msg = &tsk->phdr;
+
+ msg_set_syn(msg, 0);
+ msg_set_destnode(msg, peer_node);
+ msg_set_destport(msg, peer_port);
+ msg_set_type(msg, TIPC_CONN_MSG);
+ msg_set_lookup_scope(msg, 0);
+ msg_set_hdr_sz(msg, SHORT_H_SIZE);
+
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
+ tipc_set_sk_state(sk, TIPC_ESTABLISHED);
+ tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
+ tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid, true);
+ tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
+ tsk_set_nagle(tsk);
+ __skb_queue_purge(&sk->sk_write_queue);
+ if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
+ return;
- if (msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)
- return -EINVAL;
- msg_set_importance(&p_ptr->phdr, (u32)msg_importance(msg));
- sock->state = SS_CONNECTED;
- return 0;
+ /* Fall back to message based flow control */
+ tsk->rcv_win = FLOWCTL_MSG_WIN;
+ tsk->snd_win = FLOWCTL_MSG_WIN;
}
/**
- * set_orig_addr - capture sender's address for received message
+ * tipc_sk_set_orig_addr - capture sender's address for received message
* @m: descriptor for message info
- * @msg: received message header
+ * @skb: received message
*
* Note: Address is not captured if not requested by receiver.
*/
-static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
+static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
{
- struct sockaddr_tipc *addr = (struct sockaddr_tipc *)m->msg_name;
+ DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name);
+ struct tipc_msg *hdr = buf_msg(skb);
- if (addr) {
- addr->family = AF_TIPC;
- addr->addrtype = TIPC_ADDR_ID;
- memset(&addr->addr, 0, sizeof(addr->addr));
- addr->addr.id.ref = msg_origport(msg);
- addr->addr.id.node = msg_orignode(msg);
- addr->addr.name.domain = 0; /* could leave uninitialized */
- addr->scope = 0; /* could leave uninitialized */
- m->msg_namelen = sizeof(struct sockaddr_tipc);
- }
+ if (!srcaddr)
+ return;
+
+ srcaddr->sock.family = AF_TIPC;
+ srcaddr->sock.addrtype = TIPC_SOCKET_ADDR;
+ srcaddr->sock.scope = 0;
+ srcaddr->sock.addr.id.ref = msg_origport(hdr);
+ srcaddr->sock.addr.id.node = msg_orignode(hdr);
+ srcaddr->sock.addr.name.domain = 0;
+ m->msg_namelen = sizeof(struct sockaddr_tipc);
+
+ if (!msg_in_group(hdr))
+ return;
+
+ /* Group message users may also want to know sending member's id */
+ srcaddr->member.family = AF_TIPC;
+ srcaddr->member.addrtype = TIPC_SERVICE_ADDR;
+ srcaddr->member.scope = 0;
+ srcaddr->member.addr.name.name.type = msg_nametype(hdr);
+ srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
+ srcaddr->member.addr.name.domain = 0;
+ m->msg_namelen = sizeof(*srcaddr);
}
/**
- * anc_data_recv - optionally capture ancillary data for received message
+ * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
* @m: descriptor for message info
- * @msg: received message header
- * @tport: TIPC port associated with message
+ * @skb: received message buffer
+ * @tsk: TIPC port associated with message
*
* Note: Ancillary data is not captured if not requested by receiver.
*
- * Returns 0 if successful, otherwise errno
+ * Return: 0 if successful, otherwise errno
*/
-static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
- struct tipc_port *tport)
+static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb,
+ struct tipc_sock *tsk)
{
- u32 anc_data[3];
- u32 err;
- u32 dest_type;
- int has_name;
- int res;
+ struct tipc_msg *hdr;
+ u32 data[3] = {0,};
+ bool has_addr;
+ int dlen, rc;
if (likely(m->msg_controllen == 0))
return 0;
- /* Optionally capture errored message object(s) */
- err = msg ? msg_errcode(msg) : 0;
- if (unlikely(err)) {
- anc_data[0] = err;
- anc_data[1] = msg_data_sz(msg);
- res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
- if (res)
- return res;
- if (anc_data[1]) {
- res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
- msg_data(msg));
- if (res)
- return res;
- }
+ hdr = buf_msg(skb);
+ dlen = msg_data_sz(hdr);
+
+ /* Capture errored message object, if any */
+ if (msg_errcode(hdr)) {
+ if (skb_linearize(skb))
+ return -ENOMEM;
+ hdr = buf_msg(skb);
+ data[0] = msg_errcode(hdr);
+ data[1] = dlen;
+ rc = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, data);
+ if (rc || !dlen)
+ return rc;
+ rc = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, dlen, msg_data(hdr));
+ if (rc)
+ return rc;
}
- /* Optionally capture message destination object */
- dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
- switch (dest_type) {
+ /* Capture TIPC_SERVICE_ADDR/RANGE destination address, if any */
+ switch (msg_type(hdr)) {
case TIPC_NAMED_MSG:
- has_name = 1;
- anc_data[0] = msg_nametype(msg);
- anc_data[1] = msg_namelower(msg);
- anc_data[2] = msg_namelower(msg);
+ has_addr = true;
+ data[0] = msg_nametype(hdr);
+ data[1] = msg_namelower(hdr);
+ data[2] = data[1];
break;
case TIPC_MCAST_MSG:
- has_name = 1;
- anc_data[0] = msg_nametype(msg);
- anc_data[1] = msg_namelower(msg);
- anc_data[2] = msg_nameupper(msg);
+ has_addr = true;
+ data[0] = msg_nametype(hdr);
+ data[1] = msg_namelower(hdr);
+ data[2] = msg_nameupper(hdr);
break;
case TIPC_CONN_MSG:
- has_name = (tport->conn_type != 0);
- anc_data[0] = tport->conn_type;
- anc_data[1] = tport->conn_instance;
- anc_data[2] = tport->conn_instance;
+ has_addr = !!tsk->conn_addrtype;
+ data[0] = msg_nametype(&tsk->phdr);
+ data[1] = msg_nameinst(&tsk->phdr);
+ data[2] = data[1];
break;
default:
- has_name = 0;
+ has_addr = false;
}
- if (has_name) {
- res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
- if (res)
- return res;
+ if (!has_addr)
+ return 0;
+ return put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, data);
+}
+
+static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk)
+{
+ struct sock *sk = &tsk->sk;
+ struct sk_buff *skb = NULL;
+ struct tipc_msg *msg;
+ u32 peer_port = tsk_peer_port(tsk);
+ u32 dnode = tsk_peer_node(tsk);
+
+ if (!tipc_sk_connected(sk))
+ return NULL;
+ skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
+ dnode, tsk_own_node(tsk), peer_port,
+ tsk->portid, TIPC_OK);
+ if (!skb)
+ return NULL;
+ msg = buf_msg(skb);
+ msg_set_conn_ack(msg, tsk->rcv_unacked);
+ tsk->rcv_unacked = 0;
+
+ /* Adjust to and advertize the correct window limit */
+ if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
+ tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
+ msg_set_adv_win(msg, tsk->rcv_win);
}
+ return skb;
+}
- return 0;
+static void tipc_sk_send_ack(struct tipc_sock *tsk)
+{
+ struct sk_buff *skb;
+
+ skb = tipc_sk_build_ack(tsk);
+ if (!skb)
+ return;
+
+ tipc_node_xmit_skb(sock_net(&tsk->sk), skb, tsk_peer_node(tsk),
+ msg_link_selector(buf_msg(skb)));
+}
+
+static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
+{
+ struct sock *sk = sock->sk;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ long timeo = *timeop;
+ int err = sock_error(sk);
+
+ if (err)
+ return err;
+
+ for (;;) {
+ if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
+ if (sk->sk_shutdown & RCV_SHUTDOWN) {
+ err = -ENOTCONN;
+ break;
+ }
+ add_wait_queue(sk_sleep(sk), &wait);
+ release_sock(sk);
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+ sched_annotate_sleep();
+ lock_sock(sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ }
+ err = 0;
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ break;
+ err = -EAGAIN;
+ if (!timeo)
+ break;
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ break;
+
+ err = sock_error(sk);
+ if (err)
+ break;
+ }
+ *timeop = timeo;
+ return err;
}
/**
- * recv_msg - receive packet-oriented message
- * @iocb: (unused)
+ * tipc_recvmsg - receive packet-oriented message
+ * @sock: network socket
* @m: descriptor for message info
- * @buf_len: total size of user buffer area
+ * @buflen: length of user buffer area
* @flags: receive flags
*
* Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
* If the complete message doesn't fit in user area, truncate it.
*
- * Returns size of returned message data, errno otherwise
+ * Return: size of returned message data, errno otherwise
*/
-static int recv_msg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t buf_len, int flags)
+static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
+ size_t buflen, int flags)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
- struct sk_buff *buf;
- struct tipc_msg *msg;
+ bool connected = !tipc_sk_type_connectionless(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ int rc, err, hlen, dlen, copy;
+ struct tipc_skb_cb *skb_cb;
+ struct sk_buff_head xmitq;
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ bool grp_evt;
long timeout;
- unsigned int sz;
- u32 err;
- int res;
/* Catch invalid receive requests */
- if (unlikely(!buf_len))
+ if (unlikely(!buflen))
return -EINVAL;
lock_sock(sk);
-
- if (unlikely(sock->state == SS_UNCONNECTED)) {
- res = -ENOTCONN;
+ if (unlikely(connected && sk->sk_state == TIPC_OPEN)) {
+ rc = -ENOTCONN;
goto exit;
}
+ timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
- /* will be updated in set_orig_addr() if needed */
- m->msg_namelen = 0;
+ /* Step rcv queue to first msg with data or error; wait if necessary */
+ do {
+ rc = tipc_wait_for_rcvmsg(sock, &timeout);
+ if (unlikely(rc))
+ goto exit;
+ skb = skb_peek(&sk->sk_receive_queue);
+ skb_cb = TIPC_SKB_CB(skb);
+ hdr = buf_msg(skb);
+ dlen = msg_data_sz(hdr);
+ hlen = msg_hdr_sz(hdr);
+ err = msg_errcode(hdr);
+ grp_evt = msg_is_grp_evt(hdr);
+ if (likely(dlen || err))
+ break;
+ tsk_advance_rx_queue(sk);
+ } while (1);
- timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-restart:
+ /* Collect msg meta data, including error code and rejected data */
+ tipc_sk_set_orig_addr(m, skb);
+ rc = tipc_sk_anc_data_recv(m, skb, tsk);
+ if (unlikely(rc))
+ goto exit;
+ hdr = buf_msg(skb);
+
+ /* Capture data if non-error msg, otherwise just set return value */
+ if (likely(!err)) {
+ int offset = skb_cb->bytes_read;
- /* Look for a message in receive queue; wait if necessary */
- while (skb_queue_empty(&sk->sk_receive_queue)) {
- if (sock->state == SS_DISCONNECTING) {
- res = -ENOTCONN;
+ copy = min_t(int, dlen - offset, buflen);
+ rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
+ if (unlikely(rc))
goto exit;
+ if (unlikely(offset + copy < dlen)) {
+ if (flags & MSG_EOR) {
+ if (!(flags & MSG_PEEK))
+ skb_cb->bytes_read = offset + copy;
+ } else {
+ m->msg_flags |= MSG_TRUNC;
+ skb_cb->bytes_read = 0;
+ }
+ } else {
+ if (flags & MSG_EOR)
+ m->msg_flags |= MSG_EOR;
+ skb_cb->bytes_read = 0;
}
- if (timeout <= 0L) {
- res = timeout ? timeout : -EWOULDBLOCK;
+ } else {
+ copy = 0;
+ rc = 0;
+ if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) {
+ rc = -ECONNRESET;
goto exit;
}
- release_sock(sk);
- timeout = wait_event_interruptible_timeout(*sk_sleep(sk),
- tipc_rx_ready(sock),
- timeout);
- lock_sock(sk);
}
- /* Look at first message in receive queue */
- buf = skb_peek(&sk->sk_receive_queue);
- msg = buf_msg(buf);
- sz = msg_data_sz(msg);
- err = msg_errcode(msg);
-
- /* Discard an empty non-errored message & try again */
- if ((!sz) && (!err)) {
- advance_rx_queue(sk);
- goto restart;
+ /* Mark message as group event if applicable */
+ if (unlikely(grp_evt)) {
+ if (msg_grp_evt(hdr) == TIPC_WITHDRAWN)
+ m->msg_flags |= MSG_EOR;
+ m->msg_flags |= MSG_OOB;
+ copy = 0;
}
- /* Capture sender's address (optional) */
- set_orig_addr(m, msg);
-
- /* Capture ancillary data (optional) */
- res = anc_data_recv(m, msg, tport);
- if (res)
+ /* Caption of data or error code/rejected data was successful */
+ if (unlikely(flags & MSG_PEEK))
goto exit;
- /* Capture message data (if valid) & compute return value (always) */
- if (!err) {
- if (unlikely(buf_len < sz)) {
- sz = buf_len;
- m->msg_flags |= MSG_TRUNC;
- }
- res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg),
- m->msg_iov, sz);
- if (res)
- goto exit;
- res = sz;
- } else {
- if ((sock->state == SS_READY) ||
- ((err == TIPC_CONN_SHUTDOWN) || m->msg_control))
- res = 0;
- else
- res = -ECONNRESET;
+ /* Send group flow control advertisement when applicable */
+ if (tsk->group && msg_in_group(hdr) && !grp_evt) {
+ __skb_queue_head_init(&xmitq);
+ tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
+ msg_orignode(hdr), msg_origport(hdr),
+ &xmitq);
+ tipc_node_distr_xmit(sock_net(sk), &xmitq);
}
- /* Consume received message (optional) */
- if (likely(!(flags & MSG_PEEK))) {
- if ((sock->state != SS_READY) &&
- (++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
- tipc_acknowledge(tport->ref, tport->conn_unacked);
- advance_rx_queue(sk);
- }
+ if (skb_cb->bytes_read)
+ goto exit;
+
+ tsk_advance_rx_queue(sk);
+
+ if (likely(!connected))
+ goto exit;
+
+ /* Send connection flow control advertisement when applicable */
+ tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
+ if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
+ tipc_sk_send_ack(tsk);
exit:
release_sock(sk);
- return res;
+ return rc ? rc : copy;
}
/**
- * recv_stream - receive stream-oriented data
- * @iocb: (unused)
+ * tipc_recvstream - receive stream-oriented data
+ * @sock: network socket
* @m: descriptor for message info
- * @buf_len: total size of user buffer area
+ * @buflen: total size of user buffer area
* @flags: receive flags
*
* Used for SOCK_STREAM messages only. If not enough data is available
* will optionally wait for more; never truncates data.
*
- * Returns size of returned message data, errno otherwise
+ * Return: size of returned message data, errno otherwise
*/
-static int recv_stream(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t buf_len, int flags)
+static int tipc_recvstream(struct socket *sock, struct msghdr *m,
+ size_t buflen, int flags)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
- struct sk_buff *buf;
- struct tipc_msg *msg;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct sk_buff *skb;
+ struct tipc_msg *hdr;
+ struct tipc_skb_cb *skb_cb;
+ bool peek = flags & MSG_PEEK;
+ int offset, required, copy, copied = 0;
+ int hlen, dlen, err, rc;
long timeout;
- unsigned int sz;
- int sz_to_copy, target, needed;
- int sz_copied = 0;
- u32 err;
- int res = 0;
/* Catch invalid receive attempts */
- if (unlikely(!buf_len))
+ if (unlikely(!buflen))
return -EINVAL;
lock_sock(sk);
- if (unlikely((sock->state == SS_UNCONNECTED))) {
- res = -ENOTCONN;
+ if (unlikely(sk->sk_state == TIPC_OPEN)) {
+ rc = -ENOTCONN;
goto exit;
}
-
- /* will be updated in set_orig_addr() if needed */
- m->msg_namelen = 0;
-
- target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
+ required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen);
timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-restart:
- /* Look for a message in receive queue; wait if necessary */
- while (skb_queue_empty(&sk->sk_receive_queue)) {
- if (sock->state == SS_DISCONNECTING) {
- res = -ENOTCONN;
- goto exit;
- }
- if (timeout <= 0L) {
- res = timeout ? timeout : -EWOULDBLOCK;
- goto exit;
+ do {
+ /* Look at first msg in receive queue; wait if necessary */
+ rc = tipc_wait_for_rcvmsg(sock, &timeout);
+ if (unlikely(rc))
+ break;
+ skb = skb_peek(&sk->sk_receive_queue);
+ skb_cb = TIPC_SKB_CB(skb);
+ hdr = buf_msg(skb);
+ dlen = msg_data_sz(hdr);
+ hlen = msg_hdr_sz(hdr);
+ err = msg_errcode(hdr);
+
+ /* Discard any empty non-errored (SYN-) message */
+ if (unlikely(!dlen && !err)) {
+ tsk_advance_rx_queue(sk);
+ continue;
}
- release_sock(sk);
- timeout = wait_event_interruptible_timeout(*sk_sleep(sk),
- tipc_rx_ready(sock),
- timeout);
- lock_sock(sk);
- }
-
- /* Look at first message in receive queue */
- buf = skb_peek(&sk->sk_receive_queue);
- msg = buf_msg(buf);
- sz = msg_data_sz(msg);
- err = msg_errcode(msg);
-
- /* Discard an empty non-errored message & try again */
- if ((!sz) && (!err)) {
- advance_rx_queue(sk);
- goto restart;
- }
-
- /* Optionally capture sender's address & ancillary data of first msg */
- if (sz_copied == 0) {
- set_orig_addr(m, msg);
- res = anc_data_recv(m, msg, tport);
- if (res)
- goto exit;
- }
-
- /* Capture message data (if valid) & compute return value (always) */
- if (!err) {
- u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle);
-
- sz -= offset;
- needed = (buf_len - sz_copied);
- sz_to_copy = (sz <= needed) ? sz : needed;
-
- res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset,
- m->msg_iov, sz_to_copy);
- if (res)
- goto exit;
- sz_copied += sz_to_copy;
+ /* Collect msg meta data, incl. error code and rejected data */
+ if (!copied) {
+ tipc_sk_set_orig_addr(m, skb);
+ rc = tipc_sk_anc_data_recv(m, skb, tsk);
+ if (rc)
+ break;
+ hdr = buf_msg(skb);
+ }
- if (sz_to_copy < sz) {
- if (!(flags & MSG_PEEK))
- TIPC_SKB_CB(buf)->handle =
- (void *)(unsigned long)(offset + sz_to_copy);
- goto exit;
+ /* Copy data if msg ok, otherwise return error/partial data */
+ if (likely(!err)) {
+ offset = skb_cb->bytes_read;
+ copy = min_t(int, dlen - offset, buflen - copied);
+ rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
+ if (unlikely(rc))
+ break;
+ copied += copy;
+ offset += copy;
+ if (unlikely(offset < dlen)) {
+ if (!peek)
+ skb_cb->bytes_read = offset;
+ break;
+ }
+ } else {
+ rc = 0;
+ if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control)
+ rc = -ECONNRESET;
+ if (copied || rc)
+ break;
}
- } else {
- if (sz_copied != 0)
- goto exit; /* can't add error msg to valid data */
- if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
- res = 0;
- else
- res = -ECONNRESET;
- }
+ if (unlikely(peek))
+ break;
- /* Consume received message (optional) */
- if (likely(!(flags & MSG_PEEK))) {
- if (unlikely(++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
- tipc_acknowledge(tport->ref, tport->conn_unacked);
- advance_rx_queue(sk);
- }
+ tsk_advance_rx_queue(sk);
+
+ /* Send connection flow control advertisement when applicable */
+ tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
+ if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
+ tipc_sk_send_ack(tsk);
- /* Loop around if more data is required */
- if ((sz_copied < buf_len) && /* didn't get all requested data */
- (!skb_queue_empty(&sk->sk_receive_queue) ||
- (sz_copied < target)) && /* and more is ready or required */
- (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */
- (!err)) /* and haven't reached a FIN */
- goto restart;
+ /* Exit if all requested data or FIN/error received */
+ if (copied == buflen || err)
+ break;
+ } while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required);
exit:
release_sock(sk);
- return sz_copied ? sz_copied : res;
+ return copied ? copied : rc;
}
/**
@@ -1200,292 +2112,507 @@ static void tipc_write_space(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
- POLLWRNORM | POLLWRBAND);
+ if (skwq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
+ EPOLLWRNORM | EPOLLWRBAND);
rcu_read_unlock();
}
/**
* tipc_data_ready - wake up threads to indicate messages have been received
* @sk: socket
- * @len: the length of messages
*/
-static void tipc_data_ready(struct sock *sk, int len)
+static void tipc_data_ready(struct sock *sk)
{
struct socket_wq *wq;
+ trace_sk_data_ready(sk);
+
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
- if (wq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
- POLLRDNORM | POLLRDBAND);
+ if (skwq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
+ EPOLLRDNORM | EPOLLRDBAND);
rcu_read_unlock();
}
-/**
- * filter_connect - Handle all incoming messages for a connection-based socket
- * @tsock: TIPC socket
- * @msg: message
- *
- * Returns TIPC error status code and socket error status code
- * once it encounters some errors
- */
-static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
+static void tipc_sock_destruct(struct sock *sk)
{
- struct socket *sock = tsock->sk.sk_socket;
- struct tipc_msg *msg = buf_msg(*buf);
- struct sock *sk = &tsock->sk;
- u32 retval = TIPC_ERR_NO_PORT;
- int res;
-
- if (msg_mcast(msg))
- return retval;
+ __skb_queue_purge(&sk->sk_receive_queue);
+}
- switch ((int)sock->state) {
- case SS_CONNECTED:
- /* Accept only connection-based messages sent by peer */
- if (msg_connected(msg) && tipc_port_peer_msg(tsock->p, msg)) {
- if (unlikely(msg_errcode(msg))) {
- sock->state = SS_DISCONNECTING;
- __tipc_disconnect(tsock->p);
- }
- retval = TIPC_OK;
- }
+static void tipc_sk_proto_rcv(struct sock *sk,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb = __skb_dequeue(inputq);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_group *grp = tsk->group;
+ bool wakeup = false;
+
+ switch (msg_user(hdr)) {
+ case CONN_MANAGER:
+ tipc_sk_conn_proto_rcv(tsk, skb, inputq, xmitq);
+ return;
+ case SOCK_WAKEUP:
+ tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
+ /* coupled with smp_rmb() in tipc_wait_for_cond() */
+ smp_wmb();
+ tsk->cong_link_cnt--;
+ wakeup = true;
+ tipc_sk_push_backlog(tsk, false);
break;
- case SS_CONNECTING:
- /* Accept only ACK or NACK message */
- if (unlikely(msg_errcode(msg))) {
- sock->state = SS_DISCONNECTING;
- sk->sk_err = -ECONNREFUSED;
- retval = TIPC_OK;
- break;
+ case GROUP_PROTOCOL:
+ tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
+ break;
+ case TOP_SRV:
+ tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
+ hdr, inputq, xmitq);
+ break;
+ default:
+ break;
+ }
+
+ if (wakeup)
+ sk->sk_write_space(sk);
+
+ kfree_skb(skb);
+}
+
+/**
+ * tipc_sk_filter_connect - check incoming message for a connection-based socket
+ * @tsk: TIPC socket
+ * @skb: pointer to message buffer.
+ * @xmitq: for Nagle ACK if any
+ * Return: true if message should be added to receive queue, false otherwise
+ */
+static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
+{
+ struct sock *sk = &tsk->sk;
+ struct net *net = sock_net(sk);
+ struct tipc_msg *hdr = buf_msg(skb);
+ bool con_msg = msg_connected(hdr);
+ u32 pport = tsk_peer_port(tsk);
+ u32 pnode = tsk_peer_node(tsk);
+ u32 oport = msg_origport(hdr);
+ u32 onode = msg_orignode(hdr);
+ int err = msg_errcode(hdr);
+ unsigned long delay;
+
+ if (unlikely(msg_mcast(hdr)))
+ return false;
+ tsk->oneway = 0;
+
+ switch (sk->sk_state) {
+ case TIPC_CONNECTING:
+ /* Setup ACK */
+ if (likely(con_msg)) {
+ if (err)
+ break;
+ tipc_sk_finish_conn(tsk, oport, onode);
+ msg_set_importance(&tsk->phdr, msg_importance(hdr));
+ /* ACK+ message with data is added to receive queue */
+ if (msg_data_sz(hdr))
+ return true;
+ /* Empty ACK-, - wake up sleeping connect() and drop */
+ sk->sk_state_change(sk);
+ msg_set_dest_droppable(hdr, 1);
+ return false;
}
+ /* Ignore connectionless message if not from listening socket */
+ if (oport != pport || onode != pnode)
+ return false;
- if (unlikely(!msg_connected(msg)))
+ /* Rejected SYN */
+ if (err != TIPC_ERR_OVERLOAD)
break;
- res = auto_connect(sock, msg);
- if (res) {
- sock->state = SS_DISCONNECTING;
- sk->sk_err = res;
- retval = TIPC_OK;
+ /* Prepare for new setup attempt if we have a SYN clone */
+ if (skb_queue_empty(&sk->sk_write_queue))
break;
- }
-
- /* If an incoming message is an 'ACK-', it should be
- * discarded here because it doesn't contain useful
- * data. In addition, we should try to wake up
- * connect() routine if sleeping.
- */
- if (msg_data_sz(msg) == 0) {
- kfree_skb(*buf);
- *buf = NULL;
- if (waitqueue_active(sk_sleep(sk)))
- wake_up_interruptible(sk_sleep(sk));
- }
- retval = TIPC_OK;
- break;
- case SS_LISTENING:
- case SS_UNCONNECTED:
+ get_random_bytes(&delay, 2);
+ delay %= (tsk->conn_timeout / 4);
+ delay = msecs_to_jiffies(delay + 100);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + delay);
+ return false;
+ case TIPC_OPEN:
+ case TIPC_DISCONNECTING:
+ return false;
+ case TIPC_LISTEN:
/* Accept only SYN message */
- if (!msg_connected(msg) && !(msg_errcode(msg)))
- retval = TIPC_OK;
- break;
- case SS_DISCONNECTING:
- break;
+ if (!msg_is_syn(hdr) &&
+ tipc_node_get_capabilities(net, onode) & TIPC_SYN_BIT)
+ return false;
+ if (!con_msg && !err)
+ return true;
+ return false;
+ case TIPC_ESTABLISHED:
+ if (!skb_queue_empty(&sk->sk_write_queue))
+ tipc_sk_push_backlog(tsk, false);
+ /* Accept only connection-based messages sent by peer */
+ if (likely(con_msg && !err && pport == oport &&
+ pnode == onode)) {
+ if (msg_ack_required(hdr)) {
+ struct sk_buff *skb;
+
+ skb = tipc_sk_build_ack(tsk);
+ if (skb) {
+ msg_set_nagle_ack(buf_msg(skb));
+ __skb_queue_tail(xmitq, skb);
+ }
+ }
+ return true;
+ }
+ if (!tsk_peer_msg(tsk, hdr))
+ return false;
+ if (!err)
+ return true;
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ tipc_node_remove_conn(net, pnode, tsk->portid);
+ sk->sk_state_change(sk);
+ return true;
default:
- pr_err("Unknown socket state %u\n", sock->state);
+ pr_err("Unknown sk_state %u\n", sk->sk_state);
}
- return retval;
+ /* Abort connection setup attempt */
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ sk->sk_err = ECONNREFUSED;
+ sk->sk_state_change(sk);
+ return true;
}
/**
* rcvbuf_limit - get proper overload limit of socket receive queue
* @sk: socket
- * @buf: message
+ * @skb: message
*
- * For all connection oriented messages, irrespective of importance,
- * the default overload value (i.e. 67MB) is set as limit.
+ * For connection oriented messages, irrespective of importance,
+ * default queue limit is 2 MB.
*
- * For all connectionless messages, by default new queue limits are
- * as belows:
+ * For connectionless messages, queue limits are based on message
+ * importance as follows:
*
- * TIPC_LOW_IMPORTANCE (4 MB)
- * TIPC_MEDIUM_IMPORTANCE (8 MB)
- * TIPC_HIGH_IMPORTANCE (16 MB)
- * TIPC_CRITICAL_IMPORTANCE (32 MB)
+ * TIPC_LOW_IMPORTANCE (2 MB)
+ * TIPC_MEDIUM_IMPORTANCE (4 MB)
+ * TIPC_HIGH_IMPORTANCE (8 MB)
+ * TIPC_CRITICAL_IMPORTANCE (16 MB)
*
- * Returns overload limit according to corresponding message importance
+ * Return: overload limit according to corresponding message importance
*/
-static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
+static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
{
- struct tipc_msg *msg = buf_msg(buf);
- unsigned int limit;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_msg *hdr = buf_msg(skb);
- if (msg_connected(msg))
- limit = sysctl_tipc_rmem[2];
- else
- limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE <<
- msg_importance(msg);
- return limit;
+ if (unlikely(msg_in_group(hdr)))
+ return READ_ONCE(sk->sk_rcvbuf);
+
+ if (unlikely(!msg_connected(hdr)))
+ return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr);
+
+ if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
+ return READ_ONCE(sk->sk_rcvbuf);
+
+ return FLOWCTL_MSG_LIM;
}
/**
- * filter_rcv - validate incoming message
+ * tipc_sk_filter_rcv - validate incoming message
* @sk: socket
- * @buf: message
+ * @skb: pointer to message.
+ * @xmitq: output message area (FIXME)
*
* Enqueues message on receive queue if acceptable; optionally handles
* disconnect indication for a connected socket.
*
- * Called with socket lock already taken; port lock may also be taken.
- *
- * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
+ * Called with socket lock already taken
*/
-static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
+static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
- struct socket *sock = sk->sk_socket;
- struct tipc_msg *msg = buf_msg(buf);
- unsigned int limit = rcvbuf_limit(sk, buf);
- u32 res = TIPC_OK;
-
- /* Reject message if it is wrong sort of message for socket */
- if (msg_type(msg) > TIPC_DIRECT_MSG)
- return TIPC_ERR_NO_PORT;
+ bool sk_conn = !tipc_sk_type_connectionless(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct net *net = sock_net(sk);
+ struct sk_buff_head inputq;
+ int mtyp = msg_type(hdr);
+ int limit, err = TIPC_OK;
+
+ trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " ");
+ TIPC_SKB_CB(skb)->bytes_read = 0;
+ __skb_queue_head_init(&inputq);
+ __skb_queue_tail(&inputq, skb);
+
+ if (unlikely(!msg_isdata(hdr)))
+ tipc_sk_proto_rcv(sk, &inputq, xmitq);
+
+ if (unlikely(grp))
+ tipc_group_filter_msg(grp, &inputq, xmitq);
+
+ if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG)
+ tipc_mcast_filter_msg(net, &tsk->mc_method.deferredq, &inputq);
+
+ /* Validate and add to receive buffer if there is space */
+ while ((skb = __skb_dequeue(&inputq))) {
+ hdr = buf_msg(skb);
+ limit = rcvbuf_limit(sk, skb);
+ if ((sk_conn && !tipc_sk_filter_connect(tsk, skb, xmitq)) ||
+ (!sk_conn && msg_connected(hdr)) ||
+ (!grp && msg_in_group(hdr)))
+ err = TIPC_ERR_NO_PORT;
+ else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) {
+ trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL,
+ "err_overload2!");
+ sk_drops_inc(sk);
+ err = TIPC_ERR_OVERLOAD;
+ }
- if (sock->state == SS_READY) {
- if (msg_connected(msg))
- return TIPC_ERR_NO_PORT;
- } else {
- res = filter_connect(tipc_sk(sk), &buf);
- if (res != TIPC_OK || buf == NULL)
- return res;
+ if (unlikely(err)) {
+ if (tipc_msg_reverse(tipc_own_addr(net), &skb, err)) {
+ trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_NONE,
+ "@filter_rcv!");
+ __skb_queue_tail(xmitq, skb);
+ }
+ err = TIPC_OK;
+ continue;
+ }
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ skb_set_owner_r(skb, sk);
+ trace_tipc_sk_overlimit2(sk, skb, TIPC_DUMP_ALL,
+ "rcvq >90% allocated!");
+ sk->sk_data_ready(sk);
}
-
- /* Reject message if there isn't room to queue it */
- if (sk_rmem_alloc_get(sk) + buf->truesize >= limit)
- return TIPC_ERR_OVERLOAD;
-
- /* Enqueue message */
- TIPC_SKB_CB(buf)->handle = 0;
- __skb_queue_tail(&sk->sk_receive_queue, buf);
- skb_set_owner_r(buf, sk);
-
- sk->sk_data_ready(sk, 0);
- return TIPC_OK;
}
/**
- * backlog_rcv - handle incoming message from backlog queue
+ * tipc_sk_backlog_rcv - handle incoming message from backlog queue
* @sk: socket
- * @buf: message
+ * @skb: message
*
- * Caller must hold socket lock, but not port lock.
- *
- * Returns 0
+ * Caller must hold socket lock
*/
-static int backlog_rcv(struct sock *sk, struct sk_buff *buf)
+static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
- u32 res;
+ unsigned int before = sk_rmem_alloc_get(sk);
+ struct sk_buff_head xmitq;
+ unsigned int added;
- res = filter_rcv(sk, buf);
- if (res)
- tipc_reject_msg(buf, res);
+ __skb_queue_head_init(&xmitq);
+
+ tipc_sk_filter_rcv(sk, skb, &xmitq);
+ added = sk_rmem_alloc_get(sk) - before;
+ atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt);
+
+ /* Send pending response/rejected messages, if any */
+ tipc_node_distr_xmit(sock_net(sk), &xmitq);
return 0;
}
/**
- * dispatch - handle incoming message
- * @tport: TIPC port that received message
- * @buf: message
- *
- * Called with port lock already taken.
+ * tipc_sk_enqueue - extract all buffers with destination 'dport' from
+ * inputq and try adding them to socket or backlog queue
+ * @inputq: list of incoming buffers with potentially different destinations
+ * @sk: socket where the buffers should be enqueued
+ * @dport: port number for the socket
+ * @xmitq: output queue
*
- * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
+ * Caller must hold socket lock
*/
-static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
+static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
+ u32 dport, struct sk_buff_head *xmitq)
{
- struct sock *sk = tport->sk;
- u32 res;
+ unsigned long time_limit = jiffies + usecs_to_jiffies(20000);
+ struct sk_buff *skb;
+ unsigned int lim;
+ atomic_t *dcnt;
+ u32 onode;
+
+ while (skb_queue_len(inputq)) {
+ if (unlikely(time_after_eq(jiffies, time_limit)))
+ return;
+
+ skb = tipc_skb_dequeue(inputq, dport);
+ if (unlikely(!skb))
+ return;
+
+ /* Add message directly to receive queue if possible */
+ if (!sock_owned_by_user(sk)) {
+ tipc_sk_filter_rcv(sk, skb, xmitq);
+ continue;
+ }
- /*
- * Process message if socket is unlocked; otherwise add to backlog queue
- *
- * This code is based on sk_receive_skb(), but must be distinct from it
- * since a TIPC-specific filter/reject mechanism is utilized
- */
- bh_lock_sock(sk);
- if (!sock_owned_by_user(sk)) {
- res = filter_rcv(sk, buf);
- } else {
- if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf)))
- res = TIPC_ERR_OVERLOAD;
- else
- res = TIPC_OK;
- }
- bh_unlock_sock(sk);
+ /* Try backlog, compensating for double-counted bytes */
+ dcnt = &tipc_sk(sk)->dupl_rcvcnt;
+ if (!sk->sk_backlog.len)
+ atomic_set(dcnt, 0);
+ lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
+ if (likely(!sk_add_backlog(sk, skb, lim))) {
+ trace_tipc_sk_overlimit1(sk, skb, TIPC_DUMP_ALL,
+ "bklg & rcvq >90% allocated!");
+ continue;
+ }
- return res;
+ trace_tipc_sk_dump(sk, skb, TIPC_DUMP_ALL, "err_overload!");
+ /* Overload => reject message back to sender */
+ onode = tipc_own_addr(sock_net(sk));
+ sk_drops_inc(sk);
+ if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) {
+ trace_tipc_sk_rej_msg(sk, skb, TIPC_DUMP_ALL,
+ "@sk_enqueue!");
+ __skb_queue_tail(xmitq, skb);
+ }
+ break;
+ }
}
/**
- * wakeupdispatch - wake up port after congestion
- * @tport: port to wakeup
- *
- * Called with port lock already taken.
+ * tipc_sk_rcv - handle a chain of incoming buffers
+ * @net: the associated network namespace
+ * @inputq: buffer list containing the buffers
+ * Consumes all buffers in list until inputq is empty
+ * Note: may be called in multiple threads referring to the same queue
*/
-static void wakeupdispatch(struct tipc_port *tport)
+void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
+{
+ struct sk_buff_head xmitq;
+ u32 dnode, dport = 0;
+ int err;
+ struct tipc_sock *tsk;
+ struct sock *sk;
+ struct sk_buff *skb;
+
+ __skb_queue_head_init(&xmitq);
+ while (skb_queue_len(inputq)) {
+ dport = tipc_skb_peek_port(inputq, dport);
+ tsk = tipc_sk_lookup(net, dport);
+
+ if (likely(tsk)) {
+ sk = &tsk->sk;
+ if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
+ tipc_sk_enqueue(inputq, sk, dport, &xmitq);
+ spin_unlock_bh(&sk->sk_lock.slock);
+ }
+ /* Send pending response/rejected messages, if any */
+ tipc_node_distr_xmit(sock_net(sk), &xmitq);
+ sock_put(sk);
+ continue;
+ }
+ /* No destination socket => dequeue skb if still there */
+ skb = tipc_skb_dequeue(inputq, dport);
+ if (!skb)
+ return;
+
+ /* Try secondary lookup if unresolved named message */
+ err = TIPC_ERR_NO_PORT;
+ if (tipc_msg_lookup_dest(net, skb, &err))
+ goto xmit;
+
+ /* Prepare for message rejection */
+ if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
+ continue;
+
+ trace_tipc_sk_rej_msg(NULL, skb, TIPC_DUMP_NONE, "@sk_rcv!");
+xmit:
+ dnode = msg_destnode(buf_msg(skb));
+ tipc_node_xmit_skb(net, skb, dnode, dport);
+ }
+}
+
+static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
- struct sock *sk = tport->sk;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ struct sock *sk = sock->sk;
+ int done;
+
+ do {
+ int err = sock_error(sk);
+ if (err)
+ return err;
+ if (!*timeo_p)
+ return -ETIMEDOUT;
+ if (signal_pending(current))
+ return sock_intr_errno(*timeo_p);
+ if (sk->sk_state == TIPC_DISCONNECTING)
+ break;
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ done = sk_wait_event(sk, timeo_p, tipc_sk_connected(sk),
+ &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ } while (!done);
+ return 0;
+}
- sk->sk_write_space(sk);
+static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr)
+{
+ if (addr->family != AF_TIPC)
+ return false;
+ if (addr->addrtype == TIPC_SERVICE_RANGE)
+ return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper);
+ return (addr->addrtype == TIPC_SERVICE_ADDR ||
+ addr->addrtype == TIPC_SOCKET_ADDR);
}
/**
- * connect - establish a connection to another TIPC port
+ * tipc_connect - establish a connection to another TIPC port
* @sock: socket structure
* @dest: socket address for destination port
* @destlen: size of socket address data structure
* @flags: file-related flags associated with socket
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
-static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
- int flags)
+static int tipc_connect(struct socket *sock, struct sockaddr_unsized *dest,
+ int destlen, int flags)
{
struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
struct msghdr m = {NULL,};
- unsigned int timeout;
- int res;
+ long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
+ int previous;
+ int res = 0;
+
+ if (destlen != sizeof(struct sockaddr_tipc))
+ return -EINVAL;
lock_sock(sk);
- /* For now, TIPC does not allow use of connect() with DGRAM/RDM types */
- if (sock->state == SS_READY) {
- res = -EOPNOTSUPP;
+ if (tsk->group) {
+ res = -EINVAL;
goto exit;
}
- /*
- * Reject connection attempt using multicast address
- *
- * Note: send_msg() validates the rest of the address fields,
- * so there's no need to do it here
- */
- if (dst->addrtype == TIPC_ADDR_MCAST) {
+ if (dst->family == AF_UNSPEC) {
+ memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
+ if (!tipc_sk_type_connectionless(sk))
+ res = -EINVAL;
+ goto exit;
+ }
+ if (!tipc_sockaddr_is_sane(dst)) {
+ res = -EINVAL;
+ goto exit;
+ }
+ /* DGRAM/RDM connect(), just save the destaddr */
+ if (tipc_sk_type_connectionless(sk)) {
+ memcpy(&tsk->peer, dest, destlen);
+ goto exit;
+ } else if (dst->addrtype == TIPC_SERVICE_RANGE) {
res = -EINVAL;
goto exit;
}
- timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout;
+ previous = sk->sk_state;
- switch (sock->state) {
- case SS_UNCONNECTED:
+ switch (sk->sk_state) {
+ case TIPC_OPEN:
/* Send a 'SYN-' to destination */
m.msg_name = dest;
m.msg_namelen = destlen;
+ iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0);
/* If connect is in non-blocking case, set MSG_DONTWAIT to
* indicate send_msg() is never blocked.
@@ -1493,131 +2620,128 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
if (!timeout)
m.msg_flags = MSG_DONTWAIT;
- res = send_msg(NULL, sock, &m, 0);
+ res = __tipc_sendmsg(sock, &m, 0);
if ((res < 0) && (res != -EWOULDBLOCK))
goto exit;
- /* Just entered SS_CONNECTING state; the only
+ /* Just entered TIPC_CONNECTING state; the only
* difference is that return value in non-blocking
* case is EINPROGRESS, rather than EALREADY.
*/
res = -EINPROGRESS;
+ fallthrough;
+ case TIPC_CONNECTING:
+ if (!timeout) {
+ if (previous == TIPC_CONNECTING)
+ res = -EALREADY;
+ goto exit;
+ }
+ timeout = msecs_to_jiffies(timeout);
+ /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
+ res = tipc_wait_for_connect(sock, &timeout);
break;
- case SS_CONNECTING:
- res = -EALREADY;
- break;
- case SS_CONNECTED:
+ case TIPC_ESTABLISHED:
res = -EISCONN;
break;
default:
res = -EINVAL;
- goto exit;
- }
-
- if (sock->state == SS_CONNECTING) {
- if (!timeout)
- goto exit;
-
- /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
- release_sock(sk);
- res = wait_event_interruptible_timeout(*sk_sleep(sk),
- sock->state != SS_CONNECTING,
- timeout ? (long)msecs_to_jiffies(timeout)
- : MAX_SCHEDULE_TIMEOUT);
- lock_sock(sk);
- if (res <= 0) {
- if (res == 0)
- res = -ETIMEDOUT;
- else
- ; /* leave "res" unchanged */
- goto exit;
- }
}
- if (unlikely(sock->state == SS_DISCONNECTING))
- res = sock_error(sk);
- else
- res = 0;
-
exit:
release_sock(sk);
return res;
}
/**
- * listen - allow socket to listen for incoming connections
+ * tipc_listen - allow socket to listen for incoming connections
* @sock: socket structure
* @len: (unused)
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
-static int listen(struct socket *sock, int len)
+static int tipc_listen(struct socket *sock, int len)
{
struct sock *sk = sock->sk;
int res;
lock_sock(sk);
-
- if (sock->state != SS_UNCONNECTED)
- res = -EINVAL;
- else {
- sock->state = SS_LISTENING;
- res = 0;
- }
-
+ res = tipc_set_sk_state(sk, TIPC_LISTEN);
release_sock(sk);
+
return res;
}
+static int tipc_wait_for_accept(struct socket *sock, long timeo)
+{
+ struct sock *sk = sock->sk;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ int err;
+
+ /* True wake-one mechanism for incoming connections: only
+ * one process gets woken up, not the 'whole herd'.
+ * Since we do not 'race & poll' for established sockets
+ * anymore, the common case will execute the loop only once.
+ */
+ for (;;) {
+ if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
+ add_wait_queue(sk_sleep(sk), &wait);
+ release_sock(sk);
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+ lock_sock(sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ }
+ err = 0;
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ break;
+ err = -EAGAIN;
+ if (!timeo)
+ break;
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ break;
+ }
+ return err;
+}
+
/**
- * accept - wait for connection request
+ * tipc_accept - wait for connection request
* @sock: listening socket
- * @newsock: new socket that is to be connected
- * @flags: file-related flags associated with socket
+ * @new_sock: new socket that is to be connected
+ * @arg: arguments for accept
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
-static int accept(struct socket *sock, struct socket *new_sock, int flags)
+static int tipc_accept(struct socket *sock, struct socket *new_sock,
+ struct proto_accept_arg *arg)
{
struct sock *new_sk, *sk = sock->sk;
- struct sk_buff *buf;
struct tipc_sock *new_tsock;
- struct tipc_port *new_tport;
+ struct msghdr m = {NULL,};
struct tipc_msg *msg;
- u32 new_ref;
-
+ struct sk_buff *buf;
+ long timeo;
int res;
lock_sock(sk);
- if (sock->state != SS_LISTENING) {
+ if (sk->sk_state != TIPC_LISTEN) {
res = -EINVAL;
goto exit;
}
-
- while (skb_queue_empty(&sk->sk_receive_queue)) {
- if (flags & O_NONBLOCK) {
- res = -EWOULDBLOCK;
- goto exit;
- }
- release_sock(sk);
- res = wait_event_interruptible(*sk_sleep(sk),
- (!skb_queue_empty(&sk->sk_receive_queue)));
- lock_sock(sk);
- if (res)
- goto exit;
- }
+ timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
+ res = tipc_wait_for_accept(sock, timeo);
+ if (res)
+ goto exit;
buf = skb_peek(&sk->sk_receive_queue);
- res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1);
+ res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, arg->kern);
if (res)
goto exit;
+ security_sk_clone(sock->sk, new_sock->sk);
new_sk = new_sock->sk;
new_tsock = tipc_sk(new_sk);
- new_tport = new_tsock->p;
- new_ref = new_tport->ref;
msg = buf_msg(buf);
/* we lock on new_sk; but lockdep sees the lock on sk */
@@ -1627,55 +2751,49 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
* Reject any stray messages received by new socket
* before the socket lock was taken (very, very unlikely)
*/
- reject_rx_queue(new_sk);
+ tsk_rej_rx_queue(new_sk, TIPC_ERR_NO_PORT);
/* Connect new socket to it's peer */
- new_tsock->peer_name.ref = msg_origport(msg);
- new_tsock->peer_name.node = msg_orignode(msg);
- tipc_connect(new_ref, &new_tsock->peer_name);
- new_sock->state = SS_CONNECTED;
+ tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
- tipc_set_portimportance(new_ref, msg_importance(msg));
+ tsk_set_importance(new_sk, msg_importance(msg));
if (msg_named(msg)) {
- new_tport->conn_type = msg_nametype(msg);
- new_tport->conn_instance = msg_nameinst(msg);
+ new_tsock->conn_addrtype = TIPC_SERVICE_ADDR;
+ msg_set_nametype(&new_tsock->phdr, msg_nametype(msg));
+ msg_set_nameinst(&new_tsock->phdr, msg_nameinst(msg));
}
/*
- * Respond to 'SYN-' by discarding it & returning 'ACK'-.
- * Respond to 'SYN+' by queuing it on new socket.
+ * Respond to 'SYN-' by discarding it & returning 'ACK'.
+ * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'.
*/
if (!msg_data_sz(msg)) {
- struct msghdr m = {NULL,};
-
- advance_rx_queue(sk);
- send_packet(NULL, new_sock, &m, 0);
+ tsk_advance_rx_queue(sk);
} else {
__skb_dequeue(&sk->sk_receive_queue);
__skb_queue_head(&new_sk->sk_receive_queue, buf);
skb_set_owner_r(buf, new_sk);
}
+ iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0);
+ __tipc_sendstream(new_sock, &m, 0);
release_sock(new_sk);
-
exit:
release_sock(sk);
return res;
}
/**
- * shutdown - shutdown socket connection
+ * tipc_shutdown - shutdown socket connection
* @sock: socket structure
* @how: direction to close (must be SHUT_RDWR)
*
* Terminates connection (if necessary), then purges socket's receive queue.
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
-static int shutdown(struct socket *sock, int how)
+static int tipc_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
- struct sk_buff *buf;
int res;
if (how != SHUT_RDWR)
@@ -1683,48 +2801,326 @@ static int shutdown(struct socket *sock, int how)
lock_sock(sk);
- switch (sock->state) {
- case SS_CONNECTING:
- case SS_CONNECTED:
-
-restart:
- /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
- buf = __skb_dequeue(&sk->sk_receive_queue);
- if (buf) {
- if (TIPC_SKB_CB(buf)->handle != 0) {
- kfree_skb(buf);
- goto restart;
- }
- tipc_disconnect(tport->ref);
- tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN);
- } else {
- tipc_shutdown(tport->ref);
- }
-
- sock->state = SS_DISCONNECTING;
-
- /* fall through */
-
- case SS_DISCONNECTING:
+ trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " ");
+ __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
+ sk->sk_shutdown = SHUTDOWN_MASK;
+ if (sk->sk_state == TIPC_DISCONNECTING) {
/* Discard any unreceived messages */
__skb_queue_purge(&sk->sk_receive_queue);
- /* Wake up anyone sleeping in poll */
- sk->sk_state_change(sk);
res = 0;
- break;
-
- default:
+ } else {
res = -ENOTCONN;
}
+ /* Wake up anyone sleeping in poll. */
+ sk->sk_state_change(sk);
release_sock(sk);
return res;
}
+static void tipc_sk_check_probing_state(struct sock *sk,
+ struct sk_buff_head *list)
+{
+ struct tipc_sock *tsk = tipc_sk(sk);
+ u32 pnode = tsk_peer_node(tsk);
+ u32 pport = tsk_peer_port(tsk);
+ u32 self = tsk_own_node(tsk);
+ u32 oport = tsk->portid;
+ struct sk_buff *skb;
+
+ if (tsk->probe_unacked) {
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ sk->sk_err = ECONNABORTED;
+ tipc_node_remove_conn(sock_net(sk), pnode, pport);
+ sk->sk_state_change(sk);
+ return;
+ }
+ /* Prepare new probe */
+ skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
+ pnode, self, pport, oport, TIPC_OK);
+ if (skb)
+ __skb_queue_tail(list, skb);
+ tsk->probe_unacked = true;
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
+}
+
+static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
+{
+ struct tipc_sock *tsk = tipc_sk(sk);
+
+ /* Try again later if dest link is congested */
+ if (tsk->cong_link_cnt) {
+ sk_reset_timer(sk, &sk->sk_timer,
+ jiffies + msecs_to_jiffies(100));
+ return;
+ }
+ /* Prepare SYN for retransmit */
+ tipc_msg_skb_clone(&sk->sk_write_queue, list);
+}
+
+static void tipc_sk_timeout(struct timer_list *t)
+{
+ struct sock *sk = timer_container_of(sk, t, sk_timer);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ u32 pnode = tsk_peer_node(tsk);
+ struct sk_buff_head list;
+ int rc = 0;
+
+ __skb_queue_head_init(&list);
+ bh_lock_sock(sk);
+
+ /* Try again later if socket is busy */
+ if (sock_owned_by_user(sk)) {
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
+ bh_unlock_sock(sk);
+ sock_put(sk);
+ return;
+ }
+
+ if (sk->sk_state == TIPC_ESTABLISHED)
+ tipc_sk_check_probing_state(sk, &list);
+ else if (sk->sk_state == TIPC_CONNECTING)
+ tipc_sk_retry_connect(sk, &list);
+
+ bh_unlock_sock(sk);
+
+ if (!skb_queue_empty(&list))
+ rc = tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid);
+
+ /* SYN messages may cause link congestion */
+ if (rc == -ELINKCONG) {
+ tipc_dest_push(&tsk->cong_links, pnode, 0);
+ tsk->cong_link_cnt = 1;
+ }
+ sock_put(sk);
+}
+
+static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua)
+{
+ struct sock *sk = &tsk->sk;
+ struct net *net = sock_net(sk);
+ struct tipc_socket_addr skaddr;
+ struct publication *p;
+ u32 key;
+
+ if (tipc_sk_connected(sk))
+ return -EINVAL;
+ key = tsk->portid + tsk->pub_count + 1;
+ if (key == tsk->portid)
+ return -EADDRINUSE;
+ skaddr.ref = tsk->portid;
+ skaddr.node = tipc_own_addr(net);
+ p = tipc_nametbl_publish(net, ua, &skaddr, key);
+ if (unlikely(!p))
+ return -EINVAL;
+
+ list_add(&p->binding_sock, &tsk->publications);
+ tsk->pub_count++;
+ tsk->published = true;
+ return 0;
+}
+
+static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua)
+{
+ struct net *net = sock_net(&tsk->sk);
+ struct publication *safe, *p;
+ struct tipc_uaddr _ua;
+ int rc = -EINVAL;
+
+ list_for_each_entry_safe(p, safe, &tsk->publications, binding_sock) {
+ if (!ua) {
+ tipc_uaddr(&_ua, TIPC_SERVICE_RANGE, p->scope,
+ p->sr.type, p->sr.lower, p->sr.upper);
+ tipc_nametbl_withdraw(net, &_ua, &p->sk, p->key);
+ continue;
+ }
+ /* Unbind specific publication */
+ if (p->scope != ua->scope)
+ continue;
+ if (p->sr.type != ua->sr.type)
+ continue;
+ if (p->sr.lower != ua->sr.lower)
+ continue;
+ if (p->sr.upper != ua->sr.upper)
+ break;
+ tipc_nametbl_withdraw(net, ua, &p->sk, p->key);
+ rc = 0;
+ break;
+ }
+ if (list_empty(&tsk->publications)) {
+ tsk->published = 0;
+ rc = 0;
+ }
+ return rc;
+}
+
+/* tipc_sk_reinit: set non-zero address in all existing sockets
+ * when we go from standalone to network mode.
+ */
+void tipc_sk_reinit(struct net *net)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct rhashtable_iter iter;
+ struct tipc_sock *tsk;
+ struct tipc_msg *msg;
+
+ rhashtable_walk_enter(&tn->sk_rht, &iter);
+
+ do {
+ rhashtable_walk_start(&iter);
+
+ while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
+ sock_hold(&tsk->sk);
+ rhashtable_walk_stop(&iter);
+ lock_sock(&tsk->sk);
+ msg = &tsk->phdr;
+ msg_set_prevnode(msg, tipc_own_addr(net));
+ msg_set_orignode(msg, tipc_own_addr(net));
+ release_sock(&tsk->sk);
+ rhashtable_walk_start(&iter);
+ sock_put(&tsk->sk);
+ }
+
+ rhashtable_walk_stop(&iter);
+ } while (tsk == ERR_PTR(-EAGAIN));
+
+ rhashtable_walk_exit(&iter);
+}
+
+static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_sock *tsk;
+
+ rcu_read_lock();
+ tsk = rhashtable_lookup(&tn->sk_rht, &portid, tsk_rht_params);
+ if (tsk)
+ sock_hold(&tsk->sk);
+ rcu_read_unlock();
+
+ return tsk;
+}
+
+static int tipc_sk_insert(struct tipc_sock *tsk)
+{
+ struct sock *sk = &tsk->sk;
+ struct net *net = sock_net(sk);
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
+ u32 portid = get_random_u32_below(remaining) + TIPC_MIN_PORT;
+
+ while (remaining--) {
+ portid++;
+ if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT))
+ portid = TIPC_MIN_PORT;
+ tsk->portid = portid;
+ sock_hold(&tsk->sk);
+ if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node,
+ tsk_rht_params))
+ return 0;
+ sock_put(&tsk->sk);
+ }
+
+ return -1;
+}
+
+static void tipc_sk_remove(struct tipc_sock *tsk)
+{
+ struct sock *sk = &tsk->sk;
+ struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
+
+ if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params))
+ __sock_put(sk);
+}
+
+static const struct rhashtable_params tsk_rht_params = {
+ .nelem_hint = 192,
+ .head_offset = offsetof(struct tipc_sock, node),
+ .key_offset = offsetof(struct tipc_sock, portid),
+ .key_len = sizeof(u32), /* portid */
+ .max_size = 1048576,
+ .min_size = 256,
+ .automatic_shrinking = true,
+};
+
+int tipc_sk_rht_init(struct net *net)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+ return rhashtable_init(&tn->sk_rht, &tsk_rht_params);
+}
+
+void tipc_sk_rht_destroy(struct net *net)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+ /* Wait for socket readers to complete */
+ synchronize_net();
+
+ rhashtable_destroy(&tn->sk_rht);
+}
+
+static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
+{
+ struct net *net = sock_net(&tsk->sk);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct tipc_uaddr ua;
+ int rc;
+
+ if (mreq->type < TIPC_RESERVED_TYPES)
+ return -EACCES;
+ if (mreq->scope > TIPC_NODE_SCOPE)
+ return -EINVAL;
+ if (mreq->scope != TIPC_NODE_SCOPE)
+ mreq->scope = TIPC_CLUSTER_SCOPE;
+ if (grp)
+ return -EACCES;
+ grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open);
+ if (!grp)
+ return -ENOMEM;
+ tsk->group = grp;
+ msg_set_lookup_scope(hdr, mreq->scope);
+ msg_set_nametype(hdr, mreq->type);
+ msg_set_dest_droppable(hdr, true);
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, mreq->scope,
+ mreq->type, mreq->instance, mreq->instance);
+ tipc_nametbl_build_group(net, grp, &ua);
+ rc = tipc_sk_publish(tsk, &ua);
+ if (rc) {
+ tipc_group_delete(net, grp);
+ tsk->group = NULL;
+ return rc;
+ }
+ /* Eliminate any risk that a broadcast overtakes sent JOINs */
+ tsk->mc_method.rcast = true;
+ tsk->mc_method.mandatory = true;
+ tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf);
+ return rc;
+}
+
+static int tipc_sk_leave(struct tipc_sock *tsk)
+{
+ struct net *net = sock_net(&tsk->sk);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_uaddr ua;
+ int scope;
+
+ if (!grp)
+ return -EINVAL;
+ ua.addrtype = TIPC_SERVICE_RANGE;
+ tipc_group_self(grp, &ua.sr, &scope);
+ ua.scope = scope;
+ tipc_group_delete(net, grp);
+ tsk->group = NULL;
+ tipc_sk_withdraw(tsk, &ua);
+ return 0;
+}
+
/**
- * setsockopt - set socket option
+ * tipc_setsockopt - set socket option
* @sock: socket structure
* @lvl: option level
* @opt: option identifier
@@ -1734,44 +3130,79 @@ restart:
* For stream sockets only, accepts and ignores all IPPROTO_TCP options
* (to ease compatibility).
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
-static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
- unsigned int ol)
+static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
+ sockptr_t ov, unsigned int ol)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
- u32 value;
- int res;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group_req mreq;
+ u32 value = 0;
+ int res = 0;
if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
return 0;
if (lvl != SOL_TIPC)
return -ENOPROTOOPT;
- if (ol < sizeof(value))
- return -EINVAL;
- res = get_user(value, (u32 __user *)ov);
- if (res)
- return res;
+
+ switch (opt) {
+ case TIPC_IMPORTANCE:
+ case TIPC_SRC_DROPPABLE:
+ case TIPC_DEST_DROPPABLE:
+ case TIPC_CONN_TIMEOUT:
+ case TIPC_NODELAY:
+ if (ol < sizeof(value))
+ return -EINVAL;
+ if (copy_from_sockptr(&value, ov, sizeof(u32)))
+ return -EFAULT;
+ break;
+ case TIPC_GROUP_JOIN:
+ if (ol < sizeof(mreq))
+ return -EINVAL;
+ if (copy_from_sockptr(&mreq, ov, sizeof(mreq)))
+ return -EFAULT;
+ break;
+ default:
+ if (!sockptr_is_null(ov) || ol)
+ return -EINVAL;
+ }
lock_sock(sk);
switch (opt) {
case TIPC_IMPORTANCE:
- res = tipc_set_portimportance(tport->ref, value);
+ res = tsk_set_importance(sk, value);
break;
case TIPC_SRC_DROPPABLE:
if (sock->type != SOCK_STREAM)
- res = tipc_set_portunreliable(tport->ref, value);
+ tsk_set_unreliable(tsk, value);
else
res = -ENOPROTOOPT;
break;
case TIPC_DEST_DROPPABLE:
- res = tipc_set_portunreturnable(tport->ref, value);
+ tsk_set_unreturnable(tsk, value);
break;
case TIPC_CONN_TIMEOUT:
tipc_sk(sk)->conn_timeout = value;
- /* no need to set "res", since already 0 at this point */
+ break;
+ case TIPC_MCAST_BROADCAST:
+ tsk->mc_method.rcast = false;
+ tsk->mc_method.mandatory = true;
+ break;
+ case TIPC_MCAST_REPLICAST:
+ tsk->mc_method.rcast = true;
+ tsk->mc_method.mandatory = true;
+ break;
+ case TIPC_GROUP_JOIN:
+ res = tipc_sk_join(tsk, &mreq);
+ break;
+ case TIPC_GROUP_LEAVE:
+ res = tipc_sk_leave(tsk);
+ break;
+ case TIPC_NODELAY:
+ tsk->nodelay = !!value;
+ tsk_set_nagle(tsk);
break;
default:
res = -EINVAL;
@@ -1783,7 +3214,7 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
}
/**
- * getsockopt - get socket option
+ * tipc_getsockopt - get socket option
* @sock: socket structure
* @lvl: option level
* @opt: option identifier
@@ -1793,14 +3224,15 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
* For stream sockets only, returns 0 length result for all IPPROTO_TCP options
* (to ease compatibility).
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
-static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
- int __user *ol)
+static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
+ char __user *ov, int __user *ol)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
- int len;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_service_range seq;
+ int len, scope;
u32 value;
int res;
@@ -1816,16 +3248,16 @@ static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
switch (opt) {
case TIPC_IMPORTANCE:
- res = tipc_portimportance(tport->ref, &value);
+ value = tsk_importance(tsk);
break;
case TIPC_SRC_DROPPABLE:
- res = tipc_portunreliable(tport->ref, &value);
+ value = tsk_unreliable(tsk);
break;
case TIPC_DEST_DROPPABLE:
- res = tipc_portunreturnable(tport->ref, &value);
+ value = tsk_unreturnable(tsk);
break;
case TIPC_CONN_TIMEOUT:
- value = tipc_sk(sk)->conn_timeout;
+ value = tsk->conn_timeout;
/* no need to set "res", since already 0 at this point */
break;
case TIPC_NODE_RECVQ_DEPTH:
@@ -1834,6 +3266,15 @@ static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
case TIPC_SOCK_RECVQ_DEPTH:
value = skb_queue_len(&sk->sk_receive_queue);
break;
+ case TIPC_SOCK_RECVQ_USED:
+ value = sk_rmem_alloc_get(sk);
+ break;
+ case TIPC_GROUP_JOIN:
+ seq.type = 0;
+ if (tsk->group)
+ tipc_group_self(tsk->group, &seq, &scope);
+ value = seq.type;
+ break;
default:
res = -EINVAL;
}
@@ -1852,69 +3293,120 @@ static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
return put_user(sizeof(value), ol);
}
+static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ struct net *net = sock_net(sock->sk);
+ struct tipc_sioc_nodeid_req nr = {0};
+ struct tipc_sioc_ln_req lnr;
+ void __user *argp = (void __user *)arg;
+
+ switch (cmd) {
+ case SIOCGETLINKNAME:
+ if (copy_from_user(&lnr, argp, sizeof(lnr)))
+ return -EFAULT;
+ if (!tipc_node_get_linkname(net,
+ lnr.bearer_id & 0xffff, lnr.peer,
+ lnr.linkname, TIPC_MAX_LINK_NAME)) {
+ if (copy_to_user(argp, &lnr, sizeof(lnr)))
+ return -EFAULT;
+ return 0;
+ }
+ return -EADDRNOTAVAIL;
+ case SIOCGETNODEID:
+ if (copy_from_user(&nr, argp, sizeof(nr)))
+ return -EFAULT;
+ if (!tipc_node_get_id(net, nr.peer, nr.node_id))
+ return -EADDRNOTAVAIL;
+ if (copy_to_user(argp, &nr, sizeof(nr)))
+ return -EFAULT;
+ return 0;
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
+static int tipc_socketpair(struct socket *sock1, struct socket *sock2)
+{
+ struct tipc_sock *tsk2 = tipc_sk(sock2->sk);
+ struct tipc_sock *tsk1 = tipc_sk(sock1->sk);
+ u32 onode = tipc_own_addr(sock_net(sock1->sk));
+
+ tsk1->peer.family = AF_TIPC;
+ tsk1->peer.addrtype = TIPC_SOCKET_ADDR;
+ tsk1->peer.scope = TIPC_NODE_SCOPE;
+ tsk1->peer.addr.id.ref = tsk2->portid;
+ tsk1->peer.addr.id.node = onode;
+ tsk2->peer.family = AF_TIPC;
+ tsk2->peer.addrtype = TIPC_SOCKET_ADDR;
+ tsk2->peer.scope = TIPC_NODE_SCOPE;
+ tsk2->peer.addr.id.ref = tsk1->portid;
+ tsk2->peer.addr.id.node = onode;
+
+ tipc_sk_finish_conn(tsk1, tsk2->portid, onode);
+ tipc_sk_finish_conn(tsk2, tsk1->portid, onode);
+ return 0;
+}
+
/* Protocol switches for the various types of TIPC sockets */
static const struct proto_ops msg_ops = {
.owner = THIS_MODULE,
.family = AF_TIPC,
- .release = release,
- .bind = bind,
- .connect = connect,
- .socketpair = sock_no_socketpair,
+ .release = tipc_release,
+ .bind = tipc_bind,
+ .connect = tipc_connect,
+ .socketpair = tipc_socketpair,
.accept = sock_no_accept,
- .getname = get_name,
- .poll = poll,
- .ioctl = sock_no_ioctl,
+ .getname = tipc_getname,
+ .poll = tipc_poll,
+ .ioctl = tipc_ioctl,
.listen = sock_no_listen,
- .shutdown = shutdown,
- .setsockopt = setsockopt,
- .getsockopt = getsockopt,
- .sendmsg = send_msg,
- .recvmsg = recv_msg,
+ .shutdown = tipc_shutdown,
+ .setsockopt = tipc_setsockopt,
+ .getsockopt = tipc_getsockopt,
+ .sendmsg = tipc_sendmsg,
+ .recvmsg = tipc_recvmsg,
.mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage
};
static const struct proto_ops packet_ops = {
.owner = THIS_MODULE,
.family = AF_TIPC,
- .release = release,
- .bind = bind,
- .connect = connect,
- .socketpair = sock_no_socketpair,
- .accept = accept,
- .getname = get_name,
- .poll = poll,
- .ioctl = sock_no_ioctl,
- .listen = listen,
- .shutdown = shutdown,
- .setsockopt = setsockopt,
- .getsockopt = getsockopt,
- .sendmsg = send_packet,
- .recvmsg = recv_msg,
+ .release = tipc_release,
+ .bind = tipc_bind,
+ .connect = tipc_connect,
+ .socketpair = tipc_socketpair,
+ .accept = tipc_accept,
+ .getname = tipc_getname,
+ .poll = tipc_poll,
+ .ioctl = tipc_ioctl,
+ .listen = tipc_listen,
+ .shutdown = tipc_shutdown,
+ .setsockopt = tipc_setsockopt,
+ .getsockopt = tipc_getsockopt,
+ .sendmsg = tipc_send_packet,
+ .recvmsg = tipc_recvmsg,
.mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage
};
static const struct proto_ops stream_ops = {
.owner = THIS_MODULE,
.family = AF_TIPC,
- .release = release,
- .bind = bind,
- .connect = connect,
- .socketpair = sock_no_socketpair,
- .accept = accept,
- .getname = get_name,
- .poll = poll,
- .ioctl = sock_no_ioctl,
- .listen = listen,
- .shutdown = shutdown,
- .setsockopt = setsockopt,
- .getsockopt = getsockopt,
- .sendmsg = send_stream,
- .recvmsg = recv_stream,
+ .release = tipc_release,
+ .bind = tipc_bind,
+ .connect = tipc_connect,
+ .socketpair = tipc_socketpair,
+ .accept = tipc_accept,
+ .getname = tipc_getname,
+ .poll = tipc_poll,
+ .ioctl = tipc_ioctl,
+ .listen = tipc_listen,
+ .shutdown = tipc_shutdown,
+ .setsockopt = tipc_setsockopt,
+ .getsockopt = tipc_getsockopt,
+ .sendmsg = tipc_sendstream,
+ .recvmsg = tipc_recvstream,
.mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage
};
static const struct net_proto_family tipc_family_ops = {
@@ -1930,16 +3422,10 @@ static struct proto tipc_proto = {
.sysctl_rmem = sysctl_tipc_rmem
};
-static struct proto tipc_proto_kern = {
- .name = "TIPC",
- .obj_size = sizeof(struct tipc_sock),
- .sysctl_rmem = sysctl_tipc_rmem
-};
-
/**
* tipc_socket_init - initialize TIPC socket interface
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
int tipc_socket_init(void)
{
@@ -1957,8 +3443,6 @@ int tipc_socket_init(void)
proto_unregister(&tipc_proto);
goto out;
}
-
- sockets_enabled = 1;
out:
return res;
}
@@ -1968,10 +3452,557 @@ int tipc_socket_init(void)
*/
void tipc_socket_stop(void)
{
- if (!sockets_enabled)
- return;
-
- sockets_enabled = 0;
sock_unregister(tipc_family_ops.family);
proto_unregister(&tipc_proto);
}
+
+/* Caller should hold socket lock for the passed tipc socket. */
+static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
+{
+ u32 peer_node, peer_port;
+ u32 conn_type, conn_instance;
+ struct nlattr *nest;
+
+ peer_node = tsk_peer_node(tsk);
+ peer_port = tsk_peer_port(tsk);
+ conn_type = msg_nametype(&tsk->phdr);
+ conn_instance = msg_nameinst(&tsk->phdr);
+ nest = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_CON);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
+ goto msg_full;
+ if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
+ goto msg_full;
+
+ if (tsk->conn_addrtype != 0) {
+ if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
+ goto msg_full;
+ if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, conn_type))
+ goto msg_full;
+ if (nla_put_u32(skb, TIPC_NLA_CON_INST, conn_instance))
+ goto msg_full;
+ }
+ nla_nest_end(skb, nest);
+
+ return 0;
+
+msg_full:
+ nla_nest_cancel(skb, nest);
+
+ return -EMSGSIZE;
+}
+
+static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock
+ *tsk)
+{
+ struct net *net = sock_net(skb->sk);
+ struct sock *sk = &tsk->sk;
+
+ if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net)))
+ return -EMSGSIZE;
+
+ if (tipc_sk_connected(sk)) {
+ if (__tipc_nl_add_sk_con(skb, tsk))
+ return -EMSGSIZE;
+ } else if (!list_empty(&tsk->publications)) {
+ if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
+ return -EMSGSIZE;
+ }
+ return 0;
+}
+
+/* Caller should hold socket lock for the passed tipc socket. */
+static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
+ struct tipc_sock *tsk)
+{
+ struct nlattr *attrs;
+ void *hdr;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
+ if (!hdr)
+ goto msg_cancel;
+
+ attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK);
+ if (!attrs)
+ goto genlmsg_cancel;
+
+ if (__tipc_nl_add_sk_info(skb, tsk))
+ goto attr_msg_cancel;
+
+ nla_nest_end(skb, attrs);
+ genlmsg_end(skb, hdr);
+
+ return 0;
+
+attr_msg_cancel:
+ nla_nest_cancel(skb, attrs);
+genlmsg_cancel:
+ genlmsg_cancel(skb, hdr);
+msg_cancel:
+ return -EMSGSIZE;
+}
+
+int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+ int (*skb_handler)(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct tipc_sock *tsk))
+{
+ struct rhashtable_iter *iter = (void *)cb->args[4];
+ struct tipc_sock *tsk;
+ int err;
+
+ rhashtable_walk_start(iter);
+ while ((tsk = rhashtable_walk_next(iter)) != NULL) {
+ if (IS_ERR(tsk)) {
+ if (PTR_ERR(tsk) == -EAGAIN)
+ continue;
+ break;
+ }
+
+ sock_hold(&tsk->sk);
+ rhashtable_walk_stop(iter);
+ lock_sock(&tsk->sk);
+ err = skb_handler(skb, cb, tsk);
+ if (err) {
+ release_sock(&tsk->sk);
+ sock_put(&tsk->sk);
+ goto out;
+ }
+ release_sock(&tsk->sk);
+ rhashtable_walk_start(iter);
+ sock_put(&tsk->sk);
+ }
+ rhashtable_walk_stop(iter);
+out:
+ return skb->len;
+}
+EXPORT_SYMBOL(tipc_nl_sk_walk);
+
+int tipc_dump_start(struct netlink_callback *cb)
+{
+ return __tipc_dump_start(cb, sock_net(cb->skb->sk));
+}
+EXPORT_SYMBOL(tipc_dump_start);
+
+int __tipc_dump_start(struct netlink_callback *cb, struct net *net)
+{
+ /* tipc_nl_name_table_dump() uses cb->args[0...3]. */
+ struct rhashtable_iter *iter = (void *)cb->args[4];
+ struct tipc_net *tn = tipc_net(net);
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[4] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&tn->sk_rht, iter);
+ return 0;
+}
+
+int tipc_dump_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *hti = (void *)cb->args[4];
+
+ rhashtable_walk_exit(hti);
+ kfree(hti);
+ return 0;
+}
+EXPORT_SYMBOL(tipc_dump_done);
+
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
+ struct tipc_sock *tsk, u32 sk_filter_state,
+ u64 (*tipc_diag_gen_cookie)(struct sock *sk))
+{
+ struct sock *sk = &tsk->sk;
+ struct nlattr *attrs;
+ struct nlattr *stat;
+
+ /*filter response w.r.t sk_state*/
+ if (!(sk_filter_state & (1 << sk->sk_state)))
+ return 0;
+
+ attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK);
+ if (!attrs)
+ goto msg_cancel;
+
+ if (__tipc_nl_add_sk_info(skb, tsk))
+ goto attr_msg_cancel;
+
+ if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_UID,
+ from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk),
+ sk_uid(sk))) ||
+ nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE,
+ tipc_diag_gen_cookie(sk),
+ TIPC_NLA_SOCK_PAD))
+ goto attr_msg_cancel;
+
+ stat = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_STAT);
+ if (!stat)
+ goto attr_msg_cancel;
+
+ if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ,
+ skb_queue_len(&sk->sk_receive_queue)) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ,
+ skb_queue_len(&sk->sk_write_queue)) ||
+ nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP,
+ sk_drops_read(sk)))
+ goto stat_msg_cancel;
+
+ if (tsk->cong_link_cnt &&
+ nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG))
+ goto stat_msg_cancel;
+
+ if (tsk_conn_cong(tsk) &&
+ nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG))
+ goto stat_msg_cancel;
+
+ nla_nest_end(skb, stat);
+
+ if (tsk->group)
+ if (tipc_group_fill_sock_diag(tsk->group, skb))
+ goto stat_msg_cancel;
+
+ nla_nest_end(skb, attrs);
+
+ return 0;
+
+stat_msg_cancel:
+ nla_nest_cancel(skb, stat);
+attr_msg_cancel:
+ nla_nest_cancel(skb, attrs);
+msg_cancel:
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL(tipc_sk_fill_sock_diag);
+
+int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk);
+}
+
+/* Caller should hold socket lock for the passed tipc socket. */
+static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct publication *publ)
+{
+ void *hdr;
+ struct nlattr *attrs;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
+ if (!hdr)
+ goto msg_cancel;
+
+ attrs = nla_nest_start_noflag(skb, TIPC_NLA_PUBL);
+ if (!attrs)
+ goto genlmsg_cancel;
+
+ if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
+ goto attr_msg_cancel;
+ if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->sr.type))
+ goto attr_msg_cancel;
+ if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->sr.lower))
+ goto attr_msg_cancel;
+ if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->sr.upper))
+ goto attr_msg_cancel;
+
+ nla_nest_end(skb, attrs);
+ genlmsg_end(skb, hdr);
+
+ return 0;
+
+attr_msg_cancel:
+ nla_nest_cancel(skb, attrs);
+genlmsg_cancel:
+ genlmsg_cancel(skb, hdr);
+msg_cancel:
+ return -EMSGSIZE;
+}
+
+/* Caller should hold socket lock for the passed tipc socket. */
+static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct tipc_sock *tsk, u32 *last_publ)
+{
+ int err;
+ struct publication *p;
+
+ if (*last_publ) {
+ list_for_each_entry(p, &tsk->publications, binding_sock) {
+ if (p->key == *last_publ)
+ break;
+ }
+ if (list_entry_is_head(p, &tsk->publications, binding_sock)) {
+ /* We never set seq or call nl_dump_check_consistent()
+ * this means that setting prev_seq here will cause the
+ * consistence check to fail in the netlink callback
+ * handler. Resulting in the last NLMSG_DONE message
+ * having the NLM_F_DUMP_INTR flag set.
+ */
+ cb->prev_seq = 1;
+ *last_publ = 0;
+ return -EPIPE;
+ }
+ } else {
+ p = list_first_entry(&tsk->publications, struct publication,
+ binding_sock);
+ }
+
+ list_for_each_entry_from(p, &tsk->publications, binding_sock) {
+ err = __tipc_nl_add_sk_publ(skb, cb, p);
+ if (err) {
+ *last_publ = p->key;
+ return err;
+ }
+ }
+ *last_publ = 0;
+
+ return 0;
+}
+
+int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int err;
+ u32 tsk_portid = cb->args[0];
+ u32 last_publ = cb->args[1];
+ u32 done = cb->args[2];
+ struct net *net = sock_net(skb->sk);
+ struct tipc_sock *tsk;
+
+ if (!tsk_portid) {
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
+ struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];
+
+ if (!attrs[TIPC_NLA_SOCK])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX,
+ attrs[TIPC_NLA_SOCK],
+ tipc_nl_sock_policy, NULL);
+ if (err)
+ return err;
+
+ if (!sock[TIPC_NLA_SOCK_REF])
+ return -EINVAL;
+
+ tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
+ }
+
+ if (done)
+ return 0;
+
+ tsk = tipc_sk_lookup(net, tsk_portid);
+ if (!tsk)
+ return -EINVAL;
+
+ lock_sock(&tsk->sk);
+ err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ);
+ if (!err)
+ done = 1;
+ release_sock(&tsk->sk);
+ sock_put(&tsk->sk);
+
+ cb->args[0] = tsk_portid;
+ cb->args[1] = last_publ;
+ cb->args[2] = done;
+
+ return skb->len;
+}
+
+/**
+ * tipc_sk_filtering - check if a socket should be traced
+ * @sk: the socket to be examined
+ *
+ * @sysctl_tipc_sk_filter is used as the socket tuple for filtering:
+ * (portid, sock type, name type, name lower, name upper)
+ *
+ * Return: true if the socket meets the socket tuple data
+ * (value 0 = 'any') or when there is no tuple set (all = 0),
+ * otherwise false
+ */
+bool tipc_sk_filtering(struct sock *sk)
+{
+ struct tipc_sock *tsk;
+ struct publication *p;
+ u32 _port, _sktype, _type, _lower, _upper;
+ u32 type = 0, lower = 0, upper = 0;
+
+ if (!sk)
+ return true;
+
+ tsk = tipc_sk(sk);
+
+ _port = sysctl_tipc_sk_filter[0];
+ _sktype = sysctl_tipc_sk_filter[1];
+ _type = sysctl_tipc_sk_filter[2];
+ _lower = sysctl_tipc_sk_filter[3];
+ _upper = sysctl_tipc_sk_filter[4];
+
+ if (!_port && !_sktype && !_type && !_lower && !_upper)
+ return true;
+
+ if (_port)
+ return (_port == tsk->portid);
+
+ if (_sktype && _sktype != sk->sk_type)
+ return false;
+
+ if (tsk->published) {
+ p = list_first_entry_or_null(&tsk->publications,
+ struct publication, binding_sock);
+ if (p) {
+ type = p->sr.type;
+ lower = p->sr.lower;
+ upper = p->sr.upper;
+ }
+ }
+
+ if (!tipc_sk_type_connectionless(sk)) {
+ type = msg_nametype(&tsk->phdr);
+ lower = msg_nameinst(&tsk->phdr);
+ upper = lower;
+ }
+
+ if ((_type && _type != type) || (_lower && _lower != lower) ||
+ (_upper && _upper != upper))
+ return false;
+
+ return true;
+}
+
+u32 tipc_sock_get_portid(struct sock *sk)
+{
+ return (sk) ? (tipc_sk(sk))->portid : 0;
+}
+
+/**
+ * tipc_sk_overlimit1 - check if socket rx queue is about to be overloaded,
+ * both the rcv and backlog queues are considered
+ * @sk: tipc sk to be checked
+ * @skb: tipc msg to be checked
+ *
+ * Return: true if the socket rx queue allocation is > 90%, otherwise false
+ */
+
+bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb)
+{
+ atomic_t *dcnt = &tipc_sk(sk)->dupl_rcvcnt;
+ unsigned int lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
+ unsigned int qsize = sk->sk_backlog.len + sk_rmem_alloc_get(sk);
+
+ return (qsize > lim * 90 / 100);
+}
+
+/**
+ * tipc_sk_overlimit2 - check if socket rx queue is about to be overloaded,
+ * only the rcv queue is considered
+ * @sk: tipc sk to be checked
+ * @skb: tipc msg to be checked
+ *
+ * Return: true if the socket rx queue allocation is > 90%, otherwise false
+ */
+
+bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb)
+{
+ unsigned int lim = rcvbuf_limit(sk, skb);
+ unsigned int qsize = sk_rmem_alloc_get(sk);
+
+ return (qsize > lim * 90 / 100);
+}
+
+/**
+ * tipc_sk_dump - dump TIPC socket
+ * @sk: tipc sk to be dumped
+ * @dqueues: bitmask to decide if any socket queue to be dumped?
+ * - TIPC_DUMP_NONE: don't dump socket queues
+ * - TIPC_DUMP_SK_SNDQ: dump socket send queue
+ * - TIPC_DUMP_SK_RCVQ: dump socket rcv queue
+ * - TIPC_DUMP_SK_BKLGQ: dump socket backlog queue
+ * - TIPC_DUMP_ALL: dump all the socket queues above
+ * @buf: returned buffer of dump data in format
+ */
+int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
+{
+ int i = 0;
+ size_t sz = (dqueues) ? SK_LMAX : SK_LMIN;
+ u32 conn_type, conn_instance;
+ struct tipc_sock *tsk;
+ struct publication *p;
+ bool tsk_connected;
+
+ if (!sk) {
+ i += scnprintf(buf, sz, "sk data: (null)\n");
+ return i;
+ }
+
+ tsk = tipc_sk(sk);
+ tsk_connected = !tipc_sk_type_connectionless(sk);
+
+ i += scnprintf(buf, sz, "sk data: %u", sk->sk_type);
+ i += scnprintf(buf + i, sz - i, " %d", sk->sk_state);
+ i += scnprintf(buf + i, sz - i, " %x", tsk_own_node(tsk));
+ i += scnprintf(buf + i, sz - i, " %u", tsk->portid);
+ i += scnprintf(buf + i, sz - i, " | %u", tsk_connected);
+ if (tsk_connected) {
+ i += scnprintf(buf + i, sz - i, " %x", tsk_peer_node(tsk));
+ i += scnprintf(buf + i, sz - i, " %u", tsk_peer_port(tsk));
+ conn_type = msg_nametype(&tsk->phdr);
+ conn_instance = msg_nameinst(&tsk->phdr);
+ i += scnprintf(buf + i, sz - i, " %u", conn_type);
+ i += scnprintf(buf + i, sz - i, " %u", conn_instance);
+ }
+ i += scnprintf(buf + i, sz - i, " | %u", tsk->published);
+ if (tsk->published) {
+ p = list_first_entry_or_null(&tsk->publications,
+ struct publication, binding_sock);
+ i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.type : 0);
+ i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.lower : 0);
+ i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.upper : 0);
+ }
+ i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win);
+ i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win);
+ i += scnprintf(buf + i, sz - i, " %u", tsk->max_pkt);
+ i += scnprintf(buf + i, sz - i, " %x", tsk->peer_caps);
+ i += scnprintf(buf + i, sz - i, " %u", tsk->cong_link_cnt);
+ i += scnprintf(buf + i, sz - i, " %u", tsk->snt_unacked);
+ i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_unacked);
+ i += scnprintf(buf + i, sz - i, " %u", atomic_read(&tsk->dupl_rcvcnt));
+ i += scnprintf(buf + i, sz - i, " %u", sk->sk_shutdown);
+ i += scnprintf(buf + i, sz - i, " | %d", sk_wmem_alloc_get(sk));
+ i += scnprintf(buf + i, sz - i, " %d", sk->sk_sndbuf);
+ i += scnprintf(buf + i, sz - i, " | %d", sk_rmem_alloc_get(sk));
+ i += scnprintf(buf + i, sz - i, " %d", sk->sk_rcvbuf);
+ i += scnprintf(buf + i, sz - i, " | %d\n", READ_ONCE(sk->sk_backlog.len));
+
+ if (dqueues & TIPC_DUMP_SK_SNDQ) {
+ i += scnprintf(buf + i, sz - i, "sk_write_queue: ");
+ i += tipc_list_dump(&sk->sk_write_queue, false, buf + i);
+ }
+
+ if (dqueues & TIPC_DUMP_SK_RCVQ) {
+ i += scnprintf(buf + i, sz - i, "sk_receive_queue: ");
+ i += tipc_list_dump(&sk->sk_receive_queue, false, buf + i);
+ }
+
+ if (dqueues & TIPC_DUMP_SK_BKLGQ) {
+ i += scnprintf(buf + i, sz - i, "sk_backlog:\n head ");
+ i += tipc_skb_dump(sk->sk_backlog.head, false, buf + i);
+ if (sk->sk_backlog.tail != sk->sk_backlog.head) {
+ i += scnprintf(buf + i, sz - i, " tail ");
+ i += tipc_skb_dump(sk->sk_backlog.tail, false,
+ buf + i);
+ }
+ }
+
+ return i;
+}
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
new file mode 100644
index 000000000000..02cdf166807d
--- /dev/null
+++ b/net/tipc/socket.h
@@ -0,0 +1,80 @@
+/* net/tipc/socket.h: Include file for TIPC socket code
+ *
+ * Copyright (c) 2014-2016, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_SOCK_H
+#define _TIPC_SOCK_H
+
+#include <net/sock.h>
+#include <net/genetlink.h>
+
+/* Compatibility values for deprecated message based flow control */
+#define FLOWCTL_MSG_WIN 512
+#define FLOWCTL_MSG_LIM ((FLOWCTL_MSG_WIN * 2 + 1) * SKB_TRUESIZE(MAX_MSG_SIZE))
+
+#define FLOWCTL_BLK_SZ 1024
+
+/* Socket receive buffer sizes */
+#define RCVBUF_MIN (FLOWCTL_BLK_SZ * 512)
+#define RCVBUF_DEF (FLOWCTL_BLK_SZ * 1024 * 2)
+#define RCVBUF_MAX (FLOWCTL_BLK_SZ * 1024 * 16)
+
+struct tipc_sock;
+
+int tipc_socket_init(void);
+void tipc_socket_stop(void);
+void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
+void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
+ struct sk_buff_head *inputq);
+void tipc_sk_reinit(struct net *net);
+int tipc_sk_rht_init(struct net *net);
+void tipc_sk_rht_destroy(struct net *net);
+int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb);
+int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb);
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
+ struct tipc_sock *tsk, u32 sk_filter_state,
+ u64 (*tipc_diag_gen_cookie)(struct sock *sk));
+int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+ int (*skb_handler)(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct tipc_sock *tsk));
+int tipc_dump_start(struct netlink_callback *cb);
+int __tipc_dump_start(struct netlink_callback *cb, struct net *net);
+int tipc_dump_done(struct netlink_callback *cb);
+u32 tipc_sock_get_portid(struct sock *sk);
+bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb);
+bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb);
+int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen);
+int tsk_set_importance(struct sock *sk, int imp);
+
+#endif
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index d38bb45d82e9..f8490d94e323 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -1,8 +1,9 @@
/*
* net/tipc/subscr.c: TIPC network topology service
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2017, Ericsson AB
* Copyright (c) 2005-2007, 2010-2013, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -36,354 +37,147 @@
#include "core.h"
#include "name_table.h"
-#include "port.h"
#include "subscr.h"
-/**
- * struct tipc_subscriber - TIPC network topology subscriber
- * @conid: connection identifier to server connecting to subscriber
- * @lock: controll access to subscriber
- * @subscription_list: list of subscription objects for this subscriber
- */
-struct tipc_subscriber {
- int conid;
- spinlock_t lock;
- struct list_head subscription_list;
-};
-
-static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr,
- void *usr_data, void *buf, size_t len);
-static void *subscr_named_msg_event(int conid);
-static void subscr_conn_shutdown_event(int conid, void *usr_data);
-
-static atomic_t subscription_count = ATOMIC_INIT(0);
-
-static struct sockaddr_tipc topsrv_addr __read_mostly = {
- .family = AF_TIPC,
- .addrtype = TIPC_ADDR_NAMESEQ,
- .addr.nameseq.type = TIPC_TOP_SRV,
- .addr.nameseq.lower = TIPC_TOP_SRV,
- .addr.nameseq.upper = TIPC_TOP_SRV,
- .scope = TIPC_NODE_SCOPE
-};
-
-static struct tipc_server topsrv __read_mostly = {
- .saddr = &topsrv_addr,
- .imp = TIPC_CRITICAL_IMPORTANCE,
- .type = SOCK_SEQPACKET,
- .max_rcvbuf_size = sizeof(struct tipc_subscr),
- .name = "topology_server",
- .tipc_conn_recvmsg = subscr_conn_msg_event,
- .tipc_conn_new = subscr_named_msg_event,
- .tipc_conn_shutdown = subscr_conn_shutdown_event,
-};
-
-/**
- * htohl - convert value to endianness used by destination
- * @in: value to convert
- * @swap: non-zero if endianness must be reversed
- *
- * Returns converted value
- */
-static u32 htohl(u32 in, int swap)
+static void tipc_sub_send_event(struct tipc_subscription *sub,
+ struct publication *p,
+ u32 event)
{
- return swap ? swab32(in) : in;
-}
+ struct tipc_subscr *s = &sub->evt.s;
+ struct tipc_event *evt = &sub->evt;
-static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower,
- u32 found_upper, u32 event, u32 port_ref,
- u32 node)
-{
- struct tipc_subscriber *subscriber = sub->subscriber;
- struct kvec msg_sect;
- int ret;
-
- msg_sect.iov_base = (void *)&sub->evt;
- msg_sect.iov_len = sizeof(struct tipc_event);
-
- sub->evt.event = htohl(event, sub->swap);
- sub->evt.found_lower = htohl(found_lower, sub->swap);
- sub->evt.found_upper = htohl(found_upper, sub->swap);
- sub->evt.port.ref = htohl(port_ref, sub->swap);
- sub->evt.port.node = htohl(node, sub->swap);
- ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL,
- msg_sect.iov_base, msg_sect.iov_len);
- if (ret < 0)
- pr_err("Sending subscription event failed, no memory\n");
+ if (sub->inactive)
+ return;
+ tipc_evt_write(evt, event, event);
+ if (p) {
+ tipc_evt_write(evt, found_lower, p->sr.lower);
+ tipc_evt_write(evt, found_upper, p->sr.upper);
+ tipc_evt_write(evt, port.ref, p->sk.ref);
+ tipc_evt_write(evt, port.node, p->sk.node);
+ } else {
+ tipc_evt_write(evt, found_lower, s->seq.lower);
+ tipc_evt_write(evt, found_upper, s->seq.upper);
+ tipc_evt_write(evt, port.ref, 0);
+ tipc_evt_write(evt, port.node, 0);
+ }
+ tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt);
}
/**
- * tipc_subscr_overlap - test for subscription overlap with the given values
+ * tipc_sub_check_overlap - test for subscription overlap with the given values
+ * @subscribed: the service range subscribed for
+ * @found: the service range we are checking for match
*
- * Returns 1 if there is overlap, otherwise 0.
+ * Returns true if there is overlap, otherwise false.
*/
-int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower,
- u32 found_upper)
+static bool tipc_sub_check_overlap(struct tipc_service_range *subscribed,
+ struct tipc_service_range *found)
{
- if (found_lower < sub->seq.lower)
- found_lower = sub->seq.lower;
- if (found_upper > sub->seq.upper)
- found_upper = sub->seq.upper;
- if (found_lower > found_upper)
- return 0;
- return 1;
+ u32 found_lower = found->lower;
+ u32 found_upper = found->upper;
+
+ if (found_lower < subscribed->lower)
+ found_lower = subscribed->lower;
+ if (found_upper > subscribed->upper)
+ found_upper = subscribed->upper;
+ return found_lower <= found_upper;
}
-/**
- * tipc_subscr_report_overlap - issue event if there is subscription overlap
- *
- * Protected by nameseq.lock in name_table.c
- */
-void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower,
- u32 found_upper, u32 event, u32 port_ref,
- u32 node, int must)
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+ struct publication *p,
+ u32 event, bool must)
{
- if (!tipc_subscr_overlap(sub, found_lower, found_upper))
+ struct tipc_service_range *sr = &sub->s.seq;
+ u32 filter = sub->s.filter;
+
+ if (!tipc_sub_check_overlap(sr, &p->sr))
return;
- if (!must && !(sub->filter & TIPC_SUB_PORTS))
+ if (!must && !(filter & TIPC_SUB_PORTS))
return;
-
- subscr_send_event(sub, found_lower, found_upper, event, port_ref, node);
-}
-
-static void subscr_timeout(struct tipc_subscription *sub)
-{
- struct tipc_subscriber *subscriber = sub->subscriber;
-
- /* The spin lock per subscriber is used to protect its members */
- spin_lock_bh(&subscriber->lock);
-
- /* Validate if the connection related to the subscriber is
- * closed (in case subscriber is terminating)
- */
- if (subscriber->conid == 0) {
- spin_unlock_bh(&subscriber->lock);
+ if (filter & TIPC_SUB_CLUSTER_SCOPE && p->scope == TIPC_NODE_SCOPE)
return;
- }
-
- /* Validate timeout (in case subscription is being cancelled) */
- if (sub->timeout == TIPC_WAIT_FOREVER) {
- spin_unlock_bh(&subscriber->lock);
+ if (filter & TIPC_SUB_NODE_SCOPE && p->scope != TIPC_NODE_SCOPE)
return;
- }
-
- /* Unlink subscription from name table */
- tipc_nametbl_unsubscribe(sub);
-
- /* Unlink subscription from subscriber */
- list_del(&sub->subscription_list);
-
- spin_unlock_bh(&subscriber->lock);
-
- /* Notify subscriber of timeout */
- subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
- TIPC_SUBSCR_TIMEOUT, 0, 0);
-
- /* Now destroy subscription */
- k_term_timer(&sub->timer);
- kfree(sub);
- atomic_dec(&subscription_count);
+ spin_lock(&sub->lock);
+ tipc_sub_send_event(sub, p, event);
+ spin_unlock(&sub->lock);
}
-/**
- * subscr_del - delete a subscription within a subscription list
- *
- * Called with subscriber lock held.
- */
-static void subscr_del(struct tipc_subscription *sub)
+static void tipc_sub_timeout(struct timer_list *t)
{
- tipc_nametbl_unsubscribe(sub);
- list_del(&sub->subscription_list);
- kfree(sub);
- atomic_dec(&subscription_count);
+ struct tipc_subscription *sub = timer_container_of(sub, t, timer);
+
+ spin_lock(&sub->lock);
+ tipc_sub_send_event(sub, NULL, TIPC_SUBSCR_TIMEOUT);
+ sub->inactive = true;
+ spin_unlock(&sub->lock);
}
-/**
- * subscr_terminate - terminate communication with a subscriber
- *
- * Note: Must call it in process context since it might sleep.
- */
-static void subscr_terminate(struct tipc_subscriber *subscriber)
+static void tipc_sub_kref_release(struct kref *kref)
{
- tipc_conn_terminate(&topsrv, subscriber->conid);
+ kfree(container_of(kref, struct tipc_subscription, kref));
}
-static void subscr_release(struct tipc_subscriber *subscriber)
+void tipc_sub_put(struct tipc_subscription *subscription)
{
- struct tipc_subscription *sub;
- struct tipc_subscription *sub_temp;
-
- spin_lock_bh(&subscriber->lock);
-
- /* Invalidate subscriber reference */
- subscriber->conid = 0;
-
- /* Destroy any existing subscriptions for subscriber */
- list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
- subscription_list) {
- if (sub->timeout != TIPC_WAIT_FOREVER) {
- spin_unlock_bh(&subscriber->lock);
- k_cancel_timer(&sub->timer);
- k_term_timer(&sub->timer);
- spin_lock_bh(&subscriber->lock);
- }
- subscr_del(sub);
- }
- spin_unlock_bh(&subscriber->lock);
-
- /* Now destroy subscriber */
- kfree(subscriber);
+ kref_put(&subscription->kref, tipc_sub_kref_release);
}
-/**
- * subscr_cancel - handle subscription cancellation request
- *
- * Called with subscriber lock held. Routine must temporarily release lock
- * to enable the subscription timeout routine to finish without deadlocking;
- * the lock is then reclaimed to allow caller to release it upon return.
- *
- * Note that fields of 's' use subscriber's endianness!
- */
-static void subscr_cancel(struct tipc_subscr *s,
- struct tipc_subscriber *subscriber)
+void tipc_sub_get(struct tipc_subscription *subscription)
{
- struct tipc_subscription *sub;
- struct tipc_subscription *sub_temp;
- int found = 0;
-
- /* Find first matching subscription, exit if not found */
- list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
- subscription_list) {
- if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
- found = 1;
- break;
- }
- }
- if (!found)
- return;
-
- /* Cancel subscription timer (if used), then delete subscription */
- if (sub->timeout != TIPC_WAIT_FOREVER) {
- sub->timeout = TIPC_WAIT_FOREVER;
- spin_unlock_bh(&subscriber->lock);
- k_cancel_timer(&sub->timer);
- k_term_timer(&sub->timer);
- spin_lock_bh(&subscriber->lock);
- }
- subscr_del(sub);
+ kref_get(&subscription->kref);
}
-/**
- * subscr_subscribe - create subscription for subscriber
- *
- * Called with subscriber lock held.
- */
-static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s,
- struct tipc_subscriber *subscriber)
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
+ struct tipc_subscr *s,
+ int conid)
{
+ u32 lower = tipc_sub_read(s, seq.lower);
+ u32 upper = tipc_sub_read(s, seq.upper);
+ u32 filter = tipc_sub_read(s, filter);
struct tipc_subscription *sub;
- int swap;
-
- /* Determine subscriber's endianness */
- swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE));
+ u32 timeout;
- /* Detect & process a subscription cancellation request */
- if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
- s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
- subscr_cancel(s, subscriber);
- return NULL;
- }
-
- /* Refuse subscription if global limit exceeded */
- if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
- pr_warn("Subscription rejected, limit reached (%u)\n",
- TIPC_MAX_SUBSCRIPTIONS);
- subscr_terminate(subscriber);
+ if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) ||
+ lower > upper) {
+ pr_warn("Subscription rejected, illegal request\n");
return NULL;
}
-
- /* Allocate subscription object */
sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
if (!sub) {
pr_warn("Subscription rejected, no memory\n");
- subscr_terminate(subscriber);
return NULL;
}
-
- /* Initialize subscription object */
- sub->seq.type = htohl(s->seq.type, swap);
- sub->seq.lower = htohl(s->seq.lower, swap);
- sub->seq.upper = htohl(s->seq.upper, swap);
- sub->timeout = htohl(s->timeout, swap);
- sub->filter = htohl(s->filter, swap);
- if ((!(sub->filter & TIPC_SUB_PORTS) ==
- !(sub->filter & TIPC_SUB_SERVICE)) ||
- (sub->seq.lower > sub->seq.upper)) {
- pr_warn("Subscription rejected, illegal request\n");
+ INIT_LIST_HEAD(&sub->service_list);
+ INIT_LIST_HEAD(&sub->sub_list);
+ sub->net = net;
+ sub->conid = conid;
+ sub->inactive = false;
+ memcpy(&sub->evt.s, s, sizeof(*s));
+ sub->s.seq.type = tipc_sub_read(s, seq.type);
+ sub->s.seq.lower = lower;
+ sub->s.seq.upper = upper;
+ sub->s.filter = filter;
+ sub->s.timeout = tipc_sub_read(s, timeout);
+ memcpy(sub->s.usr_handle, s->usr_handle, 8);
+ spin_lock_init(&sub->lock);
+ kref_init(&sub->kref);
+ if (!tipc_nametbl_subscribe(sub)) {
kfree(sub);
- subscr_terminate(subscriber);
return NULL;
}
- INIT_LIST_HEAD(&sub->nameseq_list);
- list_add(&sub->subscription_list, &subscriber->subscription_list);
- sub->subscriber = subscriber;
- sub->swap = swap;
- memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
- atomic_inc(&subscription_count);
- if (sub->timeout != TIPC_WAIT_FOREVER) {
- k_init_timer(&sub->timer,
- (Handler)subscr_timeout, (unsigned long)sub);
- k_start_timer(&sub->timer, sub->timeout);
- }
-
+ timer_setup(&sub->timer, tipc_sub_timeout, 0);
+ timeout = tipc_sub_read(&sub->evt.s, timeout);
+ if (timeout != TIPC_WAIT_FOREVER)
+ mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
return sub;
}
-/* Handle one termination request for the subscriber */
-static void subscr_conn_shutdown_event(int conid, void *usr_data)
-{
- subscr_release((struct tipc_subscriber *)usr_data);
-}
-
-/* Handle one request to create a new subscription for the subscriber */
-static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr,
- void *usr_data, void *buf, size_t len)
-{
- struct tipc_subscriber *subscriber = usr_data;
- struct tipc_subscription *sub;
-
- spin_lock_bh(&subscriber->lock);
- sub = subscr_subscribe((struct tipc_subscr *)buf, subscriber);
- if (sub)
- tipc_nametbl_subscribe(sub);
- spin_unlock_bh(&subscriber->lock);
-}
-
-
-/* Handle one request to establish a new subscriber */
-static void *subscr_named_msg_event(int conid)
+void tipc_sub_unsubscribe(struct tipc_subscription *sub)
{
- struct tipc_subscriber *subscriber;
-
- /* Create subscriber object */
- subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC);
- if (subscriber == NULL) {
- pr_warn("Subscriber rejected, no memory\n");
- return NULL;
- }
- INIT_LIST_HEAD(&subscriber->subscription_list);
- subscriber->conid = conid;
- spin_lock_init(&subscriber->lock);
-
- return (void *)subscriber;
-}
-
-int tipc_subscr_start(void)
-{
- return tipc_server_start(&topsrv);
-}
-
-void tipc_subscr_stop(void)
-{
- tipc_server_stop(&topsrv);
+ tipc_nametbl_unsubscribe(sub);
+ if (sub->evt.s.timeout != TIPC_WAIT_FOREVER)
+ timer_delete_sync(&sub->timer);
+ list_del(&sub->sub_list);
+ tipc_sub_put(sub);
}
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 393e417bee3f..60b877531b66 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -1,8 +1,9 @@
/*
* net/tipc/subscr.h: Include file for TIPC network topology service
*
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2017, Ericsson AB
* Copyright (c) 2005-2007, 2012-2013, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,45 +38,85 @@
#ifndef _TIPC_SUBSCR_H
#define _TIPC_SUBSCR_H
-#include "server.h"
+#include "topsrv.h"
+#define TIPC_MAX_SUBSCR 65535
+#define TIPC_MAX_PUBL 65535
+
+struct publication;
struct tipc_subscription;
-struct tipc_subscriber;
+struct tipc_conn;
/**
* struct tipc_subscription - TIPC network topology subscription object
- * @subscriber: pointer to its subscriber
- * @seq: name sequence associated with subscription
- * @timeout: duration of subscription (in ms)
- * @filter: event filtering to be done for subscription
- * @timer: timer governing subscription duration (optional)
- * @nameseq_list: adjacent subscriptions in name sequence's subscription list
- * @subscription_list: adjacent subscriptions in subscriber's subscription list
- * @server_ref: object reference of server port associated with subscription
- * @swap: indicates if subscriber uses opposite endianness in its messages
+ * @s: host-endian copy of the user subscription
* @evt: template for events generated by subscription
+ * @kref: reference count for this subscription
+ * @net: network namespace associated with subscription
+ * @timer: timer governing subscription duration (optional)
+ * @service_list: adjacent subscriptions in name sequence's subscription list
+ * @sub_list: adjacent subscriptions in subscriber's subscription list
+ * @conid: connection identifier of topology server
+ * @inactive: true if this subscription is inactive
+ * @lock: serialize up/down and timer events
*/
struct tipc_subscription {
- struct tipc_subscriber *subscriber;
- struct tipc_name_seq seq;
- u32 timeout;
- u32 filter;
- struct timer_list timer;
- struct list_head nameseq_list;
- struct list_head subscription_list;
- int swap;
+ struct tipc_subscr s;
struct tipc_event evt;
+ struct kref kref;
+ struct net *net;
+ struct timer_list timer;
+ struct list_head service_list;
+ struct list_head sub_list;
+ int conid;
+ bool inactive;
+ spinlock_t lock;
};
-int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower,
- u32 found_upper);
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
+ struct tipc_subscr *s,
+ int conid);
+void tipc_sub_unsubscribe(struct tipc_subscription *sub);
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+ struct publication *p,
+ u32 event, bool must);
+
+int __net_init tipc_topsrv_init_net(struct net *net);
+void __net_exit tipc_topsrv_exit_net(struct net *net);
-void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower,
- u32 found_upper, u32 event, u32 port_ref,
- u32 node, int must);
+void tipc_sub_put(struct tipc_subscription *subscription);
+void tipc_sub_get(struct tipc_subscription *subscription);
-int tipc_subscr_start(void);
+#define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL)
-void tipc_subscr_stop(void);
+/* tipc_sub_read - return field_ of struct sub_ in host endian format
+ */
+#define tipc_sub_read(sub_, field_) \
+ ({ \
+ struct tipc_subscr *sub__ = sub_; \
+ u32 val__ = (sub__)->field_; \
+ int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \
+ (swap_ ? swab32(val__) : val__); \
+ })
+
+/* tipc_sub_write - write val_ to field_ of struct sub_ in user endian format
+ */
+#define tipc_sub_write(sub_, field_, val_) \
+ ({ \
+ struct tipc_subscr *sub__ = sub_; \
+ u32 val__ = val_; \
+ int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \
+ (sub__)->field_ = swap_ ? swab32(val__) : val__; \
+ })
+
+/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format
+ */
+#define tipc_evt_write(evt_, field_, val_) \
+ ({ \
+ struct tipc_event *evt__ = evt_; \
+ u32 val__ = val_; \
+ int swap_ = !((evt__)->s.filter & (TIPC_FILTER_MASK)); \
+ (evt__)->field_ = swap_ ? swab32(val__) : val__; \
+ })
#endif
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index f3fef93325a8..30d2e06e3d8c 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -34,7 +34,9 @@
*/
#include "core.h"
-
+#include "trace.h"
+#include "crypto.h"
+#include "bcast.h"
#include <linux/sysctl.h>
static struct ctl_table_header *tipc_ctl_hdr;
@@ -45,9 +47,50 @@ static struct ctl_table tipc_table[] = {
.data = &sysctl_tipc_rmem,
.maxlen = sizeof(sysctl_tipc_rmem),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
+ {
+ .procname = "named_timeout",
+ .data = &sysctl_tipc_named_timeout,
+ .maxlen = sizeof(sysctl_tipc_named_timeout),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "sk_filter",
+ .data = &sysctl_tipc_sk_filter,
+ .maxlen = sizeof(sysctl_tipc_sk_filter),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+#ifdef CONFIG_TIPC_CRYPTO
+ {
+ .procname = "max_tfms",
+ .data = &sysctl_tipc_max_tfms,
+ .maxlen = sizeof(sysctl_tipc_max_tfms),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
+ {
+ .procname = "key_exchange_enabled",
+ .data = &sysctl_tipc_key_exchange_enabled,
+ .maxlen = sizeof(sysctl_tipc_key_exchange_enabled),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif
+ {
+ .procname = "bc_retruni",
+ .data = &sysctl_tipc_bc_retruni,
+ .maxlen = sizeof(sysctl_tipc_bc_retruni),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
},
- {}
};
int tipc_register_sysctl(void)
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
new file mode 100644
index 000000000000..aad7f96b6009
--- /dev/null
+++ b/net/tipc/topsrv.c
@@ -0,0 +1,733 @@
+/*
+ * net/tipc/server.c: TIPC server infrastructure
+ *
+ * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017-2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "subscr.h"
+#include "topsrv.h"
+#include "core.h"
+#include "socket.h"
+#include "addr.h"
+#include "msg.h"
+#include "bearer.h"
+#include <net/sock.h>
+#include <linux/module.h>
+#include <trace/events/sock.h>
+
+/* Number of messages to send before rescheduling */
+#define MAX_SEND_MSG_COUNT 25
+#define MAX_RECV_MSG_COUNT 25
+#define CF_CONNECTED 1
+
+#define TIPC_SERVER_NAME_LEN 32
+
+/**
+ * struct tipc_topsrv - TIPC server structure
+ * @conn_idr: identifier set of connection
+ * @idr_lock: protect the connection identifier set
+ * @idr_in_use: amount of allocated identifier entry
+ * @net: network namespace instance
+ * @awork: accept work item
+ * @rcv_wq: receive workqueue
+ * @send_wq: send workqueue
+ * @listener: topsrv listener socket
+ * @name: server name
+ */
+struct tipc_topsrv {
+ struct idr conn_idr;
+ spinlock_t idr_lock; /* for idr list */
+ int idr_in_use;
+ struct net *net;
+ struct work_struct awork;
+ struct workqueue_struct *rcv_wq;
+ struct workqueue_struct *send_wq;
+ struct socket *listener;
+ char name[TIPC_SERVER_NAME_LEN];
+};
+
+/**
+ * struct tipc_conn - TIPC connection structure
+ * @kref: reference counter to connection object
+ * @conid: connection identifier
+ * @sock: socket handler associated with connection
+ * @flags: indicates connection state
+ * @server: pointer to connected server
+ * @sub_list: list to all pertaining subscriptions
+ * @sub_lock: lock protecting the subscription list
+ * @rwork: receive work item
+ * @outqueue: pointer to first outbound message in queue
+ * @outqueue_lock: control access to the outqueue
+ * @swork: send work item
+ */
+struct tipc_conn {
+ struct kref kref;
+ int conid;
+ struct socket *sock;
+ unsigned long flags;
+ struct tipc_topsrv *server;
+ struct list_head sub_list;
+ spinlock_t sub_lock; /* for subscription list */
+ struct work_struct rwork;
+ struct list_head outqueue;
+ spinlock_t outqueue_lock; /* for outqueue */
+ struct work_struct swork;
+};
+
+/* An entry waiting to be sent */
+struct outqueue_entry {
+ bool inactive;
+ struct tipc_event evt;
+ struct list_head list;
+};
+
+static void tipc_conn_recv_work(struct work_struct *work);
+static void tipc_conn_send_work(struct work_struct *work);
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt);
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s);
+
+static bool connected(struct tipc_conn *con)
+{
+ return con && test_bit(CF_CONNECTED, &con->flags);
+}
+
+static void tipc_conn_kref_release(struct kref *kref)
+{
+ struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
+ struct tipc_topsrv *s = con->server;
+ struct outqueue_entry *e, *safe;
+
+ spin_lock_bh(&s->idr_lock);
+ idr_remove(&s->conn_idr, con->conid);
+ s->idr_in_use--;
+ spin_unlock_bh(&s->idr_lock);
+ if (con->sock)
+ sock_release(con->sock);
+
+ spin_lock_bh(&con->outqueue_lock);
+ list_for_each_entry_safe(e, safe, &con->outqueue, list) {
+ list_del(&e->list);
+ kfree(e);
+ }
+ spin_unlock_bh(&con->outqueue_lock);
+ kfree(con);
+}
+
+static void conn_put(struct tipc_conn *con)
+{
+ kref_put(&con->kref, tipc_conn_kref_release);
+}
+
+static void conn_get(struct tipc_conn *con)
+{
+ kref_get(&con->kref);
+}
+
+static void tipc_conn_close(struct tipc_conn *con)
+{
+ struct sock *sk = con->sock->sk;
+ bool disconnect = false;
+
+ write_lock_bh(&sk->sk_callback_lock);
+ disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
+
+ if (disconnect) {
+ sk->sk_user_data = NULL;
+ tipc_conn_delete_sub(con, NULL);
+ }
+ write_unlock_bh(&sk->sk_callback_lock);
+
+ /* Handle concurrent calls from sending and receiving threads */
+ if (!disconnect)
+ return;
+
+ /* Don't flush pending works, -just let them expire */
+ kernel_sock_shutdown(con->sock, SHUT_RDWR);
+
+ conn_put(con);
+}
+
+static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock)
+{
+ struct tipc_conn *con;
+ int ret;
+
+ con = kzalloc(sizeof(*con), GFP_ATOMIC);
+ if (!con)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&con->kref);
+ INIT_LIST_HEAD(&con->outqueue);
+ INIT_LIST_HEAD(&con->sub_list);
+ spin_lock_init(&con->outqueue_lock);
+ spin_lock_init(&con->sub_lock);
+ INIT_WORK(&con->swork, tipc_conn_send_work);
+ INIT_WORK(&con->rwork, tipc_conn_recv_work);
+
+ spin_lock_bh(&s->idr_lock);
+ ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
+ if (ret < 0) {
+ kfree(con);
+ spin_unlock_bh(&s->idr_lock);
+ return ERR_PTR(-ENOMEM);
+ }
+ con->conid = ret;
+ s->idr_in_use++;
+
+ set_bit(CF_CONNECTED, &con->flags);
+ con->server = s;
+ con->sock = sock;
+ conn_get(con);
+ spin_unlock_bh(&s->idr_lock);
+
+ return con;
+}
+
+static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid)
+{
+ struct tipc_conn *con;
+
+ spin_lock_bh(&s->idr_lock);
+ con = idr_find(&s->conn_idr, conid);
+ if (!connected(con) || !kref_get_unless_zero(&con->kref))
+ con = NULL;
+ spin_unlock_bh(&s->idr_lock);
+ return con;
+}
+
+/* tipc_conn_delete_sub - delete a specific or all subscriptions
+ * for a given subscriber
+ */
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
+{
+ struct tipc_net *tn = tipc_net(con->server->net);
+ struct list_head *sub_list = &con->sub_list;
+ struct tipc_subscription *sub, *tmp;
+
+ spin_lock_bh(&con->sub_lock);
+ list_for_each_entry_safe(sub, tmp, sub_list, sub_list) {
+ if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
+ tipc_sub_unsubscribe(sub);
+ atomic_dec(&tn->subscription_count);
+ if (s)
+ break;
+ }
+ }
+ spin_unlock_bh(&con->sub_lock);
+}
+
+static void tipc_conn_send_to_sock(struct tipc_conn *con)
+{
+ struct list_head *queue = &con->outqueue;
+ struct tipc_topsrv *srv = con->server;
+ struct outqueue_entry *e;
+ struct tipc_event *evt;
+ struct msghdr msg;
+ struct kvec iov;
+ int count = 0;
+ int ret;
+
+ spin_lock_bh(&con->outqueue_lock);
+
+ while (!list_empty(queue)) {
+ e = list_first_entry(queue, struct outqueue_entry, list);
+ evt = &e->evt;
+ spin_unlock_bh(&con->outqueue_lock);
+
+ if (e->inactive)
+ tipc_conn_delete_sub(con, &evt->s);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_DONTWAIT;
+ iov.iov_base = evt;
+ iov.iov_len = sizeof(*evt);
+ msg.msg_name = NULL;
+
+ if (con->sock) {
+ ret = kernel_sendmsg(con->sock, &msg, &iov,
+ 1, sizeof(*evt));
+ if (ret == -EWOULDBLOCK || ret == 0) {
+ cond_resched();
+ return;
+ } else if (ret < 0) {
+ return tipc_conn_close(con);
+ }
+ } else {
+ tipc_topsrv_kern_evt(srv->net, evt);
+ }
+
+ /* Don't starve users filling buffers */
+ if (++count >= MAX_SEND_MSG_COUNT) {
+ cond_resched();
+ count = 0;
+ }
+ spin_lock_bh(&con->outqueue_lock);
+ list_del(&e->list);
+ kfree(e);
+ }
+ spin_unlock_bh(&con->outqueue_lock);
+}
+
+static void tipc_conn_send_work(struct work_struct *work)
+{
+ struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
+
+ if (connected(con))
+ tipc_conn_send_to_sock(con);
+
+ conn_put(con);
+}
+
+/* tipc_topsrv_queue_evt() - interrupt level call from a subscription instance
+ * The queued work is launched into tipc_conn_send_work()->tipc_conn_send_to_sock()
+ */
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+ u32 event, struct tipc_event *evt)
+{
+ struct tipc_topsrv *srv = tipc_topsrv(net);
+ struct outqueue_entry *e;
+ struct tipc_conn *con;
+
+ con = tipc_conn_lookup(srv, conid);
+ if (!con)
+ return;
+
+ if (!connected(con))
+ goto err;
+
+ e = kmalloc(sizeof(*e), GFP_ATOMIC);
+ if (!e)
+ goto err;
+ e->inactive = (event == TIPC_SUBSCR_TIMEOUT);
+ memcpy(&e->evt, evt, sizeof(*evt));
+ spin_lock_bh(&con->outqueue_lock);
+ list_add_tail(&e->list, &con->outqueue);
+ spin_unlock_bh(&con->outqueue_lock);
+
+ if (queue_work(srv->send_wq, &con->swork))
+ return;
+err:
+ conn_put(con);
+}
+
+/* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN
+ * Indicates that there now is more space in the send buffer
+ * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock()
+ */
+static void tipc_conn_write_space(struct sock *sk)
+{
+ struct tipc_conn *con;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ con = sk->sk_user_data;
+ if (connected(con)) {
+ conn_get(con);
+ if (!queue_work(con->server->send_wq, &con->swork))
+ conn_put(con);
+ }
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
+ struct tipc_conn *con,
+ struct tipc_subscr *s)
+{
+ struct tipc_net *tn = tipc_net(srv->net);
+ struct tipc_subscription *sub;
+ u32 s_filter = tipc_sub_read(s, filter);
+
+ if (s_filter & TIPC_SUB_CANCEL) {
+ tipc_sub_write(s, filter, s_filter & ~TIPC_SUB_CANCEL);
+ tipc_conn_delete_sub(con, s);
+ return 0;
+ }
+ if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
+ pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR);
+ return -1;
+ }
+ sub = tipc_sub_subscribe(srv->net, s, con->conid);
+ if (!sub)
+ return -1;
+ atomic_inc(&tn->subscription_count);
+ spin_lock_bh(&con->sub_lock);
+ list_add(&sub->sub_list, &con->sub_list);
+ spin_unlock_bh(&con->sub_lock);
+ return 0;
+}
+
+static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
+{
+ struct tipc_topsrv *srv = con->server;
+ struct sock *sk = con->sock->sk;
+ struct msghdr msg = {};
+ struct tipc_subscr s;
+ struct kvec iov;
+ int ret;
+
+ iov.iov_base = &s;
+ iov.iov_len = sizeof(s);
+ msg.msg_name = NULL;
+ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, iov.iov_len);
+ ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
+ if (ret == -EWOULDBLOCK)
+ return -EWOULDBLOCK;
+ if (ret == sizeof(s)) {
+ read_lock_bh(&sk->sk_callback_lock);
+ /* RACE: the connection can be closed in the meantime */
+ if (likely(connected(con)))
+ ret = tipc_conn_rcv_sub(srv, con, &s);
+ read_unlock_bh(&sk->sk_callback_lock);
+ if (!ret)
+ return 0;
+ }
+
+ tipc_conn_close(con);
+ return ret;
+}
+
+static void tipc_conn_recv_work(struct work_struct *work)
+{
+ struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
+ int count = 0;
+
+ while (connected(con)) {
+ if (tipc_conn_rcv_from_sock(con))
+ break;
+
+ /* Don't flood Rx machine */
+ if (++count >= MAX_RECV_MSG_COUNT) {
+ cond_resched();
+ count = 0;
+ }
+ }
+ conn_put(con);
+}
+
+/* tipc_conn_data_ready - interrupt callback indicating the socket has data
+ * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock()
+ */
+static void tipc_conn_data_ready(struct sock *sk)
+{
+ struct tipc_conn *con;
+
+ trace_sk_data_ready(sk);
+
+ read_lock_bh(&sk->sk_callback_lock);
+ con = sk->sk_user_data;
+ if (connected(con)) {
+ conn_get(con);
+ if (!queue_work(con->server->rcv_wq, &con->rwork))
+ conn_put(con);
+ }
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void tipc_topsrv_accept(struct work_struct *work)
+{
+ struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
+ struct socket *newsock, *lsock;
+ struct tipc_conn *con;
+ struct sock *newsk;
+ int ret;
+
+ spin_lock_bh(&srv->idr_lock);
+ if (!srv->listener) {
+ spin_unlock_bh(&srv->idr_lock);
+ return;
+ }
+ lsock = srv->listener;
+ spin_unlock_bh(&srv->idr_lock);
+
+ while (1) {
+ ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
+ if (ret < 0)
+ return;
+ con = tipc_conn_alloc(srv, newsock);
+ if (IS_ERR(con)) {
+ ret = PTR_ERR(con);
+ sock_release(newsock);
+ return;
+ }
+ /* Register callbacks */
+ newsk = newsock->sk;
+ write_lock_bh(&newsk->sk_callback_lock);
+ newsk->sk_data_ready = tipc_conn_data_ready;
+ newsk->sk_write_space = tipc_conn_write_space;
+ newsk->sk_user_data = con;
+ write_unlock_bh(&newsk->sk_callback_lock);
+
+ /* Wake up receive process in case of 'SYN+' message */
+ newsk->sk_data_ready(newsk);
+ conn_put(con);
+ }
+}
+
+/* tipc_topsrv_listener_data_ready - interrupt callback with connection request
+ * The queued job is launched into tipc_topsrv_accept()
+ */
+static void tipc_topsrv_listener_data_ready(struct sock *sk)
+{
+ struct tipc_topsrv *srv;
+
+ trace_sk_data_ready(sk);
+
+ read_lock_bh(&sk->sk_callback_lock);
+ srv = sk->sk_user_data;
+ if (srv)
+ queue_work(srv->rcv_wq, &srv->awork);
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
+{
+ struct socket *lsock = NULL;
+ struct sockaddr_tipc saddr;
+ struct sock *sk;
+ int rc;
+
+ rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock);
+ if (rc < 0)
+ return rc;
+
+ srv->listener = lsock;
+ sk = lsock->sk;
+ write_lock_bh(&sk->sk_callback_lock);
+ sk->sk_data_ready = tipc_topsrv_listener_data_ready;
+ sk->sk_user_data = srv;
+ write_unlock_bh(&sk->sk_callback_lock);
+
+ lock_sock(sk);
+ rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE);
+ release_sock(sk);
+ if (rc < 0)
+ goto err;
+
+ saddr.family = AF_TIPC;
+ saddr.addrtype = TIPC_SERVICE_RANGE;
+ saddr.addr.nameseq.type = TIPC_TOP_SRV;
+ saddr.addr.nameseq.lower = TIPC_TOP_SRV;
+ saddr.addr.nameseq.upper = TIPC_TOP_SRV;
+ saddr.scope = TIPC_NODE_SCOPE;
+
+ rc = tipc_sk_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr));
+ if (rc < 0)
+ goto err;
+ rc = kernel_listen(lsock, 0);
+ if (rc < 0)
+ goto err;
+
+ /* As server's listening socket owner and creator is the same module,
+ * we have to decrease TIPC module reference count to guarantee that
+ * it remains zero after the server socket is created, otherwise,
+ * executing "rmmod" command is unable to make TIPC module deleted
+ * after TIPC module is inserted successfully.
+ *
+ * However, the reference count is ever increased twice in
+ * sock_create_kern(): one is to increase the reference count of owner
+ * of TIPC socket's proto_ops struct; another is to increment the
+ * reference count of owner of TIPC proto struct. Therefore, we must
+ * decrement the module reference count twice to ensure that it keeps
+ * zero after server's listening socket is created. Of course, we
+ * must bump the module reference count twice as well before the socket
+ * is closed.
+ */
+ module_put(lsock->ops->owner);
+ module_put(sk->sk_prot_creator->owner);
+
+ return 0;
+err:
+ sock_release(lsock);
+ return -EINVAL;
+}
+
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+ u32 upper, u32 filter, int *conid)
+{
+ struct tipc_subscr sub;
+ struct tipc_conn *con;
+ int rc;
+
+ sub.seq.type = type;
+ sub.seq.lower = lower;
+ sub.seq.upper = upper;
+ sub.timeout = TIPC_WAIT_FOREVER;
+ sub.filter = filter;
+ *(u64 *)&sub.usr_handle = (u64)port;
+
+ con = tipc_conn_alloc(tipc_topsrv(net), NULL);
+ if (IS_ERR(con))
+ return false;
+
+ *conid = con->conid;
+ rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
+ if (rc)
+ conn_put(con);
+
+ conn_put(con);
+ return !rc;
+}
+
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+{
+ struct tipc_conn *con;
+
+ con = tipc_conn_lookup(tipc_topsrv(net), conid);
+ if (!con)
+ return;
+
+ test_and_clear_bit(CF_CONNECTED, &con->flags);
+ tipc_conn_delete_sub(con, NULL);
+ conn_put(con);
+ conn_put(con);
+}
+
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt)
+{
+ u32 port = *(u32 *)&evt->s.usr_handle;
+ u32 self = tipc_own_addr(net);
+ struct sk_buff_head evtq;
+ struct sk_buff *skb;
+
+ skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
+ self, self, port, port, 0);
+ if (!skb)
+ return;
+ msg_set_dest_droppable(buf_msg(skb), true);
+ memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
+ skb_queue_head_init(&evtq);
+ __skb_queue_tail(&evtq, skb);
+ tipc_loopback_trace(net, &evtq);
+ tipc_sk_rcv(net, &evtq);
+}
+
+static int tipc_topsrv_work_start(struct tipc_topsrv *s)
+{
+ s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
+ if (!s->rcv_wq) {
+ pr_err("can't start tipc receive workqueue\n");
+ return -ENOMEM;
+ }
+
+ s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
+ if (!s->send_wq) {
+ pr_err("can't start tipc send workqueue\n");
+ destroy_workqueue(s->rcv_wq);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void tipc_topsrv_work_stop(struct tipc_topsrv *s)
+{
+ destroy_workqueue(s->rcv_wq);
+ destroy_workqueue(s->send_wq);
+}
+
+static int tipc_topsrv_start(struct net *net)
+{
+ struct tipc_net *tn = tipc_net(net);
+ const char name[] = "topology_server";
+ struct tipc_topsrv *srv;
+ int ret;
+
+ srv = kzalloc(sizeof(*srv), GFP_ATOMIC);
+ if (!srv)
+ return -ENOMEM;
+
+ srv->net = net;
+ INIT_WORK(&srv->awork, tipc_topsrv_accept);
+
+ strscpy(srv->name, name, sizeof(srv->name));
+ tn->topsrv = srv;
+ atomic_set(&tn->subscription_count, 0);
+
+ spin_lock_init(&srv->idr_lock);
+ idr_init(&srv->conn_idr);
+ srv->idr_in_use = 0;
+
+ ret = tipc_topsrv_work_start(srv);
+ if (ret < 0)
+ goto err_start;
+
+ ret = tipc_topsrv_create_listener(srv);
+ if (ret < 0)
+ goto err_create;
+
+ return 0;
+
+err_create:
+ tipc_topsrv_work_stop(srv);
+err_start:
+ kfree(srv);
+ return ret;
+}
+
+static void tipc_topsrv_stop(struct net *net)
+{
+ struct tipc_topsrv *srv = tipc_topsrv(net);
+ struct socket *lsock = srv->listener;
+ struct tipc_conn *con;
+ int id;
+
+ spin_lock_bh(&srv->idr_lock);
+ for (id = 0; srv->idr_in_use; id++) {
+ con = idr_find(&srv->conn_idr, id);
+ if (con) {
+ conn_get(con);
+ spin_unlock_bh(&srv->idr_lock);
+ tipc_conn_close(con);
+ conn_put(con);
+ spin_lock_bh(&srv->idr_lock);
+ }
+ }
+ __module_get(lsock->ops->owner);
+ __module_get(lsock->sk->sk_prot_creator->owner);
+ srv->listener = NULL;
+ spin_unlock_bh(&srv->idr_lock);
+
+ tipc_topsrv_work_stop(srv);
+ sock_release(lsock);
+ idr_destroy(&srv->conn_idr);
+ kfree(srv);
+}
+
+int __net_init tipc_topsrv_init_net(struct net *net)
+{
+ return tipc_topsrv_start(net);
+}
+
+void __net_exit tipc_topsrv_exit_net(struct net *net)
+{
+ tipc_topsrv_stop(net);
+}
diff --git a/net/tipc/log.c b/net/tipc/topsrv.h
index abef644f27d8..c7ea71293748 100644
--- a/net/tipc/log.c
+++ b/net/tipc/topsrv.h
@@ -1,8 +1,8 @@
/*
- * net/tipc/log.c: TIPC print buffer routines for debugging
+ * net/tipc/server.h: Include file for TIPC server code
*
- * Copyright (c) 1996-2006, Ericsson AB
- * Copyright (c) 2005-2007, Wind River Systems
+ * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017, Ericsson AB
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,22 +34,21 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#ifndef _TIPC_SERVER_H
+#define _TIPC_SERVER_H
+
#include "core.h"
-#include "config.h"
-/**
- * tipc_snprintf - append formatted output to print buffer
- * @buf: pointer to print buffer
- * @len: buffer length
- * @fmt: formatted info to be printed
- */
-int tipc_snprintf(char *buf, int len, const char *fmt, ...)
-{
- int i;
- va_list args;
+#define TIPC_SERVER_NAME_LEN 32
+#define TIPC_SUB_CLUSTER_SCOPE 0x20
+#define TIPC_SUB_NODE_SCOPE 0x40
+#define TIPC_SUB_NO_STATUS 0x80
+
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+ u32 event, struct tipc_event *evt);
+
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+ u32 upper, u32 filter, int *conid);
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
- va_start(args, fmt);
- i = vscnprintf(buf, len, fmt, args);
- va_end(args);
- return i;
-}
+#endif
diff --git a/net/tipc/trace.c b/net/tipc/trace.c
new file mode 100644
index 000000000000..7d2931521e0e
--- /dev/null
+++ b/net/tipc/trace.c
@@ -0,0 +1,206 @@
+/*
+ * net/tipc/trace.c: TIPC tracepoints code
+ *
+ * Copyright (c) 2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+/*
+ * socket tuples for filtering in socket traces:
+ * (portid, sock type, name type, name lower, name upper)
+ */
+unsigned long sysctl_tipc_sk_filter[5] __read_mostly = {0, };
+
+/**
+ * tipc_skb_dump - dump TIPC skb data
+ * @skb: skb to be dumped
+ * @more: dump more?
+ * - false: dump only tipc msg data
+ * - true: dump kernel-related skb data and tipc cb[] array as well
+ * @buf: returned buffer of dump data in format
+ */
+int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf)
+{
+ int i = 0;
+ size_t sz = (more) ? SKB_LMAX : SKB_LMIN;
+ struct tipc_msg *hdr;
+ struct tipc_skb_cb *skbcb;
+
+ if (!skb) {
+ i += scnprintf(buf, sz, "msg: (null)\n");
+ return i;
+ }
+
+ hdr = buf_msg(skb);
+ skbcb = TIPC_SKB_CB(skb);
+
+ /* tipc msg data section */
+ i += scnprintf(buf, sz, "msg: %u", msg_user(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_type(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_hdr_sz(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_data_sz(hdr));
+ i += scnprintf(buf + i, sz - i, " %x", msg_orignode(hdr));
+ i += scnprintf(buf + i, sz - i, " %x", msg_destnode(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_seqno(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_ack(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_bcast_ack(hdr));
+ switch (msg_user(hdr)) {
+ case LINK_PROTOCOL:
+ i += scnprintf(buf + i, sz - i, " %c", msg_net_plane(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_probe(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_peer_stopping(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_session(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_next_sent(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_seq_gap(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_bc_snd_nxt(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_bc_gap(hdr));
+ break;
+ case TIPC_LOW_IMPORTANCE:
+ case TIPC_MEDIUM_IMPORTANCE:
+ case TIPC_HIGH_IMPORTANCE:
+ case TIPC_CRITICAL_IMPORTANCE:
+ case CONN_MANAGER:
+ case SOCK_WAKEUP:
+ i += scnprintf(buf + i, sz - i, " | %u", msg_origport(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_destport(hdr));
+ switch (msg_type(hdr)) {
+ case TIPC_NAMED_MSG:
+ i += scnprintf(buf + i, sz - i, " %u",
+ msg_nametype(hdr));
+ i += scnprintf(buf + i, sz - i, " %u",
+ msg_nameinst(hdr));
+ break;
+ case TIPC_MCAST_MSG:
+ i += scnprintf(buf + i, sz - i, " %u",
+ msg_nametype(hdr));
+ i += scnprintf(buf + i, sz - i, " %u",
+ msg_namelower(hdr));
+ i += scnprintf(buf + i, sz - i, " %u",
+ msg_nameupper(hdr));
+ break;
+ default:
+ break;
+ }
+ i += scnprintf(buf + i, sz - i, " | %u",
+ msg_src_droppable(hdr));
+ i += scnprintf(buf + i, sz - i, " %u",
+ msg_dest_droppable(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_errcode(hdr));
+ i += scnprintf(buf + i, sz - i, " %u", msg_reroute_cnt(hdr));
+ break;
+ default:
+ /* need more? */
+ break;
+ }
+
+ i += scnprintf(buf + i, sz - i, "\n");
+ if (!more)
+ return i;
+
+ /* kernel-related skb data section */
+ i += scnprintf(buf + i, sz - i, "skb: %s",
+ (skb->dev) ? skb->dev->name : "n/a");
+ i += scnprintf(buf + i, sz - i, " %u", skb->len);
+ i += scnprintf(buf + i, sz - i, " %u", skb->data_len);
+ i += scnprintf(buf + i, sz - i, " %u", skb->hdr_len);
+ i += scnprintf(buf + i, sz - i, " %u", skb->truesize);
+ i += scnprintf(buf + i, sz - i, " %u", skb_cloned(skb));
+ i += scnprintf(buf + i, sz - i, " %p", skb->sk);
+ i += scnprintf(buf + i, sz - i, " %u", skb_shinfo(skb)->nr_frags);
+ i += scnprintf(buf + i, sz - i, " %llx",
+ ktime_to_ms(skb_get_ktime(skb)));
+ i += scnprintf(buf + i, sz - i, " %llx\n",
+ ktime_to_ms(skb_hwtstamps(skb)->hwtstamp));
+
+ /* tipc skb cb[] data section */
+ i += scnprintf(buf + i, sz - i, "cb[]: %u", skbcb->bytes_read);
+ i += scnprintf(buf + i, sz - i, " %u", skbcb->orig_member);
+ i += scnprintf(buf + i, sz - i, " %u",
+ jiffies_to_msecs(skbcb->nxt_retr));
+ i += scnprintf(buf + i, sz - i, " %u", skbcb->validated);
+ i += scnprintf(buf + i, sz - i, " %u", skbcb->chain_imp);
+ i += scnprintf(buf + i, sz - i, " %u\n", skbcb->ackers);
+
+ return i;
+}
+
+/**
+ * tipc_list_dump - dump TIPC skb list/queue
+ * @list: list of skbs to be dumped
+ * @more: dump more?
+ * - false: dump only the head & tail skbs
+ * - true: dump the first & last 5 skbs
+ * @buf: returned buffer of dump data in format
+ */
+int tipc_list_dump(struct sk_buff_head *list, bool more, char *buf)
+{
+ int i = 0;
+ size_t sz = (more) ? LIST_LMAX : LIST_LMIN;
+ u32 count, len;
+ struct sk_buff *hskb, *tskb, *skb, *tmp;
+
+ if (!list) {
+ i += scnprintf(buf, sz, "(null)\n");
+ return i;
+ }
+
+ len = skb_queue_len(list);
+ i += scnprintf(buf, sz, "len = %d\n", len);
+
+ if (!len)
+ return i;
+
+ if (!more) {
+ hskb = skb_peek(list);
+ i += scnprintf(buf + i, sz - i, " head ");
+ i += tipc_skb_dump(hskb, false, buf + i);
+ if (len > 1) {
+ tskb = skb_peek_tail(list);
+ i += scnprintf(buf + i, sz - i, " tail ");
+ i += tipc_skb_dump(tskb, false, buf + i);
+ }
+ } else {
+ count = 0;
+ skb_queue_walk_safe(list, skb, tmp) {
+ count++;
+ if (count == 6)
+ i += scnprintf(buf + i, sz - i, " .\n .\n");
+ if (count > 5 && count <= len - 5)
+ continue;
+ i += scnprintf(buf + i, sz - i, " #%d ", count);
+ i += tipc_skb_dump(skb, false, buf + i);
+ }
+ }
+ return i;
+}
diff --git a/net/tipc/trace.h b/net/tipc/trace.h
new file mode 100644
index 000000000000..865142ed0ab4
--- /dev/null
+++ b/net/tipc/trace.h
@@ -0,0 +1,434 @@
+/*
+ * net/tipc/trace.h: TIPC tracepoints
+ *
+ * Copyright (c) 2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tipc
+
+#if !defined(_TIPC_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TIPC_TRACE_H
+
+#include <linux/tracepoint.h>
+#include "core.h"
+#include "link.h"
+#include "socket.h"
+#include "node.h"
+
+#define SKB_LMIN (100)
+#define SKB_LMAX (SKB_LMIN * 2)
+#define LIST_LMIN (SKB_LMIN * 3)
+#define LIST_LMAX (SKB_LMIN * 11)
+#define SK_LMIN (SKB_LMIN * 2)
+#define SK_LMAX (SKB_LMIN * 11)
+#define LINK_LMIN (SKB_LMIN)
+#define LINK_LMAX (SKB_LMIN * 16)
+#define NODE_LMIN (SKB_LMIN)
+#define NODE_LMAX (SKB_LMIN * 11)
+
+#ifndef __TIPC_TRACE_ENUM
+#define __TIPC_TRACE_ENUM
+enum {
+ TIPC_DUMP_NONE = 0,
+
+ TIPC_DUMP_TRANSMQ = 1,
+ TIPC_DUMP_BACKLOGQ = (1 << 1),
+ TIPC_DUMP_DEFERDQ = (1 << 2),
+ TIPC_DUMP_INPUTQ = (1 << 3),
+ TIPC_DUMP_WAKEUP = (1 << 4),
+
+ TIPC_DUMP_SK_SNDQ = (1 << 8),
+ TIPC_DUMP_SK_RCVQ = (1 << 9),
+ TIPC_DUMP_SK_BKLGQ = (1 << 10),
+ TIPC_DUMP_ALL = 0xffffu
+};
+#endif
+
+/* Link & Node FSM states: */
+#define state_sym(val) \
+ __print_symbolic(val, \
+ {(0xe), "ESTABLISHED" },\
+ {(0xe << 4), "ESTABLISHING" },\
+ {(0x1 << 8), "RESET" },\
+ {(0x2 << 12), "RESETTING" },\
+ {(0xd << 16), "PEER_RESET" },\
+ {(0xf << 20), "FAILINGOVER" },\
+ {(0xc << 24), "SYNCHING" },\
+ {(0xdd), "SELF_DOWN_PEER_DOWN" },\
+ {(0xaa), "SELF_UP_PEER_UP" },\
+ {(0xd1), "SELF_DOWN_PEER_LEAVING" },\
+ {(0xac), "SELF_UP_PEER_COMING" },\
+ {(0xca), "SELF_COMING_PEER_UP" },\
+ {(0x1d), "SELF_LEAVING_PEER_DOWN" },\
+ {(0xf0), "FAILINGOVER" },\
+ {(0xcc), "SYNCHING" })
+
+/* Link & Node FSM events: */
+#define evt_sym(val) \
+ __print_symbolic(val, \
+ {(0xec1ab1e), "ESTABLISH_EVT" },\
+ {(0x9eed0e), "PEER_RESET_EVT" },\
+ {(0xfa110e), "FAILURE_EVT" },\
+ {(0x10ca1d0e), "RESET_EVT" },\
+ {(0xfa110bee), "FAILOVER_BEGIN_EVT" },\
+ {(0xfa110ede), "FAILOVER_END_EVT" },\
+ {(0xc1ccbee), "SYNCH_BEGIN_EVT" },\
+ {(0xc1ccede), "SYNCH_END_EVT" },\
+ {(0xece), "SELF_ESTABL_CONTACT_EVT" },\
+ {(0x1ce), "SELF_LOST_CONTACT_EVT" },\
+ {(0x9ece), "PEER_ESTABL_CONTACT_EVT" },\
+ {(0x91ce), "PEER_LOST_CONTACT_EVT" },\
+ {(0xfbe), "FAILOVER_BEGIN_EVT" },\
+ {(0xfee), "FAILOVER_END_EVT" },\
+ {(0xcbe), "SYNCH_BEGIN_EVT" },\
+ {(0xcee), "SYNCH_END_EVT" })
+
+/* Bearer, net device events: */
+#define dev_evt_sym(val) \
+ __print_symbolic(val, \
+ {(NETDEV_CHANGE), "NETDEV_CHANGE" },\
+ {(NETDEV_GOING_DOWN), "NETDEV_GOING_DOWN" },\
+ {(NETDEV_UP), "NETDEV_UP" },\
+ {(NETDEV_CHANGEMTU), "NETDEV_CHANGEMTU" },\
+ {(NETDEV_CHANGEADDR), "NETDEV_CHANGEADDR" },\
+ {(NETDEV_UNREGISTER), "NETDEV_UNREGISTER" },\
+ {(NETDEV_CHANGENAME), "NETDEV_CHANGENAME" })
+
+extern unsigned long sysctl_tipc_sk_filter[5] __read_mostly;
+
+int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf);
+int tipc_list_dump(struct sk_buff_head *list, bool more, char *buf);
+int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf);
+int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf);
+int tipc_node_dump(struct tipc_node *n, bool more, char *buf);
+bool tipc_sk_filtering(struct sock *sk);
+
+DECLARE_EVENT_CLASS(tipc_skb_class,
+
+ TP_PROTO(struct sk_buff *skb, bool more, const char *header),
+
+ TP_ARGS(skb, more, header),
+
+ TP_STRUCT__entry(
+ __string(header, header)
+ __dynamic_array(char, buf, (more) ? SKB_LMAX : SKB_LMIN)
+ ),
+
+ TP_fast_assign(
+ __assign_str(header);
+ tipc_skb_dump(skb, more, __get_str(buf));
+ ),
+
+ TP_printk("%s\n%s", __get_str(header), __get_str(buf))
+)
+
+#define DEFINE_SKB_EVENT(name) \
+DEFINE_EVENT(tipc_skb_class, name, \
+ TP_PROTO(struct sk_buff *skb, bool more, const char *header), \
+ TP_ARGS(skb, more, header))
+DEFINE_SKB_EVENT(tipc_skb_dump);
+DEFINE_SKB_EVENT(tipc_proto_build);
+DEFINE_SKB_EVENT(tipc_proto_rcv);
+
+DECLARE_EVENT_CLASS(tipc_list_class,
+
+ TP_PROTO(struct sk_buff_head *list, bool more, const char *header),
+
+ TP_ARGS(list, more, header),
+
+ TP_STRUCT__entry(
+ __string(header, header)
+ __dynamic_array(char, buf, (more) ? LIST_LMAX : LIST_LMIN)
+ ),
+
+ TP_fast_assign(
+ __assign_str(header);
+ tipc_list_dump(list, more, __get_str(buf));
+ ),
+
+ TP_printk("%s\n%s", __get_str(header), __get_str(buf))
+);
+
+#define DEFINE_LIST_EVENT(name) \
+DEFINE_EVENT(tipc_list_class, name, \
+ TP_PROTO(struct sk_buff_head *list, bool more, const char *header), \
+ TP_ARGS(list, more, header))
+DEFINE_LIST_EVENT(tipc_list_dump);
+
+DECLARE_EVENT_CLASS(tipc_sk_class,
+
+ TP_PROTO(struct sock *sk, struct sk_buff *skb, u16 dqueues,
+ const char *header),
+
+ TP_ARGS(sk, skb, dqueues, header),
+
+ TP_STRUCT__entry(
+ __string(header, header)
+ __field(u32, portid)
+ __dynamic_array(char, buf, (dqueues) ? SK_LMAX : SK_LMIN)
+ __dynamic_array(char, skb_buf, (skb) ? SKB_LMIN : 1)
+ ),
+
+ TP_fast_assign(
+ __assign_str(header);
+ __entry->portid = tipc_sock_get_portid(sk);
+ tipc_sk_dump(sk, dqueues, __get_str(buf));
+ if (skb)
+ tipc_skb_dump(skb, false, __get_str(skb_buf));
+ else
+ *(__get_str(skb_buf)) = '\0';
+ ),
+
+ TP_printk("<%u> %s\n%s%s", __entry->portid, __get_str(header),
+ __get_str(skb_buf), __get_str(buf))
+);
+
+#define DEFINE_SK_EVENT_FILTER(name) \
+DEFINE_EVENT_CONDITION(tipc_sk_class, name, \
+ TP_PROTO(struct sock *sk, struct sk_buff *skb, u16 dqueues, \
+ const char *header), \
+ TP_ARGS(sk, skb, dqueues, header), \
+ TP_CONDITION(tipc_sk_filtering(sk)))
+DEFINE_SK_EVENT_FILTER(tipc_sk_dump);
+DEFINE_SK_EVENT_FILTER(tipc_sk_create);
+DEFINE_SK_EVENT_FILTER(tipc_sk_sendmcast);
+DEFINE_SK_EVENT_FILTER(tipc_sk_sendmsg);
+DEFINE_SK_EVENT_FILTER(tipc_sk_sendstream);
+DEFINE_SK_EVENT_FILTER(tipc_sk_poll);
+DEFINE_SK_EVENT_FILTER(tipc_sk_filter_rcv);
+DEFINE_SK_EVENT_FILTER(tipc_sk_advance_rx);
+DEFINE_SK_EVENT_FILTER(tipc_sk_rej_msg);
+DEFINE_SK_EVENT_FILTER(tipc_sk_drop_msg);
+DEFINE_SK_EVENT_FILTER(tipc_sk_release);
+DEFINE_SK_EVENT_FILTER(tipc_sk_shutdown);
+
+#define DEFINE_SK_EVENT_FILTER_COND(name, cond) \
+DEFINE_EVENT_CONDITION(tipc_sk_class, name, \
+ TP_PROTO(struct sock *sk, struct sk_buff *skb, u16 dqueues, \
+ const char *header), \
+ TP_ARGS(sk, skb, dqueues, header), \
+ TP_CONDITION(tipc_sk_filtering(sk) && (cond)))
+DEFINE_SK_EVENT_FILTER_COND(tipc_sk_overlimit1, tipc_sk_overlimit1(sk, skb));
+DEFINE_SK_EVENT_FILTER_COND(tipc_sk_overlimit2, tipc_sk_overlimit2(sk, skb));
+
+DECLARE_EVENT_CLASS(tipc_link_class,
+
+ TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header),
+
+ TP_ARGS(l, dqueues, header),
+
+ TP_STRUCT__entry(
+ __string(header, header)
+ __array(char, name, TIPC_MAX_LINK_NAME)
+ __dynamic_array(char, buf, (dqueues) ? LINK_LMAX : LINK_LMIN)
+ ),
+
+ TP_fast_assign(
+ __assign_str(header);
+ memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME);
+ tipc_link_dump(l, dqueues, __get_str(buf));
+ ),
+
+ TP_printk("<%s> %s\n%s", __entry->name, __get_str(header),
+ __get_str(buf))
+);
+
+#define DEFINE_LINK_EVENT(name) \
+DEFINE_EVENT(tipc_link_class, name, \
+ TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header), \
+ TP_ARGS(l, dqueues, header))
+DEFINE_LINK_EVENT(tipc_link_dump);
+DEFINE_LINK_EVENT(tipc_link_conges);
+DEFINE_LINK_EVENT(tipc_link_timeout);
+DEFINE_LINK_EVENT(tipc_link_reset);
+
+#define DEFINE_LINK_EVENT_COND(name, cond) \
+DEFINE_EVENT_CONDITION(tipc_link_class, name, \
+ TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header), \
+ TP_ARGS(l, dqueues, header), \
+ TP_CONDITION(cond))
+DEFINE_LINK_EVENT_COND(tipc_link_too_silent, tipc_link_too_silent(l));
+
+DECLARE_EVENT_CLASS(tipc_link_transmq_class,
+
+ TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
+
+ TP_ARGS(r, f, t, tq),
+
+ TP_STRUCT__entry(
+ __array(char, name, TIPC_MAX_LINK_NAME)
+ __field(u16, from)
+ __field(u16, to)
+ __field(u32, len)
+ __field(u16, fseqno)
+ __field(u16, lseqno)
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->name, tipc_link_name(r), TIPC_MAX_LINK_NAME);
+ __entry->from = f;
+ __entry->to = t;
+ __entry->len = skb_queue_len(tq);
+ __entry->fseqno = __entry->len ?
+ msg_seqno(buf_msg(skb_peek(tq))) : 0;
+ __entry->lseqno = __entry->len ?
+ msg_seqno(buf_msg(skb_peek_tail(tq))) : 0;
+ ),
+
+ TP_printk("<%s> retrans req: [%u-%u] transmq: %u [%u-%u]\n",
+ __entry->name, __entry->from, __entry->to,
+ __entry->len, __entry->fseqno, __entry->lseqno)
+);
+
+DEFINE_EVENT_CONDITION(tipc_link_transmq_class, tipc_link_retrans,
+ TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
+ TP_ARGS(r, f, t, tq),
+ TP_CONDITION(less_eq(f, t))
+);
+
+DEFINE_EVENT_PRINT(tipc_link_transmq_class, tipc_link_bc_ack,
+ TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
+ TP_ARGS(r, f, t, tq),
+ TP_printk("<%s> acked: %u gap: %u transmq: %u [%u-%u]\n",
+ __entry->name, __entry->from, __entry->to,
+ __entry->len, __entry->fseqno, __entry->lseqno)
+);
+
+DECLARE_EVENT_CLASS(tipc_node_class,
+
+ TP_PROTO(struct tipc_node *n, bool more, const char *header),
+
+ TP_ARGS(n, more, header),
+
+ TP_STRUCT__entry(
+ __string(header, header)
+ __field(u32, addr)
+ __dynamic_array(char, buf, (more) ? NODE_LMAX : NODE_LMIN)
+ ),
+
+ TP_fast_assign(
+ __assign_str(header);
+ __entry->addr = tipc_node_get_addr(n);
+ tipc_node_dump(n, more, __get_str(buf));
+ ),
+
+ TP_printk("<%x> %s\n%s", __entry->addr, __get_str(header),
+ __get_str(buf))
+);
+
+#define DEFINE_NODE_EVENT(name) \
+DEFINE_EVENT(tipc_node_class, name, \
+ TP_PROTO(struct tipc_node *n, bool more, const char *header), \
+ TP_ARGS(n, more, header))
+DEFINE_NODE_EVENT(tipc_node_dump);
+DEFINE_NODE_EVENT(tipc_node_create);
+DEFINE_NODE_EVENT(tipc_node_delete);
+DEFINE_NODE_EVENT(tipc_node_lost_contact);
+DEFINE_NODE_EVENT(tipc_node_timeout);
+DEFINE_NODE_EVENT(tipc_node_link_up);
+DEFINE_NODE_EVENT(tipc_node_link_down);
+DEFINE_NODE_EVENT(tipc_node_reset_links);
+DEFINE_NODE_EVENT(tipc_node_check_state);
+
+DECLARE_EVENT_CLASS(tipc_fsm_class,
+
+ TP_PROTO(const char *name, u32 os, u32 ns, int evt),
+
+ TP_ARGS(name, os, ns, evt),
+
+ TP_STRUCT__entry(
+ __string(name, name)
+ __field(u32, os)
+ __field(u32, ns)
+ __field(u32, evt)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name);
+ __entry->os = os;
+ __entry->ns = ns;
+ __entry->evt = evt;
+ ),
+
+ TP_printk("<%s> %s--(%s)->%s\n", __get_str(name),
+ state_sym(__entry->os), evt_sym(__entry->evt),
+ state_sym(__entry->ns))
+);
+
+#define DEFINE_FSM_EVENT(fsm_name) \
+DEFINE_EVENT(tipc_fsm_class, fsm_name, \
+ TP_PROTO(const char *name, u32 os, u32 ns, int evt), \
+ TP_ARGS(name, os, ns, evt))
+DEFINE_FSM_EVENT(tipc_link_fsm);
+DEFINE_FSM_EVENT(tipc_node_fsm);
+
+TRACE_EVENT(tipc_l2_device_event,
+
+ TP_PROTO(struct net_device *dev, struct tipc_bearer *b,
+ unsigned long evt),
+
+ TP_ARGS(dev, b, evt),
+
+ TP_STRUCT__entry(
+ __string(dev_name, dev->name)
+ __string(b_name, b->name)
+ __field(unsigned long, evt)
+ __field(u8, b_up)
+ __field(u8, carrier)
+ __field(u8, oper)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev_name);
+ __assign_str(b_name);
+ __entry->evt = evt;
+ __entry->b_up = test_bit(0, &b->up);
+ __entry->carrier = netif_carrier_ok(dev);
+ __entry->oper = netif_oper_up(dev);
+ ),
+
+ TP_printk("%s on: <%s>/<%s> oper: %s carrier: %s bearer: %s\n",
+ dev_evt_sym(__entry->evt), __get_str(dev_name),
+ __get_str(b_name), (__entry->oper) ? "up" : "down",
+ (__entry->carrier) ? "ok" : "notok",
+ (__entry->b_up) ? "up" : "down")
+);
+
+#endif /* _TIPC_TRACE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
new file mode 100644
index 000000000000..b85ab0fb3b8c
--- /dev/null
+++ b/net/tipc/udp_media.c
@@ -0,0 +1,863 @@
+/* net/tipc/udp_media.c: IP bearer support for TIPC
+ *
+ * Copyright (c) 2015, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/socket.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/inet.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/list.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/udp_tunnel.h>
+#include <net/ipv6_stubs.h>
+#include <linux/tipc_netlink.h>
+#include "core.h"
+#include "addr.h"
+#include "net.h"
+#include "bearer.h"
+#include "netlink.h"
+#include "msg.h"
+#include "udp_media.h"
+
+/* IANA assigned UDP port */
+#define UDP_PORT_DEFAULT 6118
+
+#define UDP_MIN_HEADROOM 48
+
+/**
+ * struct udp_media_addr - IP/UDP addressing information
+ *
+ * This is the bearer level originating address used in neighbor discovery
+ * messages, and all fields should be in network byte order
+ *
+ * @proto: Ethernet protocol in use
+ * @port: port being used
+ * @ipv4: IPv4 address of neighbor
+ * @ipv6: IPv6 address of neighbor
+ */
+struct udp_media_addr {
+ __be16 proto;
+ __be16 port;
+ union {
+ struct in_addr ipv4;
+ struct in6_addr ipv6;
+ };
+};
+
+/* struct udp_replicast - container for UDP remote addresses */
+struct udp_replicast {
+ struct udp_media_addr addr;
+ struct dst_cache dst_cache;
+ struct rcu_head rcu;
+ struct list_head list;
+};
+
+/**
+ * struct udp_bearer - ip/udp bearer data structure
+ * @bearer: associated generic tipc bearer
+ * @ubsock: bearer associated socket
+ * @ifindex: local address scope
+ * @work: used to schedule deferred work on a bearer
+ * @rcast: associated udp_replicast container
+ */
+struct udp_bearer {
+ struct tipc_bearer __rcu *bearer;
+ struct socket *ubsock;
+ u32 ifindex;
+ struct work_struct work;
+ struct udp_replicast rcast;
+};
+
+static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr)
+{
+ if (ntohs(addr->proto) == ETH_P_IP)
+ return ipv4_is_multicast(addr->ipv4.s_addr);
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ return ipv6_addr_is_multicast(&addr->ipv6);
+#endif
+ return 0;
+}
+
+/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */
+static void tipc_udp_media_addr_set(struct tipc_media_addr *addr,
+ struct udp_media_addr *ua)
+{
+ memset(addr, 0, sizeof(struct tipc_media_addr));
+ addr->media_id = TIPC_MEDIA_TYPE_UDP;
+ memcpy(addr->value, ua, sizeof(struct udp_media_addr));
+
+ if (tipc_udp_is_mcast_addr(ua))
+ addr->broadcast = TIPC_BROADCAST_SUPPORT;
+}
+
+/* tipc_udp_addr2str - convert ip/udp address to string */
+static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size)
+{
+ struct udp_media_addr *ua = (struct udp_media_addr *)&a->value;
+
+ if (ntohs(ua->proto) == ETH_P_IP)
+ snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->port));
+ else if (ntohs(ua->proto) == ETH_P_IPV6)
+ snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->port));
+ else {
+ pr_err("Invalid UDP media address\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */
+static int tipc_udp_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *a,
+ char *msg)
+{
+ struct udp_media_addr *ua;
+
+ ua = (struct udp_media_addr *) (msg + TIPC_MEDIA_ADDR_OFFSET);
+ if (msg[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_UDP)
+ return -EINVAL;
+ tipc_udp_media_addr_set(a, ua);
+ return 0;
+}
+
+/* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */
+static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a)
+{
+ memset(msg, 0, TIPC_MEDIA_INFO_SIZE);
+ msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_UDP;
+ memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, a->value,
+ sizeof(struct udp_media_addr));
+ return 0;
+}
+
+/* tipc_send_msg - enqueue a send request */
+static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
+ struct udp_bearer *ub, struct udp_media_addr *src,
+ struct udp_media_addr *dst, struct dst_cache *cache)
+{
+ struct dst_entry *ndst;
+ int ttl, err;
+
+ local_bh_disable();
+ ndst = dst_cache_get(cache);
+ if (dst->proto == htons(ETH_P_IP)) {
+ struct rtable *rt = dst_rtable(ndst);
+
+ if (!rt) {
+ struct flowi4 fl = {
+ .daddr = dst->ipv4.s_addr,
+ .saddr = src->ipv4.s_addr,
+ .flowi4_mark = skb->mark,
+ .flowi4_proto = IPPROTO_UDP
+ };
+ rt = ip_route_output_key(net, &fl);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto tx_error;
+ }
+ dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
+ }
+
+ ttl = ip4_dst_hoplimit(&rt->dst);
+ udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
+ dst->ipv4.s_addr, 0, ttl, 0, src->port,
+ dst->port, false, true, 0);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ if (!ndst) {
+ struct flowi6 fl6 = {
+ .flowi6_oif = ub->ifindex,
+ .daddr = dst->ipv6,
+ .saddr = src->ipv6,
+ .flowi6_proto = IPPROTO_UDP
+ };
+ ndst = ipv6_stub->ipv6_dst_lookup_flow(net,
+ ub->ubsock->sk,
+ &fl6, NULL);
+ if (IS_ERR(ndst)) {
+ err = PTR_ERR(ndst);
+ goto tx_error;
+ }
+ dst_cache_set_ip6(cache, ndst, &fl6.saddr);
+ }
+ ttl = ip6_dst_hoplimit(ndst);
+ udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL,
+ &src->ipv6, &dst->ipv6, 0, ttl, 0,
+ src->port, dst->port, false, 0);
+#endif
+ }
+ local_bh_enable();
+ return 0;
+
+tx_error:
+ local_bh_enable();
+ kfree_skb(skb);
+ return err;
+}
+
+static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *addr)
+{
+ struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+ struct udp_media_addr *dst = (struct udp_media_addr *)&addr->value;
+ struct udp_replicast *rcast;
+ struct udp_bearer *ub;
+ int err = 0;
+
+ if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
+ err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+ if (err)
+ goto out;
+ }
+
+ skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
+ ub = rcu_dereference(b->media_ptr);
+ if (!ub) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (addr->broadcast != TIPC_REPLICAST_SUPPORT)
+ return tipc_udp_xmit(net, skb, ub, src, dst,
+ &ub->rcast.dst_cache);
+
+ /* Replicast, send an skb to each configured IP address */
+ list_for_each_entry_rcu(rcast, &ub->rcast.list, list) {
+ struct sk_buff *_skb;
+
+ _skb = pskb_copy(skb, GFP_ATOMIC);
+ if (!_skb) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr,
+ &rcast->dst_cache);
+ if (err)
+ goto out;
+ }
+ err = 0;
+out:
+ kfree_skb(skb);
+ return err;
+}
+
+static bool tipc_udp_is_known_peer(struct tipc_bearer *b,
+ struct udp_media_addr *addr)
+{
+ struct udp_replicast *rcast, *tmp;
+ struct udp_bearer *ub;
+
+ ub = rcu_dereference_rtnl(b->media_ptr);
+ if (!ub) {
+ pr_err_ratelimited("UDP bearer instance not found\n");
+ return false;
+ }
+
+ list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+ if (!memcmp(&rcast->addr, addr, sizeof(struct udp_media_addr)))
+ return true;
+ }
+
+ return false;
+}
+
+static int tipc_udp_rcast_add(struct tipc_bearer *b,
+ struct udp_media_addr *addr)
+{
+ struct udp_replicast *rcast;
+ struct udp_bearer *ub;
+
+ ub = rcu_dereference_rtnl(b->media_ptr);
+ if (!ub)
+ return -ENODEV;
+
+ rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC);
+ if (!rcast)
+ return -ENOMEM;
+
+ if (dst_cache_init(&rcast->dst_cache, GFP_ATOMIC)) {
+ kfree(rcast);
+ return -ENOMEM;
+ }
+
+ memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr));
+
+ if (ntohs(addr->proto) == ETH_P_IP)
+ pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (ntohs(addr->proto) == ETH_P_IPV6)
+ pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6);
+#endif
+ b->bcast_addr.broadcast = TIPC_REPLICAST_SUPPORT;
+ list_add_rcu(&rcast->list, &ub->rcast.list);
+ return 0;
+}
+
+static int tipc_udp_rcast_disc(struct tipc_bearer *b, struct sk_buff *skb)
+{
+ struct udp_media_addr src = {0};
+ struct udp_media_addr *dst;
+
+ dst = (struct udp_media_addr *)&b->bcast_addr.value;
+ if (tipc_udp_is_mcast_addr(dst))
+ return 0;
+
+ src.port = udp_hdr(skb)->source;
+
+ if (ip_hdr(skb)->version == 4) {
+ struct iphdr *iphdr = ip_hdr(skb);
+
+ src.proto = htons(ETH_P_IP);
+ src.ipv4.s_addr = iphdr->saddr;
+ if (ipv4_is_multicast(iphdr->daddr))
+ return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (ip_hdr(skb)->version == 6) {
+ struct ipv6hdr *iphdr = ipv6_hdr(skb);
+
+ src.proto = htons(ETH_P_IPV6);
+ src.ipv6 = iphdr->saddr;
+ if (ipv6_addr_is_multicast(&iphdr->daddr))
+ return 0;
+#endif
+ } else {
+ return 0;
+ }
+
+ if (likely(tipc_udp_is_known_peer(b, &src)))
+ return 0;
+
+ return tipc_udp_rcast_add(b, &src);
+}
+
+/* tipc_udp_recv - read data from bearer socket */
+static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct udp_bearer *ub;
+ struct tipc_bearer *b;
+ struct tipc_msg *hdr;
+ int err;
+
+ ub = rcu_dereference_sk_user_data(sk);
+ if (!ub) {
+ pr_err_ratelimited("Failed to get UDP bearer reference");
+ goto out;
+ }
+ skb_pull(skb, sizeof(struct udphdr));
+ hdr = buf_msg(skb);
+
+ b = rcu_dereference(ub->bearer);
+ if (!b)
+ goto out;
+
+ if (b && test_bit(0, &b->up)) {
+ TIPC_SKB_CB(skb)->flags = 0;
+ tipc_rcv(sock_net(sk), skb, b);
+ return 0;
+ }
+
+ if (unlikely(msg_user(hdr) == LINK_CONFIG)) {
+ err = tipc_udp_rcast_disc(b, skb);
+ if (err)
+ goto out;
+ }
+
+out:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
+{
+ int err = 0;
+ struct ip_mreqn mreqn;
+ struct sock *sk = ub->ubsock->sk;
+
+ if (ntohs(remote->proto) == ETH_P_IP) {
+ mreqn.imr_multiaddr = remote->ipv4;
+ mreqn.imr_ifindex = ub->ifindex;
+ err = ip_mc_join_group(sk, &mreqn);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ lock_sock(sk);
+ err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex,
+ &remote->ipv6);
+ release_sock(sk);
+#endif
+ }
+ return err;
+}
+
+static int __tipc_nl_add_udp_addr(struct sk_buff *skb,
+ struct udp_media_addr *addr, int nla_t)
+{
+ if (ntohs(addr->proto) == ETH_P_IP) {
+ struct sockaddr_in ip4;
+
+ memset(&ip4, 0, sizeof(ip4));
+ ip4.sin_family = AF_INET;
+ ip4.sin_port = addr->port;
+ ip4.sin_addr.s_addr = addr->ipv4.s_addr;
+ if (nla_put(skb, nla_t, sizeof(ip4), &ip4))
+ return -EMSGSIZE;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (ntohs(addr->proto) == ETH_P_IPV6) {
+ struct sockaddr_in6 ip6;
+
+ memset(&ip6, 0, sizeof(ip6));
+ ip6.sin6_family = AF_INET6;
+ ip6.sin6_port = addr->port;
+ memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr));
+ if (nla_put(skb, nla_t, sizeof(ip6), &ip6))
+ return -EMSGSIZE;
+#endif
+ }
+
+ return 0;
+}
+
+int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ u32 bid = cb->args[0];
+ u32 skip_cnt = cb->args[1];
+ u32 portid = NETLINK_CB(cb->skb).portid;
+ struct udp_replicast *rcast, *tmp;
+ struct tipc_bearer *b;
+ struct udp_bearer *ub;
+ void *hdr;
+ int err;
+ int i;
+
+ if (!bid && !skip_cnt) {
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1];
+ char *bname;
+
+ if (!attrs[TIPC_NLA_BEARER])
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(battrs, TIPC_NLA_BEARER_MAX,
+ attrs[TIPC_NLA_BEARER],
+ tipc_nl_bearer_policy, NULL);
+ if (err)
+ return err;
+
+ if (!battrs[TIPC_NLA_BEARER_NAME])
+ return -EINVAL;
+
+ bname = nla_data(battrs[TIPC_NLA_BEARER_NAME]);
+
+ rtnl_lock();
+ b = tipc_bearer_find(net, bname);
+ if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) {
+ rtnl_unlock();
+ return -EINVAL;
+ }
+ bid = b->identity;
+ } else {
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+ rtnl_lock();
+ b = rtnl_dereference(tn->bearer_list[bid]);
+ if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) {
+ rtnl_unlock();
+ return -EINVAL;
+ }
+ }
+
+ ub = rtnl_dereference(b->media_ptr);
+ if (!ub) {
+ rtnl_unlock();
+ return -EINVAL;
+ }
+
+ i = 0;
+ list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+ if (i < skip_cnt)
+ goto count;
+
+ hdr = genlmsg_put(skb, portid, cb->nlh->nlmsg_seq,
+ &tipc_genl_family, NLM_F_MULTI,
+ TIPC_NL_BEARER_GET);
+ if (!hdr)
+ goto done;
+
+ err = __tipc_nl_add_udp_addr(skb, &rcast->addr,
+ TIPC_NLA_UDP_REMOTE);
+ if (err) {
+ genlmsg_cancel(skb, hdr);
+ goto done;
+ }
+ genlmsg_end(skb, hdr);
+count:
+ i++;
+ }
+done:
+ rtnl_unlock();
+ cb->args[0] = bid;
+ cb->args[1] = i;
+
+ return skb->len;
+}
+
+int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b)
+{
+ struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+ struct udp_media_addr *dst;
+ struct udp_bearer *ub;
+ struct nlattr *nest;
+
+ ub = rtnl_dereference(b->media_ptr);
+ if (!ub)
+ return -ENODEV;
+
+ nest = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_UDP_OPTS);
+ if (!nest)
+ goto msg_full;
+
+ if (__tipc_nl_add_udp_addr(msg->skb, src, TIPC_NLA_UDP_LOCAL))
+ goto msg_full;
+
+ dst = (struct udp_media_addr *)&b->bcast_addr.value;
+ if (__tipc_nl_add_udp_addr(msg->skb, dst, TIPC_NLA_UDP_REMOTE))
+ goto msg_full;
+
+ if (!list_empty(&ub->rcast.list)) {
+ if (nla_put_flag(msg->skb, TIPC_NLA_UDP_MULTI_REMOTEIP))
+ goto msg_full;
+ }
+
+ nla_nest_end(msg->skb, nest);
+ return 0;
+msg_full:
+ nla_nest_cancel(msg->skb, nest);
+ return -EMSGSIZE;
+}
+
+/**
+ * tipc_parse_udp_addr - build udp media address from netlink data
+ * @nla: netlink attribute containing sockaddr storage aligned address
+ * @addr: tipc media address to fill with address, port and protocol type
+ * @scope_id: IPv6 scope id pointer, not NULL indicates it's required
+ */
+
+static int tipc_parse_udp_addr(struct nlattr *nla, struct udp_media_addr *addr,
+ u32 *scope_id)
+{
+ struct sockaddr_storage sa;
+
+ nla_memcpy(&sa, nla, sizeof(sa));
+ if (sa.ss_family == AF_INET) {
+ struct sockaddr_in *ip4 = (struct sockaddr_in *)&sa;
+
+ addr->proto = htons(ETH_P_IP);
+ addr->port = ip4->sin_port;
+ addr->ipv4.s_addr = ip4->sin_addr.s_addr;
+ return 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (sa.ss_family == AF_INET6) {
+ struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)&sa;
+
+ addr->proto = htons(ETH_P_IPV6);
+ addr->port = ip6->sin6_port;
+ memcpy(&addr->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
+
+ /* Scope ID is only interesting for local addresses */
+ if (scope_id) {
+ int atype;
+
+ atype = ipv6_addr_type(&ip6->sin6_addr);
+ if (__ipv6_addr_needs_scope_id(atype) &&
+ !ip6->sin6_scope_id) {
+ return -EINVAL;
+ }
+
+ *scope_id = ip6->sin6_scope_id ? : 0;
+ }
+
+ return 0;
+#endif
+ }
+ return -EADDRNOTAVAIL;
+}
+
+int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr)
+{
+ int err;
+ struct udp_media_addr addr = {0};
+ struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+ struct udp_media_addr *dst;
+
+ if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy, NULL))
+ return -EINVAL;
+
+ if (!opts[TIPC_NLA_UDP_REMOTE])
+ return -EINVAL;
+
+ err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL);
+ if (err)
+ return err;
+
+ dst = (struct udp_media_addr *)&b->bcast_addr.value;
+ if (tipc_udp_is_mcast_addr(dst)) {
+ pr_err("Can't add remote ip to TIPC UDP multicast bearer\n");
+ return -EINVAL;
+ }
+
+ if (tipc_udp_is_known_peer(b, &addr))
+ return 0;
+
+ return tipc_udp_rcast_add(b, &addr);
+}
+
+/**
+ * tipc_udp_enable - callback to create a new udp bearer instance
+ * @net: network namespace
+ * @b: pointer to generic tipc_bearer
+ * @attrs: netlink bearer configuration
+ *
+ * validate the bearer parameters and initialize the udp bearer
+ * rtnl_lock should be held
+ */
+static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
+ struct nlattr *attrs[])
+{
+ int err = -EINVAL;
+ struct udp_bearer *ub;
+ struct udp_media_addr remote = {0};
+ struct udp_media_addr local = {0};
+ struct udp_port_cfg udp_conf = {0};
+ struct udp_tunnel_sock_cfg tuncfg = {NULL};
+ struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+ u8 node_id[NODE_ID_LEN] = {0,};
+ struct net_device *dev;
+ int rmcast = 0;
+
+ ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
+ if (!ub)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&ub->rcast.list);
+
+ if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
+ goto err;
+
+ if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attrs[TIPC_NLA_BEARER_UDP_OPTS], tipc_nl_udp_policy, NULL))
+ goto err;
+
+ if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) {
+ pr_err("Invalid UDP bearer configuration");
+ err = -EINVAL;
+ goto err;
+ }
+
+ err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local,
+ &ub->ifindex);
+ if (err)
+ goto err;
+
+ err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL);
+ if (err)
+ goto err;
+
+ if (remote.proto != local.proto) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ /* Checking remote ip address */
+ rmcast = tipc_udp_is_mcast_addr(&remote);
+
+ /* Autoconfigure own node identity if needed */
+ if (!tipc_own_id(net)) {
+ memcpy(node_id, local.ipv6.in6_u.u6_addr8, 16);
+ tipc_net_init(net, node_id, 0);
+ }
+ if (!tipc_own_id(net)) {
+ pr_warn("Failed to set node id, please configure manually\n");
+ err = -EINVAL;
+ goto err;
+ }
+
+ b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP;
+ b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT;
+ rcu_assign_pointer(b->media_ptr, ub);
+ rcu_assign_pointer(ub->bearer, b);
+ tipc_udp_media_addr_set(&b->addr, &local);
+ if (local.proto == htons(ETH_P_IP)) {
+ dev = __ip_dev_find(net, local.ipv4.s_addr, false);
+ if (!dev) {
+ err = -ENODEV;
+ goto err;
+ }
+ udp_conf.family = AF_INET;
+
+ /* Switch to use ANY to receive packets from group */
+ if (rmcast)
+ udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+ else
+ udp_conf.local_ip.s_addr = local.ipv4.s_addr;
+ udp_conf.use_udp_checksums = false;
+ ub->ifindex = dev->ifindex;
+ b->encap_hlen = sizeof(struct iphdr) + sizeof(struct udphdr);
+ b->mtu = b->media->mtu;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (local.proto == htons(ETH_P_IPV6)) {
+ dev = ub->ifindex ? __dev_get_by_index(net, ub->ifindex) : NULL;
+ dev = ipv6_dev_find(net, &local.ipv6, dev);
+ if (!dev) {
+ err = -ENODEV;
+ goto err;
+ }
+ udp_conf.family = AF_INET6;
+ udp_conf.use_udp6_tx_checksums = true;
+ udp_conf.use_udp6_rx_checksums = true;
+ if (rmcast)
+ udp_conf.local_ip6 = in6addr_any;
+ else
+ udp_conf.local_ip6 = local.ipv6;
+ ub->ifindex = dev->ifindex;
+ b->encap_hlen = sizeof(struct ipv6hdr) + sizeof(struct udphdr);
+ b->mtu = 1280;
+#endif
+ } else {
+ err = -EAFNOSUPPORT;
+ goto err;
+ }
+ udp_conf.local_udp_port = local.port;
+ err = udp_sock_create(net, &udp_conf, &ub->ubsock);
+ if (err)
+ goto err;
+ tuncfg.sk_user_data = ub;
+ tuncfg.encap_type = 1;
+ tuncfg.encap_rcv = tipc_udp_recv;
+ tuncfg.encap_destroy = NULL;
+ setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
+
+ err = dst_cache_init(&ub->rcast.dst_cache, GFP_ATOMIC);
+ if (err)
+ goto free;
+
+ /*
+ * The bcast media address port is used for all peers and the ip
+ * is used if it's a multicast address.
+ */
+ memcpy(&b->bcast_addr.value, &remote, sizeof(remote));
+ if (rmcast)
+ err = enable_mcast(ub, &remote);
+ else
+ err = tipc_udp_rcast_add(b, &remote);
+ if (err)
+ goto free;
+
+ return 0;
+
+free:
+ dst_cache_destroy(&ub->rcast.dst_cache);
+ udp_tunnel_sock_release(ub->ubsock);
+err:
+ kfree(ub);
+ return err;
+}
+
+/* cleanup_bearer - break the socket/bearer association */
+static void cleanup_bearer(struct work_struct *work)
+{
+ struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
+ struct udp_replicast *rcast, *tmp;
+ struct tipc_net *tn;
+
+ list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+ dst_cache_destroy(&rcast->dst_cache);
+ list_del_rcu(&rcast->list);
+ kfree_rcu(rcast, rcu);
+ }
+
+ tn = tipc_net(sock_net(ub->ubsock->sk));
+
+ dst_cache_destroy(&ub->rcast.dst_cache);
+ udp_tunnel_sock_release(ub->ubsock);
+
+ /* Note: could use a call_rcu() to avoid another synchronize_net() */
+ synchronize_net();
+ atomic_dec(&tn->wq_count);
+ kfree(ub);
+}
+
+/* tipc_udp_disable - detach bearer from socket */
+static void tipc_udp_disable(struct tipc_bearer *b)
+{
+ struct udp_bearer *ub;
+
+ ub = rtnl_dereference(b->media_ptr);
+ if (!ub) {
+ pr_err("UDP bearer instance not found\n");
+ return;
+ }
+ sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
+ RCU_INIT_POINTER(ub->bearer, NULL);
+
+ /* sock_release need to be done outside of rtnl lock */
+ atomic_inc(&tipc_net(sock_net(ub->ubsock->sk))->wq_count);
+ INIT_WORK(&ub->work, cleanup_bearer);
+ schedule_work(&ub->work);
+}
+
+struct tipc_media udp_media_info = {
+ .send_msg = tipc_udp_send_msg,
+ .enable_media = tipc_udp_enable,
+ .disable_media = tipc_udp_disable,
+ .addr2str = tipc_udp_addr2str,
+ .addr2msg = tipc_udp_addr2msg,
+ .msg2addr = tipc_udp_msg2addr,
+ .priority = TIPC_DEF_LINK_PRI,
+ .tolerance = TIPC_DEF_LINK_TOL,
+ .min_win = TIPC_DEF_LINK_WIN,
+ .max_win = TIPC_DEF_LINK_WIN,
+ .mtu = TIPC_DEF_LINK_UDP_MTU,
+ .type_id = TIPC_MEDIA_TYPE_UDP,
+ .hwaddr_len = 0,
+ .name = "udp"
+};
diff --git a/net/tipc/ref.h b/net/tipc/udp_media.h
index 5bc8e7ab84de..e7455cc73e16 100644
--- a/net/tipc/ref.h
+++ b/net/tipc/udp_media.h
@@ -1,8 +1,8 @@
/*
- * net/tipc/ref.h: Include file for TIPC object registry code
+ * net/tipc/udp_media.h: Include file for UDP bearer media
*
- * Copyright (c) 1991-2006, Ericsson AB
- * Copyright (c) 2005-2006, Wind River Systems
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,16 +34,27 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef _TIPC_REF_H
-#define _TIPC_REF_H
+#ifdef CONFIG_TIPC_MEDIA_UDP
+#ifndef _TIPC_UDP_MEDIA_H
+#define _TIPC_UDP_MEDIA_H
-int tipc_ref_table_init(u32 requested_size, u32 start);
-void tipc_ref_table_stop(void);
+#include <linux/ip.h>
+#include <linux/udp.h>
-u32 tipc_ref_acquire(void *object, spinlock_t **lock);
-void tipc_ref_discard(u32 ref);
+int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
+int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b);
+int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb);
-void *tipc_ref_lock(u32 ref);
-void *tipc_ref_deref(u32 ref);
+/* check if configured MTU is too low for tipc headers */
+static inline bool tipc_udp_mtu_bad(u32 mtu)
+{
+ if (mtu >= (TIPC_MIN_BEARER_MTU + sizeof(struct iphdr) +
+ sizeof(struct udphdr)))
+ return false;
+ pr_warn("MTU too low for tipc bearer\n");
+ return true;
+}
+
+#endif
#endif